xref: /openbmc/qemu/target/i386/tcg/translate.c (revision 7327813d)
1 /*
2  *  i386 translation
3  *
4  *  Copyright (c) 2003 Fabrice Bellard
5  *
6  * This library is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * This library is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18  */
19 #include "qemu/osdep.h"
20 
21 #include "qemu/host-utils.h"
22 #include "cpu.h"
23 #include "disas/disas.h"
24 #include "exec/exec-all.h"
25 #include "tcg/tcg-op.h"
26 #include "exec/cpu_ldst.h"
27 #include "exec/translator.h"
28 
29 #include "exec/helper-proto.h"
30 #include "exec/helper-gen.h"
31 #include "helper-tcg.h"
32 
33 #include "exec/log.h"
34 
35 #define PREFIX_REPZ   0x01
36 #define PREFIX_REPNZ  0x02
37 #define PREFIX_LOCK   0x04
38 #define PREFIX_DATA   0x08
39 #define PREFIX_ADR    0x10
40 #define PREFIX_VEX    0x20
41 #define PREFIX_REX    0x40
42 
43 #ifdef TARGET_X86_64
44 # define ctztl  ctz64
45 # define clztl  clz64
46 #else
47 # define ctztl  ctz32
48 # define clztl  clz32
49 #endif
50 
51 /* For a switch indexed by MODRM, match all memory operands for a given OP.  */
52 #define CASE_MODRM_MEM_OP(OP) \
53     case (0 << 6) | (OP << 3) | 0 ... (0 << 6) | (OP << 3) | 7: \
54     case (1 << 6) | (OP << 3) | 0 ... (1 << 6) | (OP << 3) | 7: \
55     case (2 << 6) | (OP << 3) | 0 ... (2 << 6) | (OP << 3) | 7
56 
57 #define CASE_MODRM_OP(OP) \
58     case (0 << 6) | (OP << 3) | 0 ... (0 << 6) | (OP << 3) | 7: \
59     case (1 << 6) | (OP << 3) | 0 ... (1 << 6) | (OP << 3) | 7: \
60     case (2 << 6) | (OP << 3) | 0 ... (2 << 6) | (OP << 3) | 7: \
61     case (3 << 6) | (OP << 3) | 0 ... (3 << 6) | (OP << 3) | 7
62 
63 //#define MACRO_TEST   1
64 
65 /* global register indexes */
66 static TCGv cpu_cc_dst, cpu_cc_src, cpu_cc_src2;
67 static TCGv_i32 cpu_cc_op;
68 static TCGv cpu_regs[CPU_NB_REGS];
69 static TCGv cpu_seg_base[6];
70 static TCGv_i64 cpu_bndl[4];
71 static TCGv_i64 cpu_bndu[4];
72 
73 #include "exec/gen-icount.h"
74 
75 typedef struct DisasContext {
76     DisasContextBase base;
77 
78     target_ulong pc;       /* pc = eip + cs_base */
79     target_ulong pc_start; /* pc at TB entry */
80     target_ulong cs_base;  /* base of CS segment */
81 
82     MemOp aflag;
83     MemOp dflag;
84 
85     int8_t override; /* -1 if no override, else R_CS, R_DS, etc */
86     uint8_t prefix;
87 
88 #ifndef CONFIG_USER_ONLY
89     uint8_t cpl;   /* code priv level */
90     uint8_t iopl;  /* i/o priv level */
91 #endif
92     uint8_t vex_l;  /* vex vector length */
93     uint8_t vex_v;  /* vex vvvv register, without 1's complement.  */
94     uint8_t popl_esp_hack; /* for correct popl with esp base handling */
95     uint8_t rip_offset; /* only used in x86_64, but left for simplicity */
96 
97 #ifdef TARGET_X86_64
98     uint8_t rex_r;
99     uint8_t rex_x;
100     uint8_t rex_b;
101     bool rex_w;
102 #endif
103     bool jmp_opt; /* use direct block chaining for direct jumps */
104     bool repz_opt; /* optimize jumps within repz instructions */
105     bool cc_op_dirty;
106 
107     CCOp cc_op;  /* current CC operation */
108     int mem_index; /* select memory access functions */
109     uint32_t flags; /* all execution flags */
110     int cpuid_features;
111     int cpuid_ext_features;
112     int cpuid_ext2_features;
113     int cpuid_ext3_features;
114     int cpuid_7_0_ebx_features;
115     int cpuid_xsave_features;
116 
117     /* TCG local temps */
118     TCGv cc_srcT;
119     TCGv A0;
120     TCGv T0;
121     TCGv T1;
122 
123     /* TCG local register indexes (only used inside old micro ops) */
124     TCGv tmp0;
125     TCGv tmp4;
126     TCGv_ptr ptr0;
127     TCGv_ptr ptr1;
128     TCGv_i32 tmp2_i32;
129     TCGv_i32 tmp3_i32;
130     TCGv_i64 tmp1_i64;
131 
132     sigjmp_buf jmpbuf;
133 } DisasContext;
134 
135 /* The environment in which user-only runs is constrained. */
136 #ifdef CONFIG_USER_ONLY
137 #define PE(S)     true
138 #define CPL(S)    3
139 #define IOPL(S)   0
140 #define SVME(S)   false
141 #define GUEST(S)  false
142 #else
143 #define PE(S)     (((S)->flags & HF_PE_MASK) != 0)
144 #define CPL(S)    ((S)->cpl)
145 #define IOPL(S)   ((S)->iopl)
146 #define SVME(S)   (((S)->flags & HF_SVME_MASK) != 0)
147 #define GUEST(S)  (((S)->flags & HF_GUEST_MASK) != 0)
148 #endif
149 #if defined(CONFIG_USER_ONLY) && defined(TARGET_X86_64)
150 #define VM86(S)   false
151 #define CODE32(S) true
152 #define SS32(S)   true
153 #define ADDSEG(S) false
154 #else
155 #define VM86(S)   (((S)->flags & HF_VM_MASK) != 0)
156 #define CODE32(S) (((S)->flags & HF_CS32_MASK) != 0)
157 #define SS32(S)   (((S)->flags & HF_SS32_MASK) != 0)
158 #define ADDSEG(S) (((S)->flags & HF_ADDSEG_MASK) != 0)
159 #endif
160 #if !defined(TARGET_X86_64)
161 #define CODE64(S) false
162 #define LMA(S)    false
163 #elif defined(CONFIG_USER_ONLY)
164 #define CODE64(S) true
165 #define LMA(S)    true
166 #else
167 #define CODE64(S) (((S)->flags & HF_CS64_MASK) != 0)
168 #define LMA(S)    (((S)->flags & HF_LMA_MASK) != 0)
169 #endif
170 
171 #ifdef TARGET_X86_64
172 #define REX_PREFIX(S)  (((S)->prefix & PREFIX_REX) != 0)
173 #define REX_W(S)       ((S)->rex_w)
174 #define REX_R(S)       ((S)->rex_r + 0)
175 #define REX_X(S)       ((S)->rex_x + 0)
176 #define REX_B(S)       ((S)->rex_b + 0)
177 #else
178 #define REX_PREFIX(S)  false
179 #define REX_W(S)       false
180 #define REX_R(S)       0
181 #define REX_X(S)       0
182 #define REX_B(S)       0
183 #endif
184 
185 /*
186  * Many sysemu-only helpers are not reachable for user-only.
187  * Define stub generators here, so that we need not either sprinkle
188  * ifdefs through the translator, nor provide the helper function.
189  */
190 #define STUB_HELPER(NAME, ...) \
191     static inline void gen_helper_##NAME(__VA_ARGS__) \
192     { qemu_build_not_reached(); }
193 
194 #ifdef CONFIG_USER_ONLY
195 STUB_HELPER(clgi, TCGv_env env)
196 STUB_HELPER(flush_page, TCGv_env env, TCGv addr)
197 STUB_HELPER(hlt, TCGv_env env, TCGv_i32 pc_ofs)
198 STUB_HELPER(inb, TCGv ret, TCGv_env env, TCGv_i32 port)
199 STUB_HELPER(inw, TCGv ret, TCGv_env env, TCGv_i32 port)
200 STUB_HELPER(inl, TCGv ret, TCGv_env env, TCGv_i32 port)
201 STUB_HELPER(monitor, TCGv_env env, TCGv addr)
202 STUB_HELPER(mwait, TCGv_env env, TCGv_i32 pc_ofs)
203 STUB_HELPER(outb, TCGv_env env, TCGv_i32 port, TCGv_i32 val)
204 STUB_HELPER(outw, TCGv_env env, TCGv_i32 port, TCGv_i32 val)
205 STUB_HELPER(outl, TCGv_env env, TCGv_i32 port, TCGv_i32 val)
206 STUB_HELPER(rdmsr, TCGv_env env)
207 STUB_HELPER(read_crN, TCGv ret, TCGv_env env, TCGv_i32 reg)
208 STUB_HELPER(get_dr, TCGv ret, TCGv_env env, TCGv_i32 reg)
209 STUB_HELPER(set_dr, TCGv_env env, TCGv_i32 reg, TCGv val)
210 STUB_HELPER(stgi, TCGv_env env)
211 STUB_HELPER(svm_check_intercept, TCGv_env env, TCGv_i32 type)
212 STUB_HELPER(vmload, TCGv_env env, TCGv_i32 aflag)
213 STUB_HELPER(vmmcall, TCGv_env env)
214 STUB_HELPER(vmrun, TCGv_env env, TCGv_i32 aflag, TCGv_i32 pc_ofs)
215 STUB_HELPER(vmsave, TCGv_env env, TCGv_i32 aflag)
216 STUB_HELPER(write_crN, TCGv_env env, TCGv_i32 reg, TCGv val)
217 STUB_HELPER(wrmsr, TCGv_env env)
218 #endif
219 
220 static void gen_eob(DisasContext *s);
221 static void gen_jr(DisasContext *s, TCGv dest);
222 static void gen_jmp(DisasContext *s, target_ulong eip);
223 static void gen_jmp_tb(DisasContext *s, target_ulong eip, int tb_num);
224 static void gen_op(DisasContext *s1, int op, MemOp ot, int d);
225 static void gen_exception_gpf(DisasContext *s);
226 
227 /* i386 arith/logic operations */
228 enum {
229     OP_ADDL,
230     OP_ORL,
231     OP_ADCL,
232     OP_SBBL,
233     OP_ANDL,
234     OP_SUBL,
235     OP_XORL,
236     OP_CMPL,
237 };
238 
239 /* i386 shift ops */
240 enum {
241     OP_ROL,
242     OP_ROR,
243     OP_RCL,
244     OP_RCR,
245     OP_SHL,
246     OP_SHR,
247     OP_SHL1, /* undocumented */
248     OP_SAR = 7,
249 };
250 
251 enum {
252     JCC_O,
253     JCC_B,
254     JCC_Z,
255     JCC_BE,
256     JCC_S,
257     JCC_P,
258     JCC_L,
259     JCC_LE,
260 };
261 
262 enum {
263     /* I386 int registers */
264     OR_EAX,   /* MUST be even numbered */
265     OR_ECX,
266     OR_EDX,
267     OR_EBX,
268     OR_ESP,
269     OR_EBP,
270     OR_ESI,
271     OR_EDI,
272 
273     OR_TMP0 = 16,    /* temporary operand register */
274     OR_TMP1,
275     OR_A0, /* temporary register used when doing address evaluation */
276 };
277 
278 enum {
279     USES_CC_DST  = 1,
280     USES_CC_SRC  = 2,
281     USES_CC_SRC2 = 4,
282     USES_CC_SRCT = 8,
283 };
284 
285 /* Bit set if the global variable is live after setting CC_OP to X.  */
286 static const uint8_t cc_op_live[CC_OP_NB] = {
287     [CC_OP_DYNAMIC] = USES_CC_DST | USES_CC_SRC | USES_CC_SRC2,
288     [CC_OP_EFLAGS] = USES_CC_SRC,
289     [CC_OP_MULB ... CC_OP_MULQ] = USES_CC_DST | USES_CC_SRC,
290     [CC_OP_ADDB ... CC_OP_ADDQ] = USES_CC_DST | USES_CC_SRC,
291     [CC_OP_ADCB ... CC_OP_ADCQ] = USES_CC_DST | USES_CC_SRC | USES_CC_SRC2,
292     [CC_OP_SUBB ... CC_OP_SUBQ] = USES_CC_DST | USES_CC_SRC | USES_CC_SRCT,
293     [CC_OP_SBBB ... CC_OP_SBBQ] = USES_CC_DST | USES_CC_SRC | USES_CC_SRC2,
294     [CC_OP_LOGICB ... CC_OP_LOGICQ] = USES_CC_DST,
295     [CC_OP_INCB ... CC_OP_INCQ] = USES_CC_DST | USES_CC_SRC,
296     [CC_OP_DECB ... CC_OP_DECQ] = USES_CC_DST | USES_CC_SRC,
297     [CC_OP_SHLB ... CC_OP_SHLQ] = USES_CC_DST | USES_CC_SRC,
298     [CC_OP_SARB ... CC_OP_SARQ] = USES_CC_DST | USES_CC_SRC,
299     [CC_OP_BMILGB ... CC_OP_BMILGQ] = USES_CC_DST | USES_CC_SRC,
300     [CC_OP_ADCX] = USES_CC_DST | USES_CC_SRC,
301     [CC_OP_ADOX] = USES_CC_SRC | USES_CC_SRC2,
302     [CC_OP_ADCOX] = USES_CC_DST | USES_CC_SRC | USES_CC_SRC2,
303     [CC_OP_CLR] = 0,
304     [CC_OP_POPCNT] = USES_CC_SRC,
305 };
306 
307 static void set_cc_op(DisasContext *s, CCOp op)
308 {
309     int dead;
310 
311     if (s->cc_op == op) {
312         return;
313     }
314 
315     /* Discard CC computation that will no longer be used.  */
316     dead = cc_op_live[s->cc_op] & ~cc_op_live[op];
317     if (dead & USES_CC_DST) {
318         tcg_gen_discard_tl(cpu_cc_dst);
319     }
320     if (dead & USES_CC_SRC) {
321         tcg_gen_discard_tl(cpu_cc_src);
322     }
323     if (dead & USES_CC_SRC2) {
324         tcg_gen_discard_tl(cpu_cc_src2);
325     }
326     if (dead & USES_CC_SRCT) {
327         tcg_gen_discard_tl(s->cc_srcT);
328     }
329 
330     if (op == CC_OP_DYNAMIC) {
331         /* The DYNAMIC setting is translator only, and should never be
332            stored.  Thus we always consider it clean.  */
333         s->cc_op_dirty = false;
334     } else {
335         /* Discard any computed CC_OP value (see shifts).  */
336         if (s->cc_op == CC_OP_DYNAMIC) {
337             tcg_gen_discard_i32(cpu_cc_op);
338         }
339         s->cc_op_dirty = true;
340     }
341     s->cc_op = op;
342 }
343 
344 static void gen_update_cc_op(DisasContext *s)
345 {
346     if (s->cc_op_dirty) {
347         tcg_gen_movi_i32(cpu_cc_op, s->cc_op);
348         s->cc_op_dirty = false;
349     }
350 }
351 
352 #ifdef TARGET_X86_64
353 
354 #define NB_OP_SIZES 4
355 
356 #else /* !TARGET_X86_64 */
357 
358 #define NB_OP_SIZES 3
359 
360 #endif /* !TARGET_X86_64 */
361 
362 #if defined(HOST_WORDS_BIGENDIAN)
363 #define REG_B_OFFSET (sizeof(target_ulong) - 1)
364 #define REG_H_OFFSET (sizeof(target_ulong) - 2)
365 #define REG_W_OFFSET (sizeof(target_ulong) - 2)
366 #define REG_L_OFFSET (sizeof(target_ulong) - 4)
367 #define REG_LH_OFFSET (sizeof(target_ulong) - 8)
368 #else
369 #define REG_B_OFFSET 0
370 #define REG_H_OFFSET 1
371 #define REG_W_OFFSET 0
372 #define REG_L_OFFSET 0
373 #define REG_LH_OFFSET 4
374 #endif
375 
376 /* In instruction encodings for byte register accesses the
377  * register number usually indicates "low 8 bits of register N";
378  * however there are some special cases where N 4..7 indicates
379  * [AH, CH, DH, BH], ie "bits 15..8 of register N-4". Return
380  * true for this special case, false otherwise.
381  */
382 static inline bool byte_reg_is_xH(DisasContext *s, int reg)
383 {
384     /* Any time the REX prefix is present, byte registers are uniform */
385     if (reg < 4 || REX_PREFIX(s)) {
386         return false;
387     }
388     return true;
389 }
390 
391 /* Select the size of a push/pop operation.  */
392 static inline MemOp mo_pushpop(DisasContext *s, MemOp ot)
393 {
394     if (CODE64(s)) {
395         return ot == MO_16 ? MO_16 : MO_64;
396     } else {
397         return ot;
398     }
399 }
400 
401 /* Select the size of the stack pointer.  */
402 static inline MemOp mo_stacksize(DisasContext *s)
403 {
404     return CODE64(s) ? MO_64 : SS32(s) ? MO_32 : MO_16;
405 }
406 
407 /* Select only size 64 else 32.  Used for SSE operand sizes.  */
408 static inline MemOp mo_64_32(MemOp ot)
409 {
410 #ifdef TARGET_X86_64
411     return ot == MO_64 ? MO_64 : MO_32;
412 #else
413     return MO_32;
414 #endif
415 }
416 
417 /* Select size 8 if lsb of B is clear, else OT.  Used for decoding
418    byte vs word opcodes.  */
419 static inline MemOp mo_b_d(int b, MemOp ot)
420 {
421     return b & 1 ? ot : MO_8;
422 }
423 
424 /* Select size 8 if lsb of B is clear, else OT capped at 32.
425    Used for decoding operand size of port opcodes.  */
426 static inline MemOp mo_b_d32(int b, MemOp ot)
427 {
428     return b & 1 ? (ot == MO_16 ? MO_16 : MO_32) : MO_8;
429 }
430 
431 static void gen_op_mov_reg_v(DisasContext *s, MemOp ot, int reg, TCGv t0)
432 {
433     switch(ot) {
434     case MO_8:
435         if (!byte_reg_is_xH(s, reg)) {
436             tcg_gen_deposit_tl(cpu_regs[reg], cpu_regs[reg], t0, 0, 8);
437         } else {
438             tcg_gen_deposit_tl(cpu_regs[reg - 4], cpu_regs[reg - 4], t0, 8, 8);
439         }
440         break;
441     case MO_16:
442         tcg_gen_deposit_tl(cpu_regs[reg], cpu_regs[reg], t0, 0, 16);
443         break;
444     case MO_32:
445         /* For x86_64, this sets the higher half of register to zero.
446            For i386, this is equivalent to a mov. */
447         tcg_gen_ext32u_tl(cpu_regs[reg], t0);
448         break;
449 #ifdef TARGET_X86_64
450     case MO_64:
451         tcg_gen_mov_tl(cpu_regs[reg], t0);
452         break;
453 #endif
454     default:
455         tcg_abort();
456     }
457 }
458 
459 static inline
460 void gen_op_mov_v_reg(DisasContext *s, MemOp ot, TCGv t0, int reg)
461 {
462     if (ot == MO_8 && byte_reg_is_xH(s, reg)) {
463         tcg_gen_extract_tl(t0, cpu_regs[reg - 4], 8, 8);
464     } else {
465         tcg_gen_mov_tl(t0, cpu_regs[reg]);
466     }
467 }
468 
469 static void gen_add_A0_im(DisasContext *s, int val)
470 {
471     tcg_gen_addi_tl(s->A0, s->A0, val);
472     if (!CODE64(s)) {
473         tcg_gen_ext32u_tl(s->A0, s->A0);
474     }
475 }
476 
477 static inline void gen_op_jmp_v(TCGv dest)
478 {
479     tcg_gen_st_tl(dest, cpu_env, offsetof(CPUX86State, eip));
480 }
481 
482 static inline
483 void gen_op_add_reg_im(DisasContext *s, MemOp size, int reg, int32_t val)
484 {
485     tcg_gen_addi_tl(s->tmp0, cpu_regs[reg], val);
486     gen_op_mov_reg_v(s, size, reg, s->tmp0);
487 }
488 
489 static inline void gen_op_add_reg_T0(DisasContext *s, MemOp size, int reg)
490 {
491     tcg_gen_add_tl(s->tmp0, cpu_regs[reg], s->T0);
492     gen_op_mov_reg_v(s, size, reg, s->tmp0);
493 }
494 
495 static inline void gen_op_ld_v(DisasContext *s, int idx, TCGv t0, TCGv a0)
496 {
497     tcg_gen_qemu_ld_tl(t0, a0, s->mem_index, idx | MO_LE);
498 }
499 
500 static inline void gen_op_st_v(DisasContext *s, int idx, TCGv t0, TCGv a0)
501 {
502     tcg_gen_qemu_st_tl(t0, a0, s->mem_index, idx | MO_LE);
503 }
504 
505 static inline void gen_op_st_rm_T0_A0(DisasContext *s, int idx, int d)
506 {
507     if (d == OR_TMP0) {
508         gen_op_st_v(s, idx, s->T0, s->A0);
509     } else {
510         gen_op_mov_reg_v(s, idx, d, s->T0);
511     }
512 }
513 
514 static inline void gen_jmp_im(DisasContext *s, target_ulong pc)
515 {
516     tcg_gen_movi_tl(s->tmp0, pc);
517     gen_op_jmp_v(s->tmp0);
518 }
519 
520 /* Compute SEG:REG into A0.  SEG is selected from the override segment
521    (OVR_SEG) and the default segment (DEF_SEG).  OVR_SEG may be -1 to
522    indicate no override.  */
523 static void gen_lea_v_seg(DisasContext *s, MemOp aflag, TCGv a0,
524                           int def_seg, int ovr_seg)
525 {
526     switch (aflag) {
527 #ifdef TARGET_X86_64
528     case MO_64:
529         if (ovr_seg < 0) {
530             tcg_gen_mov_tl(s->A0, a0);
531             return;
532         }
533         break;
534 #endif
535     case MO_32:
536         /* 32 bit address */
537         if (ovr_seg < 0 && ADDSEG(s)) {
538             ovr_seg = def_seg;
539         }
540         if (ovr_seg < 0) {
541             tcg_gen_ext32u_tl(s->A0, a0);
542             return;
543         }
544         break;
545     case MO_16:
546         /* 16 bit address */
547         tcg_gen_ext16u_tl(s->A0, a0);
548         a0 = s->A0;
549         if (ovr_seg < 0) {
550             if (ADDSEG(s)) {
551                 ovr_seg = def_seg;
552             } else {
553                 return;
554             }
555         }
556         break;
557     default:
558         tcg_abort();
559     }
560 
561     if (ovr_seg >= 0) {
562         TCGv seg = cpu_seg_base[ovr_seg];
563 
564         if (aflag == MO_64) {
565             tcg_gen_add_tl(s->A0, a0, seg);
566         } else if (CODE64(s)) {
567             tcg_gen_ext32u_tl(s->A0, a0);
568             tcg_gen_add_tl(s->A0, s->A0, seg);
569         } else {
570             tcg_gen_add_tl(s->A0, a0, seg);
571             tcg_gen_ext32u_tl(s->A0, s->A0);
572         }
573     }
574 }
575 
576 static inline void gen_string_movl_A0_ESI(DisasContext *s)
577 {
578     gen_lea_v_seg(s, s->aflag, cpu_regs[R_ESI], R_DS, s->override);
579 }
580 
581 static inline void gen_string_movl_A0_EDI(DisasContext *s)
582 {
583     gen_lea_v_seg(s, s->aflag, cpu_regs[R_EDI], R_ES, -1);
584 }
585 
586 static inline void gen_op_movl_T0_Dshift(DisasContext *s, MemOp ot)
587 {
588     tcg_gen_ld32s_tl(s->T0, cpu_env, offsetof(CPUX86State, df));
589     tcg_gen_shli_tl(s->T0, s->T0, ot);
590 };
591 
592 static TCGv gen_ext_tl(TCGv dst, TCGv src, MemOp size, bool sign)
593 {
594     switch (size) {
595     case MO_8:
596         if (sign) {
597             tcg_gen_ext8s_tl(dst, src);
598         } else {
599             tcg_gen_ext8u_tl(dst, src);
600         }
601         return dst;
602     case MO_16:
603         if (sign) {
604             tcg_gen_ext16s_tl(dst, src);
605         } else {
606             tcg_gen_ext16u_tl(dst, src);
607         }
608         return dst;
609 #ifdef TARGET_X86_64
610     case MO_32:
611         if (sign) {
612             tcg_gen_ext32s_tl(dst, src);
613         } else {
614             tcg_gen_ext32u_tl(dst, src);
615         }
616         return dst;
617 #endif
618     default:
619         return src;
620     }
621 }
622 
623 static void gen_extu(MemOp ot, TCGv reg)
624 {
625     gen_ext_tl(reg, reg, ot, false);
626 }
627 
628 static void gen_exts(MemOp ot, TCGv reg)
629 {
630     gen_ext_tl(reg, reg, ot, true);
631 }
632 
633 static inline
634 void gen_op_jnz_ecx(DisasContext *s, MemOp size, TCGLabel *label1)
635 {
636     tcg_gen_mov_tl(s->tmp0, cpu_regs[R_ECX]);
637     gen_extu(size, s->tmp0);
638     tcg_gen_brcondi_tl(TCG_COND_NE, s->tmp0, 0, label1);
639 }
640 
641 static inline
642 void gen_op_jz_ecx(DisasContext *s, MemOp size, TCGLabel *label1)
643 {
644     tcg_gen_mov_tl(s->tmp0, cpu_regs[R_ECX]);
645     gen_extu(size, s->tmp0);
646     tcg_gen_brcondi_tl(TCG_COND_EQ, s->tmp0, 0, label1);
647 }
648 
649 static void gen_helper_in_func(MemOp ot, TCGv v, TCGv_i32 n)
650 {
651     switch (ot) {
652     case MO_8:
653         gen_helper_inb(v, cpu_env, n);
654         break;
655     case MO_16:
656         gen_helper_inw(v, cpu_env, n);
657         break;
658     case MO_32:
659         gen_helper_inl(v, cpu_env, n);
660         break;
661     default:
662         tcg_abort();
663     }
664 }
665 
666 static void gen_helper_out_func(MemOp ot, TCGv_i32 v, TCGv_i32 n)
667 {
668     switch (ot) {
669     case MO_8:
670         gen_helper_outb(cpu_env, v, n);
671         break;
672     case MO_16:
673         gen_helper_outw(cpu_env, v, n);
674         break;
675     case MO_32:
676         gen_helper_outl(cpu_env, v, n);
677         break;
678     default:
679         tcg_abort();
680     }
681 }
682 
683 /*
684  * Validate that access to [port, port + 1<<ot) is allowed.
685  * Raise #GP, or VMM exit if not.
686  */
687 static bool gen_check_io(DisasContext *s, MemOp ot, TCGv_i32 port,
688                          uint32_t svm_flags)
689 {
690 #ifdef CONFIG_USER_ONLY
691     /*
692      * We do not implement the ioperm(2) syscall, so the TSS check
693      * will always fail.
694      */
695     gen_exception_gpf(s);
696     return false;
697 #else
698     if (PE(s) && (CPL(s) > IOPL(s) || VM86(s))) {
699         gen_helper_check_io(cpu_env, port, tcg_constant_i32(1 << ot));
700     }
701     if (GUEST(s)) {
702         target_ulong cur_eip = s->base.pc_next - s->cs_base;
703         target_ulong next_eip = s->pc - s->cs_base;
704 
705         gen_update_cc_op(s);
706         gen_jmp_im(s, cur_eip);
707         if (s->prefix & (PREFIX_REPZ | PREFIX_REPNZ)) {
708             svm_flags |= SVM_IOIO_REP_MASK;
709         }
710         svm_flags |= 1 << (SVM_IOIO_SIZE_SHIFT + ot);
711         gen_helper_svm_check_io(cpu_env, port,
712                                 tcg_constant_i32(svm_flags),
713                                 tcg_constant_i32(next_eip - cur_eip));
714     }
715     return true;
716 #endif
717 }
718 
719 static inline void gen_movs(DisasContext *s, MemOp ot)
720 {
721     gen_string_movl_A0_ESI(s);
722     gen_op_ld_v(s, ot, s->T0, s->A0);
723     gen_string_movl_A0_EDI(s);
724     gen_op_st_v(s, ot, s->T0, s->A0);
725     gen_op_movl_T0_Dshift(s, ot);
726     gen_op_add_reg_T0(s, s->aflag, R_ESI);
727     gen_op_add_reg_T0(s, s->aflag, R_EDI);
728 }
729 
730 static void gen_op_update1_cc(DisasContext *s)
731 {
732     tcg_gen_mov_tl(cpu_cc_dst, s->T0);
733 }
734 
735 static void gen_op_update2_cc(DisasContext *s)
736 {
737     tcg_gen_mov_tl(cpu_cc_src, s->T1);
738     tcg_gen_mov_tl(cpu_cc_dst, s->T0);
739 }
740 
741 static void gen_op_update3_cc(DisasContext *s, TCGv reg)
742 {
743     tcg_gen_mov_tl(cpu_cc_src2, reg);
744     tcg_gen_mov_tl(cpu_cc_src, s->T1);
745     tcg_gen_mov_tl(cpu_cc_dst, s->T0);
746 }
747 
748 static inline void gen_op_testl_T0_T1_cc(DisasContext *s)
749 {
750     tcg_gen_and_tl(cpu_cc_dst, s->T0, s->T1);
751 }
752 
753 static void gen_op_update_neg_cc(DisasContext *s)
754 {
755     tcg_gen_mov_tl(cpu_cc_dst, s->T0);
756     tcg_gen_neg_tl(cpu_cc_src, s->T0);
757     tcg_gen_movi_tl(s->cc_srcT, 0);
758 }
759 
760 /* compute all eflags to cc_src */
761 static void gen_compute_eflags(DisasContext *s)
762 {
763     TCGv zero, dst, src1, src2;
764     int live, dead;
765 
766     if (s->cc_op == CC_OP_EFLAGS) {
767         return;
768     }
769     if (s->cc_op == CC_OP_CLR) {
770         tcg_gen_movi_tl(cpu_cc_src, CC_Z | CC_P);
771         set_cc_op(s, CC_OP_EFLAGS);
772         return;
773     }
774 
775     zero = NULL;
776     dst = cpu_cc_dst;
777     src1 = cpu_cc_src;
778     src2 = cpu_cc_src2;
779 
780     /* Take care to not read values that are not live.  */
781     live = cc_op_live[s->cc_op] & ~USES_CC_SRCT;
782     dead = live ^ (USES_CC_DST | USES_CC_SRC | USES_CC_SRC2);
783     if (dead) {
784         zero = tcg_const_tl(0);
785         if (dead & USES_CC_DST) {
786             dst = zero;
787         }
788         if (dead & USES_CC_SRC) {
789             src1 = zero;
790         }
791         if (dead & USES_CC_SRC2) {
792             src2 = zero;
793         }
794     }
795 
796     gen_update_cc_op(s);
797     gen_helper_cc_compute_all(cpu_cc_src, dst, src1, src2, cpu_cc_op);
798     set_cc_op(s, CC_OP_EFLAGS);
799 
800     if (dead) {
801         tcg_temp_free(zero);
802     }
803 }
804 
805 typedef struct CCPrepare {
806     TCGCond cond;
807     TCGv reg;
808     TCGv reg2;
809     target_ulong imm;
810     target_ulong mask;
811     bool use_reg2;
812     bool no_setcond;
813 } CCPrepare;
814 
815 /* compute eflags.C to reg */
816 static CCPrepare gen_prepare_eflags_c(DisasContext *s, TCGv reg)
817 {
818     TCGv t0, t1;
819     int size, shift;
820 
821     switch (s->cc_op) {
822     case CC_OP_SUBB ... CC_OP_SUBQ:
823         /* (DATA_TYPE)CC_SRCT < (DATA_TYPE)CC_SRC */
824         size = s->cc_op - CC_OP_SUBB;
825         t1 = gen_ext_tl(s->tmp0, cpu_cc_src, size, false);
826         /* If no temporary was used, be careful not to alias t1 and t0.  */
827         t0 = t1 == cpu_cc_src ? s->tmp0 : reg;
828         tcg_gen_mov_tl(t0, s->cc_srcT);
829         gen_extu(size, t0);
830         goto add_sub;
831 
832     case CC_OP_ADDB ... CC_OP_ADDQ:
833         /* (DATA_TYPE)CC_DST < (DATA_TYPE)CC_SRC */
834         size = s->cc_op - CC_OP_ADDB;
835         t1 = gen_ext_tl(s->tmp0, cpu_cc_src, size, false);
836         t0 = gen_ext_tl(reg, cpu_cc_dst, size, false);
837     add_sub:
838         return (CCPrepare) { .cond = TCG_COND_LTU, .reg = t0,
839                              .reg2 = t1, .mask = -1, .use_reg2 = true };
840 
841     case CC_OP_LOGICB ... CC_OP_LOGICQ:
842     case CC_OP_CLR:
843     case CC_OP_POPCNT:
844         return (CCPrepare) { .cond = TCG_COND_NEVER, .mask = -1 };
845 
846     case CC_OP_INCB ... CC_OP_INCQ:
847     case CC_OP_DECB ... CC_OP_DECQ:
848         return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
849                              .mask = -1, .no_setcond = true };
850 
851     case CC_OP_SHLB ... CC_OP_SHLQ:
852         /* (CC_SRC >> (DATA_BITS - 1)) & 1 */
853         size = s->cc_op - CC_OP_SHLB;
854         shift = (8 << size) - 1;
855         return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
856                              .mask = (target_ulong)1 << shift };
857 
858     case CC_OP_MULB ... CC_OP_MULQ:
859         return (CCPrepare) { .cond = TCG_COND_NE,
860                              .reg = cpu_cc_src, .mask = -1 };
861 
862     case CC_OP_BMILGB ... CC_OP_BMILGQ:
863         size = s->cc_op - CC_OP_BMILGB;
864         t0 = gen_ext_tl(reg, cpu_cc_src, size, false);
865         return (CCPrepare) { .cond = TCG_COND_EQ, .reg = t0, .mask = -1 };
866 
867     case CC_OP_ADCX:
868     case CC_OP_ADCOX:
869         return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_dst,
870                              .mask = -1, .no_setcond = true };
871 
872     case CC_OP_EFLAGS:
873     case CC_OP_SARB ... CC_OP_SARQ:
874         /* CC_SRC & 1 */
875         return (CCPrepare) { .cond = TCG_COND_NE,
876                              .reg = cpu_cc_src, .mask = CC_C };
877 
878     default:
879        /* The need to compute only C from CC_OP_DYNAMIC is important
880           in efficiently implementing e.g. INC at the start of a TB.  */
881        gen_update_cc_op(s);
882        gen_helper_cc_compute_c(reg, cpu_cc_dst, cpu_cc_src,
883                                cpu_cc_src2, cpu_cc_op);
884        return (CCPrepare) { .cond = TCG_COND_NE, .reg = reg,
885                             .mask = -1, .no_setcond = true };
886     }
887 }
888 
889 /* compute eflags.P to reg */
890 static CCPrepare gen_prepare_eflags_p(DisasContext *s, TCGv reg)
891 {
892     gen_compute_eflags(s);
893     return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
894                          .mask = CC_P };
895 }
896 
897 /* compute eflags.S to reg */
898 static CCPrepare gen_prepare_eflags_s(DisasContext *s, TCGv reg)
899 {
900     switch (s->cc_op) {
901     case CC_OP_DYNAMIC:
902         gen_compute_eflags(s);
903         /* FALLTHRU */
904     case CC_OP_EFLAGS:
905     case CC_OP_ADCX:
906     case CC_OP_ADOX:
907     case CC_OP_ADCOX:
908         return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
909                              .mask = CC_S };
910     case CC_OP_CLR:
911     case CC_OP_POPCNT:
912         return (CCPrepare) { .cond = TCG_COND_NEVER, .mask = -1 };
913     default:
914         {
915             MemOp size = (s->cc_op - CC_OP_ADDB) & 3;
916             TCGv t0 = gen_ext_tl(reg, cpu_cc_dst, size, true);
917             return (CCPrepare) { .cond = TCG_COND_LT, .reg = t0, .mask = -1 };
918         }
919     }
920 }
921 
922 /* compute eflags.O to reg */
923 static CCPrepare gen_prepare_eflags_o(DisasContext *s, TCGv reg)
924 {
925     switch (s->cc_op) {
926     case CC_OP_ADOX:
927     case CC_OP_ADCOX:
928         return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src2,
929                              .mask = -1, .no_setcond = true };
930     case CC_OP_CLR:
931     case CC_OP_POPCNT:
932         return (CCPrepare) { .cond = TCG_COND_NEVER, .mask = -1 };
933     default:
934         gen_compute_eflags(s);
935         return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
936                              .mask = CC_O };
937     }
938 }
939 
940 /* compute eflags.Z to reg */
941 static CCPrepare gen_prepare_eflags_z(DisasContext *s, TCGv reg)
942 {
943     switch (s->cc_op) {
944     case CC_OP_DYNAMIC:
945         gen_compute_eflags(s);
946         /* FALLTHRU */
947     case CC_OP_EFLAGS:
948     case CC_OP_ADCX:
949     case CC_OP_ADOX:
950     case CC_OP_ADCOX:
951         return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
952                              .mask = CC_Z };
953     case CC_OP_CLR:
954         return (CCPrepare) { .cond = TCG_COND_ALWAYS, .mask = -1 };
955     case CC_OP_POPCNT:
956         return (CCPrepare) { .cond = TCG_COND_EQ, .reg = cpu_cc_src,
957                              .mask = -1 };
958     default:
959         {
960             MemOp size = (s->cc_op - CC_OP_ADDB) & 3;
961             TCGv t0 = gen_ext_tl(reg, cpu_cc_dst, size, false);
962             return (CCPrepare) { .cond = TCG_COND_EQ, .reg = t0, .mask = -1 };
963         }
964     }
965 }
966 
967 /* perform a conditional store into register 'reg' according to jump opcode
968    value 'b'. In the fast case, T0 is guaranted not to be used. */
969 static CCPrepare gen_prepare_cc(DisasContext *s, int b, TCGv reg)
970 {
971     int inv, jcc_op, cond;
972     MemOp size;
973     CCPrepare cc;
974     TCGv t0;
975 
976     inv = b & 1;
977     jcc_op = (b >> 1) & 7;
978 
979     switch (s->cc_op) {
980     case CC_OP_SUBB ... CC_OP_SUBQ:
981         /* We optimize relational operators for the cmp/jcc case.  */
982         size = s->cc_op - CC_OP_SUBB;
983         switch (jcc_op) {
984         case JCC_BE:
985             tcg_gen_mov_tl(s->tmp4, s->cc_srcT);
986             gen_extu(size, s->tmp4);
987             t0 = gen_ext_tl(s->tmp0, cpu_cc_src, size, false);
988             cc = (CCPrepare) { .cond = TCG_COND_LEU, .reg = s->tmp4,
989                                .reg2 = t0, .mask = -1, .use_reg2 = true };
990             break;
991 
992         case JCC_L:
993             cond = TCG_COND_LT;
994             goto fast_jcc_l;
995         case JCC_LE:
996             cond = TCG_COND_LE;
997         fast_jcc_l:
998             tcg_gen_mov_tl(s->tmp4, s->cc_srcT);
999             gen_exts(size, s->tmp4);
1000             t0 = gen_ext_tl(s->tmp0, cpu_cc_src, size, true);
1001             cc = (CCPrepare) { .cond = cond, .reg = s->tmp4,
1002                                .reg2 = t0, .mask = -1, .use_reg2 = true };
1003             break;
1004 
1005         default:
1006             goto slow_jcc;
1007         }
1008         break;
1009 
1010     default:
1011     slow_jcc:
1012         /* This actually generates good code for JC, JZ and JS.  */
1013         switch (jcc_op) {
1014         case JCC_O:
1015             cc = gen_prepare_eflags_o(s, reg);
1016             break;
1017         case JCC_B:
1018             cc = gen_prepare_eflags_c(s, reg);
1019             break;
1020         case JCC_Z:
1021             cc = gen_prepare_eflags_z(s, reg);
1022             break;
1023         case JCC_BE:
1024             gen_compute_eflags(s);
1025             cc = (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
1026                                .mask = CC_Z | CC_C };
1027             break;
1028         case JCC_S:
1029             cc = gen_prepare_eflags_s(s, reg);
1030             break;
1031         case JCC_P:
1032             cc = gen_prepare_eflags_p(s, reg);
1033             break;
1034         case JCC_L:
1035             gen_compute_eflags(s);
1036             if (reg == cpu_cc_src) {
1037                 reg = s->tmp0;
1038             }
1039             tcg_gen_shri_tl(reg, cpu_cc_src, 4); /* CC_O -> CC_S */
1040             tcg_gen_xor_tl(reg, reg, cpu_cc_src);
1041             cc = (CCPrepare) { .cond = TCG_COND_NE, .reg = reg,
1042                                .mask = CC_S };
1043             break;
1044         default:
1045         case JCC_LE:
1046             gen_compute_eflags(s);
1047             if (reg == cpu_cc_src) {
1048                 reg = s->tmp0;
1049             }
1050             tcg_gen_shri_tl(reg, cpu_cc_src, 4); /* CC_O -> CC_S */
1051             tcg_gen_xor_tl(reg, reg, cpu_cc_src);
1052             cc = (CCPrepare) { .cond = TCG_COND_NE, .reg = reg,
1053                                .mask = CC_S | CC_Z };
1054             break;
1055         }
1056         break;
1057     }
1058 
1059     if (inv) {
1060         cc.cond = tcg_invert_cond(cc.cond);
1061     }
1062     return cc;
1063 }
1064 
1065 static void gen_setcc1(DisasContext *s, int b, TCGv reg)
1066 {
1067     CCPrepare cc = gen_prepare_cc(s, b, reg);
1068 
1069     if (cc.no_setcond) {
1070         if (cc.cond == TCG_COND_EQ) {
1071             tcg_gen_xori_tl(reg, cc.reg, 1);
1072         } else {
1073             tcg_gen_mov_tl(reg, cc.reg);
1074         }
1075         return;
1076     }
1077 
1078     if (cc.cond == TCG_COND_NE && !cc.use_reg2 && cc.imm == 0 &&
1079         cc.mask != 0 && (cc.mask & (cc.mask - 1)) == 0) {
1080         tcg_gen_shri_tl(reg, cc.reg, ctztl(cc.mask));
1081         tcg_gen_andi_tl(reg, reg, 1);
1082         return;
1083     }
1084     if (cc.mask != -1) {
1085         tcg_gen_andi_tl(reg, cc.reg, cc.mask);
1086         cc.reg = reg;
1087     }
1088     if (cc.use_reg2) {
1089         tcg_gen_setcond_tl(cc.cond, reg, cc.reg, cc.reg2);
1090     } else {
1091         tcg_gen_setcondi_tl(cc.cond, reg, cc.reg, cc.imm);
1092     }
1093 }
1094 
1095 static inline void gen_compute_eflags_c(DisasContext *s, TCGv reg)
1096 {
1097     gen_setcc1(s, JCC_B << 1, reg);
1098 }
1099 
1100 /* generate a conditional jump to label 'l1' according to jump opcode
1101    value 'b'. In the fast case, T0 is guaranted not to be used. */
1102 static inline void gen_jcc1_noeob(DisasContext *s, int b, TCGLabel *l1)
1103 {
1104     CCPrepare cc = gen_prepare_cc(s, b, s->T0);
1105 
1106     if (cc.mask != -1) {
1107         tcg_gen_andi_tl(s->T0, cc.reg, cc.mask);
1108         cc.reg = s->T0;
1109     }
1110     if (cc.use_reg2) {
1111         tcg_gen_brcond_tl(cc.cond, cc.reg, cc.reg2, l1);
1112     } else {
1113         tcg_gen_brcondi_tl(cc.cond, cc.reg, cc.imm, l1);
1114     }
1115 }
1116 
1117 /* Generate a conditional jump to label 'l1' according to jump opcode
1118    value 'b'. In the fast case, T0 is guaranted not to be used.
1119    A translation block must end soon.  */
1120 static inline void gen_jcc1(DisasContext *s, int b, TCGLabel *l1)
1121 {
1122     CCPrepare cc = gen_prepare_cc(s, b, s->T0);
1123 
1124     gen_update_cc_op(s);
1125     if (cc.mask != -1) {
1126         tcg_gen_andi_tl(s->T0, cc.reg, cc.mask);
1127         cc.reg = s->T0;
1128     }
1129     set_cc_op(s, CC_OP_DYNAMIC);
1130     if (cc.use_reg2) {
1131         tcg_gen_brcond_tl(cc.cond, cc.reg, cc.reg2, l1);
1132     } else {
1133         tcg_gen_brcondi_tl(cc.cond, cc.reg, cc.imm, l1);
1134     }
1135 }
1136 
1137 /* XXX: does not work with gdbstub "ice" single step - not a
1138    serious problem */
1139 static TCGLabel *gen_jz_ecx_string(DisasContext *s, target_ulong next_eip)
1140 {
1141     TCGLabel *l1 = gen_new_label();
1142     TCGLabel *l2 = gen_new_label();
1143     gen_op_jnz_ecx(s, s->aflag, l1);
1144     gen_set_label(l2);
1145     gen_jmp_tb(s, next_eip, 1);
1146     gen_set_label(l1);
1147     return l2;
1148 }
1149 
1150 static inline void gen_stos(DisasContext *s, MemOp ot)
1151 {
1152     gen_op_mov_v_reg(s, MO_32, s->T0, R_EAX);
1153     gen_string_movl_A0_EDI(s);
1154     gen_op_st_v(s, ot, s->T0, s->A0);
1155     gen_op_movl_T0_Dshift(s, ot);
1156     gen_op_add_reg_T0(s, s->aflag, R_EDI);
1157 }
1158 
1159 static inline void gen_lods(DisasContext *s, MemOp ot)
1160 {
1161     gen_string_movl_A0_ESI(s);
1162     gen_op_ld_v(s, ot, s->T0, s->A0);
1163     gen_op_mov_reg_v(s, ot, R_EAX, s->T0);
1164     gen_op_movl_T0_Dshift(s, ot);
1165     gen_op_add_reg_T0(s, s->aflag, R_ESI);
1166 }
1167 
1168 static inline void gen_scas(DisasContext *s, MemOp ot)
1169 {
1170     gen_string_movl_A0_EDI(s);
1171     gen_op_ld_v(s, ot, s->T1, s->A0);
1172     gen_op(s, OP_CMPL, ot, R_EAX);
1173     gen_op_movl_T0_Dshift(s, ot);
1174     gen_op_add_reg_T0(s, s->aflag, R_EDI);
1175 }
1176 
1177 static inline void gen_cmps(DisasContext *s, MemOp ot)
1178 {
1179     gen_string_movl_A0_EDI(s);
1180     gen_op_ld_v(s, ot, s->T1, s->A0);
1181     gen_string_movl_A0_ESI(s);
1182     gen_op(s, OP_CMPL, ot, OR_TMP0);
1183     gen_op_movl_T0_Dshift(s, ot);
1184     gen_op_add_reg_T0(s, s->aflag, R_ESI);
1185     gen_op_add_reg_T0(s, s->aflag, R_EDI);
1186 }
1187 
1188 static void gen_bpt_io(DisasContext *s, TCGv_i32 t_port, int ot)
1189 {
1190     if (s->flags & HF_IOBPT_MASK) {
1191 #ifdef CONFIG_USER_ONLY
1192         /* user-mode cpu should not be in IOBPT mode */
1193         g_assert_not_reached();
1194 #else
1195         TCGv_i32 t_size = tcg_const_i32(1 << ot);
1196         TCGv t_next = tcg_const_tl(s->pc - s->cs_base);
1197 
1198         gen_helper_bpt_io(cpu_env, t_port, t_size, t_next);
1199         tcg_temp_free_i32(t_size);
1200         tcg_temp_free(t_next);
1201 #endif /* CONFIG_USER_ONLY */
1202     }
1203 }
1204 
1205 static inline void gen_ins(DisasContext *s, MemOp ot)
1206 {
1207     gen_string_movl_A0_EDI(s);
1208     /* Note: we must do this dummy write first to be restartable in
1209        case of page fault. */
1210     tcg_gen_movi_tl(s->T0, 0);
1211     gen_op_st_v(s, ot, s->T0, s->A0);
1212     tcg_gen_trunc_tl_i32(s->tmp2_i32, cpu_regs[R_EDX]);
1213     tcg_gen_andi_i32(s->tmp2_i32, s->tmp2_i32, 0xffff);
1214     gen_helper_in_func(ot, s->T0, s->tmp2_i32);
1215     gen_op_st_v(s, ot, s->T0, s->A0);
1216     gen_op_movl_T0_Dshift(s, ot);
1217     gen_op_add_reg_T0(s, s->aflag, R_EDI);
1218     gen_bpt_io(s, s->tmp2_i32, ot);
1219 }
1220 
1221 static inline void gen_outs(DisasContext *s, MemOp ot)
1222 {
1223     gen_string_movl_A0_ESI(s);
1224     gen_op_ld_v(s, ot, s->T0, s->A0);
1225 
1226     tcg_gen_trunc_tl_i32(s->tmp2_i32, cpu_regs[R_EDX]);
1227     tcg_gen_andi_i32(s->tmp2_i32, s->tmp2_i32, 0xffff);
1228     tcg_gen_trunc_tl_i32(s->tmp3_i32, s->T0);
1229     gen_helper_out_func(ot, s->tmp2_i32, s->tmp3_i32);
1230     gen_op_movl_T0_Dshift(s, ot);
1231     gen_op_add_reg_T0(s, s->aflag, R_ESI);
1232     gen_bpt_io(s, s->tmp2_i32, ot);
1233 }
1234 
1235 /* same method as Valgrind : we generate jumps to current or next
1236    instruction */
1237 #define GEN_REPZ(op)                                                          \
1238 static inline void gen_repz_ ## op(DisasContext *s, MemOp ot,              \
1239                                  target_ulong cur_eip, target_ulong next_eip) \
1240 {                                                                             \
1241     TCGLabel *l2;                                                             \
1242     gen_update_cc_op(s);                                                      \
1243     l2 = gen_jz_ecx_string(s, next_eip);                                      \
1244     gen_ ## op(s, ot);                                                        \
1245     gen_op_add_reg_im(s, s->aflag, R_ECX, -1);                                \
1246     /* a loop would cause two single step exceptions if ECX = 1               \
1247        before rep string_insn */                                              \
1248     if (s->repz_opt)                                                          \
1249         gen_op_jz_ecx(s, s->aflag, l2);                                       \
1250     gen_jmp(s, cur_eip);                                                      \
1251 }
1252 
1253 #define GEN_REPZ2(op)                                                         \
1254 static inline void gen_repz_ ## op(DisasContext *s, MemOp ot,              \
1255                                    target_ulong cur_eip,                      \
1256                                    target_ulong next_eip,                     \
1257                                    int nz)                                    \
1258 {                                                                             \
1259     TCGLabel *l2;                                                             \
1260     gen_update_cc_op(s);                                                      \
1261     l2 = gen_jz_ecx_string(s, next_eip);                                      \
1262     gen_ ## op(s, ot);                                                        \
1263     gen_op_add_reg_im(s, s->aflag, R_ECX, -1);                                \
1264     gen_update_cc_op(s);                                                      \
1265     gen_jcc1(s, (JCC_Z << 1) | (nz ^ 1), l2);                                 \
1266     if (s->repz_opt)                                                          \
1267         gen_op_jz_ecx(s, s->aflag, l2);                                       \
1268     gen_jmp(s, cur_eip);                                                      \
1269 }
1270 
1271 GEN_REPZ(movs)
1272 GEN_REPZ(stos)
1273 GEN_REPZ(lods)
1274 GEN_REPZ(ins)
1275 GEN_REPZ(outs)
1276 GEN_REPZ2(scas)
1277 GEN_REPZ2(cmps)
1278 
1279 static void gen_helper_fp_arith_ST0_FT0(int op)
1280 {
1281     switch (op) {
1282     case 0:
1283         gen_helper_fadd_ST0_FT0(cpu_env);
1284         break;
1285     case 1:
1286         gen_helper_fmul_ST0_FT0(cpu_env);
1287         break;
1288     case 2:
1289         gen_helper_fcom_ST0_FT0(cpu_env);
1290         break;
1291     case 3:
1292         gen_helper_fcom_ST0_FT0(cpu_env);
1293         break;
1294     case 4:
1295         gen_helper_fsub_ST0_FT0(cpu_env);
1296         break;
1297     case 5:
1298         gen_helper_fsubr_ST0_FT0(cpu_env);
1299         break;
1300     case 6:
1301         gen_helper_fdiv_ST0_FT0(cpu_env);
1302         break;
1303     case 7:
1304         gen_helper_fdivr_ST0_FT0(cpu_env);
1305         break;
1306     }
1307 }
1308 
1309 /* NOTE the exception in "r" op ordering */
1310 static void gen_helper_fp_arith_STN_ST0(int op, int opreg)
1311 {
1312     TCGv_i32 tmp = tcg_const_i32(opreg);
1313     switch (op) {
1314     case 0:
1315         gen_helper_fadd_STN_ST0(cpu_env, tmp);
1316         break;
1317     case 1:
1318         gen_helper_fmul_STN_ST0(cpu_env, tmp);
1319         break;
1320     case 4:
1321         gen_helper_fsubr_STN_ST0(cpu_env, tmp);
1322         break;
1323     case 5:
1324         gen_helper_fsub_STN_ST0(cpu_env, tmp);
1325         break;
1326     case 6:
1327         gen_helper_fdivr_STN_ST0(cpu_env, tmp);
1328         break;
1329     case 7:
1330         gen_helper_fdiv_STN_ST0(cpu_env, tmp);
1331         break;
1332     }
1333 }
1334 
1335 static void gen_exception(DisasContext *s, int trapno, target_ulong cur_eip)
1336 {
1337     gen_update_cc_op(s);
1338     gen_jmp_im(s, cur_eip);
1339     gen_helper_raise_exception(cpu_env, tcg_const_i32(trapno));
1340     s->base.is_jmp = DISAS_NORETURN;
1341 }
1342 
1343 /* Generate #UD for the current instruction.  The assumption here is that
1344    the instruction is known, but it isn't allowed in the current cpu mode.  */
1345 static void gen_illegal_opcode(DisasContext *s)
1346 {
1347     gen_exception(s, EXCP06_ILLOP, s->pc_start - s->cs_base);
1348 }
1349 
1350 /* Generate #GP for the current instruction. */
1351 static void gen_exception_gpf(DisasContext *s)
1352 {
1353     gen_exception(s, EXCP0D_GPF, s->pc_start - s->cs_base);
1354 }
1355 
1356 /* Check for cpl == 0; if not, raise #GP and return false. */
1357 static bool check_cpl0(DisasContext *s)
1358 {
1359     if (CPL(s) == 0) {
1360         return true;
1361     }
1362     gen_exception_gpf(s);
1363     return false;
1364 }
1365 
1366 /* If vm86, check for iopl == 3; if not, raise #GP and return false. */
1367 static bool check_vm86_iopl(DisasContext *s)
1368 {
1369     if (!VM86(s) || IOPL(s) == 3) {
1370         return true;
1371     }
1372     gen_exception_gpf(s);
1373     return false;
1374 }
1375 
1376 /* Check for iopl allowing access; if not, raise #GP and return false. */
1377 static bool check_iopl(DisasContext *s)
1378 {
1379     if (VM86(s) ? IOPL(s) == 3 : CPL(s) <= IOPL(s)) {
1380         return true;
1381     }
1382     gen_exception_gpf(s);
1383     return false;
1384 }
1385 
1386 /* if d == OR_TMP0, it means memory operand (address in A0) */
1387 static void gen_op(DisasContext *s1, int op, MemOp ot, int d)
1388 {
1389     if (d != OR_TMP0) {
1390         if (s1->prefix & PREFIX_LOCK) {
1391             /* Lock prefix when destination is not memory.  */
1392             gen_illegal_opcode(s1);
1393             return;
1394         }
1395         gen_op_mov_v_reg(s1, ot, s1->T0, d);
1396     } else if (!(s1->prefix & PREFIX_LOCK)) {
1397         gen_op_ld_v(s1, ot, s1->T0, s1->A0);
1398     }
1399     switch(op) {
1400     case OP_ADCL:
1401         gen_compute_eflags_c(s1, s1->tmp4);
1402         if (s1->prefix & PREFIX_LOCK) {
1403             tcg_gen_add_tl(s1->T0, s1->tmp4, s1->T1);
1404             tcg_gen_atomic_add_fetch_tl(s1->T0, s1->A0, s1->T0,
1405                                         s1->mem_index, ot | MO_LE);
1406         } else {
1407             tcg_gen_add_tl(s1->T0, s1->T0, s1->T1);
1408             tcg_gen_add_tl(s1->T0, s1->T0, s1->tmp4);
1409             gen_op_st_rm_T0_A0(s1, ot, d);
1410         }
1411         gen_op_update3_cc(s1, s1->tmp4);
1412         set_cc_op(s1, CC_OP_ADCB + ot);
1413         break;
1414     case OP_SBBL:
1415         gen_compute_eflags_c(s1, s1->tmp4);
1416         if (s1->prefix & PREFIX_LOCK) {
1417             tcg_gen_add_tl(s1->T0, s1->T1, s1->tmp4);
1418             tcg_gen_neg_tl(s1->T0, s1->T0);
1419             tcg_gen_atomic_add_fetch_tl(s1->T0, s1->A0, s1->T0,
1420                                         s1->mem_index, ot | MO_LE);
1421         } else {
1422             tcg_gen_sub_tl(s1->T0, s1->T0, s1->T1);
1423             tcg_gen_sub_tl(s1->T0, s1->T0, s1->tmp4);
1424             gen_op_st_rm_T0_A0(s1, ot, d);
1425         }
1426         gen_op_update3_cc(s1, s1->tmp4);
1427         set_cc_op(s1, CC_OP_SBBB + ot);
1428         break;
1429     case OP_ADDL:
1430         if (s1->prefix & PREFIX_LOCK) {
1431             tcg_gen_atomic_add_fetch_tl(s1->T0, s1->A0, s1->T1,
1432                                         s1->mem_index, ot | MO_LE);
1433         } else {
1434             tcg_gen_add_tl(s1->T0, s1->T0, s1->T1);
1435             gen_op_st_rm_T0_A0(s1, ot, d);
1436         }
1437         gen_op_update2_cc(s1);
1438         set_cc_op(s1, CC_OP_ADDB + ot);
1439         break;
1440     case OP_SUBL:
1441         if (s1->prefix & PREFIX_LOCK) {
1442             tcg_gen_neg_tl(s1->T0, s1->T1);
1443             tcg_gen_atomic_fetch_add_tl(s1->cc_srcT, s1->A0, s1->T0,
1444                                         s1->mem_index, ot | MO_LE);
1445             tcg_gen_sub_tl(s1->T0, s1->cc_srcT, s1->T1);
1446         } else {
1447             tcg_gen_mov_tl(s1->cc_srcT, s1->T0);
1448             tcg_gen_sub_tl(s1->T0, s1->T0, s1->T1);
1449             gen_op_st_rm_T0_A0(s1, ot, d);
1450         }
1451         gen_op_update2_cc(s1);
1452         set_cc_op(s1, CC_OP_SUBB + ot);
1453         break;
1454     default:
1455     case OP_ANDL:
1456         if (s1->prefix & PREFIX_LOCK) {
1457             tcg_gen_atomic_and_fetch_tl(s1->T0, s1->A0, s1->T1,
1458                                         s1->mem_index, ot | MO_LE);
1459         } else {
1460             tcg_gen_and_tl(s1->T0, s1->T0, s1->T1);
1461             gen_op_st_rm_T0_A0(s1, ot, d);
1462         }
1463         gen_op_update1_cc(s1);
1464         set_cc_op(s1, CC_OP_LOGICB + ot);
1465         break;
1466     case OP_ORL:
1467         if (s1->prefix & PREFIX_LOCK) {
1468             tcg_gen_atomic_or_fetch_tl(s1->T0, s1->A0, s1->T1,
1469                                        s1->mem_index, ot | MO_LE);
1470         } else {
1471             tcg_gen_or_tl(s1->T0, s1->T0, s1->T1);
1472             gen_op_st_rm_T0_A0(s1, ot, d);
1473         }
1474         gen_op_update1_cc(s1);
1475         set_cc_op(s1, CC_OP_LOGICB + ot);
1476         break;
1477     case OP_XORL:
1478         if (s1->prefix & PREFIX_LOCK) {
1479             tcg_gen_atomic_xor_fetch_tl(s1->T0, s1->A0, s1->T1,
1480                                         s1->mem_index, ot | MO_LE);
1481         } else {
1482             tcg_gen_xor_tl(s1->T0, s1->T0, s1->T1);
1483             gen_op_st_rm_T0_A0(s1, ot, d);
1484         }
1485         gen_op_update1_cc(s1);
1486         set_cc_op(s1, CC_OP_LOGICB + ot);
1487         break;
1488     case OP_CMPL:
1489         tcg_gen_mov_tl(cpu_cc_src, s1->T1);
1490         tcg_gen_mov_tl(s1->cc_srcT, s1->T0);
1491         tcg_gen_sub_tl(cpu_cc_dst, s1->T0, s1->T1);
1492         set_cc_op(s1, CC_OP_SUBB + ot);
1493         break;
1494     }
1495 }
1496 
1497 /* if d == OR_TMP0, it means memory operand (address in A0) */
1498 static void gen_inc(DisasContext *s1, MemOp ot, int d, int c)
1499 {
1500     if (s1->prefix & PREFIX_LOCK) {
1501         if (d != OR_TMP0) {
1502             /* Lock prefix when destination is not memory */
1503             gen_illegal_opcode(s1);
1504             return;
1505         }
1506         tcg_gen_movi_tl(s1->T0, c > 0 ? 1 : -1);
1507         tcg_gen_atomic_add_fetch_tl(s1->T0, s1->A0, s1->T0,
1508                                     s1->mem_index, ot | MO_LE);
1509     } else {
1510         if (d != OR_TMP0) {
1511             gen_op_mov_v_reg(s1, ot, s1->T0, d);
1512         } else {
1513             gen_op_ld_v(s1, ot, s1->T0, s1->A0);
1514         }
1515         tcg_gen_addi_tl(s1->T0, s1->T0, (c > 0 ? 1 : -1));
1516         gen_op_st_rm_T0_A0(s1, ot, d);
1517     }
1518 
1519     gen_compute_eflags_c(s1, cpu_cc_src);
1520     tcg_gen_mov_tl(cpu_cc_dst, s1->T0);
1521     set_cc_op(s1, (c > 0 ? CC_OP_INCB : CC_OP_DECB) + ot);
1522 }
1523 
1524 static void gen_shift_flags(DisasContext *s, MemOp ot, TCGv result,
1525                             TCGv shm1, TCGv count, bool is_right)
1526 {
1527     TCGv_i32 z32, s32, oldop;
1528     TCGv z_tl;
1529 
1530     /* Store the results into the CC variables.  If we know that the
1531        variable must be dead, store unconditionally.  Otherwise we'll
1532        need to not disrupt the current contents.  */
1533     z_tl = tcg_const_tl(0);
1534     if (cc_op_live[s->cc_op] & USES_CC_DST) {
1535         tcg_gen_movcond_tl(TCG_COND_NE, cpu_cc_dst, count, z_tl,
1536                            result, cpu_cc_dst);
1537     } else {
1538         tcg_gen_mov_tl(cpu_cc_dst, result);
1539     }
1540     if (cc_op_live[s->cc_op] & USES_CC_SRC) {
1541         tcg_gen_movcond_tl(TCG_COND_NE, cpu_cc_src, count, z_tl,
1542                            shm1, cpu_cc_src);
1543     } else {
1544         tcg_gen_mov_tl(cpu_cc_src, shm1);
1545     }
1546     tcg_temp_free(z_tl);
1547 
1548     /* Get the two potential CC_OP values into temporaries.  */
1549     tcg_gen_movi_i32(s->tmp2_i32, (is_right ? CC_OP_SARB : CC_OP_SHLB) + ot);
1550     if (s->cc_op == CC_OP_DYNAMIC) {
1551         oldop = cpu_cc_op;
1552     } else {
1553         tcg_gen_movi_i32(s->tmp3_i32, s->cc_op);
1554         oldop = s->tmp3_i32;
1555     }
1556 
1557     /* Conditionally store the CC_OP value.  */
1558     z32 = tcg_const_i32(0);
1559     s32 = tcg_temp_new_i32();
1560     tcg_gen_trunc_tl_i32(s32, count);
1561     tcg_gen_movcond_i32(TCG_COND_NE, cpu_cc_op, s32, z32, s->tmp2_i32, oldop);
1562     tcg_temp_free_i32(z32);
1563     tcg_temp_free_i32(s32);
1564 
1565     /* The CC_OP value is no longer predictable.  */
1566     set_cc_op(s, CC_OP_DYNAMIC);
1567 }
1568 
1569 static void gen_shift_rm_T1(DisasContext *s, MemOp ot, int op1,
1570                             int is_right, int is_arith)
1571 {
1572     target_ulong mask = (ot == MO_64 ? 0x3f : 0x1f);
1573 
1574     /* load */
1575     if (op1 == OR_TMP0) {
1576         gen_op_ld_v(s, ot, s->T0, s->A0);
1577     } else {
1578         gen_op_mov_v_reg(s, ot, s->T0, op1);
1579     }
1580 
1581     tcg_gen_andi_tl(s->T1, s->T1, mask);
1582     tcg_gen_subi_tl(s->tmp0, s->T1, 1);
1583 
1584     if (is_right) {
1585         if (is_arith) {
1586             gen_exts(ot, s->T0);
1587             tcg_gen_sar_tl(s->tmp0, s->T0, s->tmp0);
1588             tcg_gen_sar_tl(s->T0, s->T0, s->T1);
1589         } else {
1590             gen_extu(ot, s->T0);
1591             tcg_gen_shr_tl(s->tmp0, s->T0, s->tmp0);
1592             tcg_gen_shr_tl(s->T0, s->T0, s->T1);
1593         }
1594     } else {
1595         tcg_gen_shl_tl(s->tmp0, s->T0, s->tmp0);
1596         tcg_gen_shl_tl(s->T0, s->T0, s->T1);
1597     }
1598 
1599     /* store */
1600     gen_op_st_rm_T0_A0(s, ot, op1);
1601 
1602     gen_shift_flags(s, ot, s->T0, s->tmp0, s->T1, is_right);
1603 }
1604 
1605 static void gen_shift_rm_im(DisasContext *s, MemOp ot, int op1, int op2,
1606                             int is_right, int is_arith)
1607 {
1608     int mask = (ot == MO_64 ? 0x3f : 0x1f);
1609 
1610     /* load */
1611     if (op1 == OR_TMP0)
1612         gen_op_ld_v(s, ot, s->T0, s->A0);
1613     else
1614         gen_op_mov_v_reg(s, ot, s->T0, op1);
1615 
1616     op2 &= mask;
1617     if (op2 != 0) {
1618         if (is_right) {
1619             if (is_arith) {
1620                 gen_exts(ot, s->T0);
1621                 tcg_gen_sari_tl(s->tmp4, s->T0, op2 - 1);
1622                 tcg_gen_sari_tl(s->T0, s->T0, op2);
1623             } else {
1624                 gen_extu(ot, s->T0);
1625                 tcg_gen_shri_tl(s->tmp4, s->T0, op2 - 1);
1626                 tcg_gen_shri_tl(s->T0, s->T0, op2);
1627             }
1628         } else {
1629             tcg_gen_shli_tl(s->tmp4, s->T0, op2 - 1);
1630             tcg_gen_shli_tl(s->T0, s->T0, op2);
1631         }
1632     }
1633 
1634     /* store */
1635     gen_op_st_rm_T0_A0(s, ot, op1);
1636 
1637     /* update eflags if non zero shift */
1638     if (op2 != 0) {
1639         tcg_gen_mov_tl(cpu_cc_src, s->tmp4);
1640         tcg_gen_mov_tl(cpu_cc_dst, s->T0);
1641         set_cc_op(s, (is_right ? CC_OP_SARB : CC_OP_SHLB) + ot);
1642     }
1643 }
1644 
1645 static void gen_rot_rm_T1(DisasContext *s, MemOp ot, int op1, int is_right)
1646 {
1647     target_ulong mask = (ot == MO_64 ? 0x3f : 0x1f);
1648     TCGv_i32 t0, t1;
1649 
1650     /* load */
1651     if (op1 == OR_TMP0) {
1652         gen_op_ld_v(s, ot, s->T0, s->A0);
1653     } else {
1654         gen_op_mov_v_reg(s, ot, s->T0, op1);
1655     }
1656 
1657     tcg_gen_andi_tl(s->T1, s->T1, mask);
1658 
1659     switch (ot) {
1660     case MO_8:
1661         /* Replicate the 8-bit input so that a 32-bit rotate works.  */
1662         tcg_gen_ext8u_tl(s->T0, s->T0);
1663         tcg_gen_muli_tl(s->T0, s->T0, 0x01010101);
1664         goto do_long;
1665     case MO_16:
1666         /* Replicate the 16-bit input so that a 32-bit rotate works.  */
1667         tcg_gen_deposit_tl(s->T0, s->T0, s->T0, 16, 16);
1668         goto do_long;
1669     do_long:
1670 #ifdef TARGET_X86_64
1671     case MO_32:
1672         tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
1673         tcg_gen_trunc_tl_i32(s->tmp3_i32, s->T1);
1674         if (is_right) {
1675             tcg_gen_rotr_i32(s->tmp2_i32, s->tmp2_i32, s->tmp3_i32);
1676         } else {
1677             tcg_gen_rotl_i32(s->tmp2_i32, s->tmp2_i32, s->tmp3_i32);
1678         }
1679         tcg_gen_extu_i32_tl(s->T0, s->tmp2_i32);
1680         break;
1681 #endif
1682     default:
1683         if (is_right) {
1684             tcg_gen_rotr_tl(s->T0, s->T0, s->T1);
1685         } else {
1686             tcg_gen_rotl_tl(s->T0, s->T0, s->T1);
1687         }
1688         break;
1689     }
1690 
1691     /* store */
1692     gen_op_st_rm_T0_A0(s, ot, op1);
1693 
1694     /* We'll need the flags computed into CC_SRC.  */
1695     gen_compute_eflags(s);
1696 
1697     /* The value that was "rotated out" is now present at the other end
1698        of the word.  Compute C into CC_DST and O into CC_SRC2.  Note that
1699        since we've computed the flags into CC_SRC, these variables are
1700        currently dead.  */
1701     if (is_right) {
1702         tcg_gen_shri_tl(cpu_cc_src2, s->T0, mask - 1);
1703         tcg_gen_shri_tl(cpu_cc_dst, s->T0, mask);
1704         tcg_gen_andi_tl(cpu_cc_dst, cpu_cc_dst, 1);
1705     } else {
1706         tcg_gen_shri_tl(cpu_cc_src2, s->T0, mask);
1707         tcg_gen_andi_tl(cpu_cc_dst, s->T0, 1);
1708     }
1709     tcg_gen_andi_tl(cpu_cc_src2, cpu_cc_src2, 1);
1710     tcg_gen_xor_tl(cpu_cc_src2, cpu_cc_src2, cpu_cc_dst);
1711 
1712     /* Now conditionally store the new CC_OP value.  If the shift count
1713        is 0 we keep the CC_OP_EFLAGS setting so that only CC_SRC is live.
1714        Otherwise reuse CC_OP_ADCOX which have the C and O flags split out
1715        exactly as we computed above.  */
1716     t0 = tcg_const_i32(0);
1717     t1 = tcg_temp_new_i32();
1718     tcg_gen_trunc_tl_i32(t1, s->T1);
1719     tcg_gen_movi_i32(s->tmp2_i32, CC_OP_ADCOX);
1720     tcg_gen_movi_i32(s->tmp3_i32, CC_OP_EFLAGS);
1721     tcg_gen_movcond_i32(TCG_COND_NE, cpu_cc_op, t1, t0,
1722                         s->tmp2_i32, s->tmp3_i32);
1723     tcg_temp_free_i32(t0);
1724     tcg_temp_free_i32(t1);
1725 
1726     /* The CC_OP value is no longer predictable.  */
1727     set_cc_op(s, CC_OP_DYNAMIC);
1728 }
1729 
1730 static void gen_rot_rm_im(DisasContext *s, MemOp ot, int op1, int op2,
1731                           int is_right)
1732 {
1733     int mask = (ot == MO_64 ? 0x3f : 0x1f);
1734     int shift;
1735 
1736     /* load */
1737     if (op1 == OR_TMP0) {
1738         gen_op_ld_v(s, ot, s->T0, s->A0);
1739     } else {
1740         gen_op_mov_v_reg(s, ot, s->T0, op1);
1741     }
1742 
1743     op2 &= mask;
1744     if (op2 != 0) {
1745         switch (ot) {
1746 #ifdef TARGET_X86_64
1747         case MO_32:
1748             tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
1749             if (is_right) {
1750                 tcg_gen_rotri_i32(s->tmp2_i32, s->tmp2_i32, op2);
1751             } else {
1752                 tcg_gen_rotli_i32(s->tmp2_i32, s->tmp2_i32, op2);
1753             }
1754             tcg_gen_extu_i32_tl(s->T0, s->tmp2_i32);
1755             break;
1756 #endif
1757         default:
1758             if (is_right) {
1759                 tcg_gen_rotri_tl(s->T0, s->T0, op2);
1760             } else {
1761                 tcg_gen_rotli_tl(s->T0, s->T0, op2);
1762             }
1763             break;
1764         case MO_8:
1765             mask = 7;
1766             goto do_shifts;
1767         case MO_16:
1768             mask = 15;
1769         do_shifts:
1770             shift = op2 & mask;
1771             if (is_right) {
1772                 shift = mask + 1 - shift;
1773             }
1774             gen_extu(ot, s->T0);
1775             tcg_gen_shli_tl(s->tmp0, s->T0, shift);
1776             tcg_gen_shri_tl(s->T0, s->T0, mask + 1 - shift);
1777             tcg_gen_or_tl(s->T0, s->T0, s->tmp0);
1778             break;
1779         }
1780     }
1781 
1782     /* store */
1783     gen_op_st_rm_T0_A0(s, ot, op1);
1784 
1785     if (op2 != 0) {
1786         /* Compute the flags into CC_SRC.  */
1787         gen_compute_eflags(s);
1788 
1789         /* The value that was "rotated out" is now present at the other end
1790            of the word.  Compute C into CC_DST and O into CC_SRC2.  Note that
1791            since we've computed the flags into CC_SRC, these variables are
1792            currently dead.  */
1793         if (is_right) {
1794             tcg_gen_shri_tl(cpu_cc_src2, s->T0, mask - 1);
1795             tcg_gen_shri_tl(cpu_cc_dst, s->T0, mask);
1796             tcg_gen_andi_tl(cpu_cc_dst, cpu_cc_dst, 1);
1797         } else {
1798             tcg_gen_shri_tl(cpu_cc_src2, s->T0, mask);
1799             tcg_gen_andi_tl(cpu_cc_dst, s->T0, 1);
1800         }
1801         tcg_gen_andi_tl(cpu_cc_src2, cpu_cc_src2, 1);
1802         tcg_gen_xor_tl(cpu_cc_src2, cpu_cc_src2, cpu_cc_dst);
1803         set_cc_op(s, CC_OP_ADCOX);
1804     }
1805 }
1806 
1807 /* XXX: add faster immediate = 1 case */
1808 static void gen_rotc_rm_T1(DisasContext *s, MemOp ot, int op1,
1809                            int is_right)
1810 {
1811     gen_compute_eflags(s);
1812     assert(s->cc_op == CC_OP_EFLAGS);
1813 
1814     /* load */
1815     if (op1 == OR_TMP0)
1816         gen_op_ld_v(s, ot, s->T0, s->A0);
1817     else
1818         gen_op_mov_v_reg(s, ot, s->T0, op1);
1819 
1820     if (is_right) {
1821         switch (ot) {
1822         case MO_8:
1823             gen_helper_rcrb(s->T0, cpu_env, s->T0, s->T1);
1824             break;
1825         case MO_16:
1826             gen_helper_rcrw(s->T0, cpu_env, s->T0, s->T1);
1827             break;
1828         case MO_32:
1829             gen_helper_rcrl(s->T0, cpu_env, s->T0, s->T1);
1830             break;
1831 #ifdef TARGET_X86_64
1832         case MO_64:
1833             gen_helper_rcrq(s->T0, cpu_env, s->T0, s->T1);
1834             break;
1835 #endif
1836         default:
1837             tcg_abort();
1838         }
1839     } else {
1840         switch (ot) {
1841         case MO_8:
1842             gen_helper_rclb(s->T0, cpu_env, s->T0, s->T1);
1843             break;
1844         case MO_16:
1845             gen_helper_rclw(s->T0, cpu_env, s->T0, s->T1);
1846             break;
1847         case MO_32:
1848             gen_helper_rcll(s->T0, cpu_env, s->T0, s->T1);
1849             break;
1850 #ifdef TARGET_X86_64
1851         case MO_64:
1852             gen_helper_rclq(s->T0, cpu_env, s->T0, s->T1);
1853             break;
1854 #endif
1855         default:
1856             tcg_abort();
1857         }
1858     }
1859     /* store */
1860     gen_op_st_rm_T0_A0(s, ot, op1);
1861 }
1862 
1863 /* XXX: add faster immediate case */
1864 static void gen_shiftd_rm_T1(DisasContext *s, MemOp ot, int op1,
1865                              bool is_right, TCGv count_in)
1866 {
1867     target_ulong mask = (ot == MO_64 ? 63 : 31);
1868     TCGv count;
1869 
1870     /* load */
1871     if (op1 == OR_TMP0) {
1872         gen_op_ld_v(s, ot, s->T0, s->A0);
1873     } else {
1874         gen_op_mov_v_reg(s, ot, s->T0, op1);
1875     }
1876 
1877     count = tcg_temp_new();
1878     tcg_gen_andi_tl(count, count_in, mask);
1879 
1880     switch (ot) {
1881     case MO_16:
1882         /* Note: we implement the Intel behaviour for shift count > 16.
1883            This means "shrdw C, B, A" shifts A:B:A >> C.  Build the B:A
1884            portion by constructing it as a 32-bit value.  */
1885         if (is_right) {
1886             tcg_gen_deposit_tl(s->tmp0, s->T0, s->T1, 16, 16);
1887             tcg_gen_mov_tl(s->T1, s->T0);
1888             tcg_gen_mov_tl(s->T0, s->tmp0);
1889         } else {
1890             tcg_gen_deposit_tl(s->T1, s->T0, s->T1, 16, 16);
1891         }
1892         /*
1893          * If TARGET_X86_64 defined then fall through into MO_32 case,
1894          * otherwise fall through default case.
1895          */
1896     case MO_32:
1897 #ifdef TARGET_X86_64
1898         /* Concatenate the two 32-bit values and use a 64-bit shift.  */
1899         tcg_gen_subi_tl(s->tmp0, count, 1);
1900         if (is_right) {
1901             tcg_gen_concat_tl_i64(s->T0, s->T0, s->T1);
1902             tcg_gen_shr_i64(s->tmp0, s->T0, s->tmp0);
1903             tcg_gen_shr_i64(s->T0, s->T0, count);
1904         } else {
1905             tcg_gen_concat_tl_i64(s->T0, s->T1, s->T0);
1906             tcg_gen_shl_i64(s->tmp0, s->T0, s->tmp0);
1907             tcg_gen_shl_i64(s->T0, s->T0, count);
1908             tcg_gen_shri_i64(s->tmp0, s->tmp0, 32);
1909             tcg_gen_shri_i64(s->T0, s->T0, 32);
1910         }
1911         break;
1912 #endif
1913     default:
1914         tcg_gen_subi_tl(s->tmp0, count, 1);
1915         if (is_right) {
1916             tcg_gen_shr_tl(s->tmp0, s->T0, s->tmp0);
1917 
1918             tcg_gen_subfi_tl(s->tmp4, mask + 1, count);
1919             tcg_gen_shr_tl(s->T0, s->T0, count);
1920             tcg_gen_shl_tl(s->T1, s->T1, s->tmp4);
1921         } else {
1922             tcg_gen_shl_tl(s->tmp0, s->T0, s->tmp0);
1923             if (ot == MO_16) {
1924                 /* Only needed if count > 16, for Intel behaviour.  */
1925                 tcg_gen_subfi_tl(s->tmp4, 33, count);
1926                 tcg_gen_shr_tl(s->tmp4, s->T1, s->tmp4);
1927                 tcg_gen_or_tl(s->tmp0, s->tmp0, s->tmp4);
1928             }
1929 
1930             tcg_gen_subfi_tl(s->tmp4, mask + 1, count);
1931             tcg_gen_shl_tl(s->T0, s->T0, count);
1932             tcg_gen_shr_tl(s->T1, s->T1, s->tmp4);
1933         }
1934         tcg_gen_movi_tl(s->tmp4, 0);
1935         tcg_gen_movcond_tl(TCG_COND_EQ, s->T1, count, s->tmp4,
1936                            s->tmp4, s->T1);
1937         tcg_gen_or_tl(s->T0, s->T0, s->T1);
1938         break;
1939     }
1940 
1941     /* store */
1942     gen_op_st_rm_T0_A0(s, ot, op1);
1943 
1944     gen_shift_flags(s, ot, s->T0, s->tmp0, count, is_right);
1945     tcg_temp_free(count);
1946 }
1947 
1948 static void gen_shift(DisasContext *s1, int op, MemOp ot, int d, int s)
1949 {
1950     if (s != OR_TMP1)
1951         gen_op_mov_v_reg(s1, ot, s1->T1, s);
1952     switch(op) {
1953     case OP_ROL:
1954         gen_rot_rm_T1(s1, ot, d, 0);
1955         break;
1956     case OP_ROR:
1957         gen_rot_rm_T1(s1, ot, d, 1);
1958         break;
1959     case OP_SHL:
1960     case OP_SHL1:
1961         gen_shift_rm_T1(s1, ot, d, 0, 0);
1962         break;
1963     case OP_SHR:
1964         gen_shift_rm_T1(s1, ot, d, 1, 0);
1965         break;
1966     case OP_SAR:
1967         gen_shift_rm_T1(s1, ot, d, 1, 1);
1968         break;
1969     case OP_RCL:
1970         gen_rotc_rm_T1(s1, ot, d, 0);
1971         break;
1972     case OP_RCR:
1973         gen_rotc_rm_T1(s1, ot, d, 1);
1974         break;
1975     }
1976 }
1977 
1978 static void gen_shifti(DisasContext *s1, int op, MemOp ot, int d, int c)
1979 {
1980     switch(op) {
1981     case OP_ROL:
1982         gen_rot_rm_im(s1, ot, d, c, 0);
1983         break;
1984     case OP_ROR:
1985         gen_rot_rm_im(s1, ot, d, c, 1);
1986         break;
1987     case OP_SHL:
1988     case OP_SHL1:
1989         gen_shift_rm_im(s1, ot, d, c, 0, 0);
1990         break;
1991     case OP_SHR:
1992         gen_shift_rm_im(s1, ot, d, c, 1, 0);
1993         break;
1994     case OP_SAR:
1995         gen_shift_rm_im(s1, ot, d, c, 1, 1);
1996         break;
1997     default:
1998         /* currently not optimized */
1999         tcg_gen_movi_tl(s1->T1, c);
2000         gen_shift(s1, op, ot, d, OR_TMP1);
2001         break;
2002     }
2003 }
2004 
2005 #define X86_MAX_INSN_LENGTH 15
2006 
2007 static uint64_t advance_pc(CPUX86State *env, DisasContext *s, int num_bytes)
2008 {
2009     uint64_t pc = s->pc;
2010 
2011     s->pc += num_bytes;
2012     if (unlikely(s->pc - s->pc_start > X86_MAX_INSN_LENGTH)) {
2013         /* If the instruction's 16th byte is on a different page than the 1st, a
2014          * page fault on the second page wins over the general protection fault
2015          * caused by the instruction being too long.
2016          * This can happen even if the operand is only one byte long!
2017          */
2018         if (((s->pc - 1) ^ (pc - 1)) & TARGET_PAGE_MASK) {
2019             volatile uint8_t unused =
2020                 cpu_ldub_code(env, (s->pc - 1) & TARGET_PAGE_MASK);
2021             (void) unused;
2022         }
2023         siglongjmp(s->jmpbuf, 1);
2024     }
2025 
2026     return pc;
2027 }
2028 
2029 static inline uint8_t x86_ldub_code(CPUX86State *env, DisasContext *s)
2030 {
2031     return translator_ldub(env, &s->base, advance_pc(env, s, 1));
2032 }
2033 
2034 static inline int16_t x86_ldsw_code(CPUX86State *env, DisasContext *s)
2035 {
2036     return translator_ldsw(env, &s->base, advance_pc(env, s, 2));
2037 }
2038 
2039 static inline uint16_t x86_lduw_code(CPUX86State *env, DisasContext *s)
2040 {
2041     return translator_lduw(env, &s->base, advance_pc(env, s, 2));
2042 }
2043 
2044 static inline uint32_t x86_ldl_code(CPUX86State *env, DisasContext *s)
2045 {
2046     return translator_ldl(env, &s->base, advance_pc(env, s, 4));
2047 }
2048 
2049 #ifdef TARGET_X86_64
2050 static inline uint64_t x86_ldq_code(CPUX86State *env, DisasContext *s)
2051 {
2052     return translator_ldq(env, &s->base, advance_pc(env, s, 8));
2053 }
2054 #endif
2055 
2056 /* Decompose an address.  */
2057 
2058 typedef struct AddressParts {
2059     int def_seg;
2060     int base;
2061     int index;
2062     int scale;
2063     target_long disp;
2064 } AddressParts;
2065 
2066 static AddressParts gen_lea_modrm_0(CPUX86State *env, DisasContext *s,
2067                                     int modrm)
2068 {
2069     int def_seg, base, index, scale, mod, rm;
2070     target_long disp;
2071     bool havesib;
2072 
2073     def_seg = R_DS;
2074     index = -1;
2075     scale = 0;
2076     disp = 0;
2077 
2078     mod = (modrm >> 6) & 3;
2079     rm = modrm & 7;
2080     base = rm | REX_B(s);
2081 
2082     if (mod == 3) {
2083         /* Normally filtered out earlier, but including this path
2084            simplifies multi-byte nop, as well as bndcl, bndcu, bndcn.  */
2085         goto done;
2086     }
2087 
2088     switch (s->aflag) {
2089     case MO_64:
2090     case MO_32:
2091         havesib = 0;
2092         if (rm == 4) {
2093             int code = x86_ldub_code(env, s);
2094             scale = (code >> 6) & 3;
2095             index = ((code >> 3) & 7) | REX_X(s);
2096             if (index == 4) {
2097                 index = -1;  /* no index */
2098             }
2099             base = (code & 7) | REX_B(s);
2100             havesib = 1;
2101         }
2102 
2103         switch (mod) {
2104         case 0:
2105             if ((base & 7) == 5) {
2106                 base = -1;
2107                 disp = (int32_t)x86_ldl_code(env, s);
2108                 if (CODE64(s) && !havesib) {
2109                     base = -2;
2110                     disp += s->pc + s->rip_offset;
2111                 }
2112             }
2113             break;
2114         case 1:
2115             disp = (int8_t)x86_ldub_code(env, s);
2116             break;
2117         default:
2118         case 2:
2119             disp = (int32_t)x86_ldl_code(env, s);
2120             break;
2121         }
2122 
2123         /* For correct popl handling with esp.  */
2124         if (base == R_ESP && s->popl_esp_hack) {
2125             disp += s->popl_esp_hack;
2126         }
2127         if (base == R_EBP || base == R_ESP) {
2128             def_seg = R_SS;
2129         }
2130         break;
2131 
2132     case MO_16:
2133         if (mod == 0) {
2134             if (rm == 6) {
2135                 base = -1;
2136                 disp = x86_lduw_code(env, s);
2137                 break;
2138             }
2139         } else if (mod == 1) {
2140             disp = (int8_t)x86_ldub_code(env, s);
2141         } else {
2142             disp = (int16_t)x86_lduw_code(env, s);
2143         }
2144 
2145         switch (rm) {
2146         case 0:
2147             base = R_EBX;
2148             index = R_ESI;
2149             break;
2150         case 1:
2151             base = R_EBX;
2152             index = R_EDI;
2153             break;
2154         case 2:
2155             base = R_EBP;
2156             index = R_ESI;
2157             def_seg = R_SS;
2158             break;
2159         case 3:
2160             base = R_EBP;
2161             index = R_EDI;
2162             def_seg = R_SS;
2163             break;
2164         case 4:
2165             base = R_ESI;
2166             break;
2167         case 5:
2168             base = R_EDI;
2169             break;
2170         case 6:
2171             base = R_EBP;
2172             def_seg = R_SS;
2173             break;
2174         default:
2175         case 7:
2176             base = R_EBX;
2177             break;
2178         }
2179         break;
2180 
2181     default:
2182         tcg_abort();
2183     }
2184 
2185  done:
2186     return (AddressParts){ def_seg, base, index, scale, disp };
2187 }
2188 
2189 /* Compute the address, with a minimum number of TCG ops.  */
2190 static TCGv gen_lea_modrm_1(DisasContext *s, AddressParts a)
2191 {
2192     TCGv ea = NULL;
2193 
2194     if (a.index >= 0) {
2195         if (a.scale == 0) {
2196             ea = cpu_regs[a.index];
2197         } else {
2198             tcg_gen_shli_tl(s->A0, cpu_regs[a.index], a.scale);
2199             ea = s->A0;
2200         }
2201         if (a.base >= 0) {
2202             tcg_gen_add_tl(s->A0, ea, cpu_regs[a.base]);
2203             ea = s->A0;
2204         }
2205     } else if (a.base >= 0) {
2206         ea = cpu_regs[a.base];
2207     }
2208     if (!ea) {
2209         tcg_gen_movi_tl(s->A0, a.disp);
2210         ea = s->A0;
2211     } else if (a.disp != 0) {
2212         tcg_gen_addi_tl(s->A0, ea, a.disp);
2213         ea = s->A0;
2214     }
2215 
2216     return ea;
2217 }
2218 
2219 static void gen_lea_modrm(CPUX86State *env, DisasContext *s, int modrm)
2220 {
2221     AddressParts a = gen_lea_modrm_0(env, s, modrm);
2222     TCGv ea = gen_lea_modrm_1(s, a);
2223     gen_lea_v_seg(s, s->aflag, ea, a.def_seg, s->override);
2224 }
2225 
2226 static void gen_nop_modrm(CPUX86State *env, DisasContext *s, int modrm)
2227 {
2228     (void)gen_lea_modrm_0(env, s, modrm);
2229 }
2230 
2231 /* Used for BNDCL, BNDCU, BNDCN.  */
2232 static void gen_bndck(CPUX86State *env, DisasContext *s, int modrm,
2233                       TCGCond cond, TCGv_i64 bndv)
2234 {
2235     TCGv ea = gen_lea_modrm_1(s, gen_lea_modrm_0(env, s, modrm));
2236 
2237     tcg_gen_extu_tl_i64(s->tmp1_i64, ea);
2238     if (!CODE64(s)) {
2239         tcg_gen_ext32u_i64(s->tmp1_i64, s->tmp1_i64);
2240     }
2241     tcg_gen_setcond_i64(cond, s->tmp1_i64, s->tmp1_i64, bndv);
2242     tcg_gen_extrl_i64_i32(s->tmp2_i32, s->tmp1_i64);
2243     gen_helper_bndck(cpu_env, s->tmp2_i32);
2244 }
2245 
2246 /* used for LEA and MOV AX, mem */
2247 static void gen_add_A0_ds_seg(DisasContext *s)
2248 {
2249     gen_lea_v_seg(s, s->aflag, s->A0, R_DS, s->override);
2250 }
2251 
2252 /* generate modrm memory load or store of 'reg'. TMP0 is used if reg ==
2253    OR_TMP0 */
2254 static void gen_ldst_modrm(CPUX86State *env, DisasContext *s, int modrm,
2255                            MemOp ot, int reg, int is_store)
2256 {
2257     int mod, rm;
2258 
2259     mod = (modrm >> 6) & 3;
2260     rm = (modrm & 7) | REX_B(s);
2261     if (mod == 3) {
2262         if (is_store) {
2263             if (reg != OR_TMP0)
2264                 gen_op_mov_v_reg(s, ot, s->T0, reg);
2265             gen_op_mov_reg_v(s, ot, rm, s->T0);
2266         } else {
2267             gen_op_mov_v_reg(s, ot, s->T0, rm);
2268             if (reg != OR_TMP0)
2269                 gen_op_mov_reg_v(s, ot, reg, s->T0);
2270         }
2271     } else {
2272         gen_lea_modrm(env, s, modrm);
2273         if (is_store) {
2274             if (reg != OR_TMP0)
2275                 gen_op_mov_v_reg(s, ot, s->T0, reg);
2276             gen_op_st_v(s, ot, s->T0, s->A0);
2277         } else {
2278             gen_op_ld_v(s, ot, s->T0, s->A0);
2279             if (reg != OR_TMP0)
2280                 gen_op_mov_reg_v(s, ot, reg, s->T0);
2281         }
2282     }
2283 }
2284 
2285 static inline uint32_t insn_get(CPUX86State *env, DisasContext *s, MemOp ot)
2286 {
2287     uint32_t ret;
2288 
2289     switch (ot) {
2290     case MO_8:
2291         ret = x86_ldub_code(env, s);
2292         break;
2293     case MO_16:
2294         ret = x86_lduw_code(env, s);
2295         break;
2296     case MO_32:
2297 #ifdef TARGET_X86_64
2298     case MO_64:
2299 #endif
2300         ret = x86_ldl_code(env, s);
2301         break;
2302     default:
2303         tcg_abort();
2304     }
2305     return ret;
2306 }
2307 
2308 static inline int insn_const_size(MemOp ot)
2309 {
2310     if (ot <= MO_32) {
2311         return 1 << ot;
2312     } else {
2313         return 4;
2314     }
2315 }
2316 
2317 static void gen_goto_tb(DisasContext *s, int tb_num, target_ulong eip)
2318 {
2319     target_ulong pc = s->cs_base + eip;
2320 
2321     if (translator_use_goto_tb(&s->base, pc))  {
2322         /* jump to same page: we can use a direct jump */
2323         tcg_gen_goto_tb(tb_num);
2324         gen_jmp_im(s, eip);
2325         tcg_gen_exit_tb(s->base.tb, tb_num);
2326         s->base.is_jmp = DISAS_NORETURN;
2327     } else {
2328         /* jump to another page */
2329         gen_jmp_im(s, eip);
2330         gen_jr(s, s->tmp0);
2331     }
2332 }
2333 
2334 static inline void gen_jcc(DisasContext *s, int b,
2335                            target_ulong val, target_ulong next_eip)
2336 {
2337     TCGLabel *l1, *l2;
2338 
2339     if (s->jmp_opt) {
2340         l1 = gen_new_label();
2341         gen_jcc1(s, b, l1);
2342 
2343         gen_goto_tb(s, 0, next_eip);
2344 
2345         gen_set_label(l1);
2346         gen_goto_tb(s, 1, val);
2347     } else {
2348         l1 = gen_new_label();
2349         l2 = gen_new_label();
2350         gen_jcc1(s, b, l1);
2351 
2352         gen_jmp_im(s, next_eip);
2353         tcg_gen_br(l2);
2354 
2355         gen_set_label(l1);
2356         gen_jmp_im(s, val);
2357         gen_set_label(l2);
2358         gen_eob(s);
2359     }
2360 }
2361 
2362 static void gen_cmovcc1(CPUX86State *env, DisasContext *s, MemOp ot, int b,
2363                         int modrm, int reg)
2364 {
2365     CCPrepare cc;
2366 
2367     gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
2368 
2369     cc = gen_prepare_cc(s, b, s->T1);
2370     if (cc.mask != -1) {
2371         TCGv t0 = tcg_temp_new();
2372         tcg_gen_andi_tl(t0, cc.reg, cc.mask);
2373         cc.reg = t0;
2374     }
2375     if (!cc.use_reg2) {
2376         cc.reg2 = tcg_const_tl(cc.imm);
2377     }
2378 
2379     tcg_gen_movcond_tl(cc.cond, s->T0, cc.reg, cc.reg2,
2380                        s->T0, cpu_regs[reg]);
2381     gen_op_mov_reg_v(s, ot, reg, s->T0);
2382 
2383     if (cc.mask != -1) {
2384         tcg_temp_free(cc.reg);
2385     }
2386     if (!cc.use_reg2) {
2387         tcg_temp_free(cc.reg2);
2388     }
2389 }
2390 
2391 static inline void gen_op_movl_T0_seg(DisasContext *s, X86Seg seg_reg)
2392 {
2393     tcg_gen_ld32u_tl(s->T0, cpu_env,
2394                      offsetof(CPUX86State,segs[seg_reg].selector));
2395 }
2396 
2397 static inline void gen_op_movl_seg_T0_vm(DisasContext *s, X86Seg seg_reg)
2398 {
2399     tcg_gen_ext16u_tl(s->T0, s->T0);
2400     tcg_gen_st32_tl(s->T0, cpu_env,
2401                     offsetof(CPUX86State,segs[seg_reg].selector));
2402     tcg_gen_shli_tl(cpu_seg_base[seg_reg], s->T0, 4);
2403 }
2404 
2405 /* move T0 to seg_reg and compute if the CPU state may change. Never
2406    call this function with seg_reg == R_CS */
2407 static void gen_movl_seg_T0(DisasContext *s, X86Seg seg_reg)
2408 {
2409     if (PE(s) && !VM86(s)) {
2410         tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
2411         gen_helper_load_seg(cpu_env, tcg_const_i32(seg_reg), s->tmp2_i32);
2412         /* abort translation because the addseg value may change or
2413            because ss32 may change. For R_SS, translation must always
2414            stop as a special handling must be done to disable hardware
2415            interrupts for the next instruction */
2416         if (seg_reg == R_SS || (CODE32(s) && seg_reg < R_FS)) {
2417             s->base.is_jmp = DISAS_TOO_MANY;
2418         }
2419     } else {
2420         gen_op_movl_seg_T0_vm(s, seg_reg);
2421         if (seg_reg == R_SS) {
2422             s->base.is_jmp = DISAS_TOO_MANY;
2423         }
2424     }
2425 }
2426 
2427 static void gen_svm_check_intercept(DisasContext *s, uint32_t type)
2428 {
2429     /* no SVM activated; fast case */
2430     if (likely(!GUEST(s))) {
2431         return;
2432     }
2433     gen_helper_svm_check_intercept(cpu_env, tcg_constant_i32(type));
2434 }
2435 
2436 static inline void gen_stack_update(DisasContext *s, int addend)
2437 {
2438     gen_op_add_reg_im(s, mo_stacksize(s), R_ESP, addend);
2439 }
2440 
2441 /* Generate a push. It depends on ss32, addseg and dflag.  */
2442 static void gen_push_v(DisasContext *s, TCGv val)
2443 {
2444     MemOp d_ot = mo_pushpop(s, s->dflag);
2445     MemOp a_ot = mo_stacksize(s);
2446     int size = 1 << d_ot;
2447     TCGv new_esp = s->A0;
2448 
2449     tcg_gen_subi_tl(s->A0, cpu_regs[R_ESP], size);
2450 
2451     if (!CODE64(s)) {
2452         if (ADDSEG(s)) {
2453             new_esp = s->tmp4;
2454             tcg_gen_mov_tl(new_esp, s->A0);
2455         }
2456         gen_lea_v_seg(s, a_ot, s->A0, R_SS, -1);
2457     }
2458 
2459     gen_op_st_v(s, d_ot, val, s->A0);
2460     gen_op_mov_reg_v(s, a_ot, R_ESP, new_esp);
2461 }
2462 
2463 /* two step pop is necessary for precise exceptions */
2464 static MemOp gen_pop_T0(DisasContext *s)
2465 {
2466     MemOp d_ot = mo_pushpop(s, s->dflag);
2467 
2468     gen_lea_v_seg(s, mo_stacksize(s), cpu_regs[R_ESP], R_SS, -1);
2469     gen_op_ld_v(s, d_ot, s->T0, s->A0);
2470 
2471     return d_ot;
2472 }
2473 
2474 static inline void gen_pop_update(DisasContext *s, MemOp ot)
2475 {
2476     gen_stack_update(s, 1 << ot);
2477 }
2478 
2479 static inline void gen_stack_A0(DisasContext *s)
2480 {
2481     gen_lea_v_seg(s, SS32(s) ? MO_32 : MO_16, cpu_regs[R_ESP], R_SS, -1);
2482 }
2483 
2484 static void gen_pusha(DisasContext *s)
2485 {
2486     MemOp s_ot = SS32(s) ? MO_32 : MO_16;
2487     MemOp d_ot = s->dflag;
2488     int size = 1 << d_ot;
2489     int i;
2490 
2491     for (i = 0; i < 8; i++) {
2492         tcg_gen_addi_tl(s->A0, cpu_regs[R_ESP], (i - 8) * size);
2493         gen_lea_v_seg(s, s_ot, s->A0, R_SS, -1);
2494         gen_op_st_v(s, d_ot, cpu_regs[7 - i], s->A0);
2495     }
2496 
2497     gen_stack_update(s, -8 * size);
2498 }
2499 
2500 static void gen_popa(DisasContext *s)
2501 {
2502     MemOp s_ot = SS32(s) ? MO_32 : MO_16;
2503     MemOp d_ot = s->dflag;
2504     int size = 1 << d_ot;
2505     int i;
2506 
2507     for (i = 0; i < 8; i++) {
2508         /* ESP is not reloaded */
2509         if (7 - i == R_ESP) {
2510             continue;
2511         }
2512         tcg_gen_addi_tl(s->A0, cpu_regs[R_ESP], i * size);
2513         gen_lea_v_seg(s, s_ot, s->A0, R_SS, -1);
2514         gen_op_ld_v(s, d_ot, s->T0, s->A0);
2515         gen_op_mov_reg_v(s, d_ot, 7 - i, s->T0);
2516     }
2517 
2518     gen_stack_update(s, 8 * size);
2519 }
2520 
2521 static void gen_enter(DisasContext *s, int esp_addend, int level)
2522 {
2523     MemOp d_ot = mo_pushpop(s, s->dflag);
2524     MemOp a_ot = CODE64(s) ? MO_64 : SS32(s) ? MO_32 : MO_16;
2525     int size = 1 << d_ot;
2526 
2527     /* Push BP; compute FrameTemp into T1.  */
2528     tcg_gen_subi_tl(s->T1, cpu_regs[R_ESP], size);
2529     gen_lea_v_seg(s, a_ot, s->T1, R_SS, -1);
2530     gen_op_st_v(s, d_ot, cpu_regs[R_EBP], s->A0);
2531 
2532     level &= 31;
2533     if (level != 0) {
2534         int i;
2535 
2536         /* Copy level-1 pointers from the previous frame.  */
2537         for (i = 1; i < level; ++i) {
2538             tcg_gen_subi_tl(s->A0, cpu_regs[R_EBP], size * i);
2539             gen_lea_v_seg(s, a_ot, s->A0, R_SS, -1);
2540             gen_op_ld_v(s, d_ot, s->tmp0, s->A0);
2541 
2542             tcg_gen_subi_tl(s->A0, s->T1, size * i);
2543             gen_lea_v_seg(s, a_ot, s->A0, R_SS, -1);
2544             gen_op_st_v(s, d_ot, s->tmp0, s->A0);
2545         }
2546 
2547         /* Push the current FrameTemp as the last level.  */
2548         tcg_gen_subi_tl(s->A0, s->T1, size * level);
2549         gen_lea_v_seg(s, a_ot, s->A0, R_SS, -1);
2550         gen_op_st_v(s, d_ot, s->T1, s->A0);
2551     }
2552 
2553     /* Copy the FrameTemp value to EBP.  */
2554     gen_op_mov_reg_v(s, a_ot, R_EBP, s->T1);
2555 
2556     /* Compute the final value of ESP.  */
2557     tcg_gen_subi_tl(s->T1, s->T1, esp_addend + size * level);
2558     gen_op_mov_reg_v(s, a_ot, R_ESP, s->T1);
2559 }
2560 
2561 static void gen_leave(DisasContext *s)
2562 {
2563     MemOp d_ot = mo_pushpop(s, s->dflag);
2564     MemOp a_ot = mo_stacksize(s);
2565 
2566     gen_lea_v_seg(s, a_ot, cpu_regs[R_EBP], R_SS, -1);
2567     gen_op_ld_v(s, d_ot, s->T0, s->A0);
2568 
2569     tcg_gen_addi_tl(s->T1, cpu_regs[R_EBP], 1 << d_ot);
2570 
2571     gen_op_mov_reg_v(s, d_ot, R_EBP, s->T0);
2572     gen_op_mov_reg_v(s, a_ot, R_ESP, s->T1);
2573 }
2574 
2575 /* Similarly, except that the assumption here is that we don't decode
2576    the instruction at all -- either a missing opcode, an unimplemented
2577    feature, or just a bogus instruction stream.  */
2578 static void gen_unknown_opcode(CPUX86State *env, DisasContext *s)
2579 {
2580     gen_illegal_opcode(s);
2581 
2582     if (qemu_loglevel_mask(LOG_UNIMP)) {
2583         FILE *logfile = qemu_log_lock();
2584         target_ulong pc = s->pc_start, end = s->pc;
2585 
2586         qemu_log("ILLOPC: " TARGET_FMT_lx ":", pc);
2587         for (; pc < end; ++pc) {
2588             qemu_log(" %02x", cpu_ldub_code(env, pc));
2589         }
2590         qemu_log("\n");
2591         qemu_log_unlock(logfile);
2592     }
2593 }
2594 
2595 /* an interrupt is different from an exception because of the
2596    privilege checks */
2597 static void gen_interrupt(DisasContext *s, int intno,
2598                           target_ulong cur_eip, target_ulong next_eip)
2599 {
2600     gen_update_cc_op(s);
2601     gen_jmp_im(s, cur_eip);
2602     gen_helper_raise_interrupt(cpu_env, tcg_const_i32(intno),
2603                                tcg_const_i32(next_eip - cur_eip));
2604     s->base.is_jmp = DISAS_NORETURN;
2605 }
2606 
2607 static void gen_set_hflag(DisasContext *s, uint32_t mask)
2608 {
2609     if ((s->flags & mask) == 0) {
2610         TCGv_i32 t = tcg_temp_new_i32();
2611         tcg_gen_ld_i32(t, cpu_env, offsetof(CPUX86State, hflags));
2612         tcg_gen_ori_i32(t, t, mask);
2613         tcg_gen_st_i32(t, cpu_env, offsetof(CPUX86State, hflags));
2614         tcg_temp_free_i32(t);
2615         s->flags |= mask;
2616     }
2617 }
2618 
2619 static void gen_reset_hflag(DisasContext *s, uint32_t mask)
2620 {
2621     if (s->flags & mask) {
2622         TCGv_i32 t = tcg_temp_new_i32();
2623         tcg_gen_ld_i32(t, cpu_env, offsetof(CPUX86State, hflags));
2624         tcg_gen_andi_i32(t, t, ~mask);
2625         tcg_gen_st_i32(t, cpu_env, offsetof(CPUX86State, hflags));
2626         tcg_temp_free_i32(t);
2627         s->flags &= ~mask;
2628     }
2629 }
2630 
2631 /* Clear BND registers during legacy branches.  */
2632 static void gen_bnd_jmp(DisasContext *s)
2633 {
2634     /* Clear the registers only if BND prefix is missing, MPX is enabled,
2635        and if the BNDREGs are known to be in use (non-zero) already.
2636        The helper itself will check BNDPRESERVE at runtime.  */
2637     if ((s->prefix & PREFIX_REPNZ) == 0
2638         && (s->flags & HF_MPX_EN_MASK) != 0
2639         && (s->flags & HF_MPX_IU_MASK) != 0) {
2640         gen_helper_bnd_jmp(cpu_env);
2641     }
2642 }
2643 
2644 /* Generate an end of block. Trace exception is also generated if needed.
2645    If INHIBIT, set HF_INHIBIT_IRQ_MASK if it isn't already set.
2646    If RECHECK_TF, emit a rechecking helper for #DB, ignoring the state of
2647    S->TF.  This is used by the syscall/sysret insns.  */
2648 static void
2649 do_gen_eob_worker(DisasContext *s, bool inhibit, bool recheck_tf, bool jr)
2650 {
2651     gen_update_cc_op(s);
2652 
2653     /* If several instructions disable interrupts, only the first does it.  */
2654     if (inhibit && !(s->flags & HF_INHIBIT_IRQ_MASK)) {
2655         gen_set_hflag(s, HF_INHIBIT_IRQ_MASK);
2656     } else {
2657         gen_reset_hflag(s, HF_INHIBIT_IRQ_MASK);
2658     }
2659 
2660     if (s->base.tb->flags & HF_RF_MASK) {
2661         gen_helper_reset_rf(cpu_env);
2662     }
2663     if (recheck_tf) {
2664         gen_helper_rechecking_single_step(cpu_env);
2665         tcg_gen_exit_tb(NULL, 0);
2666     } else if (s->flags & HF_TF_MASK) {
2667         gen_helper_single_step(cpu_env);
2668     } else if (jr) {
2669         tcg_gen_lookup_and_goto_ptr();
2670     } else {
2671         tcg_gen_exit_tb(NULL, 0);
2672     }
2673     s->base.is_jmp = DISAS_NORETURN;
2674 }
2675 
2676 static inline void
2677 gen_eob_worker(DisasContext *s, bool inhibit, bool recheck_tf)
2678 {
2679     do_gen_eob_worker(s, inhibit, recheck_tf, false);
2680 }
2681 
2682 /* End of block.
2683    If INHIBIT, set HF_INHIBIT_IRQ_MASK if it isn't already set.  */
2684 static void gen_eob_inhibit_irq(DisasContext *s, bool inhibit)
2685 {
2686     gen_eob_worker(s, inhibit, false);
2687 }
2688 
2689 /* End of block, resetting the inhibit irq flag.  */
2690 static void gen_eob(DisasContext *s)
2691 {
2692     gen_eob_worker(s, false, false);
2693 }
2694 
2695 /* Jump to register */
2696 static void gen_jr(DisasContext *s, TCGv dest)
2697 {
2698     do_gen_eob_worker(s, false, false, true);
2699 }
2700 
2701 /* generate a jump to eip. No segment change must happen before as a
2702    direct call to the next block may occur */
2703 static void gen_jmp_tb(DisasContext *s, target_ulong eip, int tb_num)
2704 {
2705     gen_update_cc_op(s);
2706     set_cc_op(s, CC_OP_DYNAMIC);
2707     if (s->jmp_opt) {
2708         gen_goto_tb(s, tb_num, eip);
2709     } else {
2710         gen_jmp_im(s, eip);
2711         gen_eob(s);
2712     }
2713 }
2714 
2715 static void gen_jmp(DisasContext *s, target_ulong eip)
2716 {
2717     gen_jmp_tb(s, eip, 0);
2718 }
2719 
2720 static inline void gen_ldq_env_A0(DisasContext *s, int offset)
2721 {
2722     tcg_gen_qemu_ld_i64(s->tmp1_i64, s->A0, s->mem_index, MO_LEQ);
2723     tcg_gen_st_i64(s->tmp1_i64, cpu_env, offset);
2724 }
2725 
2726 static inline void gen_stq_env_A0(DisasContext *s, int offset)
2727 {
2728     tcg_gen_ld_i64(s->tmp1_i64, cpu_env, offset);
2729     tcg_gen_qemu_st_i64(s->tmp1_i64, s->A0, s->mem_index, MO_LEQ);
2730 }
2731 
2732 static inline void gen_ldo_env_A0(DisasContext *s, int offset)
2733 {
2734     int mem_index = s->mem_index;
2735     tcg_gen_qemu_ld_i64(s->tmp1_i64, s->A0, mem_index, MO_LEQ);
2736     tcg_gen_st_i64(s->tmp1_i64, cpu_env, offset + offsetof(ZMMReg, ZMM_Q(0)));
2737     tcg_gen_addi_tl(s->tmp0, s->A0, 8);
2738     tcg_gen_qemu_ld_i64(s->tmp1_i64, s->tmp0, mem_index, MO_LEQ);
2739     tcg_gen_st_i64(s->tmp1_i64, cpu_env, offset + offsetof(ZMMReg, ZMM_Q(1)));
2740 }
2741 
2742 static inline void gen_sto_env_A0(DisasContext *s, int offset)
2743 {
2744     int mem_index = s->mem_index;
2745     tcg_gen_ld_i64(s->tmp1_i64, cpu_env, offset + offsetof(ZMMReg, ZMM_Q(0)));
2746     tcg_gen_qemu_st_i64(s->tmp1_i64, s->A0, mem_index, MO_LEQ);
2747     tcg_gen_addi_tl(s->tmp0, s->A0, 8);
2748     tcg_gen_ld_i64(s->tmp1_i64, cpu_env, offset + offsetof(ZMMReg, ZMM_Q(1)));
2749     tcg_gen_qemu_st_i64(s->tmp1_i64, s->tmp0, mem_index, MO_LEQ);
2750 }
2751 
2752 static inline void gen_op_movo(DisasContext *s, int d_offset, int s_offset)
2753 {
2754     tcg_gen_ld_i64(s->tmp1_i64, cpu_env, s_offset + offsetof(ZMMReg, ZMM_Q(0)));
2755     tcg_gen_st_i64(s->tmp1_i64, cpu_env, d_offset + offsetof(ZMMReg, ZMM_Q(0)));
2756     tcg_gen_ld_i64(s->tmp1_i64, cpu_env, s_offset + offsetof(ZMMReg, ZMM_Q(1)));
2757     tcg_gen_st_i64(s->tmp1_i64, cpu_env, d_offset + offsetof(ZMMReg, ZMM_Q(1)));
2758 }
2759 
2760 static inline void gen_op_movq(DisasContext *s, int d_offset, int s_offset)
2761 {
2762     tcg_gen_ld_i64(s->tmp1_i64, cpu_env, s_offset);
2763     tcg_gen_st_i64(s->tmp1_i64, cpu_env, d_offset);
2764 }
2765 
2766 static inline void gen_op_movl(DisasContext *s, int d_offset, int s_offset)
2767 {
2768     tcg_gen_ld_i32(s->tmp2_i32, cpu_env, s_offset);
2769     tcg_gen_st_i32(s->tmp2_i32, cpu_env, d_offset);
2770 }
2771 
2772 static inline void gen_op_movq_env_0(DisasContext *s, int d_offset)
2773 {
2774     tcg_gen_movi_i64(s->tmp1_i64, 0);
2775     tcg_gen_st_i64(s->tmp1_i64, cpu_env, d_offset);
2776 }
2777 
2778 typedef void (*SSEFunc_i_ep)(TCGv_i32 val, TCGv_ptr env, TCGv_ptr reg);
2779 typedef void (*SSEFunc_l_ep)(TCGv_i64 val, TCGv_ptr env, TCGv_ptr reg);
2780 typedef void (*SSEFunc_0_epi)(TCGv_ptr env, TCGv_ptr reg, TCGv_i32 val);
2781 typedef void (*SSEFunc_0_epl)(TCGv_ptr env, TCGv_ptr reg, TCGv_i64 val);
2782 typedef void (*SSEFunc_0_epp)(TCGv_ptr env, TCGv_ptr reg_a, TCGv_ptr reg_b);
2783 typedef void (*SSEFunc_0_eppi)(TCGv_ptr env, TCGv_ptr reg_a, TCGv_ptr reg_b,
2784                                TCGv_i32 val);
2785 typedef void (*SSEFunc_0_ppi)(TCGv_ptr reg_a, TCGv_ptr reg_b, TCGv_i32 val);
2786 typedef void (*SSEFunc_0_eppt)(TCGv_ptr env, TCGv_ptr reg_a, TCGv_ptr reg_b,
2787                                TCGv val);
2788 
2789 #define SSE_SPECIAL ((void *)1)
2790 #define SSE_DUMMY ((void *)2)
2791 
2792 #define MMX_OP2(x) { gen_helper_ ## x ## _mmx, gen_helper_ ## x ## _xmm }
2793 #define SSE_FOP(x) { gen_helper_ ## x ## ps, gen_helper_ ## x ## pd, \
2794                      gen_helper_ ## x ## ss, gen_helper_ ## x ## sd, }
2795 
2796 static const SSEFunc_0_epp sse_op_table1[256][4] = {
2797     /* 3DNow! extensions */
2798     [0x0e] = { SSE_DUMMY }, /* femms */
2799     [0x0f] = { SSE_DUMMY }, /* pf... */
2800     /* pure SSE operations */
2801     [0x10] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movups, movupd, movss, movsd */
2802     [0x11] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movups, movupd, movss, movsd */
2803     [0x12] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movlps, movlpd, movsldup, movddup */
2804     [0x13] = { SSE_SPECIAL, SSE_SPECIAL },  /* movlps, movlpd */
2805     [0x14] = { gen_helper_punpckldq_xmm, gen_helper_punpcklqdq_xmm },
2806     [0x15] = { gen_helper_punpckhdq_xmm, gen_helper_punpckhqdq_xmm },
2807     [0x16] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL },  /* movhps, movhpd, movshdup */
2808     [0x17] = { SSE_SPECIAL, SSE_SPECIAL },  /* movhps, movhpd */
2809 
2810     [0x28] = { SSE_SPECIAL, SSE_SPECIAL },  /* movaps, movapd */
2811     [0x29] = { SSE_SPECIAL, SSE_SPECIAL },  /* movaps, movapd */
2812     [0x2a] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* cvtpi2ps, cvtpi2pd, cvtsi2ss, cvtsi2sd */
2813     [0x2b] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movntps, movntpd, movntss, movntsd */
2814     [0x2c] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* cvttps2pi, cvttpd2pi, cvttsd2si, cvttss2si */
2815     [0x2d] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* cvtps2pi, cvtpd2pi, cvtsd2si, cvtss2si */
2816     [0x2e] = { gen_helper_ucomiss, gen_helper_ucomisd },
2817     [0x2f] = { gen_helper_comiss, gen_helper_comisd },
2818     [0x50] = { SSE_SPECIAL, SSE_SPECIAL }, /* movmskps, movmskpd */
2819     [0x51] = SSE_FOP(sqrt),
2820     [0x52] = { gen_helper_rsqrtps, NULL, gen_helper_rsqrtss, NULL },
2821     [0x53] = { gen_helper_rcpps, NULL, gen_helper_rcpss, NULL },
2822     [0x54] = { gen_helper_pand_xmm, gen_helper_pand_xmm }, /* andps, andpd */
2823     [0x55] = { gen_helper_pandn_xmm, gen_helper_pandn_xmm }, /* andnps, andnpd */
2824     [0x56] = { gen_helper_por_xmm, gen_helper_por_xmm }, /* orps, orpd */
2825     [0x57] = { gen_helper_pxor_xmm, gen_helper_pxor_xmm }, /* xorps, xorpd */
2826     [0x58] = SSE_FOP(add),
2827     [0x59] = SSE_FOP(mul),
2828     [0x5a] = { gen_helper_cvtps2pd, gen_helper_cvtpd2ps,
2829                gen_helper_cvtss2sd, gen_helper_cvtsd2ss },
2830     [0x5b] = { gen_helper_cvtdq2ps, gen_helper_cvtps2dq, gen_helper_cvttps2dq },
2831     [0x5c] = SSE_FOP(sub),
2832     [0x5d] = SSE_FOP(min),
2833     [0x5e] = SSE_FOP(div),
2834     [0x5f] = SSE_FOP(max),
2835 
2836     [0xc2] = SSE_FOP(cmpeq),
2837     [0xc6] = { (SSEFunc_0_epp)gen_helper_shufps,
2838                (SSEFunc_0_epp)gen_helper_shufpd }, /* XXX: casts */
2839 
2840     /* SSSE3, SSE4, MOVBE, CRC32, BMI1, BMI2, ADX.  */
2841     [0x38] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL },
2842     [0x3a] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL },
2843 
2844     /* MMX ops and their SSE extensions */
2845     [0x60] = MMX_OP2(punpcklbw),
2846     [0x61] = MMX_OP2(punpcklwd),
2847     [0x62] = MMX_OP2(punpckldq),
2848     [0x63] = MMX_OP2(packsswb),
2849     [0x64] = MMX_OP2(pcmpgtb),
2850     [0x65] = MMX_OP2(pcmpgtw),
2851     [0x66] = MMX_OP2(pcmpgtl),
2852     [0x67] = MMX_OP2(packuswb),
2853     [0x68] = MMX_OP2(punpckhbw),
2854     [0x69] = MMX_OP2(punpckhwd),
2855     [0x6a] = MMX_OP2(punpckhdq),
2856     [0x6b] = MMX_OP2(packssdw),
2857     [0x6c] = { NULL, gen_helper_punpcklqdq_xmm },
2858     [0x6d] = { NULL, gen_helper_punpckhqdq_xmm },
2859     [0x6e] = { SSE_SPECIAL, SSE_SPECIAL }, /* movd mm, ea */
2860     [0x6f] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movq, movdqa, , movqdu */
2861     [0x70] = { (SSEFunc_0_epp)gen_helper_pshufw_mmx,
2862                (SSEFunc_0_epp)gen_helper_pshufd_xmm,
2863                (SSEFunc_0_epp)gen_helper_pshufhw_xmm,
2864                (SSEFunc_0_epp)gen_helper_pshuflw_xmm }, /* XXX: casts */
2865     [0x71] = { SSE_SPECIAL, SSE_SPECIAL }, /* shiftw */
2866     [0x72] = { SSE_SPECIAL, SSE_SPECIAL }, /* shiftd */
2867     [0x73] = { SSE_SPECIAL, SSE_SPECIAL }, /* shiftq */
2868     [0x74] = MMX_OP2(pcmpeqb),
2869     [0x75] = MMX_OP2(pcmpeqw),
2870     [0x76] = MMX_OP2(pcmpeql),
2871     [0x77] = { SSE_DUMMY }, /* emms */
2872     [0x78] = { NULL, SSE_SPECIAL, NULL, SSE_SPECIAL }, /* extrq_i, insertq_i */
2873     [0x79] = { NULL, gen_helper_extrq_r, NULL, gen_helper_insertq_r },
2874     [0x7c] = { NULL, gen_helper_haddpd, NULL, gen_helper_haddps },
2875     [0x7d] = { NULL, gen_helper_hsubpd, NULL, gen_helper_hsubps },
2876     [0x7e] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movd, movd, , movq */
2877     [0x7f] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movq, movdqa, movdqu */
2878     [0xc4] = { SSE_SPECIAL, SSE_SPECIAL }, /* pinsrw */
2879     [0xc5] = { SSE_SPECIAL, SSE_SPECIAL }, /* pextrw */
2880     [0xd0] = { NULL, gen_helper_addsubpd, NULL, gen_helper_addsubps },
2881     [0xd1] = MMX_OP2(psrlw),
2882     [0xd2] = MMX_OP2(psrld),
2883     [0xd3] = MMX_OP2(psrlq),
2884     [0xd4] = MMX_OP2(paddq),
2885     [0xd5] = MMX_OP2(pmullw),
2886     [0xd6] = { NULL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL },
2887     [0xd7] = { SSE_SPECIAL, SSE_SPECIAL }, /* pmovmskb */
2888     [0xd8] = MMX_OP2(psubusb),
2889     [0xd9] = MMX_OP2(psubusw),
2890     [0xda] = MMX_OP2(pminub),
2891     [0xdb] = MMX_OP2(pand),
2892     [0xdc] = MMX_OP2(paddusb),
2893     [0xdd] = MMX_OP2(paddusw),
2894     [0xde] = MMX_OP2(pmaxub),
2895     [0xdf] = MMX_OP2(pandn),
2896     [0xe0] = MMX_OP2(pavgb),
2897     [0xe1] = MMX_OP2(psraw),
2898     [0xe2] = MMX_OP2(psrad),
2899     [0xe3] = MMX_OP2(pavgw),
2900     [0xe4] = MMX_OP2(pmulhuw),
2901     [0xe5] = MMX_OP2(pmulhw),
2902     [0xe6] = { NULL, gen_helper_cvttpd2dq, gen_helper_cvtdq2pd, gen_helper_cvtpd2dq },
2903     [0xe7] = { SSE_SPECIAL , SSE_SPECIAL },  /* movntq, movntq */
2904     [0xe8] = MMX_OP2(psubsb),
2905     [0xe9] = MMX_OP2(psubsw),
2906     [0xea] = MMX_OP2(pminsw),
2907     [0xeb] = MMX_OP2(por),
2908     [0xec] = MMX_OP2(paddsb),
2909     [0xed] = MMX_OP2(paddsw),
2910     [0xee] = MMX_OP2(pmaxsw),
2911     [0xef] = MMX_OP2(pxor),
2912     [0xf0] = { NULL, NULL, NULL, SSE_SPECIAL }, /* lddqu */
2913     [0xf1] = MMX_OP2(psllw),
2914     [0xf2] = MMX_OP2(pslld),
2915     [0xf3] = MMX_OP2(psllq),
2916     [0xf4] = MMX_OP2(pmuludq),
2917     [0xf5] = MMX_OP2(pmaddwd),
2918     [0xf6] = MMX_OP2(psadbw),
2919     [0xf7] = { (SSEFunc_0_epp)gen_helper_maskmov_mmx,
2920                (SSEFunc_0_epp)gen_helper_maskmov_xmm }, /* XXX: casts */
2921     [0xf8] = MMX_OP2(psubb),
2922     [0xf9] = MMX_OP2(psubw),
2923     [0xfa] = MMX_OP2(psubl),
2924     [0xfb] = MMX_OP2(psubq),
2925     [0xfc] = MMX_OP2(paddb),
2926     [0xfd] = MMX_OP2(paddw),
2927     [0xfe] = MMX_OP2(paddl),
2928 };
2929 
2930 static const SSEFunc_0_epp sse_op_table2[3 * 8][2] = {
2931     [0 + 2] = MMX_OP2(psrlw),
2932     [0 + 4] = MMX_OP2(psraw),
2933     [0 + 6] = MMX_OP2(psllw),
2934     [8 + 2] = MMX_OP2(psrld),
2935     [8 + 4] = MMX_OP2(psrad),
2936     [8 + 6] = MMX_OP2(pslld),
2937     [16 + 2] = MMX_OP2(psrlq),
2938     [16 + 3] = { NULL, gen_helper_psrldq_xmm },
2939     [16 + 6] = MMX_OP2(psllq),
2940     [16 + 7] = { NULL, gen_helper_pslldq_xmm },
2941 };
2942 
2943 static const SSEFunc_0_epi sse_op_table3ai[] = {
2944     gen_helper_cvtsi2ss,
2945     gen_helper_cvtsi2sd
2946 };
2947 
2948 #ifdef TARGET_X86_64
2949 static const SSEFunc_0_epl sse_op_table3aq[] = {
2950     gen_helper_cvtsq2ss,
2951     gen_helper_cvtsq2sd
2952 };
2953 #endif
2954 
2955 static const SSEFunc_i_ep sse_op_table3bi[] = {
2956     gen_helper_cvttss2si,
2957     gen_helper_cvtss2si,
2958     gen_helper_cvttsd2si,
2959     gen_helper_cvtsd2si
2960 };
2961 
2962 #ifdef TARGET_X86_64
2963 static const SSEFunc_l_ep sse_op_table3bq[] = {
2964     gen_helper_cvttss2sq,
2965     gen_helper_cvtss2sq,
2966     gen_helper_cvttsd2sq,
2967     gen_helper_cvtsd2sq
2968 };
2969 #endif
2970 
2971 static const SSEFunc_0_epp sse_op_table4[8][4] = {
2972     SSE_FOP(cmpeq),
2973     SSE_FOP(cmplt),
2974     SSE_FOP(cmple),
2975     SSE_FOP(cmpunord),
2976     SSE_FOP(cmpneq),
2977     SSE_FOP(cmpnlt),
2978     SSE_FOP(cmpnle),
2979     SSE_FOP(cmpord),
2980 };
2981 
2982 static const SSEFunc_0_epp sse_op_table5[256] = {
2983     [0x0c] = gen_helper_pi2fw,
2984     [0x0d] = gen_helper_pi2fd,
2985     [0x1c] = gen_helper_pf2iw,
2986     [0x1d] = gen_helper_pf2id,
2987     [0x8a] = gen_helper_pfnacc,
2988     [0x8e] = gen_helper_pfpnacc,
2989     [0x90] = gen_helper_pfcmpge,
2990     [0x94] = gen_helper_pfmin,
2991     [0x96] = gen_helper_pfrcp,
2992     [0x97] = gen_helper_pfrsqrt,
2993     [0x9a] = gen_helper_pfsub,
2994     [0x9e] = gen_helper_pfadd,
2995     [0xa0] = gen_helper_pfcmpgt,
2996     [0xa4] = gen_helper_pfmax,
2997     [0xa6] = gen_helper_movq, /* pfrcpit1; no need to actually increase precision */
2998     [0xa7] = gen_helper_movq, /* pfrsqit1 */
2999     [0xaa] = gen_helper_pfsubr,
3000     [0xae] = gen_helper_pfacc,
3001     [0xb0] = gen_helper_pfcmpeq,
3002     [0xb4] = gen_helper_pfmul,
3003     [0xb6] = gen_helper_movq, /* pfrcpit2 */
3004     [0xb7] = gen_helper_pmulhrw_mmx,
3005     [0xbb] = gen_helper_pswapd,
3006     [0xbf] = gen_helper_pavgb_mmx /* pavgusb */
3007 };
3008 
3009 struct SSEOpHelper_epp {
3010     SSEFunc_0_epp op[2];
3011     uint32_t ext_mask;
3012 };
3013 
3014 struct SSEOpHelper_eppi {
3015     SSEFunc_0_eppi op[2];
3016     uint32_t ext_mask;
3017 };
3018 
3019 #define SSSE3_OP(x) { MMX_OP2(x), CPUID_EXT_SSSE3 }
3020 #define SSE41_OP(x) { { NULL, gen_helper_ ## x ## _xmm }, CPUID_EXT_SSE41 }
3021 #define SSE42_OP(x) { { NULL, gen_helper_ ## x ## _xmm }, CPUID_EXT_SSE42 }
3022 #define SSE41_SPECIAL { { NULL, SSE_SPECIAL }, CPUID_EXT_SSE41 }
3023 #define PCLMULQDQ_OP(x) { { NULL, gen_helper_ ## x ## _xmm }, \
3024         CPUID_EXT_PCLMULQDQ }
3025 #define AESNI_OP(x) { { NULL, gen_helper_ ## x ## _xmm }, CPUID_EXT_AES }
3026 
3027 static const struct SSEOpHelper_epp sse_op_table6[256] = {
3028     [0x00] = SSSE3_OP(pshufb),
3029     [0x01] = SSSE3_OP(phaddw),
3030     [0x02] = SSSE3_OP(phaddd),
3031     [0x03] = SSSE3_OP(phaddsw),
3032     [0x04] = SSSE3_OP(pmaddubsw),
3033     [0x05] = SSSE3_OP(phsubw),
3034     [0x06] = SSSE3_OP(phsubd),
3035     [0x07] = SSSE3_OP(phsubsw),
3036     [0x08] = SSSE3_OP(psignb),
3037     [0x09] = SSSE3_OP(psignw),
3038     [0x0a] = SSSE3_OP(psignd),
3039     [0x0b] = SSSE3_OP(pmulhrsw),
3040     [0x10] = SSE41_OP(pblendvb),
3041     [0x14] = SSE41_OP(blendvps),
3042     [0x15] = SSE41_OP(blendvpd),
3043     [0x17] = SSE41_OP(ptest),
3044     [0x1c] = SSSE3_OP(pabsb),
3045     [0x1d] = SSSE3_OP(pabsw),
3046     [0x1e] = SSSE3_OP(pabsd),
3047     [0x20] = SSE41_OP(pmovsxbw),
3048     [0x21] = SSE41_OP(pmovsxbd),
3049     [0x22] = SSE41_OP(pmovsxbq),
3050     [0x23] = SSE41_OP(pmovsxwd),
3051     [0x24] = SSE41_OP(pmovsxwq),
3052     [0x25] = SSE41_OP(pmovsxdq),
3053     [0x28] = SSE41_OP(pmuldq),
3054     [0x29] = SSE41_OP(pcmpeqq),
3055     [0x2a] = SSE41_SPECIAL, /* movntqda */
3056     [0x2b] = SSE41_OP(packusdw),
3057     [0x30] = SSE41_OP(pmovzxbw),
3058     [0x31] = SSE41_OP(pmovzxbd),
3059     [0x32] = SSE41_OP(pmovzxbq),
3060     [0x33] = SSE41_OP(pmovzxwd),
3061     [0x34] = SSE41_OP(pmovzxwq),
3062     [0x35] = SSE41_OP(pmovzxdq),
3063     [0x37] = SSE42_OP(pcmpgtq),
3064     [0x38] = SSE41_OP(pminsb),
3065     [0x39] = SSE41_OP(pminsd),
3066     [0x3a] = SSE41_OP(pminuw),
3067     [0x3b] = SSE41_OP(pminud),
3068     [0x3c] = SSE41_OP(pmaxsb),
3069     [0x3d] = SSE41_OP(pmaxsd),
3070     [0x3e] = SSE41_OP(pmaxuw),
3071     [0x3f] = SSE41_OP(pmaxud),
3072     [0x40] = SSE41_OP(pmulld),
3073     [0x41] = SSE41_OP(phminposuw),
3074     [0xdb] = AESNI_OP(aesimc),
3075     [0xdc] = AESNI_OP(aesenc),
3076     [0xdd] = AESNI_OP(aesenclast),
3077     [0xde] = AESNI_OP(aesdec),
3078     [0xdf] = AESNI_OP(aesdeclast),
3079 };
3080 
3081 static const struct SSEOpHelper_eppi sse_op_table7[256] = {
3082     [0x08] = SSE41_OP(roundps),
3083     [0x09] = SSE41_OP(roundpd),
3084     [0x0a] = SSE41_OP(roundss),
3085     [0x0b] = SSE41_OP(roundsd),
3086     [0x0c] = SSE41_OP(blendps),
3087     [0x0d] = SSE41_OP(blendpd),
3088     [0x0e] = SSE41_OP(pblendw),
3089     [0x0f] = SSSE3_OP(palignr),
3090     [0x14] = SSE41_SPECIAL, /* pextrb */
3091     [0x15] = SSE41_SPECIAL, /* pextrw */
3092     [0x16] = SSE41_SPECIAL, /* pextrd/pextrq */
3093     [0x17] = SSE41_SPECIAL, /* extractps */
3094     [0x20] = SSE41_SPECIAL, /* pinsrb */
3095     [0x21] = SSE41_SPECIAL, /* insertps */
3096     [0x22] = SSE41_SPECIAL, /* pinsrd/pinsrq */
3097     [0x40] = SSE41_OP(dpps),
3098     [0x41] = SSE41_OP(dppd),
3099     [0x42] = SSE41_OP(mpsadbw),
3100     [0x44] = PCLMULQDQ_OP(pclmulqdq),
3101     [0x60] = SSE42_OP(pcmpestrm),
3102     [0x61] = SSE42_OP(pcmpestri),
3103     [0x62] = SSE42_OP(pcmpistrm),
3104     [0x63] = SSE42_OP(pcmpistri),
3105     [0xdf] = AESNI_OP(aeskeygenassist),
3106 };
3107 
3108 static void gen_sse(CPUX86State *env, DisasContext *s, int b,
3109                     target_ulong pc_start)
3110 {
3111     int b1, op1_offset, op2_offset, is_xmm, val;
3112     int modrm, mod, rm, reg;
3113     SSEFunc_0_epp sse_fn_epp;
3114     SSEFunc_0_eppi sse_fn_eppi;
3115     SSEFunc_0_ppi sse_fn_ppi;
3116     SSEFunc_0_eppt sse_fn_eppt;
3117     MemOp ot;
3118 
3119     b &= 0xff;
3120     if (s->prefix & PREFIX_DATA)
3121         b1 = 1;
3122     else if (s->prefix & PREFIX_REPZ)
3123         b1 = 2;
3124     else if (s->prefix & PREFIX_REPNZ)
3125         b1 = 3;
3126     else
3127         b1 = 0;
3128     sse_fn_epp = sse_op_table1[b][b1];
3129     if (!sse_fn_epp) {
3130         goto unknown_op;
3131     }
3132     if ((b <= 0x5f && b >= 0x10) || b == 0xc6 || b == 0xc2) {
3133         is_xmm = 1;
3134     } else {
3135         if (b1 == 0) {
3136             /* MMX case */
3137             is_xmm = 0;
3138         } else {
3139             is_xmm = 1;
3140         }
3141     }
3142     /* simple MMX/SSE operation */
3143     if (s->flags & HF_TS_MASK) {
3144         gen_exception(s, EXCP07_PREX, pc_start - s->cs_base);
3145         return;
3146     }
3147     if (s->flags & HF_EM_MASK) {
3148     illegal_op:
3149         gen_illegal_opcode(s);
3150         return;
3151     }
3152     if (is_xmm
3153         && !(s->flags & HF_OSFXSR_MASK)
3154         && (b != 0x38 && b != 0x3a)) {
3155         goto unknown_op;
3156     }
3157     if (b == 0x0e) {
3158         if (!(s->cpuid_ext2_features & CPUID_EXT2_3DNOW)) {
3159             /* If we were fully decoding this we might use illegal_op.  */
3160             goto unknown_op;
3161         }
3162         /* femms */
3163         gen_helper_emms(cpu_env);
3164         return;
3165     }
3166     if (b == 0x77) {
3167         /* emms */
3168         gen_helper_emms(cpu_env);
3169         return;
3170     }
3171     /* prepare MMX state (XXX: optimize by storing fptt and fptags in
3172        the static cpu state) */
3173     if (!is_xmm) {
3174         gen_helper_enter_mmx(cpu_env);
3175     }
3176 
3177     modrm = x86_ldub_code(env, s);
3178     reg = ((modrm >> 3) & 7);
3179     if (is_xmm) {
3180         reg |= REX_R(s);
3181     }
3182     mod = (modrm >> 6) & 3;
3183     if (sse_fn_epp == SSE_SPECIAL) {
3184         b |= (b1 << 8);
3185         switch(b) {
3186         case 0x0e7: /* movntq */
3187             if (mod == 3) {
3188                 goto illegal_op;
3189             }
3190             gen_lea_modrm(env, s, modrm);
3191             gen_stq_env_A0(s, offsetof(CPUX86State, fpregs[reg].mmx));
3192             break;
3193         case 0x1e7: /* movntdq */
3194         case 0x02b: /* movntps */
3195         case 0x12b: /* movntps */
3196             if (mod == 3)
3197                 goto illegal_op;
3198             gen_lea_modrm(env, s, modrm);
3199             gen_sto_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
3200             break;
3201         case 0x3f0: /* lddqu */
3202             if (mod == 3)
3203                 goto illegal_op;
3204             gen_lea_modrm(env, s, modrm);
3205             gen_ldo_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
3206             break;
3207         case 0x22b: /* movntss */
3208         case 0x32b: /* movntsd */
3209             if (mod == 3)
3210                 goto illegal_op;
3211             gen_lea_modrm(env, s, modrm);
3212             if (b1 & 1) {
3213                 gen_stq_env_A0(s, offsetof(CPUX86State,
3214                                            xmm_regs[reg].ZMM_Q(0)));
3215             } else {
3216                 tcg_gen_ld32u_tl(s->T0, cpu_env, offsetof(CPUX86State,
3217                     xmm_regs[reg].ZMM_L(0)));
3218                 gen_op_st_v(s, MO_32, s->T0, s->A0);
3219             }
3220             break;
3221         case 0x6e: /* movd mm, ea */
3222 #ifdef TARGET_X86_64
3223             if (s->dflag == MO_64) {
3224                 gen_ldst_modrm(env, s, modrm, MO_64, OR_TMP0, 0);
3225                 tcg_gen_st_tl(s->T0, cpu_env,
3226                               offsetof(CPUX86State, fpregs[reg].mmx));
3227             } else
3228 #endif
3229             {
3230                 gen_ldst_modrm(env, s, modrm, MO_32, OR_TMP0, 0);
3231                 tcg_gen_addi_ptr(s->ptr0, cpu_env,
3232                                  offsetof(CPUX86State,fpregs[reg].mmx));
3233                 tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
3234                 gen_helper_movl_mm_T0_mmx(s->ptr0, s->tmp2_i32);
3235             }
3236             break;
3237         case 0x16e: /* movd xmm, ea */
3238 #ifdef TARGET_X86_64
3239             if (s->dflag == MO_64) {
3240                 gen_ldst_modrm(env, s, modrm, MO_64, OR_TMP0, 0);
3241                 tcg_gen_addi_ptr(s->ptr0, cpu_env,
3242                                  offsetof(CPUX86State,xmm_regs[reg]));
3243                 gen_helper_movq_mm_T0_xmm(s->ptr0, s->T0);
3244             } else
3245 #endif
3246             {
3247                 gen_ldst_modrm(env, s, modrm, MO_32, OR_TMP0, 0);
3248                 tcg_gen_addi_ptr(s->ptr0, cpu_env,
3249                                  offsetof(CPUX86State,xmm_regs[reg]));
3250                 tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
3251                 gen_helper_movl_mm_T0_xmm(s->ptr0, s->tmp2_i32);
3252             }
3253             break;
3254         case 0x6f: /* movq mm, ea */
3255             if (mod != 3) {
3256                 gen_lea_modrm(env, s, modrm);
3257                 gen_ldq_env_A0(s, offsetof(CPUX86State, fpregs[reg].mmx));
3258             } else {
3259                 rm = (modrm & 7);
3260                 tcg_gen_ld_i64(s->tmp1_i64, cpu_env,
3261                                offsetof(CPUX86State,fpregs[rm].mmx));
3262                 tcg_gen_st_i64(s->tmp1_i64, cpu_env,
3263                                offsetof(CPUX86State,fpregs[reg].mmx));
3264             }
3265             break;
3266         case 0x010: /* movups */
3267         case 0x110: /* movupd */
3268         case 0x028: /* movaps */
3269         case 0x128: /* movapd */
3270         case 0x16f: /* movdqa xmm, ea */
3271         case 0x26f: /* movdqu xmm, ea */
3272             if (mod != 3) {
3273                 gen_lea_modrm(env, s, modrm);
3274                 gen_ldo_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
3275             } else {
3276                 rm = (modrm & 7) | REX_B(s);
3277                 gen_op_movo(s, offsetof(CPUX86State, xmm_regs[reg]),
3278                             offsetof(CPUX86State,xmm_regs[rm]));
3279             }
3280             break;
3281         case 0x210: /* movss xmm, ea */
3282             if (mod != 3) {
3283                 gen_lea_modrm(env, s, modrm);
3284                 gen_op_ld_v(s, MO_32, s->T0, s->A0);
3285                 tcg_gen_st32_tl(s->T0, cpu_env,
3286                                 offsetof(CPUX86State, xmm_regs[reg].ZMM_L(0)));
3287                 tcg_gen_movi_tl(s->T0, 0);
3288                 tcg_gen_st32_tl(s->T0, cpu_env,
3289                                 offsetof(CPUX86State, xmm_regs[reg].ZMM_L(1)));
3290                 tcg_gen_st32_tl(s->T0, cpu_env,
3291                                 offsetof(CPUX86State, xmm_regs[reg].ZMM_L(2)));
3292                 tcg_gen_st32_tl(s->T0, cpu_env,
3293                                 offsetof(CPUX86State, xmm_regs[reg].ZMM_L(3)));
3294             } else {
3295                 rm = (modrm & 7) | REX_B(s);
3296                 gen_op_movl(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_L(0)),
3297                             offsetof(CPUX86State,xmm_regs[rm].ZMM_L(0)));
3298             }
3299             break;
3300         case 0x310: /* movsd xmm, ea */
3301             if (mod != 3) {
3302                 gen_lea_modrm(env, s, modrm);
3303                 gen_ldq_env_A0(s, offsetof(CPUX86State,
3304                                            xmm_regs[reg].ZMM_Q(0)));
3305                 tcg_gen_movi_tl(s->T0, 0);
3306                 tcg_gen_st32_tl(s->T0, cpu_env,
3307                                 offsetof(CPUX86State, xmm_regs[reg].ZMM_L(2)));
3308                 tcg_gen_st32_tl(s->T0, cpu_env,
3309                                 offsetof(CPUX86State, xmm_regs[reg].ZMM_L(3)));
3310             } else {
3311                 rm = (modrm & 7) | REX_B(s);
3312                 gen_op_movq(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_Q(0)),
3313                             offsetof(CPUX86State,xmm_regs[rm].ZMM_Q(0)));
3314             }
3315             break;
3316         case 0x012: /* movlps */
3317         case 0x112: /* movlpd */
3318             if (mod != 3) {
3319                 gen_lea_modrm(env, s, modrm);
3320                 gen_ldq_env_A0(s, offsetof(CPUX86State,
3321                                            xmm_regs[reg].ZMM_Q(0)));
3322             } else {
3323                 /* movhlps */
3324                 rm = (modrm & 7) | REX_B(s);
3325                 gen_op_movq(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_Q(0)),
3326                             offsetof(CPUX86State,xmm_regs[rm].ZMM_Q(1)));
3327             }
3328             break;
3329         case 0x212: /* movsldup */
3330             if (mod != 3) {
3331                 gen_lea_modrm(env, s, modrm);
3332                 gen_ldo_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
3333             } else {
3334                 rm = (modrm & 7) | REX_B(s);
3335                 gen_op_movl(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_L(0)),
3336                             offsetof(CPUX86State,xmm_regs[rm].ZMM_L(0)));
3337                 gen_op_movl(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_L(2)),
3338                             offsetof(CPUX86State,xmm_regs[rm].ZMM_L(2)));
3339             }
3340             gen_op_movl(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_L(1)),
3341                         offsetof(CPUX86State,xmm_regs[reg].ZMM_L(0)));
3342             gen_op_movl(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_L(3)),
3343                         offsetof(CPUX86State,xmm_regs[reg].ZMM_L(2)));
3344             break;
3345         case 0x312: /* movddup */
3346             if (mod != 3) {
3347                 gen_lea_modrm(env, s, modrm);
3348                 gen_ldq_env_A0(s, offsetof(CPUX86State,
3349                                            xmm_regs[reg].ZMM_Q(0)));
3350             } else {
3351                 rm = (modrm & 7) | REX_B(s);
3352                 gen_op_movq(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_Q(0)),
3353                             offsetof(CPUX86State,xmm_regs[rm].ZMM_Q(0)));
3354             }
3355             gen_op_movq(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_Q(1)),
3356                         offsetof(CPUX86State,xmm_regs[reg].ZMM_Q(0)));
3357             break;
3358         case 0x016: /* movhps */
3359         case 0x116: /* movhpd */
3360             if (mod != 3) {
3361                 gen_lea_modrm(env, s, modrm);
3362                 gen_ldq_env_A0(s, offsetof(CPUX86State,
3363                                            xmm_regs[reg].ZMM_Q(1)));
3364             } else {
3365                 /* movlhps */
3366                 rm = (modrm & 7) | REX_B(s);
3367                 gen_op_movq(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_Q(1)),
3368                             offsetof(CPUX86State,xmm_regs[rm].ZMM_Q(0)));
3369             }
3370             break;
3371         case 0x216: /* movshdup */
3372             if (mod != 3) {
3373                 gen_lea_modrm(env, s, modrm);
3374                 gen_ldo_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
3375             } else {
3376                 rm = (modrm & 7) | REX_B(s);
3377                 gen_op_movl(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_L(1)),
3378                             offsetof(CPUX86State,xmm_regs[rm].ZMM_L(1)));
3379                 gen_op_movl(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_L(3)),
3380                             offsetof(CPUX86State,xmm_regs[rm].ZMM_L(3)));
3381             }
3382             gen_op_movl(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_L(0)),
3383                         offsetof(CPUX86State,xmm_regs[reg].ZMM_L(1)));
3384             gen_op_movl(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_L(2)),
3385                         offsetof(CPUX86State,xmm_regs[reg].ZMM_L(3)));
3386             break;
3387         case 0x178:
3388         case 0x378:
3389             {
3390                 int bit_index, field_length;
3391 
3392                 if (b1 == 1 && reg != 0)
3393                     goto illegal_op;
3394                 field_length = x86_ldub_code(env, s) & 0x3F;
3395                 bit_index = x86_ldub_code(env, s) & 0x3F;
3396                 tcg_gen_addi_ptr(s->ptr0, cpu_env,
3397                     offsetof(CPUX86State,xmm_regs[reg]));
3398                 if (b1 == 1)
3399                     gen_helper_extrq_i(cpu_env, s->ptr0,
3400                                        tcg_const_i32(bit_index),
3401                                        tcg_const_i32(field_length));
3402                 else
3403                     gen_helper_insertq_i(cpu_env, s->ptr0,
3404                                          tcg_const_i32(bit_index),
3405                                          tcg_const_i32(field_length));
3406             }
3407             break;
3408         case 0x7e: /* movd ea, mm */
3409 #ifdef TARGET_X86_64
3410             if (s->dflag == MO_64) {
3411                 tcg_gen_ld_i64(s->T0, cpu_env,
3412                                offsetof(CPUX86State,fpregs[reg].mmx));
3413                 gen_ldst_modrm(env, s, modrm, MO_64, OR_TMP0, 1);
3414             } else
3415 #endif
3416             {
3417                 tcg_gen_ld32u_tl(s->T0, cpu_env,
3418                                  offsetof(CPUX86State,fpregs[reg].mmx.MMX_L(0)));
3419                 gen_ldst_modrm(env, s, modrm, MO_32, OR_TMP0, 1);
3420             }
3421             break;
3422         case 0x17e: /* movd ea, xmm */
3423 #ifdef TARGET_X86_64
3424             if (s->dflag == MO_64) {
3425                 tcg_gen_ld_i64(s->T0, cpu_env,
3426                                offsetof(CPUX86State,xmm_regs[reg].ZMM_Q(0)));
3427                 gen_ldst_modrm(env, s, modrm, MO_64, OR_TMP0, 1);
3428             } else
3429 #endif
3430             {
3431                 tcg_gen_ld32u_tl(s->T0, cpu_env,
3432                                  offsetof(CPUX86State,xmm_regs[reg].ZMM_L(0)));
3433                 gen_ldst_modrm(env, s, modrm, MO_32, OR_TMP0, 1);
3434             }
3435             break;
3436         case 0x27e: /* movq xmm, ea */
3437             if (mod != 3) {
3438                 gen_lea_modrm(env, s, modrm);
3439                 gen_ldq_env_A0(s, offsetof(CPUX86State,
3440                                            xmm_regs[reg].ZMM_Q(0)));
3441             } else {
3442                 rm = (modrm & 7) | REX_B(s);
3443                 gen_op_movq(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_Q(0)),
3444                             offsetof(CPUX86State,xmm_regs[rm].ZMM_Q(0)));
3445             }
3446             gen_op_movq_env_0(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_Q(1)));
3447             break;
3448         case 0x7f: /* movq ea, mm */
3449             if (mod != 3) {
3450                 gen_lea_modrm(env, s, modrm);
3451                 gen_stq_env_A0(s, offsetof(CPUX86State, fpregs[reg].mmx));
3452             } else {
3453                 rm = (modrm & 7);
3454                 gen_op_movq(s, offsetof(CPUX86State, fpregs[rm].mmx),
3455                             offsetof(CPUX86State,fpregs[reg].mmx));
3456             }
3457             break;
3458         case 0x011: /* movups */
3459         case 0x111: /* movupd */
3460         case 0x029: /* movaps */
3461         case 0x129: /* movapd */
3462         case 0x17f: /* movdqa ea, xmm */
3463         case 0x27f: /* movdqu ea, xmm */
3464             if (mod != 3) {
3465                 gen_lea_modrm(env, s, modrm);
3466                 gen_sto_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
3467             } else {
3468                 rm = (modrm & 7) | REX_B(s);
3469                 gen_op_movo(s, offsetof(CPUX86State, xmm_regs[rm]),
3470                             offsetof(CPUX86State,xmm_regs[reg]));
3471             }
3472             break;
3473         case 0x211: /* movss ea, xmm */
3474             if (mod != 3) {
3475                 gen_lea_modrm(env, s, modrm);
3476                 tcg_gen_ld32u_tl(s->T0, cpu_env,
3477                                  offsetof(CPUX86State, xmm_regs[reg].ZMM_L(0)));
3478                 gen_op_st_v(s, MO_32, s->T0, s->A0);
3479             } else {
3480                 rm = (modrm & 7) | REX_B(s);
3481                 gen_op_movl(s, offsetof(CPUX86State, xmm_regs[rm].ZMM_L(0)),
3482                             offsetof(CPUX86State,xmm_regs[reg].ZMM_L(0)));
3483             }
3484             break;
3485         case 0x311: /* movsd ea, xmm */
3486             if (mod != 3) {
3487                 gen_lea_modrm(env, s, modrm);
3488                 gen_stq_env_A0(s, offsetof(CPUX86State,
3489                                            xmm_regs[reg].ZMM_Q(0)));
3490             } else {
3491                 rm = (modrm & 7) | REX_B(s);
3492                 gen_op_movq(s, offsetof(CPUX86State, xmm_regs[rm].ZMM_Q(0)),
3493                             offsetof(CPUX86State,xmm_regs[reg].ZMM_Q(0)));
3494             }
3495             break;
3496         case 0x013: /* movlps */
3497         case 0x113: /* movlpd */
3498             if (mod != 3) {
3499                 gen_lea_modrm(env, s, modrm);
3500                 gen_stq_env_A0(s, offsetof(CPUX86State,
3501                                            xmm_regs[reg].ZMM_Q(0)));
3502             } else {
3503                 goto illegal_op;
3504             }
3505             break;
3506         case 0x017: /* movhps */
3507         case 0x117: /* movhpd */
3508             if (mod != 3) {
3509                 gen_lea_modrm(env, s, modrm);
3510                 gen_stq_env_A0(s, offsetof(CPUX86State,
3511                                            xmm_regs[reg].ZMM_Q(1)));
3512             } else {
3513                 goto illegal_op;
3514             }
3515             break;
3516         case 0x71: /* shift mm, im */
3517         case 0x72:
3518         case 0x73:
3519         case 0x171: /* shift xmm, im */
3520         case 0x172:
3521         case 0x173:
3522             if (b1 >= 2) {
3523                 goto unknown_op;
3524             }
3525             val = x86_ldub_code(env, s);
3526             if (is_xmm) {
3527                 tcg_gen_movi_tl(s->T0, val);
3528                 tcg_gen_st32_tl(s->T0, cpu_env,
3529                                 offsetof(CPUX86State, xmm_t0.ZMM_L(0)));
3530                 tcg_gen_movi_tl(s->T0, 0);
3531                 tcg_gen_st32_tl(s->T0, cpu_env,
3532                                 offsetof(CPUX86State, xmm_t0.ZMM_L(1)));
3533                 op1_offset = offsetof(CPUX86State,xmm_t0);
3534             } else {
3535                 tcg_gen_movi_tl(s->T0, val);
3536                 tcg_gen_st32_tl(s->T0, cpu_env,
3537                                 offsetof(CPUX86State, mmx_t0.MMX_L(0)));
3538                 tcg_gen_movi_tl(s->T0, 0);
3539                 tcg_gen_st32_tl(s->T0, cpu_env,
3540                                 offsetof(CPUX86State, mmx_t0.MMX_L(1)));
3541                 op1_offset = offsetof(CPUX86State,mmx_t0);
3542             }
3543             sse_fn_epp = sse_op_table2[((b - 1) & 3) * 8 +
3544                                        (((modrm >> 3)) & 7)][b1];
3545             if (!sse_fn_epp) {
3546                 goto unknown_op;
3547             }
3548             if (is_xmm) {
3549                 rm = (modrm & 7) | REX_B(s);
3550                 op2_offset = offsetof(CPUX86State,xmm_regs[rm]);
3551             } else {
3552                 rm = (modrm & 7);
3553                 op2_offset = offsetof(CPUX86State,fpregs[rm].mmx);
3554             }
3555             tcg_gen_addi_ptr(s->ptr0, cpu_env, op2_offset);
3556             tcg_gen_addi_ptr(s->ptr1, cpu_env, op1_offset);
3557             sse_fn_epp(cpu_env, s->ptr0, s->ptr1);
3558             break;
3559         case 0x050: /* movmskps */
3560             rm = (modrm & 7) | REX_B(s);
3561             tcg_gen_addi_ptr(s->ptr0, cpu_env,
3562                              offsetof(CPUX86State,xmm_regs[rm]));
3563             gen_helper_movmskps(s->tmp2_i32, cpu_env, s->ptr0);
3564             tcg_gen_extu_i32_tl(cpu_regs[reg], s->tmp2_i32);
3565             break;
3566         case 0x150: /* movmskpd */
3567             rm = (modrm & 7) | REX_B(s);
3568             tcg_gen_addi_ptr(s->ptr0, cpu_env,
3569                              offsetof(CPUX86State,xmm_regs[rm]));
3570             gen_helper_movmskpd(s->tmp2_i32, cpu_env, s->ptr0);
3571             tcg_gen_extu_i32_tl(cpu_regs[reg], s->tmp2_i32);
3572             break;
3573         case 0x02a: /* cvtpi2ps */
3574         case 0x12a: /* cvtpi2pd */
3575             gen_helper_enter_mmx(cpu_env);
3576             if (mod != 3) {
3577                 gen_lea_modrm(env, s, modrm);
3578                 op2_offset = offsetof(CPUX86State,mmx_t0);
3579                 gen_ldq_env_A0(s, op2_offset);
3580             } else {
3581                 rm = (modrm & 7);
3582                 op2_offset = offsetof(CPUX86State,fpregs[rm].mmx);
3583             }
3584             op1_offset = offsetof(CPUX86State,xmm_regs[reg]);
3585             tcg_gen_addi_ptr(s->ptr0, cpu_env, op1_offset);
3586             tcg_gen_addi_ptr(s->ptr1, cpu_env, op2_offset);
3587             switch(b >> 8) {
3588             case 0x0:
3589                 gen_helper_cvtpi2ps(cpu_env, s->ptr0, s->ptr1);
3590                 break;
3591             default:
3592             case 0x1:
3593                 gen_helper_cvtpi2pd(cpu_env, s->ptr0, s->ptr1);
3594                 break;
3595             }
3596             break;
3597         case 0x22a: /* cvtsi2ss */
3598         case 0x32a: /* cvtsi2sd */
3599             ot = mo_64_32(s->dflag);
3600             gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
3601             op1_offset = offsetof(CPUX86State,xmm_regs[reg]);
3602             tcg_gen_addi_ptr(s->ptr0, cpu_env, op1_offset);
3603             if (ot == MO_32) {
3604                 SSEFunc_0_epi sse_fn_epi = sse_op_table3ai[(b >> 8) & 1];
3605                 tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
3606                 sse_fn_epi(cpu_env, s->ptr0, s->tmp2_i32);
3607             } else {
3608 #ifdef TARGET_X86_64
3609                 SSEFunc_0_epl sse_fn_epl = sse_op_table3aq[(b >> 8) & 1];
3610                 sse_fn_epl(cpu_env, s->ptr0, s->T0);
3611 #else
3612                 goto illegal_op;
3613 #endif
3614             }
3615             break;
3616         case 0x02c: /* cvttps2pi */
3617         case 0x12c: /* cvttpd2pi */
3618         case 0x02d: /* cvtps2pi */
3619         case 0x12d: /* cvtpd2pi */
3620             gen_helper_enter_mmx(cpu_env);
3621             if (mod != 3) {
3622                 gen_lea_modrm(env, s, modrm);
3623                 op2_offset = offsetof(CPUX86State,xmm_t0);
3624                 gen_ldo_env_A0(s, op2_offset);
3625             } else {
3626                 rm = (modrm & 7) | REX_B(s);
3627                 op2_offset = offsetof(CPUX86State,xmm_regs[rm]);
3628             }
3629             op1_offset = offsetof(CPUX86State,fpregs[reg & 7].mmx);
3630             tcg_gen_addi_ptr(s->ptr0, cpu_env, op1_offset);
3631             tcg_gen_addi_ptr(s->ptr1, cpu_env, op2_offset);
3632             switch(b) {
3633             case 0x02c:
3634                 gen_helper_cvttps2pi(cpu_env, s->ptr0, s->ptr1);
3635                 break;
3636             case 0x12c:
3637                 gen_helper_cvttpd2pi(cpu_env, s->ptr0, s->ptr1);
3638                 break;
3639             case 0x02d:
3640                 gen_helper_cvtps2pi(cpu_env, s->ptr0, s->ptr1);
3641                 break;
3642             case 0x12d:
3643                 gen_helper_cvtpd2pi(cpu_env, s->ptr0, s->ptr1);
3644                 break;
3645             }
3646             break;
3647         case 0x22c: /* cvttss2si */
3648         case 0x32c: /* cvttsd2si */
3649         case 0x22d: /* cvtss2si */
3650         case 0x32d: /* cvtsd2si */
3651             ot = mo_64_32(s->dflag);
3652             if (mod != 3) {
3653                 gen_lea_modrm(env, s, modrm);
3654                 if ((b >> 8) & 1) {
3655                     gen_ldq_env_A0(s, offsetof(CPUX86State, xmm_t0.ZMM_Q(0)));
3656                 } else {
3657                     gen_op_ld_v(s, MO_32, s->T0, s->A0);
3658                     tcg_gen_st32_tl(s->T0, cpu_env,
3659                                     offsetof(CPUX86State, xmm_t0.ZMM_L(0)));
3660                 }
3661                 op2_offset = offsetof(CPUX86State,xmm_t0);
3662             } else {
3663                 rm = (modrm & 7) | REX_B(s);
3664                 op2_offset = offsetof(CPUX86State,xmm_regs[rm]);
3665             }
3666             tcg_gen_addi_ptr(s->ptr0, cpu_env, op2_offset);
3667             if (ot == MO_32) {
3668                 SSEFunc_i_ep sse_fn_i_ep =
3669                     sse_op_table3bi[((b >> 7) & 2) | (b & 1)];
3670                 sse_fn_i_ep(s->tmp2_i32, cpu_env, s->ptr0);
3671                 tcg_gen_extu_i32_tl(s->T0, s->tmp2_i32);
3672             } else {
3673 #ifdef TARGET_X86_64
3674                 SSEFunc_l_ep sse_fn_l_ep =
3675                     sse_op_table3bq[((b >> 7) & 2) | (b & 1)];
3676                 sse_fn_l_ep(s->T0, cpu_env, s->ptr0);
3677 #else
3678                 goto illegal_op;
3679 #endif
3680             }
3681             gen_op_mov_reg_v(s, ot, reg, s->T0);
3682             break;
3683         case 0xc4: /* pinsrw */
3684         case 0x1c4:
3685             s->rip_offset = 1;
3686             gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
3687             val = x86_ldub_code(env, s);
3688             if (b1) {
3689                 val &= 7;
3690                 tcg_gen_st16_tl(s->T0, cpu_env,
3691                                 offsetof(CPUX86State,xmm_regs[reg].ZMM_W(val)));
3692             } else {
3693                 val &= 3;
3694                 tcg_gen_st16_tl(s->T0, cpu_env,
3695                                 offsetof(CPUX86State,fpregs[reg].mmx.MMX_W(val)));
3696             }
3697             break;
3698         case 0xc5: /* pextrw */
3699         case 0x1c5:
3700             if (mod != 3)
3701                 goto illegal_op;
3702             ot = mo_64_32(s->dflag);
3703             val = x86_ldub_code(env, s);
3704             if (b1) {
3705                 val &= 7;
3706                 rm = (modrm & 7) | REX_B(s);
3707                 tcg_gen_ld16u_tl(s->T0, cpu_env,
3708                                  offsetof(CPUX86State,xmm_regs[rm].ZMM_W(val)));
3709             } else {
3710                 val &= 3;
3711                 rm = (modrm & 7);
3712                 tcg_gen_ld16u_tl(s->T0, cpu_env,
3713                                 offsetof(CPUX86State,fpregs[rm].mmx.MMX_W(val)));
3714             }
3715             reg = ((modrm >> 3) & 7) | REX_R(s);
3716             gen_op_mov_reg_v(s, ot, reg, s->T0);
3717             break;
3718         case 0x1d6: /* movq ea, xmm */
3719             if (mod != 3) {
3720                 gen_lea_modrm(env, s, modrm);
3721                 gen_stq_env_A0(s, offsetof(CPUX86State,
3722                                            xmm_regs[reg].ZMM_Q(0)));
3723             } else {
3724                 rm = (modrm & 7) | REX_B(s);
3725                 gen_op_movq(s, offsetof(CPUX86State, xmm_regs[rm].ZMM_Q(0)),
3726                             offsetof(CPUX86State,xmm_regs[reg].ZMM_Q(0)));
3727                 gen_op_movq_env_0(s,
3728                                   offsetof(CPUX86State, xmm_regs[rm].ZMM_Q(1)));
3729             }
3730             break;
3731         case 0x2d6: /* movq2dq */
3732             gen_helper_enter_mmx(cpu_env);
3733             rm = (modrm & 7);
3734             gen_op_movq(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_Q(0)),
3735                         offsetof(CPUX86State,fpregs[rm].mmx));
3736             gen_op_movq_env_0(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_Q(1)));
3737             break;
3738         case 0x3d6: /* movdq2q */
3739             gen_helper_enter_mmx(cpu_env);
3740             rm = (modrm & 7) | REX_B(s);
3741             gen_op_movq(s, offsetof(CPUX86State, fpregs[reg & 7].mmx),
3742                         offsetof(CPUX86State,xmm_regs[rm].ZMM_Q(0)));
3743             break;
3744         case 0xd7: /* pmovmskb */
3745         case 0x1d7:
3746             if (mod != 3)
3747                 goto illegal_op;
3748             if (b1) {
3749                 rm = (modrm & 7) | REX_B(s);
3750                 tcg_gen_addi_ptr(s->ptr0, cpu_env,
3751                                  offsetof(CPUX86State, xmm_regs[rm]));
3752                 gen_helper_pmovmskb_xmm(s->tmp2_i32, cpu_env, s->ptr0);
3753             } else {
3754                 rm = (modrm & 7);
3755                 tcg_gen_addi_ptr(s->ptr0, cpu_env,
3756                                  offsetof(CPUX86State, fpregs[rm].mmx));
3757                 gen_helper_pmovmskb_mmx(s->tmp2_i32, cpu_env, s->ptr0);
3758             }
3759             reg = ((modrm >> 3) & 7) | REX_R(s);
3760             tcg_gen_extu_i32_tl(cpu_regs[reg], s->tmp2_i32);
3761             break;
3762 
3763         case 0x138:
3764         case 0x038:
3765             b = modrm;
3766             if ((b & 0xf0) == 0xf0) {
3767                 goto do_0f_38_fx;
3768             }
3769             modrm = x86_ldub_code(env, s);
3770             rm = modrm & 7;
3771             reg = ((modrm >> 3) & 7) | REX_R(s);
3772             mod = (modrm >> 6) & 3;
3773             if (b1 >= 2) {
3774                 goto unknown_op;
3775             }
3776 
3777             sse_fn_epp = sse_op_table6[b].op[b1];
3778             if (!sse_fn_epp) {
3779                 goto unknown_op;
3780             }
3781             if (!(s->cpuid_ext_features & sse_op_table6[b].ext_mask))
3782                 goto illegal_op;
3783 
3784             if (b1) {
3785                 op1_offset = offsetof(CPUX86State,xmm_regs[reg]);
3786                 if (mod == 3) {
3787                     op2_offset = offsetof(CPUX86State,xmm_regs[rm | REX_B(s)]);
3788                 } else {
3789                     op2_offset = offsetof(CPUX86State,xmm_t0);
3790                     gen_lea_modrm(env, s, modrm);
3791                     switch (b) {
3792                     case 0x20: case 0x30: /* pmovsxbw, pmovzxbw */
3793                     case 0x23: case 0x33: /* pmovsxwd, pmovzxwd */
3794                     case 0x25: case 0x35: /* pmovsxdq, pmovzxdq */
3795                         gen_ldq_env_A0(s, op2_offset +
3796                                         offsetof(ZMMReg, ZMM_Q(0)));
3797                         break;
3798                     case 0x21: case 0x31: /* pmovsxbd, pmovzxbd */
3799                     case 0x24: case 0x34: /* pmovsxwq, pmovzxwq */
3800                         tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0,
3801                                             s->mem_index, MO_LEUL);
3802                         tcg_gen_st_i32(s->tmp2_i32, cpu_env, op2_offset +
3803                                         offsetof(ZMMReg, ZMM_L(0)));
3804                         break;
3805                     case 0x22: case 0x32: /* pmovsxbq, pmovzxbq */
3806                         tcg_gen_qemu_ld_tl(s->tmp0, s->A0,
3807                                            s->mem_index, MO_LEUW);
3808                         tcg_gen_st16_tl(s->tmp0, cpu_env, op2_offset +
3809                                         offsetof(ZMMReg, ZMM_W(0)));
3810                         break;
3811                     case 0x2a:            /* movntqda */
3812                         gen_ldo_env_A0(s, op1_offset);
3813                         return;
3814                     default:
3815                         gen_ldo_env_A0(s, op2_offset);
3816                     }
3817                 }
3818             } else {
3819                 op1_offset = offsetof(CPUX86State,fpregs[reg].mmx);
3820                 if (mod == 3) {
3821                     op2_offset = offsetof(CPUX86State,fpregs[rm].mmx);
3822                 } else {
3823                     op2_offset = offsetof(CPUX86State,mmx_t0);
3824                     gen_lea_modrm(env, s, modrm);
3825                     gen_ldq_env_A0(s, op2_offset);
3826                 }
3827             }
3828             if (sse_fn_epp == SSE_SPECIAL) {
3829                 goto unknown_op;
3830             }
3831 
3832             tcg_gen_addi_ptr(s->ptr0, cpu_env, op1_offset);
3833             tcg_gen_addi_ptr(s->ptr1, cpu_env, op2_offset);
3834             sse_fn_epp(cpu_env, s->ptr0, s->ptr1);
3835 
3836             if (b == 0x17) {
3837                 set_cc_op(s, CC_OP_EFLAGS);
3838             }
3839             break;
3840 
3841         case 0x238:
3842         case 0x338:
3843         do_0f_38_fx:
3844             /* Various integer extensions at 0f 38 f[0-f].  */
3845             b = modrm | (b1 << 8);
3846             modrm = x86_ldub_code(env, s);
3847             reg = ((modrm >> 3) & 7) | REX_R(s);
3848 
3849             switch (b) {
3850             case 0x3f0: /* crc32 Gd,Eb */
3851             case 0x3f1: /* crc32 Gd,Ey */
3852             do_crc32:
3853                 if (!(s->cpuid_ext_features & CPUID_EXT_SSE42)) {
3854                     goto illegal_op;
3855                 }
3856                 if ((b & 0xff) == 0xf0) {
3857                     ot = MO_8;
3858                 } else if (s->dflag != MO_64) {
3859                     ot = (s->prefix & PREFIX_DATA ? MO_16 : MO_32);
3860                 } else {
3861                     ot = MO_64;
3862                 }
3863 
3864                 tcg_gen_trunc_tl_i32(s->tmp2_i32, cpu_regs[reg]);
3865                 gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
3866                 gen_helper_crc32(s->T0, s->tmp2_i32,
3867                                  s->T0, tcg_const_i32(8 << ot));
3868 
3869                 ot = mo_64_32(s->dflag);
3870                 gen_op_mov_reg_v(s, ot, reg, s->T0);
3871                 break;
3872 
3873             case 0x1f0: /* crc32 or movbe */
3874             case 0x1f1:
3875                 /* For these insns, the f3 prefix is supposed to have priority
3876                    over the 66 prefix, but that's not what we implement above
3877                    setting b1.  */
3878                 if (s->prefix & PREFIX_REPNZ) {
3879                     goto do_crc32;
3880                 }
3881                 /* FALLTHRU */
3882             case 0x0f0: /* movbe Gy,My */
3883             case 0x0f1: /* movbe My,Gy */
3884                 if (!(s->cpuid_ext_features & CPUID_EXT_MOVBE)) {
3885                     goto illegal_op;
3886                 }
3887                 if (s->dflag != MO_64) {
3888                     ot = (s->prefix & PREFIX_DATA ? MO_16 : MO_32);
3889                 } else {
3890                     ot = MO_64;
3891                 }
3892 
3893                 gen_lea_modrm(env, s, modrm);
3894                 if ((b & 1) == 0) {
3895                     tcg_gen_qemu_ld_tl(s->T0, s->A0,
3896                                        s->mem_index, ot | MO_BE);
3897                     gen_op_mov_reg_v(s, ot, reg, s->T0);
3898                 } else {
3899                     tcg_gen_qemu_st_tl(cpu_regs[reg], s->A0,
3900                                        s->mem_index, ot | MO_BE);
3901                 }
3902                 break;
3903 
3904             case 0x0f2: /* andn Gy, By, Ey */
3905                 if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI1)
3906                     || !(s->prefix & PREFIX_VEX)
3907                     || s->vex_l != 0) {
3908                     goto illegal_op;
3909                 }
3910                 ot = mo_64_32(s->dflag);
3911                 gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
3912                 tcg_gen_andc_tl(s->T0, s->T0, cpu_regs[s->vex_v]);
3913                 gen_op_mov_reg_v(s, ot, reg, s->T0);
3914                 gen_op_update1_cc(s);
3915                 set_cc_op(s, CC_OP_LOGICB + ot);
3916                 break;
3917 
3918             case 0x0f7: /* bextr Gy, Ey, By */
3919                 if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI1)
3920                     || !(s->prefix & PREFIX_VEX)
3921                     || s->vex_l != 0) {
3922                     goto illegal_op;
3923                 }
3924                 ot = mo_64_32(s->dflag);
3925                 {
3926                     TCGv bound, zero;
3927 
3928                     gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
3929                     /* Extract START, and shift the operand.
3930                        Shifts larger than operand size get zeros.  */
3931                     tcg_gen_ext8u_tl(s->A0, cpu_regs[s->vex_v]);
3932                     tcg_gen_shr_tl(s->T0, s->T0, s->A0);
3933 
3934                     bound = tcg_const_tl(ot == MO_64 ? 63 : 31);
3935                     zero = tcg_const_tl(0);
3936                     tcg_gen_movcond_tl(TCG_COND_LEU, s->T0, s->A0, bound,
3937                                        s->T0, zero);
3938                     tcg_temp_free(zero);
3939 
3940                     /* Extract the LEN into a mask.  Lengths larger than
3941                        operand size get all ones.  */
3942                     tcg_gen_extract_tl(s->A0, cpu_regs[s->vex_v], 8, 8);
3943                     tcg_gen_movcond_tl(TCG_COND_LEU, s->A0, s->A0, bound,
3944                                        s->A0, bound);
3945                     tcg_temp_free(bound);
3946                     tcg_gen_movi_tl(s->T1, 1);
3947                     tcg_gen_shl_tl(s->T1, s->T1, s->A0);
3948                     tcg_gen_subi_tl(s->T1, s->T1, 1);
3949                     tcg_gen_and_tl(s->T0, s->T0, s->T1);
3950 
3951                     gen_op_mov_reg_v(s, ot, reg, s->T0);
3952                     gen_op_update1_cc(s);
3953                     set_cc_op(s, CC_OP_LOGICB + ot);
3954                 }
3955                 break;
3956 
3957             case 0x0f5: /* bzhi Gy, Ey, By */
3958                 if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI2)
3959                     || !(s->prefix & PREFIX_VEX)
3960                     || s->vex_l != 0) {
3961                     goto illegal_op;
3962                 }
3963                 ot = mo_64_32(s->dflag);
3964                 gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
3965                 tcg_gen_ext8u_tl(s->T1, cpu_regs[s->vex_v]);
3966                 {
3967                     TCGv bound = tcg_const_tl(ot == MO_64 ? 63 : 31);
3968                     /* Note that since we're using BMILG (in order to get O
3969                        cleared) we need to store the inverse into C.  */
3970                     tcg_gen_setcond_tl(TCG_COND_LT, cpu_cc_src,
3971                                        s->T1, bound);
3972                     tcg_gen_movcond_tl(TCG_COND_GT, s->T1, s->T1,
3973                                        bound, bound, s->T1);
3974                     tcg_temp_free(bound);
3975                 }
3976                 tcg_gen_movi_tl(s->A0, -1);
3977                 tcg_gen_shl_tl(s->A0, s->A0, s->T1);
3978                 tcg_gen_andc_tl(s->T0, s->T0, s->A0);
3979                 gen_op_mov_reg_v(s, ot, reg, s->T0);
3980                 gen_op_update1_cc(s);
3981                 set_cc_op(s, CC_OP_BMILGB + ot);
3982                 break;
3983 
3984             case 0x3f6: /* mulx By, Gy, rdx, Ey */
3985                 if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI2)
3986                     || !(s->prefix & PREFIX_VEX)
3987                     || s->vex_l != 0) {
3988                     goto illegal_op;
3989                 }
3990                 ot = mo_64_32(s->dflag);
3991                 gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
3992                 switch (ot) {
3993                 default:
3994                     tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
3995                     tcg_gen_trunc_tl_i32(s->tmp3_i32, cpu_regs[R_EDX]);
3996                     tcg_gen_mulu2_i32(s->tmp2_i32, s->tmp3_i32,
3997                                       s->tmp2_i32, s->tmp3_i32);
3998                     tcg_gen_extu_i32_tl(cpu_regs[s->vex_v], s->tmp2_i32);
3999                     tcg_gen_extu_i32_tl(cpu_regs[reg], s->tmp3_i32);
4000                     break;
4001 #ifdef TARGET_X86_64
4002                 case MO_64:
4003                     tcg_gen_mulu2_i64(s->T0, s->T1,
4004                                       s->T0, cpu_regs[R_EDX]);
4005                     tcg_gen_mov_i64(cpu_regs[s->vex_v], s->T0);
4006                     tcg_gen_mov_i64(cpu_regs[reg], s->T1);
4007                     break;
4008 #endif
4009                 }
4010                 break;
4011 
4012             case 0x3f5: /* pdep Gy, By, Ey */
4013                 if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI2)
4014                     || !(s->prefix & PREFIX_VEX)
4015                     || s->vex_l != 0) {
4016                     goto illegal_op;
4017                 }
4018                 ot = mo_64_32(s->dflag);
4019                 gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
4020                 /* Note that by zero-extending the source operand, we
4021                    automatically handle zero-extending the result.  */
4022                 if (ot == MO_64) {
4023                     tcg_gen_mov_tl(s->T1, cpu_regs[s->vex_v]);
4024                 } else {
4025                     tcg_gen_ext32u_tl(s->T1, cpu_regs[s->vex_v]);
4026                 }
4027                 gen_helper_pdep(cpu_regs[reg], s->T1, s->T0);
4028                 break;
4029 
4030             case 0x2f5: /* pext Gy, By, Ey */
4031                 if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI2)
4032                     || !(s->prefix & PREFIX_VEX)
4033                     || s->vex_l != 0) {
4034                     goto illegal_op;
4035                 }
4036                 ot = mo_64_32(s->dflag);
4037                 gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
4038                 /* Note that by zero-extending the source operand, we
4039                    automatically handle zero-extending the result.  */
4040                 if (ot == MO_64) {
4041                     tcg_gen_mov_tl(s->T1, cpu_regs[s->vex_v]);
4042                 } else {
4043                     tcg_gen_ext32u_tl(s->T1, cpu_regs[s->vex_v]);
4044                 }
4045                 gen_helper_pext(cpu_regs[reg], s->T1, s->T0);
4046                 break;
4047 
4048             case 0x1f6: /* adcx Gy, Ey */
4049             case 0x2f6: /* adox Gy, Ey */
4050                 if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_ADX)) {
4051                     goto illegal_op;
4052                 } else {
4053                     TCGv carry_in, carry_out, zero;
4054                     int end_op;
4055 
4056                     ot = mo_64_32(s->dflag);
4057                     gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
4058 
4059                     /* Re-use the carry-out from a previous round.  */
4060                     carry_in = NULL;
4061                     carry_out = (b == 0x1f6 ? cpu_cc_dst : cpu_cc_src2);
4062                     switch (s->cc_op) {
4063                     case CC_OP_ADCX:
4064                         if (b == 0x1f6) {
4065                             carry_in = cpu_cc_dst;
4066                             end_op = CC_OP_ADCX;
4067                         } else {
4068                             end_op = CC_OP_ADCOX;
4069                         }
4070                         break;
4071                     case CC_OP_ADOX:
4072                         if (b == 0x1f6) {
4073                             end_op = CC_OP_ADCOX;
4074                         } else {
4075                             carry_in = cpu_cc_src2;
4076                             end_op = CC_OP_ADOX;
4077                         }
4078                         break;
4079                     case CC_OP_ADCOX:
4080                         end_op = CC_OP_ADCOX;
4081                         carry_in = carry_out;
4082                         break;
4083                     default:
4084                         end_op = (b == 0x1f6 ? CC_OP_ADCX : CC_OP_ADOX);
4085                         break;
4086                     }
4087                     /* If we can't reuse carry-out, get it out of EFLAGS.  */
4088                     if (!carry_in) {
4089                         if (s->cc_op != CC_OP_ADCX && s->cc_op != CC_OP_ADOX) {
4090                             gen_compute_eflags(s);
4091                         }
4092                         carry_in = s->tmp0;
4093                         tcg_gen_extract_tl(carry_in, cpu_cc_src,
4094                                            ctz32(b == 0x1f6 ? CC_C : CC_O), 1);
4095                     }
4096 
4097                     switch (ot) {
4098 #ifdef TARGET_X86_64
4099                     case MO_32:
4100                         /* If we know TL is 64-bit, and we want a 32-bit
4101                            result, just do everything in 64-bit arithmetic.  */
4102                         tcg_gen_ext32u_i64(cpu_regs[reg], cpu_regs[reg]);
4103                         tcg_gen_ext32u_i64(s->T0, s->T0);
4104                         tcg_gen_add_i64(s->T0, s->T0, cpu_regs[reg]);
4105                         tcg_gen_add_i64(s->T0, s->T0, carry_in);
4106                         tcg_gen_ext32u_i64(cpu_regs[reg], s->T0);
4107                         tcg_gen_shri_i64(carry_out, s->T0, 32);
4108                         break;
4109 #endif
4110                     default:
4111                         /* Otherwise compute the carry-out in two steps.  */
4112                         zero = tcg_const_tl(0);
4113                         tcg_gen_add2_tl(s->T0, carry_out,
4114                                         s->T0, zero,
4115                                         carry_in, zero);
4116                         tcg_gen_add2_tl(cpu_regs[reg], carry_out,
4117                                         cpu_regs[reg], carry_out,
4118                                         s->T0, zero);
4119                         tcg_temp_free(zero);
4120                         break;
4121                     }
4122                     set_cc_op(s, end_op);
4123                 }
4124                 break;
4125 
4126             case 0x1f7: /* shlx Gy, Ey, By */
4127             case 0x2f7: /* sarx Gy, Ey, By */
4128             case 0x3f7: /* shrx Gy, Ey, By */
4129                 if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI2)
4130                     || !(s->prefix & PREFIX_VEX)
4131                     || s->vex_l != 0) {
4132                     goto illegal_op;
4133                 }
4134                 ot = mo_64_32(s->dflag);
4135                 gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
4136                 if (ot == MO_64) {
4137                     tcg_gen_andi_tl(s->T1, cpu_regs[s->vex_v], 63);
4138                 } else {
4139                     tcg_gen_andi_tl(s->T1, cpu_regs[s->vex_v], 31);
4140                 }
4141                 if (b == 0x1f7) {
4142                     tcg_gen_shl_tl(s->T0, s->T0, s->T1);
4143                 } else if (b == 0x2f7) {
4144                     if (ot != MO_64) {
4145                         tcg_gen_ext32s_tl(s->T0, s->T0);
4146                     }
4147                     tcg_gen_sar_tl(s->T0, s->T0, s->T1);
4148                 } else {
4149                     if (ot != MO_64) {
4150                         tcg_gen_ext32u_tl(s->T0, s->T0);
4151                     }
4152                     tcg_gen_shr_tl(s->T0, s->T0, s->T1);
4153                 }
4154                 gen_op_mov_reg_v(s, ot, reg, s->T0);
4155                 break;
4156 
4157             case 0x0f3:
4158             case 0x1f3:
4159             case 0x2f3:
4160             case 0x3f3: /* Group 17 */
4161                 if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI1)
4162                     || !(s->prefix & PREFIX_VEX)
4163                     || s->vex_l != 0) {
4164                     goto illegal_op;
4165                 }
4166                 ot = mo_64_32(s->dflag);
4167                 gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
4168 
4169                 tcg_gen_mov_tl(cpu_cc_src, s->T0);
4170                 switch (reg & 7) {
4171                 case 1: /* blsr By,Ey */
4172                     tcg_gen_subi_tl(s->T1, s->T0, 1);
4173                     tcg_gen_and_tl(s->T0, s->T0, s->T1);
4174                     break;
4175                 case 2: /* blsmsk By,Ey */
4176                     tcg_gen_subi_tl(s->T1, s->T0, 1);
4177                     tcg_gen_xor_tl(s->T0, s->T0, s->T1);
4178                     break;
4179                 case 3: /* blsi By, Ey */
4180                     tcg_gen_neg_tl(s->T1, s->T0);
4181                     tcg_gen_and_tl(s->T0, s->T0, s->T1);
4182                     break;
4183                 default:
4184                     goto unknown_op;
4185                 }
4186                 tcg_gen_mov_tl(cpu_cc_dst, s->T0);
4187                 gen_op_mov_reg_v(s, ot, s->vex_v, s->T0);
4188                 set_cc_op(s, CC_OP_BMILGB + ot);
4189                 break;
4190 
4191             default:
4192                 goto unknown_op;
4193             }
4194             break;
4195 
4196         case 0x03a:
4197         case 0x13a:
4198             b = modrm;
4199             modrm = x86_ldub_code(env, s);
4200             rm = modrm & 7;
4201             reg = ((modrm >> 3) & 7) | REX_R(s);
4202             mod = (modrm >> 6) & 3;
4203             if (b1 >= 2) {
4204                 goto unknown_op;
4205             }
4206 
4207             sse_fn_eppi = sse_op_table7[b].op[b1];
4208             if (!sse_fn_eppi) {
4209                 goto unknown_op;
4210             }
4211             if (!(s->cpuid_ext_features & sse_op_table7[b].ext_mask))
4212                 goto illegal_op;
4213 
4214             s->rip_offset = 1;
4215 
4216             if (sse_fn_eppi == SSE_SPECIAL) {
4217                 ot = mo_64_32(s->dflag);
4218                 rm = (modrm & 7) | REX_B(s);
4219                 if (mod != 3)
4220                     gen_lea_modrm(env, s, modrm);
4221                 reg = ((modrm >> 3) & 7) | REX_R(s);
4222                 val = x86_ldub_code(env, s);
4223                 switch (b) {
4224                 case 0x14: /* pextrb */
4225                     tcg_gen_ld8u_tl(s->T0, cpu_env, offsetof(CPUX86State,
4226                                             xmm_regs[reg].ZMM_B(val & 15)));
4227                     if (mod == 3) {
4228                         gen_op_mov_reg_v(s, ot, rm, s->T0);
4229                     } else {
4230                         tcg_gen_qemu_st_tl(s->T0, s->A0,
4231                                            s->mem_index, MO_UB);
4232                     }
4233                     break;
4234                 case 0x15: /* pextrw */
4235                     tcg_gen_ld16u_tl(s->T0, cpu_env, offsetof(CPUX86State,
4236                                             xmm_regs[reg].ZMM_W(val & 7)));
4237                     if (mod == 3) {
4238                         gen_op_mov_reg_v(s, ot, rm, s->T0);
4239                     } else {
4240                         tcg_gen_qemu_st_tl(s->T0, s->A0,
4241                                            s->mem_index, MO_LEUW);
4242                     }
4243                     break;
4244                 case 0x16:
4245                     if (ot == MO_32) { /* pextrd */
4246                         tcg_gen_ld_i32(s->tmp2_i32, cpu_env,
4247                                         offsetof(CPUX86State,
4248                                                 xmm_regs[reg].ZMM_L(val & 3)));
4249                         if (mod == 3) {
4250                             tcg_gen_extu_i32_tl(cpu_regs[rm], s->tmp2_i32);
4251                         } else {
4252                             tcg_gen_qemu_st_i32(s->tmp2_i32, s->A0,
4253                                                 s->mem_index, MO_LEUL);
4254                         }
4255                     } else { /* pextrq */
4256 #ifdef TARGET_X86_64
4257                         tcg_gen_ld_i64(s->tmp1_i64, cpu_env,
4258                                         offsetof(CPUX86State,
4259                                                 xmm_regs[reg].ZMM_Q(val & 1)));
4260                         if (mod == 3) {
4261                             tcg_gen_mov_i64(cpu_regs[rm], s->tmp1_i64);
4262                         } else {
4263                             tcg_gen_qemu_st_i64(s->tmp1_i64, s->A0,
4264                                                 s->mem_index, MO_LEQ);
4265                         }
4266 #else
4267                         goto illegal_op;
4268 #endif
4269                     }
4270                     break;
4271                 case 0x17: /* extractps */
4272                     tcg_gen_ld32u_tl(s->T0, cpu_env, offsetof(CPUX86State,
4273                                             xmm_regs[reg].ZMM_L(val & 3)));
4274                     if (mod == 3) {
4275                         gen_op_mov_reg_v(s, ot, rm, s->T0);
4276                     } else {
4277                         tcg_gen_qemu_st_tl(s->T0, s->A0,
4278                                            s->mem_index, MO_LEUL);
4279                     }
4280                     break;
4281                 case 0x20: /* pinsrb */
4282                     if (mod == 3) {
4283                         gen_op_mov_v_reg(s, MO_32, s->T0, rm);
4284                     } else {
4285                         tcg_gen_qemu_ld_tl(s->T0, s->A0,
4286                                            s->mem_index, MO_UB);
4287                     }
4288                     tcg_gen_st8_tl(s->T0, cpu_env, offsetof(CPUX86State,
4289                                             xmm_regs[reg].ZMM_B(val & 15)));
4290                     break;
4291                 case 0x21: /* insertps */
4292                     if (mod == 3) {
4293                         tcg_gen_ld_i32(s->tmp2_i32, cpu_env,
4294                                         offsetof(CPUX86State,xmm_regs[rm]
4295                                                 .ZMM_L((val >> 6) & 3)));
4296                     } else {
4297                         tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0,
4298                                             s->mem_index, MO_LEUL);
4299                     }
4300                     tcg_gen_st_i32(s->tmp2_i32, cpu_env,
4301                                     offsetof(CPUX86State,xmm_regs[reg]
4302                                             .ZMM_L((val >> 4) & 3)));
4303                     if ((val >> 0) & 1)
4304                         tcg_gen_st_i32(tcg_const_i32(0 /*float32_zero*/),
4305                                         cpu_env, offsetof(CPUX86State,
4306                                                 xmm_regs[reg].ZMM_L(0)));
4307                     if ((val >> 1) & 1)
4308                         tcg_gen_st_i32(tcg_const_i32(0 /*float32_zero*/),
4309                                         cpu_env, offsetof(CPUX86State,
4310                                                 xmm_regs[reg].ZMM_L(1)));
4311                     if ((val >> 2) & 1)
4312                         tcg_gen_st_i32(tcg_const_i32(0 /*float32_zero*/),
4313                                         cpu_env, offsetof(CPUX86State,
4314                                                 xmm_regs[reg].ZMM_L(2)));
4315                     if ((val >> 3) & 1)
4316                         tcg_gen_st_i32(tcg_const_i32(0 /*float32_zero*/),
4317                                         cpu_env, offsetof(CPUX86State,
4318                                                 xmm_regs[reg].ZMM_L(3)));
4319                     break;
4320                 case 0x22:
4321                     if (ot == MO_32) { /* pinsrd */
4322                         if (mod == 3) {
4323                             tcg_gen_trunc_tl_i32(s->tmp2_i32, cpu_regs[rm]);
4324                         } else {
4325                             tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0,
4326                                                 s->mem_index, MO_LEUL);
4327                         }
4328                         tcg_gen_st_i32(s->tmp2_i32, cpu_env,
4329                                         offsetof(CPUX86State,
4330                                                 xmm_regs[reg].ZMM_L(val & 3)));
4331                     } else { /* pinsrq */
4332 #ifdef TARGET_X86_64
4333                         if (mod == 3) {
4334                             gen_op_mov_v_reg(s, ot, s->tmp1_i64, rm);
4335                         } else {
4336                             tcg_gen_qemu_ld_i64(s->tmp1_i64, s->A0,
4337                                                 s->mem_index, MO_LEQ);
4338                         }
4339                         tcg_gen_st_i64(s->tmp1_i64, cpu_env,
4340                                         offsetof(CPUX86State,
4341                                                 xmm_regs[reg].ZMM_Q(val & 1)));
4342 #else
4343                         goto illegal_op;
4344 #endif
4345                     }
4346                     break;
4347                 }
4348                 return;
4349             }
4350 
4351             if (b1) {
4352                 op1_offset = offsetof(CPUX86State,xmm_regs[reg]);
4353                 if (mod == 3) {
4354                     op2_offset = offsetof(CPUX86State,xmm_regs[rm | REX_B(s)]);
4355                 } else {
4356                     op2_offset = offsetof(CPUX86State,xmm_t0);
4357                     gen_lea_modrm(env, s, modrm);
4358                     gen_ldo_env_A0(s, op2_offset);
4359                 }
4360             } else {
4361                 op1_offset = offsetof(CPUX86State,fpregs[reg].mmx);
4362                 if (mod == 3) {
4363                     op2_offset = offsetof(CPUX86State,fpregs[rm].mmx);
4364                 } else {
4365                     op2_offset = offsetof(CPUX86State,mmx_t0);
4366                     gen_lea_modrm(env, s, modrm);
4367                     gen_ldq_env_A0(s, op2_offset);
4368                 }
4369             }
4370             val = x86_ldub_code(env, s);
4371 
4372             if ((b & 0xfc) == 0x60) { /* pcmpXstrX */
4373                 set_cc_op(s, CC_OP_EFLAGS);
4374 
4375                 if (s->dflag == MO_64) {
4376                     /* The helper must use entire 64-bit gp registers */
4377                     val |= 1 << 8;
4378                 }
4379             }
4380 
4381             tcg_gen_addi_ptr(s->ptr0, cpu_env, op1_offset);
4382             tcg_gen_addi_ptr(s->ptr1, cpu_env, op2_offset);
4383             sse_fn_eppi(cpu_env, s->ptr0, s->ptr1, tcg_const_i32(val));
4384             break;
4385 
4386         case 0x33a:
4387             /* Various integer extensions at 0f 3a f[0-f].  */
4388             b = modrm | (b1 << 8);
4389             modrm = x86_ldub_code(env, s);
4390             reg = ((modrm >> 3) & 7) | REX_R(s);
4391 
4392             switch (b) {
4393             case 0x3f0: /* rorx Gy,Ey, Ib */
4394                 if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI2)
4395                     || !(s->prefix & PREFIX_VEX)
4396                     || s->vex_l != 0) {
4397                     goto illegal_op;
4398                 }
4399                 ot = mo_64_32(s->dflag);
4400                 gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
4401                 b = x86_ldub_code(env, s);
4402                 if (ot == MO_64) {
4403                     tcg_gen_rotri_tl(s->T0, s->T0, b & 63);
4404                 } else {
4405                     tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
4406                     tcg_gen_rotri_i32(s->tmp2_i32, s->tmp2_i32, b & 31);
4407                     tcg_gen_extu_i32_tl(s->T0, s->tmp2_i32);
4408                 }
4409                 gen_op_mov_reg_v(s, ot, reg, s->T0);
4410                 break;
4411 
4412             default:
4413                 goto unknown_op;
4414             }
4415             break;
4416 
4417         default:
4418         unknown_op:
4419             gen_unknown_opcode(env, s);
4420             return;
4421         }
4422     } else {
4423         /* generic MMX or SSE operation */
4424         switch(b) {
4425         case 0x70: /* pshufx insn */
4426         case 0xc6: /* pshufx insn */
4427         case 0xc2: /* compare insns */
4428             s->rip_offset = 1;
4429             break;
4430         default:
4431             break;
4432         }
4433         if (is_xmm) {
4434             op1_offset = offsetof(CPUX86State,xmm_regs[reg]);
4435             if (mod != 3) {
4436                 int sz = 4;
4437 
4438                 gen_lea_modrm(env, s, modrm);
4439                 op2_offset = offsetof(CPUX86State,xmm_t0);
4440 
4441                 switch (b) {
4442                 case 0x50 ... 0x5a:
4443                 case 0x5c ... 0x5f:
4444                 case 0xc2:
4445                     /* Most sse scalar operations.  */
4446                     if (b1 == 2) {
4447                         sz = 2;
4448                     } else if (b1 == 3) {
4449                         sz = 3;
4450                     }
4451                     break;
4452 
4453                 case 0x2e:  /* ucomis[sd] */
4454                 case 0x2f:  /* comis[sd] */
4455                     if (b1 == 0) {
4456                         sz = 2;
4457                     } else {
4458                         sz = 3;
4459                     }
4460                     break;
4461                 }
4462 
4463                 switch (sz) {
4464                 case 2:
4465                     /* 32 bit access */
4466                     gen_op_ld_v(s, MO_32, s->T0, s->A0);
4467                     tcg_gen_st32_tl(s->T0, cpu_env,
4468                                     offsetof(CPUX86State,xmm_t0.ZMM_L(0)));
4469                     break;
4470                 case 3:
4471                     /* 64 bit access */
4472                     gen_ldq_env_A0(s, offsetof(CPUX86State, xmm_t0.ZMM_D(0)));
4473                     break;
4474                 default:
4475                     /* 128 bit access */
4476                     gen_ldo_env_A0(s, op2_offset);
4477                     break;
4478                 }
4479             } else {
4480                 rm = (modrm & 7) | REX_B(s);
4481                 op2_offset = offsetof(CPUX86State,xmm_regs[rm]);
4482             }
4483         } else {
4484             op1_offset = offsetof(CPUX86State,fpregs[reg].mmx);
4485             if (mod != 3) {
4486                 gen_lea_modrm(env, s, modrm);
4487                 op2_offset = offsetof(CPUX86State,mmx_t0);
4488                 gen_ldq_env_A0(s, op2_offset);
4489             } else {
4490                 rm = (modrm & 7);
4491                 op2_offset = offsetof(CPUX86State,fpregs[rm].mmx);
4492             }
4493         }
4494         switch(b) {
4495         case 0x0f: /* 3DNow! data insns */
4496             val = x86_ldub_code(env, s);
4497             sse_fn_epp = sse_op_table5[val];
4498             if (!sse_fn_epp) {
4499                 goto unknown_op;
4500             }
4501             if (!(s->cpuid_ext2_features & CPUID_EXT2_3DNOW)) {
4502                 goto illegal_op;
4503             }
4504             tcg_gen_addi_ptr(s->ptr0, cpu_env, op1_offset);
4505             tcg_gen_addi_ptr(s->ptr1, cpu_env, op2_offset);
4506             sse_fn_epp(cpu_env, s->ptr0, s->ptr1);
4507             break;
4508         case 0x70: /* pshufx insn */
4509         case 0xc6: /* pshufx insn */
4510             val = x86_ldub_code(env, s);
4511             tcg_gen_addi_ptr(s->ptr0, cpu_env, op1_offset);
4512             tcg_gen_addi_ptr(s->ptr1, cpu_env, op2_offset);
4513             /* XXX: introduce a new table? */
4514             sse_fn_ppi = (SSEFunc_0_ppi)sse_fn_epp;
4515             sse_fn_ppi(s->ptr0, s->ptr1, tcg_const_i32(val));
4516             break;
4517         case 0xc2:
4518             /* compare insns */
4519             val = x86_ldub_code(env, s);
4520             if (val >= 8)
4521                 goto unknown_op;
4522             sse_fn_epp = sse_op_table4[val][b1];
4523 
4524             tcg_gen_addi_ptr(s->ptr0, cpu_env, op1_offset);
4525             tcg_gen_addi_ptr(s->ptr1, cpu_env, op2_offset);
4526             sse_fn_epp(cpu_env, s->ptr0, s->ptr1);
4527             break;
4528         case 0xf7:
4529             /* maskmov : we must prepare A0 */
4530             if (mod != 3)
4531                 goto illegal_op;
4532             tcg_gen_mov_tl(s->A0, cpu_regs[R_EDI]);
4533             gen_extu(s->aflag, s->A0);
4534             gen_add_A0_ds_seg(s);
4535 
4536             tcg_gen_addi_ptr(s->ptr0, cpu_env, op1_offset);
4537             tcg_gen_addi_ptr(s->ptr1, cpu_env, op2_offset);
4538             /* XXX: introduce a new table? */
4539             sse_fn_eppt = (SSEFunc_0_eppt)sse_fn_epp;
4540             sse_fn_eppt(cpu_env, s->ptr0, s->ptr1, s->A0);
4541             break;
4542         default:
4543             tcg_gen_addi_ptr(s->ptr0, cpu_env, op1_offset);
4544             tcg_gen_addi_ptr(s->ptr1, cpu_env, op2_offset);
4545             sse_fn_epp(cpu_env, s->ptr0, s->ptr1);
4546             break;
4547         }
4548         if (b == 0x2e || b == 0x2f) {
4549             set_cc_op(s, CC_OP_EFLAGS);
4550         }
4551     }
4552 }
4553 
4554 /* convert one instruction. s->base.is_jmp is set if the translation must
4555    be stopped. Return the next pc value */
4556 static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
4557 {
4558     CPUX86State *env = cpu->env_ptr;
4559     int b, prefixes;
4560     int shift;
4561     MemOp ot, aflag, dflag;
4562     int modrm, reg, rm, mod, op, opreg, val;
4563     target_ulong next_eip, tval;
4564     target_ulong pc_start = s->base.pc_next;
4565 
4566     s->pc_start = s->pc = pc_start;
4567     s->override = -1;
4568 #ifdef TARGET_X86_64
4569     s->rex_w = false;
4570     s->rex_r = 0;
4571     s->rex_x = 0;
4572     s->rex_b = 0;
4573 #endif
4574     s->rip_offset = 0; /* for relative ip address */
4575     s->vex_l = 0;
4576     s->vex_v = 0;
4577     if (sigsetjmp(s->jmpbuf, 0) != 0) {
4578         gen_exception_gpf(s);
4579         return s->pc;
4580     }
4581 
4582     prefixes = 0;
4583 
4584  next_byte:
4585     b = x86_ldub_code(env, s);
4586     /* Collect prefixes.  */
4587     switch (b) {
4588     case 0xf3:
4589         prefixes |= PREFIX_REPZ;
4590         goto next_byte;
4591     case 0xf2:
4592         prefixes |= PREFIX_REPNZ;
4593         goto next_byte;
4594     case 0xf0:
4595         prefixes |= PREFIX_LOCK;
4596         goto next_byte;
4597     case 0x2e:
4598         s->override = R_CS;
4599         goto next_byte;
4600     case 0x36:
4601         s->override = R_SS;
4602         goto next_byte;
4603     case 0x3e:
4604         s->override = R_DS;
4605         goto next_byte;
4606     case 0x26:
4607         s->override = R_ES;
4608         goto next_byte;
4609     case 0x64:
4610         s->override = R_FS;
4611         goto next_byte;
4612     case 0x65:
4613         s->override = R_GS;
4614         goto next_byte;
4615     case 0x66:
4616         prefixes |= PREFIX_DATA;
4617         goto next_byte;
4618     case 0x67:
4619         prefixes |= PREFIX_ADR;
4620         goto next_byte;
4621 #ifdef TARGET_X86_64
4622     case 0x40 ... 0x4f:
4623         if (CODE64(s)) {
4624             /* REX prefix */
4625             prefixes |= PREFIX_REX;
4626             s->rex_w = (b >> 3) & 1;
4627             s->rex_r = (b & 0x4) << 1;
4628             s->rex_x = (b & 0x2) << 2;
4629             s->rex_b = (b & 0x1) << 3;
4630             goto next_byte;
4631         }
4632         break;
4633 #endif
4634     case 0xc5: /* 2-byte VEX */
4635     case 0xc4: /* 3-byte VEX */
4636         /* VEX prefixes cannot be used except in 32-bit mode.
4637            Otherwise the instruction is LES or LDS.  */
4638         if (CODE32(s) && !VM86(s)) {
4639             static const int pp_prefix[4] = {
4640                 0, PREFIX_DATA, PREFIX_REPZ, PREFIX_REPNZ
4641             };
4642             int vex3, vex2 = x86_ldub_code(env, s);
4643 
4644             if (!CODE64(s) && (vex2 & 0xc0) != 0xc0) {
4645                 /* 4.1.4.6: In 32-bit mode, bits [7:6] must be 11b,
4646                    otherwise the instruction is LES or LDS.  */
4647                 s->pc--; /* rewind the advance_pc() x86_ldub_code() did */
4648                 break;
4649             }
4650 
4651             /* 4.1.1-4.1.3: No preceding lock, 66, f2, f3, or rex prefixes. */
4652             if (prefixes & (PREFIX_REPZ | PREFIX_REPNZ
4653                             | PREFIX_LOCK | PREFIX_DATA | PREFIX_REX)) {
4654                 goto illegal_op;
4655             }
4656 #ifdef TARGET_X86_64
4657             s->rex_r = (~vex2 >> 4) & 8;
4658 #endif
4659             if (b == 0xc5) {
4660                 /* 2-byte VEX prefix: RVVVVlpp, implied 0f leading opcode byte */
4661                 vex3 = vex2;
4662                 b = x86_ldub_code(env, s) | 0x100;
4663             } else {
4664                 /* 3-byte VEX prefix: RXBmmmmm wVVVVlpp */
4665                 vex3 = x86_ldub_code(env, s);
4666 #ifdef TARGET_X86_64
4667                 s->rex_x = (~vex2 >> 3) & 8;
4668                 s->rex_b = (~vex2 >> 2) & 8;
4669                 s->rex_w = (vex3 >> 7) & 1;
4670 #endif
4671                 switch (vex2 & 0x1f) {
4672                 case 0x01: /* Implied 0f leading opcode bytes.  */
4673                     b = x86_ldub_code(env, s) | 0x100;
4674                     break;
4675                 case 0x02: /* Implied 0f 38 leading opcode bytes.  */
4676                     b = 0x138;
4677                     break;
4678                 case 0x03: /* Implied 0f 3a leading opcode bytes.  */
4679                     b = 0x13a;
4680                     break;
4681                 default:   /* Reserved for future use.  */
4682                     goto unknown_op;
4683                 }
4684             }
4685             s->vex_v = (~vex3 >> 3) & 0xf;
4686             s->vex_l = (vex3 >> 2) & 1;
4687             prefixes |= pp_prefix[vex3 & 3] | PREFIX_VEX;
4688         }
4689         break;
4690     }
4691 
4692     /* Post-process prefixes.  */
4693     if (CODE64(s)) {
4694         /* In 64-bit mode, the default data size is 32-bit.  Select 64-bit
4695            data with rex_w, and 16-bit data with 0x66; rex_w takes precedence
4696            over 0x66 if both are present.  */
4697         dflag = (REX_W(s) ? MO_64 : prefixes & PREFIX_DATA ? MO_16 : MO_32);
4698         /* In 64-bit mode, 0x67 selects 32-bit addressing.  */
4699         aflag = (prefixes & PREFIX_ADR ? MO_32 : MO_64);
4700     } else {
4701         /* In 16/32-bit mode, 0x66 selects the opposite data size.  */
4702         if (CODE32(s) ^ ((prefixes & PREFIX_DATA) != 0)) {
4703             dflag = MO_32;
4704         } else {
4705             dflag = MO_16;
4706         }
4707         /* In 16/32-bit mode, 0x67 selects the opposite addressing.  */
4708         if (CODE32(s) ^ ((prefixes & PREFIX_ADR) != 0)) {
4709             aflag = MO_32;
4710         }  else {
4711             aflag = MO_16;
4712         }
4713     }
4714 
4715     s->prefix = prefixes;
4716     s->aflag = aflag;
4717     s->dflag = dflag;
4718 
4719     /* now check op code */
4720  reswitch:
4721     switch(b) {
4722     case 0x0f:
4723         /**************************/
4724         /* extended op code */
4725         b = x86_ldub_code(env, s) | 0x100;
4726         goto reswitch;
4727 
4728         /**************************/
4729         /* arith & logic */
4730     case 0x00 ... 0x05:
4731     case 0x08 ... 0x0d:
4732     case 0x10 ... 0x15:
4733     case 0x18 ... 0x1d:
4734     case 0x20 ... 0x25:
4735     case 0x28 ... 0x2d:
4736     case 0x30 ... 0x35:
4737     case 0x38 ... 0x3d:
4738         {
4739             int op, f, val;
4740             op = (b >> 3) & 7;
4741             f = (b >> 1) & 3;
4742 
4743             ot = mo_b_d(b, dflag);
4744 
4745             switch(f) {
4746             case 0: /* OP Ev, Gv */
4747                 modrm = x86_ldub_code(env, s);
4748                 reg = ((modrm >> 3) & 7) | REX_R(s);
4749                 mod = (modrm >> 6) & 3;
4750                 rm = (modrm & 7) | REX_B(s);
4751                 if (mod != 3) {
4752                     gen_lea_modrm(env, s, modrm);
4753                     opreg = OR_TMP0;
4754                 } else if (op == OP_XORL && rm == reg) {
4755                 xor_zero:
4756                     /* xor reg, reg optimisation */
4757                     set_cc_op(s, CC_OP_CLR);
4758                     tcg_gen_movi_tl(s->T0, 0);
4759                     gen_op_mov_reg_v(s, ot, reg, s->T0);
4760                     break;
4761                 } else {
4762                     opreg = rm;
4763                 }
4764                 gen_op_mov_v_reg(s, ot, s->T1, reg);
4765                 gen_op(s, op, ot, opreg);
4766                 break;
4767             case 1: /* OP Gv, Ev */
4768                 modrm = x86_ldub_code(env, s);
4769                 mod = (modrm >> 6) & 3;
4770                 reg = ((modrm >> 3) & 7) | REX_R(s);
4771                 rm = (modrm & 7) | REX_B(s);
4772                 if (mod != 3) {
4773                     gen_lea_modrm(env, s, modrm);
4774                     gen_op_ld_v(s, ot, s->T1, s->A0);
4775                 } else if (op == OP_XORL && rm == reg) {
4776                     goto xor_zero;
4777                 } else {
4778                     gen_op_mov_v_reg(s, ot, s->T1, rm);
4779                 }
4780                 gen_op(s, op, ot, reg);
4781                 break;
4782             case 2: /* OP A, Iv */
4783                 val = insn_get(env, s, ot);
4784                 tcg_gen_movi_tl(s->T1, val);
4785                 gen_op(s, op, ot, OR_EAX);
4786                 break;
4787             }
4788         }
4789         break;
4790 
4791     case 0x82:
4792         if (CODE64(s))
4793             goto illegal_op;
4794         /* fall through */
4795     case 0x80: /* GRP1 */
4796     case 0x81:
4797     case 0x83:
4798         {
4799             int val;
4800 
4801             ot = mo_b_d(b, dflag);
4802 
4803             modrm = x86_ldub_code(env, s);
4804             mod = (modrm >> 6) & 3;
4805             rm = (modrm & 7) | REX_B(s);
4806             op = (modrm >> 3) & 7;
4807 
4808             if (mod != 3) {
4809                 if (b == 0x83)
4810                     s->rip_offset = 1;
4811                 else
4812                     s->rip_offset = insn_const_size(ot);
4813                 gen_lea_modrm(env, s, modrm);
4814                 opreg = OR_TMP0;
4815             } else {
4816                 opreg = rm;
4817             }
4818 
4819             switch(b) {
4820             default:
4821             case 0x80:
4822             case 0x81:
4823             case 0x82:
4824                 val = insn_get(env, s, ot);
4825                 break;
4826             case 0x83:
4827                 val = (int8_t)insn_get(env, s, MO_8);
4828                 break;
4829             }
4830             tcg_gen_movi_tl(s->T1, val);
4831             gen_op(s, op, ot, opreg);
4832         }
4833         break;
4834 
4835         /**************************/
4836         /* inc, dec, and other misc arith */
4837     case 0x40 ... 0x47: /* inc Gv */
4838         ot = dflag;
4839         gen_inc(s, ot, OR_EAX + (b & 7), 1);
4840         break;
4841     case 0x48 ... 0x4f: /* dec Gv */
4842         ot = dflag;
4843         gen_inc(s, ot, OR_EAX + (b & 7), -1);
4844         break;
4845     case 0xf6: /* GRP3 */
4846     case 0xf7:
4847         ot = mo_b_d(b, dflag);
4848 
4849         modrm = x86_ldub_code(env, s);
4850         mod = (modrm >> 6) & 3;
4851         rm = (modrm & 7) | REX_B(s);
4852         op = (modrm >> 3) & 7;
4853         if (mod != 3) {
4854             if (op == 0) {
4855                 s->rip_offset = insn_const_size(ot);
4856             }
4857             gen_lea_modrm(env, s, modrm);
4858             /* For those below that handle locked memory, don't load here.  */
4859             if (!(s->prefix & PREFIX_LOCK)
4860                 || op != 2) {
4861                 gen_op_ld_v(s, ot, s->T0, s->A0);
4862             }
4863         } else {
4864             gen_op_mov_v_reg(s, ot, s->T0, rm);
4865         }
4866 
4867         switch(op) {
4868         case 0: /* test */
4869             val = insn_get(env, s, ot);
4870             tcg_gen_movi_tl(s->T1, val);
4871             gen_op_testl_T0_T1_cc(s);
4872             set_cc_op(s, CC_OP_LOGICB + ot);
4873             break;
4874         case 2: /* not */
4875             if (s->prefix & PREFIX_LOCK) {
4876                 if (mod == 3) {
4877                     goto illegal_op;
4878                 }
4879                 tcg_gen_movi_tl(s->T0, ~0);
4880                 tcg_gen_atomic_xor_fetch_tl(s->T0, s->A0, s->T0,
4881                                             s->mem_index, ot | MO_LE);
4882             } else {
4883                 tcg_gen_not_tl(s->T0, s->T0);
4884                 if (mod != 3) {
4885                     gen_op_st_v(s, ot, s->T0, s->A0);
4886                 } else {
4887                     gen_op_mov_reg_v(s, ot, rm, s->T0);
4888                 }
4889             }
4890             break;
4891         case 3: /* neg */
4892             if (s->prefix & PREFIX_LOCK) {
4893                 TCGLabel *label1;
4894                 TCGv a0, t0, t1, t2;
4895 
4896                 if (mod == 3) {
4897                     goto illegal_op;
4898                 }
4899                 a0 = tcg_temp_local_new();
4900                 t0 = tcg_temp_local_new();
4901                 label1 = gen_new_label();
4902 
4903                 tcg_gen_mov_tl(a0, s->A0);
4904                 tcg_gen_mov_tl(t0, s->T0);
4905 
4906                 gen_set_label(label1);
4907                 t1 = tcg_temp_new();
4908                 t2 = tcg_temp_new();
4909                 tcg_gen_mov_tl(t2, t0);
4910                 tcg_gen_neg_tl(t1, t0);
4911                 tcg_gen_atomic_cmpxchg_tl(t0, a0, t0, t1,
4912                                           s->mem_index, ot | MO_LE);
4913                 tcg_temp_free(t1);
4914                 tcg_gen_brcond_tl(TCG_COND_NE, t0, t2, label1);
4915 
4916                 tcg_temp_free(t2);
4917                 tcg_temp_free(a0);
4918                 tcg_gen_mov_tl(s->T0, t0);
4919                 tcg_temp_free(t0);
4920             } else {
4921                 tcg_gen_neg_tl(s->T0, s->T0);
4922                 if (mod != 3) {
4923                     gen_op_st_v(s, ot, s->T0, s->A0);
4924                 } else {
4925                     gen_op_mov_reg_v(s, ot, rm, s->T0);
4926                 }
4927             }
4928             gen_op_update_neg_cc(s);
4929             set_cc_op(s, CC_OP_SUBB + ot);
4930             break;
4931         case 4: /* mul */
4932             switch(ot) {
4933             case MO_8:
4934                 gen_op_mov_v_reg(s, MO_8, s->T1, R_EAX);
4935                 tcg_gen_ext8u_tl(s->T0, s->T0);
4936                 tcg_gen_ext8u_tl(s->T1, s->T1);
4937                 /* XXX: use 32 bit mul which could be faster */
4938                 tcg_gen_mul_tl(s->T0, s->T0, s->T1);
4939                 gen_op_mov_reg_v(s, MO_16, R_EAX, s->T0);
4940                 tcg_gen_mov_tl(cpu_cc_dst, s->T0);
4941                 tcg_gen_andi_tl(cpu_cc_src, s->T0, 0xff00);
4942                 set_cc_op(s, CC_OP_MULB);
4943                 break;
4944             case MO_16:
4945                 gen_op_mov_v_reg(s, MO_16, s->T1, R_EAX);
4946                 tcg_gen_ext16u_tl(s->T0, s->T0);
4947                 tcg_gen_ext16u_tl(s->T1, s->T1);
4948                 /* XXX: use 32 bit mul which could be faster */
4949                 tcg_gen_mul_tl(s->T0, s->T0, s->T1);
4950                 gen_op_mov_reg_v(s, MO_16, R_EAX, s->T0);
4951                 tcg_gen_mov_tl(cpu_cc_dst, s->T0);
4952                 tcg_gen_shri_tl(s->T0, s->T0, 16);
4953                 gen_op_mov_reg_v(s, MO_16, R_EDX, s->T0);
4954                 tcg_gen_mov_tl(cpu_cc_src, s->T0);
4955                 set_cc_op(s, CC_OP_MULW);
4956                 break;
4957             default:
4958             case MO_32:
4959                 tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
4960                 tcg_gen_trunc_tl_i32(s->tmp3_i32, cpu_regs[R_EAX]);
4961                 tcg_gen_mulu2_i32(s->tmp2_i32, s->tmp3_i32,
4962                                   s->tmp2_i32, s->tmp3_i32);
4963                 tcg_gen_extu_i32_tl(cpu_regs[R_EAX], s->tmp2_i32);
4964                 tcg_gen_extu_i32_tl(cpu_regs[R_EDX], s->tmp3_i32);
4965                 tcg_gen_mov_tl(cpu_cc_dst, cpu_regs[R_EAX]);
4966                 tcg_gen_mov_tl(cpu_cc_src, cpu_regs[R_EDX]);
4967                 set_cc_op(s, CC_OP_MULL);
4968                 break;
4969 #ifdef TARGET_X86_64
4970             case MO_64:
4971                 tcg_gen_mulu2_i64(cpu_regs[R_EAX], cpu_regs[R_EDX],
4972                                   s->T0, cpu_regs[R_EAX]);
4973                 tcg_gen_mov_tl(cpu_cc_dst, cpu_regs[R_EAX]);
4974                 tcg_gen_mov_tl(cpu_cc_src, cpu_regs[R_EDX]);
4975                 set_cc_op(s, CC_OP_MULQ);
4976                 break;
4977 #endif
4978             }
4979             break;
4980         case 5: /* imul */
4981             switch(ot) {
4982             case MO_8:
4983                 gen_op_mov_v_reg(s, MO_8, s->T1, R_EAX);
4984                 tcg_gen_ext8s_tl(s->T0, s->T0);
4985                 tcg_gen_ext8s_tl(s->T1, s->T1);
4986                 /* XXX: use 32 bit mul which could be faster */
4987                 tcg_gen_mul_tl(s->T0, s->T0, s->T1);
4988                 gen_op_mov_reg_v(s, MO_16, R_EAX, s->T0);
4989                 tcg_gen_mov_tl(cpu_cc_dst, s->T0);
4990                 tcg_gen_ext8s_tl(s->tmp0, s->T0);
4991                 tcg_gen_sub_tl(cpu_cc_src, s->T0, s->tmp0);
4992                 set_cc_op(s, CC_OP_MULB);
4993                 break;
4994             case MO_16:
4995                 gen_op_mov_v_reg(s, MO_16, s->T1, R_EAX);
4996                 tcg_gen_ext16s_tl(s->T0, s->T0);
4997                 tcg_gen_ext16s_tl(s->T1, s->T1);
4998                 /* XXX: use 32 bit mul which could be faster */
4999                 tcg_gen_mul_tl(s->T0, s->T0, s->T1);
5000                 gen_op_mov_reg_v(s, MO_16, R_EAX, s->T0);
5001                 tcg_gen_mov_tl(cpu_cc_dst, s->T0);
5002                 tcg_gen_ext16s_tl(s->tmp0, s->T0);
5003                 tcg_gen_sub_tl(cpu_cc_src, s->T0, s->tmp0);
5004                 tcg_gen_shri_tl(s->T0, s->T0, 16);
5005                 gen_op_mov_reg_v(s, MO_16, R_EDX, s->T0);
5006                 set_cc_op(s, CC_OP_MULW);
5007                 break;
5008             default:
5009             case MO_32:
5010                 tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
5011                 tcg_gen_trunc_tl_i32(s->tmp3_i32, cpu_regs[R_EAX]);
5012                 tcg_gen_muls2_i32(s->tmp2_i32, s->tmp3_i32,
5013                                   s->tmp2_i32, s->tmp3_i32);
5014                 tcg_gen_extu_i32_tl(cpu_regs[R_EAX], s->tmp2_i32);
5015                 tcg_gen_extu_i32_tl(cpu_regs[R_EDX], s->tmp3_i32);
5016                 tcg_gen_sari_i32(s->tmp2_i32, s->tmp2_i32, 31);
5017                 tcg_gen_mov_tl(cpu_cc_dst, cpu_regs[R_EAX]);
5018                 tcg_gen_sub_i32(s->tmp2_i32, s->tmp2_i32, s->tmp3_i32);
5019                 tcg_gen_extu_i32_tl(cpu_cc_src, s->tmp2_i32);
5020                 set_cc_op(s, CC_OP_MULL);
5021                 break;
5022 #ifdef TARGET_X86_64
5023             case MO_64:
5024                 tcg_gen_muls2_i64(cpu_regs[R_EAX], cpu_regs[R_EDX],
5025                                   s->T0, cpu_regs[R_EAX]);
5026                 tcg_gen_mov_tl(cpu_cc_dst, cpu_regs[R_EAX]);
5027                 tcg_gen_sari_tl(cpu_cc_src, cpu_regs[R_EAX], 63);
5028                 tcg_gen_sub_tl(cpu_cc_src, cpu_cc_src, cpu_regs[R_EDX]);
5029                 set_cc_op(s, CC_OP_MULQ);
5030                 break;
5031 #endif
5032             }
5033             break;
5034         case 6: /* div */
5035             switch(ot) {
5036             case MO_8:
5037                 gen_helper_divb_AL(cpu_env, s->T0);
5038                 break;
5039             case MO_16:
5040                 gen_helper_divw_AX(cpu_env, s->T0);
5041                 break;
5042             default:
5043             case MO_32:
5044                 gen_helper_divl_EAX(cpu_env, s->T0);
5045                 break;
5046 #ifdef TARGET_X86_64
5047             case MO_64:
5048                 gen_helper_divq_EAX(cpu_env, s->T0);
5049                 break;
5050 #endif
5051             }
5052             break;
5053         case 7: /* idiv */
5054             switch(ot) {
5055             case MO_8:
5056                 gen_helper_idivb_AL(cpu_env, s->T0);
5057                 break;
5058             case MO_16:
5059                 gen_helper_idivw_AX(cpu_env, s->T0);
5060                 break;
5061             default:
5062             case MO_32:
5063                 gen_helper_idivl_EAX(cpu_env, s->T0);
5064                 break;
5065 #ifdef TARGET_X86_64
5066             case MO_64:
5067                 gen_helper_idivq_EAX(cpu_env, s->T0);
5068                 break;
5069 #endif
5070             }
5071             break;
5072         default:
5073             goto unknown_op;
5074         }
5075         break;
5076 
5077     case 0xfe: /* GRP4 */
5078     case 0xff: /* GRP5 */
5079         ot = mo_b_d(b, dflag);
5080 
5081         modrm = x86_ldub_code(env, s);
5082         mod = (modrm >> 6) & 3;
5083         rm = (modrm & 7) | REX_B(s);
5084         op = (modrm >> 3) & 7;
5085         if (op >= 2 && b == 0xfe) {
5086             goto unknown_op;
5087         }
5088         if (CODE64(s)) {
5089             if (op == 2 || op == 4) {
5090                 /* operand size for jumps is 64 bit */
5091                 ot = MO_64;
5092             } else if (op == 3 || op == 5) {
5093                 ot = dflag != MO_16 ? MO_32 + REX_W(s) : MO_16;
5094             } else if (op == 6) {
5095                 /* default push size is 64 bit */
5096                 ot = mo_pushpop(s, dflag);
5097             }
5098         }
5099         if (mod != 3) {
5100             gen_lea_modrm(env, s, modrm);
5101             if (op >= 2 && op != 3 && op != 5)
5102                 gen_op_ld_v(s, ot, s->T0, s->A0);
5103         } else {
5104             gen_op_mov_v_reg(s, ot, s->T0, rm);
5105         }
5106 
5107         switch(op) {
5108         case 0: /* inc Ev */
5109             if (mod != 3)
5110                 opreg = OR_TMP0;
5111             else
5112                 opreg = rm;
5113             gen_inc(s, ot, opreg, 1);
5114             break;
5115         case 1: /* dec Ev */
5116             if (mod != 3)
5117                 opreg = OR_TMP0;
5118             else
5119                 opreg = rm;
5120             gen_inc(s, ot, opreg, -1);
5121             break;
5122         case 2: /* call Ev */
5123             /* XXX: optimize if memory (no 'and' is necessary) */
5124             if (dflag == MO_16) {
5125                 tcg_gen_ext16u_tl(s->T0, s->T0);
5126             }
5127             next_eip = s->pc - s->cs_base;
5128             tcg_gen_movi_tl(s->T1, next_eip);
5129             gen_push_v(s, s->T1);
5130             gen_op_jmp_v(s->T0);
5131             gen_bnd_jmp(s);
5132             gen_jr(s, s->T0);
5133             break;
5134         case 3: /* lcall Ev */
5135             if (mod == 3) {
5136                 goto illegal_op;
5137             }
5138             gen_op_ld_v(s, ot, s->T1, s->A0);
5139             gen_add_A0_im(s, 1 << ot);
5140             gen_op_ld_v(s, MO_16, s->T0, s->A0);
5141         do_lcall:
5142             if (PE(s) && !VM86(s)) {
5143                 tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
5144                 gen_helper_lcall_protected(cpu_env, s->tmp2_i32, s->T1,
5145                                            tcg_const_i32(dflag - 1),
5146                                            tcg_const_tl(s->pc - s->cs_base));
5147             } else {
5148                 tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
5149                 gen_helper_lcall_real(cpu_env, s->tmp2_i32, s->T1,
5150                                       tcg_const_i32(dflag - 1),
5151                                       tcg_const_i32(s->pc - s->cs_base));
5152             }
5153             tcg_gen_ld_tl(s->tmp4, cpu_env, offsetof(CPUX86State, eip));
5154             gen_jr(s, s->tmp4);
5155             break;
5156         case 4: /* jmp Ev */
5157             if (dflag == MO_16) {
5158                 tcg_gen_ext16u_tl(s->T0, s->T0);
5159             }
5160             gen_op_jmp_v(s->T0);
5161             gen_bnd_jmp(s);
5162             gen_jr(s, s->T0);
5163             break;
5164         case 5: /* ljmp Ev */
5165             if (mod == 3) {
5166                 goto illegal_op;
5167             }
5168             gen_op_ld_v(s, ot, s->T1, s->A0);
5169             gen_add_A0_im(s, 1 << ot);
5170             gen_op_ld_v(s, MO_16, s->T0, s->A0);
5171         do_ljmp:
5172             if (PE(s) && !VM86(s)) {
5173                 tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
5174                 gen_helper_ljmp_protected(cpu_env, s->tmp2_i32, s->T1,
5175                                           tcg_const_tl(s->pc - s->cs_base));
5176             } else {
5177                 gen_op_movl_seg_T0_vm(s, R_CS);
5178                 gen_op_jmp_v(s->T1);
5179             }
5180             tcg_gen_ld_tl(s->tmp4, cpu_env, offsetof(CPUX86State, eip));
5181             gen_jr(s, s->tmp4);
5182             break;
5183         case 6: /* push Ev */
5184             gen_push_v(s, s->T0);
5185             break;
5186         default:
5187             goto unknown_op;
5188         }
5189         break;
5190 
5191     case 0x84: /* test Ev, Gv */
5192     case 0x85:
5193         ot = mo_b_d(b, dflag);
5194 
5195         modrm = x86_ldub_code(env, s);
5196         reg = ((modrm >> 3) & 7) | REX_R(s);
5197 
5198         gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
5199         gen_op_mov_v_reg(s, ot, s->T1, reg);
5200         gen_op_testl_T0_T1_cc(s);
5201         set_cc_op(s, CC_OP_LOGICB + ot);
5202         break;
5203 
5204     case 0xa8: /* test eAX, Iv */
5205     case 0xa9:
5206         ot = mo_b_d(b, dflag);
5207         val = insn_get(env, s, ot);
5208 
5209         gen_op_mov_v_reg(s, ot, s->T0, OR_EAX);
5210         tcg_gen_movi_tl(s->T1, val);
5211         gen_op_testl_T0_T1_cc(s);
5212         set_cc_op(s, CC_OP_LOGICB + ot);
5213         break;
5214 
5215     case 0x98: /* CWDE/CBW */
5216         switch (dflag) {
5217 #ifdef TARGET_X86_64
5218         case MO_64:
5219             gen_op_mov_v_reg(s, MO_32, s->T0, R_EAX);
5220             tcg_gen_ext32s_tl(s->T0, s->T0);
5221             gen_op_mov_reg_v(s, MO_64, R_EAX, s->T0);
5222             break;
5223 #endif
5224         case MO_32:
5225             gen_op_mov_v_reg(s, MO_16, s->T0, R_EAX);
5226             tcg_gen_ext16s_tl(s->T0, s->T0);
5227             gen_op_mov_reg_v(s, MO_32, R_EAX, s->T0);
5228             break;
5229         case MO_16:
5230             gen_op_mov_v_reg(s, MO_8, s->T0, R_EAX);
5231             tcg_gen_ext8s_tl(s->T0, s->T0);
5232             gen_op_mov_reg_v(s, MO_16, R_EAX, s->T0);
5233             break;
5234         default:
5235             tcg_abort();
5236         }
5237         break;
5238     case 0x99: /* CDQ/CWD */
5239         switch (dflag) {
5240 #ifdef TARGET_X86_64
5241         case MO_64:
5242             gen_op_mov_v_reg(s, MO_64, s->T0, R_EAX);
5243             tcg_gen_sari_tl(s->T0, s->T0, 63);
5244             gen_op_mov_reg_v(s, MO_64, R_EDX, s->T0);
5245             break;
5246 #endif
5247         case MO_32:
5248             gen_op_mov_v_reg(s, MO_32, s->T0, R_EAX);
5249             tcg_gen_ext32s_tl(s->T0, s->T0);
5250             tcg_gen_sari_tl(s->T0, s->T0, 31);
5251             gen_op_mov_reg_v(s, MO_32, R_EDX, s->T0);
5252             break;
5253         case MO_16:
5254             gen_op_mov_v_reg(s, MO_16, s->T0, R_EAX);
5255             tcg_gen_ext16s_tl(s->T0, s->T0);
5256             tcg_gen_sari_tl(s->T0, s->T0, 15);
5257             gen_op_mov_reg_v(s, MO_16, R_EDX, s->T0);
5258             break;
5259         default:
5260             tcg_abort();
5261         }
5262         break;
5263     case 0x1af: /* imul Gv, Ev */
5264     case 0x69: /* imul Gv, Ev, I */
5265     case 0x6b:
5266         ot = dflag;
5267         modrm = x86_ldub_code(env, s);
5268         reg = ((modrm >> 3) & 7) | REX_R(s);
5269         if (b == 0x69)
5270             s->rip_offset = insn_const_size(ot);
5271         else if (b == 0x6b)
5272             s->rip_offset = 1;
5273         gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
5274         if (b == 0x69) {
5275             val = insn_get(env, s, ot);
5276             tcg_gen_movi_tl(s->T1, val);
5277         } else if (b == 0x6b) {
5278             val = (int8_t)insn_get(env, s, MO_8);
5279             tcg_gen_movi_tl(s->T1, val);
5280         } else {
5281             gen_op_mov_v_reg(s, ot, s->T1, reg);
5282         }
5283         switch (ot) {
5284 #ifdef TARGET_X86_64
5285         case MO_64:
5286             tcg_gen_muls2_i64(cpu_regs[reg], s->T1, s->T0, s->T1);
5287             tcg_gen_mov_tl(cpu_cc_dst, cpu_regs[reg]);
5288             tcg_gen_sari_tl(cpu_cc_src, cpu_cc_dst, 63);
5289             tcg_gen_sub_tl(cpu_cc_src, cpu_cc_src, s->T1);
5290             break;
5291 #endif
5292         case MO_32:
5293             tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
5294             tcg_gen_trunc_tl_i32(s->tmp3_i32, s->T1);
5295             tcg_gen_muls2_i32(s->tmp2_i32, s->tmp3_i32,
5296                               s->tmp2_i32, s->tmp3_i32);
5297             tcg_gen_extu_i32_tl(cpu_regs[reg], s->tmp2_i32);
5298             tcg_gen_sari_i32(s->tmp2_i32, s->tmp2_i32, 31);
5299             tcg_gen_mov_tl(cpu_cc_dst, cpu_regs[reg]);
5300             tcg_gen_sub_i32(s->tmp2_i32, s->tmp2_i32, s->tmp3_i32);
5301             tcg_gen_extu_i32_tl(cpu_cc_src, s->tmp2_i32);
5302             break;
5303         default:
5304             tcg_gen_ext16s_tl(s->T0, s->T0);
5305             tcg_gen_ext16s_tl(s->T1, s->T1);
5306             /* XXX: use 32 bit mul which could be faster */
5307             tcg_gen_mul_tl(s->T0, s->T0, s->T1);
5308             tcg_gen_mov_tl(cpu_cc_dst, s->T0);
5309             tcg_gen_ext16s_tl(s->tmp0, s->T0);
5310             tcg_gen_sub_tl(cpu_cc_src, s->T0, s->tmp0);
5311             gen_op_mov_reg_v(s, ot, reg, s->T0);
5312             break;
5313         }
5314         set_cc_op(s, CC_OP_MULB + ot);
5315         break;
5316     case 0x1c0:
5317     case 0x1c1: /* xadd Ev, Gv */
5318         ot = mo_b_d(b, dflag);
5319         modrm = x86_ldub_code(env, s);
5320         reg = ((modrm >> 3) & 7) | REX_R(s);
5321         mod = (modrm >> 6) & 3;
5322         gen_op_mov_v_reg(s, ot, s->T0, reg);
5323         if (mod == 3) {
5324             rm = (modrm & 7) | REX_B(s);
5325             gen_op_mov_v_reg(s, ot, s->T1, rm);
5326             tcg_gen_add_tl(s->T0, s->T0, s->T1);
5327             gen_op_mov_reg_v(s, ot, reg, s->T1);
5328             gen_op_mov_reg_v(s, ot, rm, s->T0);
5329         } else {
5330             gen_lea_modrm(env, s, modrm);
5331             if (s->prefix & PREFIX_LOCK) {
5332                 tcg_gen_atomic_fetch_add_tl(s->T1, s->A0, s->T0,
5333                                             s->mem_index, ot | MO_LE);
5334                 tcg_gen_add_tl(s->T0, s->T0, s->T1);
5335             } else {
5336                 gen_op_ld_v(s, ot, s->T1, s->A0);
5337                 tcg_gen_add_tl(s->T0, s->T0, s->T1);
5338                 gen_op_st_v(s, ot, s->T0, s->A0);
5339             }
5340             gen_op_mov_reg_v(s, ot, reg, s->T1);
5341         }
5342         gen_op_update2_cc(s);
5343         set_cc_op(s, CC_OP_ADDB + ot);
5344         break;
5345     case 0x1b0:
5346     case 0x1b1: /* cmpxchg Ev, Gv */
5347         {
5348             TCGv oldv, newv, cmpv;
5349 
5350             ot = mo_b_d(b, dflag);
5351             modrm = x86_ldub_code(env, s);
5352             reg = ((modrm >> 3) & 7) | REX_R(s);
5353             mod = (modrm >> 6) & 3;
5354             oldv = tcg_temp_new();
5355             newv = tcg_temp_new();
5356             cmpv = tcg_temp_new();
5357             gen_op_mov_v_reg(s, ot, newv, reg);
5358             tcg_gen_mov_tl(cmpv, cpu_regs[R_EAX]);
5359 
5360             if (s->prefix & PREFIX_LOCK) {
5361                 if (mod == 3) {
5362                     goto illegal_op;
5363                 }
5364                 gen_lea_modrm(env, s, modrm);
5365                 tcg_gen_atomic_cmpxchg_tl(oldv, s->A0, cmpv, newv,
5366                                           s->mem_index, ot | MO_LE);
5367                 gen_op_mov_reg_v(s, ot, R_EAX, oldv);
5368             } else {
5369                 if (mod == 3) {
5370                     rm = (modrm & 7) | REX_B(s);
5371                     gen_op_mov_v_reg(s, ot, oldv, rm);
5372                 } else {
5373                     gen_lea_modrm(env, s, modrm);
5374                     gen_op_ld_v(s, ot, oldv, s->A0);
5375                     rm = 0; /* avoid warning */
5376                 }
5377                 gen_extu(ot, oldv);
5378                 gen_extu(ot, cmpv);
5379                 /* store value = (old == cmp ? new : old);  */
5380                 tcg_gen_movcond_tl(TCG_COND_EQ, newv, oldv, cmpv, newv, oldv);
5381                 if (mod == 3) {
5382                     gen_op_mov_reg_v(s, ot, R_EAX, oldv);
5383                     gen_op_mov_reg_v(s, ot, rm, newv);
5384                 } else {
5385                     /* Perform an unconditional store cycle like physical cpu;
5386                        must be before changing accumulator to ensure
5387                        idempotency if the store faults and the instruction
5388                        is restarted */
5389                     gen_op_st_v(s, ot, newv, s->A0);
5390                     gen_op_mov_reg_v(s, ot, R_EAX, oldv);
5391                 }
5392             }
5393             tcg_gen_mov_tl(cpu_cc_src, oldv);
5394             tcg_gen_mov_tl(s->cc_srcT, cmpv);
5395             tcg_gen_sub_tl(cpu_cc_dst, cmpv, oldv);
5396             set_cc_op(s, CC_OP_SUBB + ot);
5397             tcg_temp_free(oldv);
5398             tcg_temp_free(newv);
5399             tcg_temp_free(cmpv);
5400         }
5401         break;
5402     case 0x1c7: /* cmpxchg8b */
5403         modrm = x86_ldub_code(env, s);
5404         mod = (modrm >> 6) & 3;
5405         switch ((modrm >> 3) & 7) {
5406         case 1: /* CMPXCHG8, CMPXCHG16 */
5407             if (mod == 3) {
5408                 goto illegal_op;
5409             }
5410 #ifdef TARGET_X86_64
5411             if (dflag == MO_64) {
5412                 if (!(s->cpuid_ext_features & CPUID_EXT_CX16)) {
5413                     goto illegal_op;
5414                 }
5415                 gen_lea_modrm(env, s, modrm);
5416                 if ((s->prefix & PREFIX_LOCK) &&
5417                     (tb_cflags(s->base.tb) & CF_PARALLEL)) {
5418                     gen_helper_cmpxchg16b(cpu_env, s->A0);
5419                 } else {
5420                     gen_helper_cmpxchg16b_unlocked(cpu_env, s->A0);
5421                 }
5422                 set_cc_op(s, CC_OP_EFLAGS);
5423                 break;
5424             }
5425 #endif
5426             if (!(s->cpuid_features & CPUID_CX8)) {
5427                 goto illegal_op;
5428             }
5429             gen_lea_modrm(env, s, modrm);
5430             if ((s->prefix & PREFIX_LOCK) &&
5431                 (tb_cflags(s->base.tb) & CF_PARALLEL)) {
5432                 gen_helper_cmpxchg8b(cpu_env, s->A0);
5433             } else {
5434                 gen_helper_cmpxchg8b_unlocked(cpu_env, s->A0);
5435             }
5436             set_cc_op(s, CC_OP_EFLAGS);
5437             break;
5438 
5439         case 7: /* RDSEED */
5440         case 6: /* RDRAND */
5441             if (mod != 3 ||
5442                 (s->prefix & (PREFIX_LOCK | PREFIX_REPZ | PREFIX_REPNZ)) ||
5443                 !(s->cpuid_ext_features & CPUID_EXT_RDRAND)) {
5444                 goto illegal_op;
5445             }
5446             if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
5447                 gen_io_start();
5448             }
5449             gen_helper_rdrand(s->T0, cpu_env);
5450             rm = (modrm & 7) | REX_B(s);
5451             gen_op_mov_reg_v(s, dflag, rm, s->T0);
5452             set_cc_op(s, CC_OP_EFLAGS);
5453             if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
5454                 gen_jmp(s, s->pc - s->cs_base);
5455             }
5456             break;
5457 
5458         default:
5459             goto illegal_op;
5460         }
5461         break;
5462 
5463         /**************************/
5464         /* push/pop */
5465     case 0x50 ... 0x57: /* push */
5466         gen_op_mov_v_reg(s, MO_32, s->T0, (b & 7) | REX_B(s));
5467         gen_push_v(s, s->T0);
5468         break;
5469     case 0x58 ... 0x5f: /* pop */
5470         ot = gen_pop_T0(s);
5471         /* NOTE: order is important for pop %sp */
5472         gen_pop_update(s, ot);
5473         gen_op_mov_reg_v(s, ot, (b & 7) | REX_B(s), s->T0);
5474         break;
5475     case 0x60: /* pusha */
5476         if (CODE64(s))
5477             goto illegal_op;
5478         gen_pusha(s);
5479         break;
5480     case 0x61: /* popa */
5481         if (CODE64(s))
5482             goto illegal_op;
5483         gen_popa(s);
5484         break;
5485     case 0x68: /* push Iv */
5486     case 0x6a:
5487         ot = mo_pushpop(s, dflag);
5488         if (b == 0x68)
5489             val = insn_get(env, s, ot);
5490         else
5491             val = (int8_t)insn_get(env, s, MO_8);
5492         tcg_gen_movi_tl(s->T0, val);
5493         gen_push_v(s, s->T0);
5494         break;
5495     case 0x8f: /* pop Ev */
5496         modrm = x86_ldub_code(env, s);
5497         mod = (modrm >> 6) & 3;
5498         ot = gen_pop_T0(s);
5499         if (mod == 3) {
5500             /* NOTE: order is important for pop %sp */
5501             gen_pop_update(s, ot);
5502             rm = (modrm & 7) | REX_B(s);
5503             gen_op_mov_reg_v(s, ot, rm, s->T0);
5504         } else {
5505             /* NOTE: order is important too for MMU exceptions */
5506             s->popl_esp_hack = 1 << ot;
5507             gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 1);
5508             s->popl_esp_hack = 0;
5509             gen_pop_update(s, ot);
5510         }
5511         break;
5512     case 0xc8: /* enter */
5513         {
5514             int level;
5515             val = x86_lduw_code(env, s);
5516             level = x86_ldub_code(env, s);
5517             gen_enter(s, val, level);
5518         }
5519         break;
5520     case 0xc9: /* leave */
5521         gen_leave(s);
5522         break;
5523     case 0x06: /* push es */
5524     case 0x0e: /* push cs */
5525     case 0x16: /* push ss */
5526     case 0x1e: /* push ds */
5527         if (CODE64(s))
5528             goto illegal_op;
5529         gen_op_movl_T0_seg(s, b >> 3);
5530         gen_push_v(s, s->T0);
5531         break;
5532     case 0x1a0: /* push fs */
5533     case 0x1a8: /* push gs */
5534         gen_op_movl_T0_seg(s, (b >> 3) & 7);
5535         gen_push_v(s, s->T0);
5536         break;
5537     case 0x07: /* pop es */
5538     case 0x17: /* pop ss */
5539     case 0x1f: /* pop ds */
5540         if (CODE64(s))
5541             goto illegal_op;
5542         reg = b >> 3;
5543         ot = gen_pop_T0(s);
5544         gen_movl_seg_T0(s, reg);
5545         gen_pop_update(s, ot);
5546         /* Note that reg == R_SS in gen_movl_seg_T0 always sets is_jmp.  */
5547         if (s->base.is_jmp) {
5548             gen_jmp_im(s, s->pc - s->cs_base);
5549             if (reg == R_SS) {
5550                 s->flags &= ~HF_TF_MASK;
5551                 gen_eob_inhibit_irq(s, true);
5552             } else {
5553                 gen_eob(s);
5554             }
5555         }
5556         break;
5557     case 0x1a1: /* pop fs */
5558     case 0x1a9: /* pop gs */
5559         ot = gen_pop_T0(s);
5560         gen_movl_seg_T0(s, (b >> 3) & 7);
5561         gen_pop_update(s, ot);
5562         if (s->base.is_jmp) {
5563             gen_jmp_im(s, s->pc - s->cs_base);
5564             gen_eob(s);
5565         }
5566         break;
5567 
5568         /**************************/
5569         /* mov */
5570     case 0x88:
5571     case 0x89: /* mov Gv, Ev */
5572         ot = mo_b_d(b, dflag);
5573         modrm = x86_ldub_code(env, s);
5574         reg = ((modrm >> 3) & 7) | REX_R(s);
5575 
5576         /* generate a generic store */
5577         gen_ldst_modrm(env, s, modrm, ot, reg, 1);
5578         break;
5579     case 0xc6:
5580     case 0xc7: /* mov Ev, Iv */
5581         ot = mo_b_d(b, dflag);
5582         modrm = x86_ldub_code(env, s);
5583         mod = (modrm >> 6) & 3;
5584         if (mod != 3) {
5585             s->rip_offset = insn_const_size(ot);
5586             gen_lea_modrm(env, s, modrm);
5587         }
5588         val = insn_get(env, s, ot);
5589         tcg_gen_movi_tl(s->T0, val);
5590         if (mod != 3) {
5591             gen_op_st_v(s, ot, s->T0, s->A0);
5592         } else {
5593             gen_op_mov_reg_v(s, ot, (modrm & 7) | REX_B(s), s->T0);
5594         }
5595         break;
5596     case 0x8a:
5597     case 0x8b: /* mov Ev, Gv */
5598         ot = mo_b_d(b, dflag);
5599         modrm = x86_ldub_code(env, s);
5600         reg = ((modrm >> 3) & 7) | REX_R(s);
5601 
5602         gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
5603         gen_op_mov_reg_v(s, ot, reg, s->T0);
5604         break;
5605     case 0x8e: /* mov seg, Gv */
5606         modrm = x86_ldub_code(env, s);
5607         reg = (modrm >> 3) & 7;
5608         if (reg >= 6 || reg == R_CS)
5609             goto illegal_op;
5610         gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
5611         gen_movl_seg_T0(s, reg);
5612         /* Note that reg == R_SS in gen_movl_seg_T0 always sets is_jmp.  */
5613         if (s->base.is_jmp) {
5614             gen_jmp_im(s, s->pc - s->cs_base);
5615             if (reg == R_SS) {
5616                 s->flags &= ~HF_TF_MASK;
5617                 gen_eob_inhibit_irq(s, true);
5618             } else {
5619                 gen_eob(s);
5620             }
5621         }
5622         break;
5623     case 0x8c: /* mov Gv, seg */
5624         modrm = x86_ldub_code(env, s);
5625         reg = (modrm >> 3) & 7;
5626         mod = (modrm >> 6) & 3;
5627         if (reg >= 6)
5628             goto illegal_op;
5629         gen_op_movl_T0_seg(s, reg);
5630         ot = mod == 3 ? dflag : MO_16;
5631         gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 1);
5632         break;
5633 
5634     case 0x1b6: /* movzbS Gv, Eb */
5635     case 0x1b7: /* movzwS Gv, Eb */
5636     case 0x1be: /* movsbS Gv, Eb */
5637     case 0x1bf: /* movswS Gv, Eb */
5638         {
5639             MemOp d_ot;
5640             MemOp s_ot;
5641 
5642             /* d_ot is the size of destination */
5643             d_ot = dflag;
5644             /* ot is the size of source */
5645             ot = (b & 1) + MO_8;
5646             /* s_ot is the sign+size of source */
5647             s_ot = b & 8 ? MO_SIGN | ot : ot;
5648 
5649             modrm = x86_ldub_code(env, s);
5650             reg = ((modrm >> 3) & 7) | REX_R(s);
5651             mod = (modrm >> 6) & 3;
5652             rm = (modrm & 7) | REX_B(s);
5653 
5654             if (mod == 3) {
5655                 if (s_ot == MO_SB && byte_reg_is_xH(s, rm)) {
5656                     tcg_gen_sextract_tl(s->T0, cpu_regs[rm - 4], 8, 8);
5657                 } else {
5658                     gen_op_mov_v_reg(s, ot, s->T0, rm);
5659                     switch (s_ot) {
5660                     case MO_UB:
5661                         tcg_gen_ext8u_tl(s->T0, s->T0);
5662                         break;
5663                     case MO_SB:
5664                         tcg_gen_ext8s_tl(s->T0, s->T0);
5665                         break;
5666                     case MO_UW:
5667                         tcg_gen_ext16u_tl(s->T0, s->T0);
5668                         break;
5669                     default:
5670                     case MO_SW:
5671                         tcg_gen_ext16s_tl(s->T0, s->T0);
5672                         break;
5673                     }
5674                 }
5675                 gen_op_mov_reg_v(s, d_ot, reg, s->T0);
5676             } else {
5677                 gen_lea_modrm(env, s, modrm);
5678                 gen_op_ld_v(s, s_ot, s->T0, s->A0);
5679                 gen_op_mov_reg_v(s, d_ot, reg, s->T0);
5680             }
5681         }
5682         break;
5683 
5684     case 0x8d: /* lea */
5685         modrm = x86_ldub_code(env, s);
5686         mod = (modrm >> 6) & 3;
5687         if (mod == 3)
5688             goto illegal_op;
5689         reg = ((modrm >> 3) & 7) | REX_R(s);
5690         {
5691             AddressParts a = gen_lea_modrm_0(env, s, modrm);
5692             TCGv ea = gen_lea_modrm_1(s, a);
5693             gen_lea_v_seg(s, s->aflag, ea, -1, -1);
5694             gen_op_mov_reg_v(s, dflag, reg, s->A0);
5695         }
5696         break;
5697 
5698     case 0xa0: /* mov EAX, Ov */
5699     case 0xa1:
5700     case 0xa2: /* mov Ov, EAX */
5701     case 0xa3:
5702         {
5703             target_ulong offset_addr;
5704 
5705             ot = mo_b_d(b, dflag);
5706             switch (s->aflag) {
5707 #ifdef TARGET_X86_64
5708             case MO_64:
5709                 offset_addr = x86_ldq_code(env, s);
5710                 break;
5711 #endif
5712             default:
5713                 offset_addr = insn_get(env, s, s->aflag);
5714                 break;
5715             }
5716             tcg_gen_movi_tl(s->A0, offset_addr);
5717             gen_add_A0_ds_seg(s);
5718             if ((b & 2) == 0) {
5719                 gen_op_ld_v(s, ot, s->T0, s->A0);
5720                 gen_op_mov_reg_v(s, ot, R_EAX, s->T0);
5721             } else {
5722                 gen_op_mov_v_reg(s, ot, s->T0, R_EAX);
5723                 gen_op_st_v(s, ot, s->T0, s->A0);
5724             }
5725         }
5726         break;
5727     case 0xd7: /* xlat */
5728         tcg_gen_mov_tl(s->A0, cpu_regs[R_EBX]);
5729         tcg_gen_ext8u_tl(s->T0, cpu_regs[R_EAX]);
5730         tcg_gen_add_tl(s->A0, s->A0, s->T0);
5731         gen_extu(s->aflag, s->A0);
5732         gen_add_A0_ds_seg(s);
5733         gen_op_ld_v(s, MO_8, s->T0, s->A0);
5734         gen_op_mov_reg_v(s, MO_8, R_EAX, s->T0);
5735         break;
5736     case 0xb0 ... 0xb7: /* mov R, Ib */
5737         val = insn_get(env, s, MO_8);
5738         tcg_gen_movi_tl(s->T0, val);
5739         gen_op_mov_reg_v(s, MO_8, (b & 7) | REX_B(s), s->T0);
5740         break;
5741     case 0xb8 ... 0xbf: /* mov R, Iv */
5742 #ifdef TARGET_X86_64
5743         if (dflag == MO_64) {
5744             uint64_t tmp;
5745             /* 64 bit case */
5746             tmp = x86_ldq_code(env, s);
5747             reg = (b & 7) | REX_B(s);
5748             tcg_gen_movi_tl(s->T0, tmp);
5749             gen_op_mov_reg_v(s, MO_64, reg, s->T0);
5750         } else
5751 #endif
5752         {
5753             ot = dflag;
5754             val = insn_get(env, s, ot);
5755             reg = (b & 7) | REX_B(s);
5756             tcg_gen_movi_tl(s->T0, val);
5757             gen_op_mov_reg_v(s, ot, reg, s->T0);
5758         }
5759         break;
5760 
5761     case 0x91 ... 0x97: /* xchg R, EAX */
5762     do_xchg_reg_eax:
5763         ot = dflag;
5764         reg = (b & 7) | REX_B(s);
5765         rm = R_EAX;
5766         goto do_xchg_reg;
5767     case 0x86:
5768     case 0x87: /* xchg Ev, Gv */
5769         ot = mo_b_d(b, dflag);
5770         modrm = x86_ldub_code(env, s);
5771         reg = ((modrm >> 3) & 7) | REX_R(s);
5772         mod = (modrm >> 6) & 3;
5773         if (mod == 3) {
5774             rm = (modrm & 7) | REX_B(s);
5775         do_xchg_reg:
5776             gen_op_mov_v_reg(s, ot, s->T0, reg);
5777             gen_op_mov_v_reg(s, ot, s->T1, rm);
5778             gen_op_mov_reg_v(s, ot, rm, s->T0);
5779             gen_op_mov_reg_v(s, ot, reg, s->T1);
5780         } else {
5781             gen_lea_modrm(env, s, modrm);
5782             gen_op_mov_v_reg(s, ot, s->T0, reg);
5783             /* for xchg, lock is implicit */
5784             tcg_gen_atomic_xchg_tl(s->T1, s->A0, s->T0,
5785                                    s->mem_index, ot | MO_LE);
5786             gen_op_mov_reg_v(s, ot, reg, s->T1);
5787         }
5788         break;
5789     case 0xc4: /* les Gv */
5790         /* In CODE64 this is VEX3; see above.  */
5791         op = R_ES;
5792         goto do_lxx;
5793     case 0xc5: /* lds Gv */
5794         /* In CODE64 this is VEX2; see above.  */
5795         op = R_DS;
5796         goto do_lxx;
5797     case 0x1b2: /* lss Gv */
5798         op = R_SS;
5799         goto do_lxx;
5800     case 0x1b4: /* lfs Gv */
5801         op = R_FS;
5802         goto do_lxx;
5803     case 0x1b5: /* lgs Gv */
5804         op = R_GS;
5805     do_lxx:
5806         ot = dflag != MO_16 ? MO_32 : MO_16;
5807         modrm = x86_ldub_code(env, s);
5808         reg = ((modrm >> 3) & 7) | REX_R(s);
5809         mod = (modrm >> 6) & 3;
5810         if (mod == 3)
5811             goto illegal_op;
5812         gen_lea_modrm(env, s, modrm);
5813         gen_op_ld_v(s, ot, s->T1, s->A0);
5814         gen_add_A0_im(s, 1 << ot);
5815         /* load the segment first to handle exceptions properly */
5816         gen_op_ld_v(s, MO_16, s->T0, s->A0);
5817         gen_movl_seg_T0(s, op);
5818         /* then put the data */
5819         gen_op_mov_reg_v(s, ot, reg, s->T1);
5820         if (s->base.is_jmp) {
5821             gen_jmp_im(s, s->pc - s->cs_base);
5822             gen_eob(s);
5823         }
5824         break;
5825 
5826         /************************/
5827         /* shifts */
5828     case 0xc0:
5829     case 0xc1:
5830         /* shift Ev,Ib */
5831         shift = 2;
5832     grp2:
5833         {
5834             ot = mo_b_d(b, dflag);
5835             modrm = x86_ldub_code(env, s);
5836             mod = (modrm >> 6) & 3;
5837             op = (modrm >> 3) & 7;
5838 
5839             if (mod != 3) {
5840                 if (shift == 2) {
5841                     s->rip_offset = 1;
5842                 }
5843                 gen_lea_modrm(env, s, modrm);
5844                 opreg = OR_TMP0;
5845             } else {
5846                 opreg = (modrm & 7) | REX_B(s);
5847             }
5848 
5849             /* simpler op */
5850             if (shift == 0) {
5851                 gen_shift(s, op, ot, opreg, OR_ECX);
5852             } else {
5853                 if (shift == 2) {
5854                     shift = x86_ldub_code(env, s);
5855                 }
5856                 gen_shifti(s, op, ot, opreg, shift);
5857             }
5858         }
5859         break;
5860     case 0xd0:
5861     case 0xd1:
5862         /* shift Ev,1 */
5863         shift = 1;
5864         goto grp2;
5865     case 0xd2:
5866     case 0xd3:
5867         /* shift Ev,cl */
5868         shift = 0;
5869         goto grp2;
5870 
5871     case 0x1a4: /* shld imm */
5872         op = 0;
5873         shift = 1;
5874         goto do_shiftd;
5875     case 0x1a5: /* shld cl */
5876         op = 0;
5877         shift = 0;
5878         goto do_shiftd;
5879     case 0x1ac: /* shrd imm */
5880         op = 1;
5881         shift = 1;
5882         goto do_shiftd;
5883     case 0x1ad: /* shrd cl */
5884         op = 1;
5885         shift = 0;
5886     do_shiftd:
5887         ot = dflag;
5888         modrm = x86_ldub_code(env, s);
5889         mod = (modrm >> 6) & 3;
5890         rm = (modrm & 7) | REX_B(s);
5891         reg = ((modrm >> 3) & 7) | REX_R(s);
5892         if (mod != 3) {
5893             gen_lea_modrm(env, s, modrm);
5894             opreg = OR_TMP0;
5895         } else {
5896             opreg = rm;
5897         }
5898         gen_op_mov_v_reg(s, ot, s->T1, reg);
5899 
5900         if (shift) {
5901             TCGv imm = tcg_const_tl(x86_ldub_code(env, s));
5902             gen_shiftd_rm_T1(s, ot, opreg, op, imm);
5903             tcg_temp_free(imm);
5904         } else {
5905             gen_shiftd_rm_T1(s, ot, opreg, op, cpu_regs[R_ECX]);
5906         }
5907         break;
5908 
5909         /************************/
5910         /* floats */
5911     case 0xd8 ... 0xdf:
5912         {
5913             bool update_fip = true;
5914 
5915             if (s->flags & (HF_EM_MASK | HF_TS_MASK)) {
5916                 /* if CR0.EM or CR0.TS are set, generate an FPU exception */
5917                 /* XXX: what to do if illegal op ? */
5918                 gen_exception(s, EXCP07_PREX, pc_start - s->cs_base);
5919                 break;
5920             }
5921             modrm = x86_ldub_code(env, s);
5922             mod = (modrm >> 6) & 3;
5923             rm = modrm & 7;
5924             op = ((b & 7) << 3) | ((modrm >> 3) & 7);
5925             if (mod != 3) {
5926                 /* memory op */
5927                 AddressParts a = gen_lea_modrm_0(env, s, modrm);
5928                 TCGv ea = gen_lea_modrm_1(s, a);
5929                 TCGv last_addr = tcg_temp_new();
5930                 bool update_fdp = true;
5931 
5932                 tcg_gen_mov_tl(last_addr, ea);
5933                 gen_lea_v_seg(s, s->aflag, ea, a.def_seg, s->override);
5934 
5935                 switch (op) {
5936                 case 0x00 ... 0x07: /* fxxxs */
5937                 case 0x10 ... 0x17: /* fixxxl */
5938                 case 0x20 ... 0x27: /* fxxxl */
5939                 case 0x30 ... 0x37: /* fixxx */
5940                     {
5941                         int op1;
5942                         op1 = op & 7;
5943 
5944                         switch (op >> 4) {
5945                         case 0:
5946                             tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0,
5947                                                 s->mem_index, MO_LEUL);
5948                             gen_helper_flds_FT0(cpu_env, s->tmp2_i32);
5949                             break;
5950                         case 1:
5951                             tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0,
5952                                                 s->mem_index, MO_LEUL);
5953                             gen_helper_fildl_FT0(cpu_env, s->tmp2_i32);
5954                             break;
5955                         case 2:
5956                             tcg_gen_qemu_ld_i64(s->tmp1_i64, s->A0,
5957                                                 s->mem_index, MO_LEQ);
5958                             gen_helper_fldl_FT0(cpu_env, s->tmp1_i64);
5959                             break;
5960                         case 3:
5961                         default:
5962                             tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0,
5963                                                 s->mem_index, MO_LESW);
5964                             gen_helper_fildl_FT0(cpu_env, s->tmp2_i32);
5965                             break;
5966                         }
5967 
5968                         gen_helper_fp_arith_ST0_FT0(op1);
5969                         if (op1 == 3) {
5970                             /* fcomp needs pop */
5971                             gen_helper_fpop(cpu_env);
5972                         }
5973                     }
5974                     break;
5975                 case 0x08: /* flds */
5976                 case 0x0a: /* fsts */
5977                 case 0x0b: /* fstps */
5978                 case 0x18 ... 0x1b: /* fildl, fisttpl, fistl, fistpl */
5979                 case 0x28 ... 0x2b: /* fldl, fisttpll, fstl, fstpl */
5980                 case 0x38 ... 0x3b: /* filds, fisttps, fists, fistps */
5981                     switch (op & 7) {
5982                     case 0:
5983                         switch (op >> 4) {
5984                         case 0:
5985                             tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0,
5986                                                 s->mem_index, MO_LEUL);
5987                             gen_helper_flds_ST0(cpu_env, s->tmp2_i32);
5988                             break;
5989                         case 1:
5990                             tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0,
5991                                                 s->mem_index, MO_LEUL);
5992                             gen_helper_fildl_ST0(cpu_env, s->tmp2_i32);
5993                             break;
5994                         case 2:
5995                             tcg_gen_qemu_ld_i64(s->tmp1_i64, s->A0,
5996                                                 s->mem_index, MO_LEQ);
5997                             gen_helper_fldl_ST0(cpu_env, s->tmp1_i64);
5998                             break;
5999                         case 3:
6000                         default:
6001                             tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0,
6002                                                 s->mem_index, MO_LESW);
6003                             gen_helper_fildl_ST0(cpu_env, s->tmp2_i32);
6004                             break;
6005                         }
6006                         break;
6007                     case 1:
6008                         /* XXX: the corresponding CPUID bit must be tested ! */
6009                         switch (op >> 4) {
6010                         case 1:
6011                             gen_helper_fisttl_ST0(s->tmp2_i32, cpu_env);
6012                             tcg_gen_qemu_st_i32(s->tmp2_i32, s->A0,
6013                                                 s->mem_index, MO_LEUL);
6014                             break;
6015                         case 2:
6016                             gen_helper_fisttll_ST0(s->tmp1_i64, cpu_env);
6017                             tcg_gen_qemu_st_i64(s->tmp1_i64, s->A0,
6018                                                 s->mem_index, MO_LEQ);
6019                             break;
6020                         case 3:
6021                         default:
6022                             gen_helper_fistt_ST0(s->tmp2_i32, cpu_env);
6023                             tcg_gen_qemu_st_i32(s->tmp2_i32, s->A0,
6024                                                 s->mem_index, MO_LEUW);
6025                             break;
6026                         }
6027                         gen_helper_fpop(cpu_env);
6028                         break;
6029                     default:
6030                         switch (op >> 4) {
6031                         case 0:
6032                             gen_helper_fsts_ST0(s->tmp2_i32, cpu_env);
6033                             tcg_gen_qemu_st_i32(s->tmp2_i32, s->A0,
6034                                                 s->mem_index, MO_LEUL);
6035                             break;
6036                         case 1:
6037                             gen_helper_fistl_ST0(s->tmp2_i32, cpu_env);
6038                             tcg_gen_qemu_st_i32(s->tmp2_i32, s->A0,
6039                                                 s->mem_index, MO_LEUL);
6040                             break;
6041                         case 2:
6042                             gen_helper_fstl_ST0(s->tmp1_i64, cpu_env);
6043                             tcg_gen_qemu_st_i64(s->tmp1_i64, s->A0,
6044                                                 s->mem_index, MO_LEQ);
6045                             break;
6046                         case 3:
6047                         default:
6048                             gen_helper_fist_ST0(s->tmp2_i32, cpu_env);
6049                             tcg_gen_qemu_st_i32(s->tmp2_i32, s->A0,
6050                                                 s->mem_index, MO_LEUW);
6051                             break;
6052                         }
6053                         if ((op & 7) == 3) {
6054                             gen_helper_fpop(cpu_env);
6055                         }
6056                         break;
6057                     }
6058                     break;
6059                 case 0x0c: /* fldenv mem */
6060                     gen_helper_fldenv(cpu_env, s->A0,
6061                                       tcg_const_i32(dflag - 1));
6062                     update_fip = update_fdp = false;
6063                     break;
6064                 case 0x0d: /* fldcw mem */
6065                     tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0,
6066                                         s->mem_index, MO_LEUW);
6067                     gen_helper_fldcw(cpu_env, s->tmp2_i32);
6068                     update_fip = update_fdp = false;
6069                     break;
6070                 case 0x0e: /* fnstenv mem */
6071                     gen_helper_fstenv(cpu_env, s->A0,
6072                                       tcg_const_i32(dflag - 1));
6073                     update_fip = update_fdp = false;
6074                     break;
6075                 case 0x0f: /* fnstcw mem */
6076                     gen_helper_fnstcw(s->tmp2_i32, cpu_env);
6077                     tcg_gen_qemu_st_i32(s->tmp2_i32, s->A0,
6078                                         s->mem_index, MO_LEUW);
6079                     update_fip = update_fdp = false;
6080                     break;
6081                 case 0x1d: /* fldt mem */
6082                     gen_helper_fldt_ST0(cpu_env, s->A0);
6083                     break;
6084                 case 0x1f: /* fstpt mem */
6085                     gen_helper_fstt_ST0(cpu_env, s->A0);
6086                     gen_helper_fpop(cpu_env);
6087                     break;
6088                 case 0x2c: /* frstor mem */
6089                     gen_helper_frstor(cpu_env, s->A0,
6090                                       tcg_const_i32(dflag - 1));
6091                     update_fip = update_fdp = false;
6092                     break;
6093                 case 0x2e: /* fnsave mem */
6094                     gen_helper_fsave(cpu_env, s->A0,
6095                                      tcg_const_i32(dflag - 1));
6096                     update_fip = update_fdp = false;
6097                     break;
6098                 case 0x2f: /* fnstsw mem */
6099                     gen_helper_fnstsw(s->tmp2_i32, cpu_env);
6100                     tcg_gen_qemu_st_i32(s->tmp2_i32, s->A0,
6101                                         s->mem_index, MO_LEUW);
6102                     update_fip = update_fdp = false;
6103                     break;
6104                 case 0x3c: /* fbld */
6105                     gen_helper_fbld_ST0(cpu_env, s->A0);
6106                     break;
6107                 case 0x3e: /* fbstp */
6108                     gen_helper_fbst_ST0(cpu_env, s->A0);
6109                     gen_helper_fpop(cpu_env);
6110                     break;
6111                 case 0x3d: /* fildll */
6112                     tcg_gen_qemu_ld_i64(s->tmp1_i64, s->A0,
6113                                         s->mem_index, MO_LEQ);
6114                     gen_helper_fildll_ST0(cpu_env, s->tmp1_i64);
6115                     break;
6116                 case 0x3f: /* fistpll */
6117                     gen_helper_fistll_ST0(s->tmp1_i64, cpu_env);
6118                     tcg_gen_qemu_st_i64(s->tmp1_i64, s->A0,
6119                                         s->mem_index, MO_LEQ);
6120                     gen_helper_fpop(cpu_env);
6121                     break;
6122                 default:
6123                     goto unknown_op;
6124                 }
6125 
6126                 if (update_fdp) {
6127                     int last_seg = s->override >= 0 ? s->override : a.def_seg;
6128 
6129                     tcg_gen_ld_i32(s->tmp2_i32, cpu_env,
6130                                    offsetof(CPUX86State,
6131                                             segs[last_seg].selector));
6132                     tcg_gen_st16_i32(s->tmp2_i32, cpu_env,
6133                                      offsetof(CPUX86State, fpds));
6134                     tcg_gen_st_tl(last_addr, cpu_env,
6135                                   offsetof(CPUX86State, fpdp));
6136                 }
6137                 tcg_temp_free(last_addr);
6138             } else {
6139                 /* register float ops */
6140                 opreg = rm;
6141 
6142                 switch (op) {
6143                 case 0x08: /* fld sti */
6144                     gen_helper_fpush(cpu_env);
6145                     gen_helper_fmov_ST0_STN(cpu_env,
6146                                             tcg_const_i32((opreg + 1) & 7));
6147                     break;
6148                 case 0x09: /* fxchg sti */
6149                 case 0x29: /* fxchg4 sti, undocumented op */
6150                 case 0x39: /* fxchg7 sti, undocumented op */
6151                     gen_helper_fxchg_ST0_STN(cpu_env, tcg_const_i32(opreg));
6152                     break;
6153                 case 0x0a: /* grp d9/2 */
6154                     switch (rm) {
6155                     case 0: /* fnop */
6156                         /* check exceptions (FreeBSD FPU probe) */
6157                         gen_helper_fwait(cpu_env);
6158                         update_fip = false;
6159                         break;
6160                     default:
6161                         goto unknown_op;
6162                     }
6163                     break;
6164                 case 0x0c: /* grp d9/4 */
6165                     switch (rm) {
6166                     case 0: /* fchs */
6167                         gen_helper_fchs_ST0(cpu_env);
6168                         break;
6169                     case 1: /* fabs */
6170                         gen_helper_fabs_ST0(cpu_env);
6171                         break;
6172                     case 4: /* ftst */
6173                         gen_helper_fldz_FT0(cpu_env);
6174                         gen_helper_fcom_ST0_FT0(cpu_env);
6175                         break;
6176                     case 5: /* fxam */
6177                         gen_helper_fxam_ST0(cpu_env);
6178                         break;
6179                     default:
6180                         goto unknown_op;
6181                     }
6182                     break;
6183                 case 0x0d: /* grp d9/5 */
6184                     {
6185                         switch (rm) {
6186                         case 0:
6187                             gen_helper_fpush(cpu_env);
6188                             gen_helper_fld1_ST0(cpu_env);
6189                             break;
6190                         case 1:
6191                             gen_helper_fpush(cpu_env);
6192                             gen_helper_fldl2t_ST0(cpu_env);
6193                             break;
6194                         case 2:
6195                             gen_helper_fpush(cpu_env);
6196                             gen_helper_fldl2e_ST0(cpu_env);
6197                             break;
6198                         case 3:
6199                             gen_helper_fpush(cpu_env);
6200                             gen_helper_fldpi_ST0(cpu_env);
6201                             break;
6202                         case 4:
6203                             gen_helper_fpush(cpu_env);
6204                             gen_helper_fldlg2_ST0(cpu_env);
6205                             break;
6206                         case 5:
6207                             gen_helper_fpush(cpu_env);
6208                             gen_helper_fldln2_ST0(cpu_env);
6209                             break;
6210                         case 6:
6211                             gen_helper_fpush(cpu_env);
6212                             gen_helper_fldz_ST0(cpu_env);
6213                             break;
6214                         default:
6215                             goto unknown_op;
6216                         }
6217                     }
6218                     break;
6219                 case 0x0e: /* grp d9/6 */
6220                     switch (rm) {
6221                     case 0: /* f2xm1 */
6222                         gen_helper_f2xm1(cpu_env);
6223                         break;
6224                     case 1: /* fyl2x */
6225                         gen_helper_fyl2x(cpu_env);
6226                         break;
6227                     case 2: /* fptan */
6228                         gen_helper_fptan(cpu_env);
6229                         break;
6230                     case 3: /* fpatan */
6231                         gen_helper_fpatan(cpu_env);
6232                         break;
6233                     case 4: /* fxtract */
6234                         gen_helper_fxtract(cpu_env);
6235                         break;
6236                     case 5: /* fprem1 */
6237                         gen_helper_fprem1(cpu_env);
6238                         break;
6239                     case 6: /* fdecstp */
6240                         gen_helper_fdecstp(cpu_env);
6241                         break;
6242                     default:
6243                     case 7: /* fincstp */
6244                         gen_helper_fincstp(cpu_env);
6245                         break;
6246                     }
6247                     break;
6248                 case 0x0f: /* grp d9/7 */
6249                     switch (rm) {
6250                     case 0: /* fprem */
6251                         gen_helper_fprem(cpu_env);
6252                         break;
6253                     case 1: /* fyl2xp1 */
6254                         gen_helper_fyl2xp1(cpu_env);
6255                         break;
6256                     case 2: /* fsqrt */
6257                         gen_helper_fsqrt(cpu_env);
6258                         break;
6259                     case 3: /* fsincos */
6260                         gen_helper_fsincos(cpu_env);
6261                         break;
6262                     case 5: /* fscale */
6263                         gen_helper_fscale(cpu_env);
6264                         break;
6265                     case 4: /* frndint */
6266                         gen_helper_frndint(cpu_env);
6267                         break;
6268                     case 6: /* fsin */
6269                         gen_helper_fsin(cpu_env);
6270                         break;
6271                     default:
6272                     case 7: /* fcos */
6273                         gen_helper_fcos(cpu_env);
6274                         break;
6275                     }
6276                     break;
6277                 case 0x00: case 0x01: case 0x04 ... 0x07: /* fxxx st, sti */
6278                 case 0x20: case 0x21: case 0x24 ... 0x27: /* fxxx sti, st */
6279                 case 0x30: case 0x31: case 0x34 ... 0x37: /* fxxxp sti, st */
6280                     {
6281                         int op1;
6282 
6283                         op1 = op & 7;
6284                         if (op >= 0x20) {
6285                             gen_helper_fp_arith_STN_ST0(op1, opreg);
6286                             if (op >= 0x30) {
6287                                 gen_helper_fpop(cpu_env);
6288                             }
6289                         } else {
6290                             gen_helper_fmov_FT0_STN(cpu_env,
6291                                                     tcg_const_i32(opreg));
6292                             gen_helper_fp_arith_ST0_FT0(op1);
6293                         }
6294                     }
6295                     break;
6296                 case 0x02: /* fcom */
6297                 case 0x22: /* fcom2, undocumented op */
6298                     gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg));
6299                     gen_helper_fcom_ST0_FT0(cpu_env);
6300                     break;
6301                 case 0x03: /* fcomp */
6302                 case 0x23: /* fcomp3, undocumented op */
6303                 case 0x32: /* fcomp5, undocumented op */
6304                     gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg));
6305                     gen_helper_fcom_ST0_FT0(cpu_env);
6306                     gen_helper_fpop(cpu_env);
6307                     break;
6308                 case 0x15: /* da/5 */
6309                     switch (rm) {
6310                     case 1: /* fucompp */
6311                         gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(1));
6312                         gen_helper_fucom_ST0_FT0(cpu_env);
6313                         gen_helper_fpop(cpu_env);
6314                         gen_helper_fpop(cpu_env);
6315                         break;
6316                     default:
6317                         goto unknown_op;
6318                     }
6319                     break;
6320                 case 0x1c:
6321                     switch (rm) {
6322                     case 0: /* feni (287 only, just do nop here) */
6323                         break;
6324                     case 1: /* fdisi (287 only, just do nop here) */
6325                         break;
6326                     case 2: /* fclex */
6327                         gen_helper_fclex(cpu_env);
6328                         update_fip = false;
6329                         break;
6330                     case 3: /* fninit */
6331                         gen_helper_fninit(cpu_env);
6332                         update_fip = false;
6333                         break;
6334                     case 4: /* fsetpm (287 only, just do nop here) */
6335                         break;
6336                     default:
6337                         goto unknown_op;
6338                     }
6339                     break;
6340                 case 0x1d: /* fucomi */
6341                     if (!(s->cpuid_features & CPUID_CMOV)) {
6342                         goto illegal_op;
6343                     }
6344                     gen_update_cc_op(s);
6345                     gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg));
6346                     gen_helper_fucomi_ST0_FT0(cpu_env);
6347                     set_cc_op(s, CC_OP_EFLAGS);
6348                     break;
6349                 case 0x1e: /* fcomi */
6350                     if (!(s->cpuid_features & CPUID_CMOV)) {
6351                         goto illegal_op;
6352                     }
6353                     gen_update_cc_op(s);
6354                     gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg));
6355                     gen_helper_fcomi_ST0_FT0(cpu_env);
6356                     set_cc_op(s, CC_OP_EFLAGS);
6357                     break;
6358                 case 0x28: /* ffree sti */
6359                     gen_helper_ffree_STN(cpu_env, tcg_const_i32(opreg));
6360                     break;
6361                 case 0x2a: /* fst sti */
6362                     gen_helper_fmov_STN_ST0(cpu_env, tcg_const_i32(opreg));
6363                     break;
6364                 case 0x2b: /* fstp sti */
6365                 case 0x0b: /* fstp1 sti, undocumented op */
6366                 case 0x3a: /* fstp8 sti, undocumented op */
6367                 case 0x3b: /* fstp9 sti, undocumented op */
6368                     gen_helper_fmov_STN_ST0(cpu_env, tcg_const_i32(opreg));
6369                     gen_helper_fpop(cpu_env);
6370                     break;
6371                 case 0x2c: /* fucom st(i) */
6372                     gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg));
6373                     gen_helper_fucom_ST0_FT0(cpu_env);
6374                     break;
6375                 case 0x2d: /* fucomp st(i) */
6376                     gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg));
6377                     gen_helper_fucom_ST0_FT0(cpu_env);
6378                     gen_helper_fpop(cpu_env);
6379                     break;
6380                 case 0x33: /* de/3 */
6381                     switch (rm) {
6382                     case 1: /* fcompp */
6383                         gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(1));
6384                         gen_helper_fcom_ST0_FT0(cpu_env);
6385                         gen_helper_fpop(cpu_env);
6386                         gen_helper_fpop(cpu_env);
6387                         break;
6388                     default:
6389                         goto unknown_op;
6390                     }
6391                     break;
6392                 case 0x38: /* ffreep sti, undocumented op */
6393                     gen_helper_ffree_STN(cpu_env, tcg_const_i32(opreg));
6394                     gen_helper_fpop(cpu_env);
6395                     break;
6396                 case 0x3c: /* df/4 */
6397                     switch (rm) {
6398                     case 0:
6399                         gen_helper_fnstsw(s->tmp2_i32, cpu_env);
6400                         tcg_gen_extu_i32_tl(s->T0, s->tmp2_i32);
6401                         gen_op_mov_reg_v(s, MO_16, R_EAX, s->T0);
6402                         break;
6403                     default:
6404                         goto unknown_op;
6405                     }
6406                     break;
6407                 case 0x3d: /* fucomip */
6408                     if (!(s->cpuid_features & CPUID_CMOV)) {
6409                         goto illegal_op;
6410                     }
6411                     gen_update_cc_op(s);
6412                     gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg));
6413                     gen_helper_fucomi_ST0_FT0(cpu_env);
6414                     gen_helper_fpop(cpu_env);
6415                     set_cc_op(s, CC_OP_EFLAGS);
6416                     break;
6417                 case 0x3e: /* fcomip */
6418                     if (!(s->cpuid_features & CPUID_CMOV)) {
6419                         goto illegal_op;
6420                     }
6421                     gen_update_cc_op(s);
6422                     gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg));
6423                     gen_helper_fcomi_ST0_FT0(cpu_env);
6424                     gen_helper_fpop(cpu_env);
6425                     set_cc_op(s, CC_OP_EFLAGS);
6426                     break;
6427                 case 0x10 ... 0x13: /* fcmovxx */
6428                 case 0x18 ... 0x1b:
6429                     {
6430                         int op1;
6431                         TCGLabel *l1;
6432                         static const uint8_t fcmov_cc[8] = {
6433                             (JCC_B << 1),
6434                             (JCC_Z << 1),
6435                             (JCC_BE << 1),
6436                             (JCC_P << 1),
6437                         };
6438 
6439                         if (!(s->cpuid_features & CPUID_CMOV)) {
6440                             goto illegal_op;
6441                         }
6442                         op1 = fcmov_cc[op & 3] | (((op >> 3) & 1) ^ 1);
6443                         l1 = gen_new_label();
6444                         gen_jcc1_noeob(s, op1, l1);
6445                         gen_helper_fmov_ST0_STN(cpu_env, tcg_const_i32(opreg));
6446                         gen_set_label(l1);
6447                     }
6448                     break;
6449                 default:
6450                     goto unknown_op;
6451                 }
6452             }
6453 
6454             if (update_fip) {
6455                 tcg_gen_ld_i32(s->tmp2_i32, cpu_env,
6456                                offsetof(CPUX86State, segs[R_CS].selector));
6457                 tcg_gen_st16_i32(s->tmp2_i32, cpu_env,
6458                                  offsetof(CPUX86State, fpcs));
6459                 tcg_gen_st_tl(tcg_constant_tl(pc_start - s->cs_base),
6460                               cpu_env, offsetof(CPUX86State, fpip));
6461             }
6462         }
6463         break;
6464         /************************/
6465         /* string ops */
6466 
6467     case 0xa4: /* movsS */
6468     case 0xa5:
6469         ot = mo_b_d(b, dflag);
6470         if (prefixes & (PREFIX_REPZ | PREFIX_REPNZ)) {
6471             gen_repz_movs(s, ot, pc_start - s->cs_base, s->pc - s->cs_base);
6472         } else {
6473             gen_movs(s, ot);
6474         }
6475         break;
6476 
6477     case 0xaa: /* stosS */
6478     case 0xab:
6479         ot = mo_b_d(b, dflag);
6480         if (prefixes & (PREFIX_REPZ | PREFIX_REPNZ)) {
6481             gen_repz_stos(s, ot, pc_start - s->cs_base, s->pc - s->cs_base);
6482         } else {
6483             gen_stos(s, ot);
6484         }
6485         break;
6486     case 0xac: /* lodsS */
6487     case 0xad:
6488         ot = mo_b_d(b, dflag);
6489         if (prefixes & (PREFIX_REPZ | PREFIX_REPNZ)) {
6490             gen_repz_lods(s, ot, pc_start - s->cs_base, s->pc - s->cs_base);
6491         } else {
6492             gen_lods(s, ot);
6493         }
6494         break;
6495     case 0xae: /* scasS */
6496     case 0xaf:
6497         ot = mo_b_d(b, dflag);
6498         if (prefixes & PREFIX_REPNZ) {
6499             gen_repz_scas(s, ot, pc_start - s->cs_base, s->pc - s->cs_base, 1);
6500         } else if (prefixes & PREFIX_REPZ) {
6501             gen_repz_scas(s, ot, pc_start - s->cs_base, s->pc - s->cs_base, 0);
6502         } else {
6503             gen_scas(s, ot);
6504         }
6505         break;
6506 
6507     case 0xa6: /* cmpsS */
6508     case 0xa7:
6509         ot = mo_b_d(b, dflag);
6510         if (prefixes & PREFIX_REPNZ) {
6511             gen_repz_cmps(s, ot, pc_start - s->cs_base, s->pc - s->cs_base, 1);
6512         } else if (prefixes & PREFIX_REPZ) {
6513             gen_repz_cmps(s, ot, pc_start - s->cs_base, s->pc - s->cs_base, 0);
6514         } else {
6515             gen_cmps(s, ot);
6516         }
6517         break;
6518     case 0x6c: /* insS */
6519     case 0x6d:
6520         ot = mo_b_d32(b, dflag);
6521         tcg_gen_trunc_tl_i32(s->tmp2_i32, cpu_regs[R_EDX]);
6522         tcg_gen_ext16u_i32(s->tmp2_i32, s->tmp2_i32);
6523         if (!gen_check_io(s, ot, s->tmp2_i32,
6524                           SVM_IOIO_TYPE_MASK | SVM_IOIO_STR_MASK)) {
6525             break;
6526         }
6527         if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
6528             gen_io_start();
6529         }
6530         if (prefixes & (PREFIX_REPZ | PREFIX_REPNZ)) {
6531             gen_repz_ins(s, ot, pc_start - s->cs_base, s->pc - s->cs_base);
6532             /* jump generated by gen_repz_ins */
6533         } else {
6534             gen_ins(s, ot);
6535             if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
6536                 gen_jmp(s, s->pc - s->cs_base);
6537             }
6538         }
6539         break;
6540     case 0x6e: /* outsS */
6541     case 0x6f:
6542         ot = mo_b_d32(b, dflag);
6543         tcg_gen_trunc_tl_i32(s->tmp2_i32, cpu_regs[R_EDX]);
6544         tcg_gen_ext16u_i32(s->tmp2_i32, s->tmp2_i32);
6545         if (!gen_check_io(s, ot, s->tmp2_i32, SVM_IOIO_STR_MASK)) {
6546             break;
6547         }
6548         if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
6549             gen_io_start();
6550         }
6551         if (prefixes & (PREFIX_REPZ | PREFIX_REPNZ)) {
6552             gen_repz_outs(s, ot, pc_start - s->cs_base, s->pc - s->cs_base);
6553             /* jump generated by gen_repz_outs */
6554         } else {
6555             gen_outs(s, ot);
6556             if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
6557                 gen_jmp(s, s->pc - s->cs_base);
6558             }
6559         }
6560         break;
6561 
6562         /************************/
6563         /* port I/O */
6564 
6565     case 0xe4:
6566     case 0xe5:
6567         ot = mo_b_d32(b, dflag);
6568         val = x86_ldub_code(env, s);
6569         tcg_gen_movi_i32(s->tmp2_i32, val);
6570         if (!gen_check_io(s, ot, s->tmp2_i32, SVM_IOIO_TYPE_MASK)) {
6571             break;
6572         }
6573         if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
6574             gen_io_start();
6575         }
6576         gen_helper_in_func(ot, s->T1, s->tmp2_i32);
6577         gen_op_mov_reg_v(s, ot, R_EAX, s->T1);
6578         gen_bpt_io(s, s->tmp2_i32, ot);
6579         if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
6580             gen_jmp(s, s->pc - s->cs_base);
6581         }
6582         break;
6583     case 0xe6:
6584     case 0xe7:
6585         ot = mo_b_d32(b, dflag);
6586         val = x86_ldub_code(env, s);
6587         tcg_gen_movi_i32(s->tmp2_i32, val);
6588         if (!gen_check_io(s, ot, s->tmp2_i32, 0)) {
6589             break;
6590         }
6591         if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
6592             gen_io_start();
6593         }
6594         gen_op_mov_v_reg(s, ot, s->T1, R_EAX);
6595         tcg_gen_trunc_tl_i32(s->tmp3_i32, s->T1);
6596         gen_helper_out_func(ot, s->tmp2_i32, s->tmp3_i32);
6597         gen_bpt_io(s, s->tmp2_i32, ot);
6598         if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
6599             gen_jmp(s, s->pc - s->cs_base);
6600         }
6601         break;
6602     case 0xec:
6603     case 0xed:
6604         ot = mo_b_d32(b, dflag);
6605         tcg_gen_trunc_tl_i32(s->tmp2_i32, cpu_regs[R_EDX]);
6606         tcg_gen_ext16u_i32(s->tmp2_i32, s->tmp2_i32);
6607         if (!gen_check_io(s, ot, s->tmp2_i32, SVM_IOIO_TYPE_MASK)) {
6608             break;
6609         }
6610         if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
6611             gen_io_start();
6612         }
6613         gen_helper_in_func(ot, s->T1, s->tmp2_i32);
6614         gen_op_mov_reg_v(s, ot, R_EAX, s->T1);
6615         gen_bpt_io(s, s->tmp2_i32, ot);
6616         if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
6617             gen_jmp(s, s->pc - s->cs_base);
6618         }
6619         break;
6620     case 0xee:
6621     case 0xef:
6622         ot = mo_b_d32(b, dflag);
6623         tcg_gen_trunc_tl_i32(s->tmp2_i32, cpu_regs[R_EDX]);
6624         tcg_gen_ext16u_i32(s->tmp2_i32, s->tmp2_i32);
6625         if (!gen_check_io(s, ot, s->tmp2_i32, 0)) {
6626             break;
6627         }
6628         if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
6629             gen_io_start();
6630         }
6631         gen_op_mov_v_reg(s, ot, s->T1, R_EAX);
6632         tcg_gen_trunc_tl_i32(s->tmp3_i32, s->T1);
6633         gen_helper_out_func(ot, s->tmp2_i32, s->tmp3_i32);
6634         gen_bpt_io(s, s->tmp2_i32, ot);
6635         if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
6636             gen_jmp(s, s->pc - s->cs_base);
6637         }
6638         break;
6639 
6640         /************************/
6641         /* control */
6642     case 0xc2: /* ret im */
6643         val = x86_ldsw_code(env, s);
6644         ot = gen_pop_T0(s);
6645         gen_stack_update(s, val + (1 << ot));
6646         /* Note that gen_pop_T0 uses a zero-extending load.  */
6647         gen_op_jmp_v(s->T0);
6648         gen_bnd_jmp(s);
6649         gen_jr(s, s->T0);
6650         break;
6651     case 0xc3: /* ret */
6652         ot = gen_pop_T0(s);
6653         gen_pop_update(s, ot);
6654         /* Note that gen_pop_T0 uses a zero-extending load.  */
6655         gen_op_jmp_v(s->T0);
6656         gen_bnd_jmp(s);
6657         gen_jr(s, s->T0);
6658         break;
6659     case 0xca: /* lret im */
6660         val = x86_ldsw_code(env, s);
6661     do_lret:
6662         if (PE(s) && !VM86(s)) {
6663             gen_update_cc_op(s);
6664             gen_jmp_im(s, pc_start - s->cs_base);
6665             gen_helper_lret_protected(cpu_env, tcg_const_i32(dflag - 1),
6666                                       tcg_const_i32(val));
6667         } else {
6668             gen_stack_A0(s);
6669             /* pop offset */
6670             gen_op_ld_v(s, dflag, s->T0, s->A0);
6671             /* NOTE: keeping EIP updated is not a problem in case of
6672                exception */
6673             gen_op_jmp_v(s->T0);
6674             /* pop selector */
6675             gen_add_A0_im(s, 1 << dflag);
6676             gen_op_ld_v(s, dflag, s->T0, s->A0);
6677             gen_op_movl_seg_T0_vm(s, R_CS);
6678             /* add stack offset */
6679             gen_stack_update(s, val + (2 << dflag));
6680         }
6681         gen_eob(s);
6682         break;
6683     case 0xcb: /* lret */
6684         val = 0;
6685         goto do_lret;
6686     case 0xcf: /* iret */
6687         gen_svm_check_intercept(s, SVM_EXIT_IRET);
6688         if (!PE(s) || VM86(s)) {
6689             /* real mode or vm86 mode */
6690             if (!check_vm86_iopl(s)) {
6691                 break;
6692             }
6693             gen_helper_iret_real(cpu_env, tcg_const_i32(dflag - 1));
6694         } else {
6695             gen_helper_iret_protected(cpu_env, tcg_const_i32(dflag - 1),
6696                                       tcg_const_i32(s->pc - s->cs_base));
6697         }
6698         set_cc_op(s, CC_OP_EFLAGS);
6699         gen_eob(s);
6700         break;
6701     case 0xe8: /* call im */
6702         {
6703             if (dflag != MO_16) {
6704                 tval = (int32_t)insn_get(env, s, MO_32);
6705             } else {
6706                 tval = (int16_t)insn_get(env, s, MO_16);
6707             }
6708             next_eip = s->pc - s->cs_base;
6709             tval += next_eip;
6710             if (dflag == MO_16) {
6711                 tval &= 0xffff;
6712             } else if (!CODE64(s)) {
6713                 tval &= 0xffffffff;
6714             }
6715             tcg_gen_movi_tl(s->T0, next_eip);
6716             gen_push_v(s, s->T0);
6717             gen_bnd_jmp(s);
6718             gen_jmp(s, tval);
6719         }
6720         break;
6721     case 0x9a: /* lcall im */
6722         {
6723             unsigned int selector, offset;
6724 
6725             if (CODE64(s))
6726                 goto illegal_op;
6727             ot = dflag;
6728             offset = insn_get(env, s, ot);
6729             selector = insn_get(env, s, MO_16);
6730 
6731             tcg_gen_movi_tl(s->T0, selector);
6732             tcg_gen_movi_tl(s->T1, offset);
6733         }
6734         goto do_lcall;
6735     case 0xe9: /* jmp im */
6736         if (dflag != MO_16) {
6737             tval = (int32_t)insn_get(env, s, MO_32);
6738         } else {
6739             tval = (int16_t)insn_get(env, s, MO_16);
6740         }
6741         tval += s->pc - s->cs_base;
6742         if (dflag == MO_16) {
6743             tval &= 0xffff;
6744         } else if (!CODE64(s)) {
6745             tval &= 0xffffffff;
6746         }
6747         gen_bnd_jmp(s);
6748         gen_jmp(s, tval);
6749         break;
6750     case 0xea: /* ljmp im */
6751         {
6752             unsigned int selector, offset;
6753 
6754             if (CODE64(s))
6755                 goto illegal_op;
6756             ot = dflag;
6757             offset = insn_get(env, s, ot);
6758             selector = insn_get(env, s, MO_16);
6759 
6760             tcg_gen_movi_tl(s->T0, selector);
6761             tcg_gen_movi_tl(s->T1, offset);
6762         }
6763         goto do_ljmp;
6764     case 0xeb: /* jmp Jb */
6765         tval = (int8_t)insn_get(env, s, MO_8);
6766         tval += s->pc - s->cs_base;
6767         if (dflag == MO_16) {
6768             tval &= 0xffff;
6769         }
6770         gen_jmp(s, tval);
6771         break;
6772     case 0x70 ... 0x7f: /* jcc Jb */
6773         tval = (int8_t)insn_get(env, s, MO_8);
6774         goto do_jcc;
6775     case 0x180 ... 0x18f: /* jcc Jv */
6776         if (dflag != MO_16) {
6777             tval = (int32_t)insn_get(env, s, MO_32);
6778         } else {
6779             tval = (int16_t)insn_get(env, s, MO_16);
6780         }
6781     do_jcc:
6782         next_eip = s->pc - s->cs_base;
6783         tval += next_eip;
6784         if (dflag == MO_16) {
6785             tval &= 0xffff;
6786         }
6787         gen_bnd_jmp(s);
6788         gen_jcc(s, b, tval, next_eip);
6789         break;
6790 
6791     case 0x190 ... 0x19f: /* setcc Gv */
6792         modrm = x86_ldub_code(env, s);
6793         gen_setcc1(s, b, s->T0);
6794         gen_ldst_modrm(env, s, modrm, MO_8, OR_TMP0, 1);
6795         break;
6796     case 0x140 ... 0x14f: /* cmov Gv, Ev */
6797         if (!(s->cpuid_features & CPUID_CMOV)) {
6798             goto illegal_op;
6799         }
6800         ot = dflag;
6801         modrm = x86_ldub_code(env, s);
6802         reg = ((modrm >> 3) & 7) | REX_R(s);
6803         gen_cmovcc1(env, s, ot, b, modrm, reg);
6804         break;
6805 
6806         /************************/
6807         /* flags */
6808     case 0x9c: /* pushf */
6809         gen_svm_check_intercept(s, SVM_EXIT_PUSHF);
6810         if (check_vm86_iopl(s)) {
6811             gen_update_cc_op(s);
6812             gen_helper_read_eflags(s->T0, cpu_env);
6813             gen_push_v(s, s->T0);
6814         }
6815         break;
6816     case 0x9d: /* popf */
6817         gen_svm_check_intercept(s, SVM_EXIT_POPF);
6818         if (check_vm86_iopl(s)) {
6819             ot = gen_pop_T0(s);
6820             if (CPL(s) == 0) {
6821                 if (dflag != MO_16) {
6822                     gen_helper_write_eflags(cpu_env, s->T0,
6823                                             tcg_const_i32((TF_MASK | AC_MASK |
6824                                                            ID_MASK | NT_MASK |
6825                                                            IF_MASK |
6826                                                            IOPL_MASK)));
6827                 } else {
6828                     gen_helper_write_eflags(cpu_env, s->T0,
6829                                             tcg_const_i32((TF_MASK | AC_MASK |
6830                                                            ID_MASK | NT_MASK |
6831                                                            IF_MASK | IOPL_MASK)
6832                                                           & 0xffff));
6833                 }
6834             } else {
6835                 if (CPL(s) <= IOPL(s)) {
6836                     if (dflag != MO_16) {
6837                         gen_helper_write_eflags(cpu_env, s->T0,
6838                                                 tcg_const_i32((TF_MASK |
6839                                                                AC_MASK |
6840                                                                ID_MASK |
6841                                                                NT_MASK |
6842                                                                IF_MASK)));
6843                     } else {
6844                         gen_helper_write_eflags(cpu_env, s->T0,
6845                                                 tcg_const_i32((TF_MASK |
6846                                                                AC_MASK |
6847                                                                ID_MASK |
6848                                                                NT_MASK |
6849                                                                IF_MASK)
6850                                                               & 0xffff));
6851                     }
6852                 } else {
6853                     if (dflag != MO_16) {
6854                         gen_helper_write_eflags(cpu_env, s->T0,
6855                                            tcg_const_i32((TF_MASK | AC_MASK |
6856                                                           ID_MASK | NT_MASK)));
6857                     } else {
6858                         gen_helper_write_eflags(cpu_env, s->T0,
6859                                            tcg_const_i32((TF_MASK | AC_MASK |
6860                                                           ID_MASK | NT_MASK)
6861                                                          & 0xffff));
6862                     }
6863                 }
6864             }
6865             gen_pop_update(s, ot);
6866             set_cc_op(s, CC_OP_EFLAGS);
6867             /* abort translation because TF/AC flag may change */
6868             gen_jmp_im(s, s->pc - s->cs_base);
6869             gen_eob(s);
6870         }
6871         break;
6872     case 0x9e: /* sahf */
6873         if (CODE64(s) && !(s->cpuid_ext3_features & CPUID_EXT3_LAHF_LM))
6874             goto illegal_op;
6875         gen_op_mov_v_reg(s, MO_8, s->T0, R_AH);
6876         gen_compute_eflags(s);
6877         tcg_gen_andi_tl(cpu_cc_src, cpu_cc_src, CC_O);
6878         tcg_gen_andi_tl(s->T0, s->T0, CC_S | CC_Z | CC_A | CC_P | CC_C);
6879         tcg_gen_or_tl(cpu_cc_src, cpu_cc_src, s->T0);
6880         break;
6881     case 0x9f: /* lahf */
6882         if (CODE64(s) && !(s->cpuid_ext3_features & CPUID_EXT3_LAHF_LM))
6883             goto illegal_op;
6884         gen_compute_eflags(s);
6885         /* Note: gen_compute_eflags() only gives the condition codes */
6886         tcg_gen_ori_tl(s->T0, cpu_cc_src, 0x02);
6887         gen_op_mov_reg_v(s, MO_8, R_AH, s->T0);
6888         break;
6889     case 0xf5: /* cmc */
6890         gen_compute_eflags(s);
6891         tcg_gen_xori_tl(cpu_cc_src, cpu_cc_src, CC_C);
6892         break;
6893     case 0xf8: /* clc */
6894         gen_compute_eflags(s);
6895         tcg_gen_andi_tl(cpu_cc_src, cpu_cc_src, ~CC_C);
6896         break;
6897     case 0xf9: /* stc */
6898         gen_compute_eflags(s);
6899         tcg_gen_ori_tl(cpu_cc_src, cpu_cc_src, CC_C);
6900         break;
6901     case 0xfc: /* cld */
6902         tcg_gen_movi_i32(s->tmp2_i32, 1);
6903         tcg_gen_st_i32(s->tmp2_i32, cpu_env, offsetof(CPUX86State, df));
6904         break;
6905     case 0xfd: /* std */
6906         tcg_gen_movi_i32(s->tmp2_i32, -1);
6907         tcg_gen_st_i32(s->tmp2_i32, cpu_env, offsetof(CPUX86State, df));
6908         break;
6909 
6910         /************************/
6911         /* bit operations */
6912     case 0x1ba: /* bt/bts/btr/btc Gv, im */
6913         ot = dflag;
6914         modrm = x86_ldub_code(env, s);
6915         op = (modrm >> 3) & 7;
6916         mod = (modrm >> 6) & 3;
6917         rm = (modrm & 7) | REX_B(s);
6918         if (mod != 3) {
6919             s->rip_offset = 1;
6920             gen_lea_modrm(env, s, modrm);
6921             if (!(s->prefix & PREFIX_LOCK)) {
6922                 gen_op_ld_v(s, ot, s->T0, s->A0);
6923             }
6924         } else {
6925             gen_op_mov_v_reg(s, ot, s->T0, rm);
6926         }
6927         /* load shift */
6928         val = x86_ldub_code(env, s);
6929         tcg_gen_movi_tl(s->T1, val);
6930         if (op < 4)
6931             goto unknown_op;
6932         op -= 4;
6933         goto bt_op;
6934     case 0x1a3: /* bt Gv, Ev */
6935         op = 0;
6936         goto do_btx;
6937     case 0x1ab: /* bts */
6938         op = 1;
6939         goto do_btx;
6940     case 0x1b3: /* btr */
6941         op = 2;
6942         goto do_btx;
6943     case 0x1bb: /* btc */
6944         op = 3;
6945     do_btx:
6946         ot = dflag;
6947         modrm = x86_ldub_code(env, s);
6948         reg = ((modrm >> 3) & 7) | REX_R(s);
6949         mod = (modrm >> 6) & 3;
6950         rm = (modrm & 7) | REX_B(s);
6951         gen_op_mov_v_reg(s, MO_32, s->T1, reg);
6952         if (mod != 3) {
6953             AddressParts a = gen_lea_modrm_0(env, s, modrm);
6954             /* specific case: we need to add a displacement */
6955             gen_exts(ot, s->T1);
6956             tcg_gen_sari_tl(s->tmp0, s->T1, 3 + ot);
6957             tcg_gen_shli_tl(s->tmp0, s->tmp0, ot);
6958             tcg_gen_add_tl(s->A0, gen_lea_modrm_1(s, a), s->tmp0);
6959             gen_lea_v_seg(s, s->aflag, s->A0, a.def_seg, s->override);
6960             if (!(s->prefix & PREFIX_LOCK)) {
6961                 gen_op_ld_v(s, ot, s->T0, s->A0);
6962             }
6963         } else {
6964             gen_op_mov_v_reg(s, ot, s->T0, rm);
6965         }
6966     bt_op:
6967         tcg_gen_andi_tl(s->T1, s->T1, (1 << (3 + ot)) - 1);
6968         tcg_gen_movi_tl(s->tmp0, 1);
6969         tcg_gen_shl_tl(s->tmp0, s->tmp0, s->T1);
6970         if (s->prefix & PREFIX_LOCK) {
6971             switch (op) {
6972             case 0: /* bt */
6973                 /* Needs no atomic ops; we surpressed the normal
6974                    memory load for LOCK above so do it now.  */
6975                 gen_op_ld_v(s, ot, s->T0, s->A0);
6976                 break;
6977             case 1: /* bts */
6978                 tcg_gen_atomic_fetch_or_tl(s->T0, s->A0, s->tmp0,
6979                                            s->mem_index, ot | MO_LE);
6980                 break;
6981             case 2: /* btr */
6982                 tcg_gen_not_tl(s->tmp0, s->tmp0);
6983                 tcg_gen_atomic_fetch_and_tl(s->T0, s->A0, s->tmp0,
6984                                             s->mem_index, ot | MO_LE);
6985                 break;
6986             default:
6987             case 3: /* btc */
6988                 tcg_gen_atomic_fetch_xor_tl(s->T0, s->A0, s->tmp0,
6989                                             s->mem_index, ot | MO_LE);
6990                 break;
6991             }
6992             tcg_gen_shr_tl(s->tmp4, s->T0, s->T1);
6993         } else {
6994             tcg_gen_shr_tl(s->tmp4, s->T0, s->T1);
6995             switch (op) {
6996             case 0: /* bt */
6997                 /* Data already loaded; nothing to do.  */
6998                 break;
6999             case 1: /* bts */
7000                 tcg_gen_or_tl(s->T0, s->T0, s->tmp0);
7001                 break;
7002             case 2: /* btr */
7003                 tcg_gen_andc_tl(s->T0, s->T0, s->tmp0);
7004                 break;
7005             default:
7006             case 3: /* btc */
7007                 tcg_gen_xor_tl(s->T0, s->T0, s->tmp0);
7008                 break;
7009             }
7010             if (op != 0) {
7011                 if (mod != 3) {
7012                     gen_op_st_v(s, ot, s->T0, s->A0);
7013                 } else {
7014                     gen_op_mov_reg_v(s, ot, rm, s->T0);
7015                 }
7016             }
7017         }
7018 
7019         /* Delay all CC updates until after the store above.  Note that
7020            C is the result of the test, Z is unchanged, and the others
7021            are all undefined.  */
7022         switch (s->cc_op) {
7023         case CC_OP_MULB ... CC_OP_MULQ:
7024         case CC_OP_ADDB ... CC_OP_ADDQ:
7025         case CC_OP_ADCB ... CC_OP_ADCQ:
7026         case CC_OP_SUBB ... CC_OP_SUBQ:
7027         case CC_OP_SBBB ... CC_OP_SBBQ:
7028         case CC_OP_LOGICB ... CC_OP_LOGICQ:
7029         case CC_OP_INCB ... CC_OP_INCQ:
7030         case CC_OP_DECB ... CC_OP_DECQ:
7031         case CC_OP_SHLB ... CC_OP_SHLQ:
7032         case CC_OP_SARB ... CC_OP_SARQ:
7033         case CC_OP_BMILGB ... CC_OP_BMILGQ:
7034             /* Z was going to be computed from the non-zero status of CC_DST.
7035                We can get that same Z value (and the new C value) by leaving
7036                CC_DST alone, setting CC_SRC, and using a CC_OP_SAR of the
7037                same width.  */
7038             tcg_gen_mov_tl(cpu_cc_src, s->tmp4);
7039             set_cc_op(s, ((s->cc_op - CC_OP_MULB) & 3) + CC_OP_SARB);
7040             break;
7041         default:
7042             /* Otherwise, generate EFLAGS and replace the C bit.  */
7043             gen_compute_eflags(s);
7044             tcg_gen_deposit_tl(cpu_cc_src, cpu_cc_src, s->tmp4,
7045                                ctz32(CC_C), 1);
7046             break;
7047         }
7048         break;
7049     case 0x1bc: /* bsf / tzcnt */
7050     case 0x1bd: /* bsr / lzcnt */
7051         ot = dflag;
7052         modrm = x86_ldub_code(env, s);
7053         reg = ((modrm >> 3) & 7) | REX_R(s);
7054         gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
7055         gen_extu(ot, s->T0);
7056 
7057         /* Note that lzcnt and tzcnt are in different extensions.  */
7058         if ((prefixes & PREFIX_REPZ)
7059             && (b & 1
7060                 ? s->cpuid_ext3_features & CPUID_EXT3_ABM
7061                 : s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI1)) {
7062             int size = 8 << ot;
7063             /* For lzcnt/tzcnt, C bit is defined related to the input. */
7064             tcg_gen_mov_tl(cpu_cc_src, s->T0);
7065             if (b & 1) {
7066                 /* For lzcnt, reduce the target_ulong result by the
7067                    number of zeros that we expect to find at the top.  */
7068                 tcg_gen_clzi_tl(s->T0, s->T0, TARGET_LONG_BITS);
7069                 tcg_gen_subi_tl(s->T0, s->T0, TARGET_LONG_BITS - size);
7070             } else {
7071                 /* For tzcnt, a zero input must return the operand size.  */
7072                 tcg_gen_ctzi_tl(s->T0, s->T0, size);
7073             }
7074             /* For lzcnt/tzcnt, Z bit is defined related to the result.  */
7075             gen_op_update1_cc(s);
7076             set_cc_op(s, CC_OP_BMILGB + ot);
7077         } else {
7078             /* For bsr/bsf, only the Z bit is defined and it is related
7079                to the input and not the result.  */
7080             tcg_gen_mov_tl(cpu_cc_dst, s->T0);
7081             set_cc_op(s, CC_OP_LOGICB + ot);
7082 
7083             /* ??? The manual says that the output is undefined when the
7084                input is zero, but real hardware leaves it unchanged, and
7085                real programs appear to depend on that.  Accomplish this
7086                by passing the output as the value to return upon zero.  */
7087             if (b & 1) {
7088                 /* For bsr, return the bit index of the first 1 bit,
7089                    not the count of leading zeros.  */
7090                 tcg_gen_xori_tl(s->T1, cpu_regs[reg], TARGET_LONG_BITS - 1);
7091                 tcg_gen_clz_tl(s->T0, s->T0, s->T1);
7092                 tcg_gen_xori_tl(s->T0, s->T0, TARGET_LONG_BITS - 1);
7093             } else {
7094                 tcg_gen_ctz_tl(s->T0, s->T0, cpu_regs[reg]);
7095             }
7096         }
7097         gen_op_mov_reg_v(s, ot, reg, s->T0);
7098         break;
7099         /************************/
7100         /* bcd */
7101     case 0x27: /* daa */
7102         if (CODE64(s))
7103             goto illegal_op;
7104         gen_update_cc_op(s);
7105         gen_helper_daa(cpu_env);
7106         set_cc_op(s, CC_OP_EFLAGS);
7107         break;
7108     case 0x2f: /* das */
7109         if (CODE64(s))
7110             goto illegal_op;
7111         gen_update_cc_op(s);
7112         gen_helper_das(cpu_env);
7113         set_cc_op(s, CC_OP_EFLAGS);
7114         break;
7115     case 0x37: /* aaa */
7116         if (CODE64(s))
7117             goto illegal_op;
7118         gen_update_cc_op(s);
7119         gen_helper_aaa(cpu_env);
7120         set_cc_op(s, CC_OP_EFLAGS);
7121         break;
7122     case 0x3f: /* aas */
7123         if (CODE64(s))
7124             goto illegal_op;
7125         gen_update_cc_op(s);
7126         gen_helper_aas(cpu_env);
7127         set_cc_op(s, CC_OP_EFLAGS);
7128         break;
7129     case 0xd4: /* aam */
7130         if (CODE64(s))
7131             goto illegal_op;
7132         val = x86_ldub_code(env, s);
7133         if (val == 0) {
7134             gen_exception(s, EXCP00_DIVZ, pc_start - s->cs_base);
7135         } else {
7136             gen_helper_aam(cpu_env, tcg_const_i32(val));
7137             set_cc_op(s, CC_OP_LOGICB);
7138         }
7139         break;
7140     case 0xd5: /* aad */
7141         if (CODE64(s))
7142             goto illegal_op;
7143         val = x86_ldub_code(env, s);
7144         gen_helper_aad(cpu_env, tcg_const_i32(val));
7145         set_cc_op(s, CC_OP_LOGICB);
7146         break;
7147         /************************/
7148         /* misc */
7149     case 0x90: /* nop */
7150         /* XXX: correct lock test for all insn */
7151         if (prefixes & PREFIX_LOCK) {
7152             goto illegal_op;
7153         }
7154         /* If REX_B is set, then this is xchg eax, r8d, not a nop.  */
7155         if (REX_B(s)) {
7156             goto do_xchg_reg_eax;
7157         }
7158         if (prefixes & PREFIX_REPZ) {
7159             gen_update_cc_op(s);
7160             gen_jmp_im(s, pc_start - s->cs_base);
7161             gen_helper_pause(cpu_env, tcg_const_i32(s->pc - pc_start));
7162             s->base.is_jmp = DISAS_NORETURN;
7163         }
7164         break;
7165     case 0x9b: /* fwait */
7166         if ((s->flags & (HF_MP_MASK | HF_TS_MASK)) ==
7167             (HF_MP_MASK | HF_TS_MASK)) {
7168             gen_exception(s, EXCP07_PREX, pc_start - s->cs_base);
7169         } else {
7170             gen_helper_fwait(cpu_env);
7171         }
7172         break;
7173     case 0xcc: /* int3 */
7174         gen_interrupt(s, EXCP03_INT3, pc_start - s->cs_base, s->pc - s->cs_base);
7175         break;
7176     case 0xcd: /* int N */
7177         val = x86_ldub_code(env, s);
7178         if (check_vm86_iopl(s)) {
7179             gen_interrupt(s, val, pc_start - s->cs_base, s->pc - s->cs_base);
7180         }
7181         break;
7182     case 0xce: /* into */
7183         if (CODE64(s))
7184             goto illegal_op;
7185         gen_update_cc_op(s);
7186         gen_jmp_im(s, pc_start - s->cs_base);
7187         gen_helper_into(cpu_env, tcg_const_i32(s->pc - pc_start));
7188         break;
7189 #ifdef WANT_ICEBP
7190     case 0xf1: /* icebp (undocumented, exits to external debugger) */
7191         gen_svm_check_intercept(s, SVM_EXIT_ICEBP);
7192         gen_debug(s);
7193         break;
7194 #endif
7195     case 0xfa: /* cli */
7196         if (check_iopl(s)) {
7197             gen_helper_cli(cpu_env);
7198         }
7199         break;
7200     case 0xfb: /* sti */
7201         if (check_iopl(s)) {
7202             gen_helper_sti(cpu_env);
7203             /* interruptions are enabled only the first insn after sti */
7204             gen_jmp_im(s, s->pc - s->cs_base);
7205             gen_eob_inhibit_irq(s, true);
7206         }
7207         break;
7208     case 0x62: /* bound */
7209         if (CODE64(s))
7210             goto illegal_op;
7211         ot = dflag;
7212         modrm = x86_ldub_code(env, s);
7213         reg = (modrm >> 3) & 7;
7214         mod = (modrm >> 6) & 3;
7215         if (mod == 3)
7216             goto illegal_op;
7217         gen_op_mov_v_reg(s, ot, s->T0, reg);
7218         gen_lea_modrm(env, s, modrm);
7219         tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
7220         if (ot == MO_16) {
7221             gen_helper_boundw(cpu_env, s->A0, s->tmp2_i32);
7222         } else {
7223             gen_helper_boundl(cpu_env, s->A0, s->tmp2_i32);
7224         }
7225         break;
7226     case 0x1c8 ... 0x1cf: /* bswap reg */
7227         reg = (b & 7) | REX_B(s);
7228 #ifdef TARGET_X86_64
7229         if (dflag == MO_64) {
7230             tcg_gen_bswap64_i64(cpu_regs[reg], cpu_regs[reg]);
7231             break;
7232         }
7233 #endif
7234         tcg_gen_bswap32_tl(cpu_regs[reg], cpu_regs[reg], TCG_BSWAP_OZ);
7235         break;
7236     case 0xd6: /* salc */
7237         if (CODE64(s))
7238             goto illegal_op;
7239         gen_compute_eflags_c(s, s->T0);
7240         tcg_gen_neg_tl(s->T0, s->T0);
7241         gen_op_mov_reg_v(s, MO_8, R_EAX, s->T0);
7242         break;
7243     case 0xe0: /* loopnz */
7244     case 0xe1: /* loopz */
7245     case 0xe2: /* loop */
7246     case 0xe3: /* jecxz */
7247         {
7248             TCGLabel *l1, *l2, *l3;
7249 
7250             tval = (int8_t)insn_get(env, s, MO_8);
7251             next_eip = s->pc - s->cs_base;
7252             tval += next_eip;
7253             if (dflag == MO_16) {
7254                 tval &= 0xffff;
7255             }
7256 
7257             l1 = gen_new_label();
7258             l2 = gen_new_label();
7259             l3 = gen_new_label();
7260             gen_update_cc_op(s);
7261             b &= 3;
7262             switch(b) {
7263             case 0: /* loopnz */
7264             case 1: /* loopz */
7265                 gen_op_add_reg_im(s, s->aflag, R_ECX, -1);
7266                 gen_op_jz_ecx(s, s->aflag, l3);
7267                 gen_jcc1(s, (JCC_Z << 1) | (b ^ 1), l1);
7268                 break;
7269             case 2: /* loop */
7270                 gen_op_add_reg_im(s, s->aflag, R_ECX, -1);
7271                 gen_op_jnz_ecx(s, s->aflag, l1);
7272                 break;
7273             default:
7274             case 3: /* jcxz */
7275                 gen_op_jz_ecx(s, s->aflag, l1);
7276                 break;
7277             }
7278 
7279             gen_set_label(l3);
7280             gen_jmp_im(s, next_eip);
7281             tcg_gen_br(l2);
7282 
7283             gen_set_label(l1);
7284             gen_jmp_im(s, tval);
7285             gen_set_label(l2);
7286             gen_eob(s);
7287         }
7288         break;
7289     case 0x130: /* wrmsr */
7290     case 0x132: /* rdmsr */
7291         if (check_cpl0(s)) {
7292             gen_update_cc_op(s);
7293             gen_jmp_im(s, pc_start - s->cs_base);
7294             if (b & 2) {
7295                 gen_helper_rdmsr(cpu_env);
7296             } else {
7297                 gen_helper_wrmsr(cpu_env);
7298                 gen_jmp_im(s, s->pc - s->cs_base);
7299                 gen_eob(s);
7300             }
7301         }
7302         break;
7303     case 0x131: /* rdtsc */
7304         gen_update_cc_op(s);
7305         gen_jmp_im(s, pc_start - s->cs_base);
7306         if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
7307             gen_io_start();
7308         }
7309         gen_helper_rdtsc(cpu_env);
7310         if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
7311             gen_jmp(s, s->pc - s->cs_base);
7312         }
7313         break;
7314     case 0x133: /* rdpmc */
7315         gen_update_cc_op(s);
7316         gen_jmp_im(s, pc_start - s->cs_base);
7317         gen_helper_rdpmc(cpu_env);
7318         s->base.is_jmp = DISAS_NORETURN;
7319         break;
7320     case 0x134: /* sysenter */
7321         /* For Intel SYSENTER is valid on 64-bit */
7322         if (CODE64(s) && env->cpuid_vendor1 != CPUID_VENDOR_INTEL_1)
7323             goto illegal_op;
7324         if (!PE(s)) {
7325             gen_exception_gpf(s);
7326         } else {
7327             gen_helper_sysenter(cpu_env);
7328             gen_eob(s);
7329         }
7330         break;
7331     case 0x135: /* sysexit */
7332         /* For Intel SYSEXIT is valid on 64-bit */
7333         if (CODE64(s) && env->cpuid_vendor1 != CPUID_VENDOR_INTEL_1)
7334             goto illegal_op;
7335         if (!PE(s)) {
7336             gen_exception_gpf(s);
7337         } else {
7338             gen_helper_sysexit(cpu_env, tcg_const_i32(dflag - 1));
7339             gen_eob(s);
7340         }
7341         break;
7342 #ifdef TARGET_X86_64
7343     case 0x105: /* syscall */
7344         /* XXX: is it usable in real mode ? */
7345         gen_update_cc_op(s);
7346         gen_jmp_im(s, pc_start - s->cs_base);
7347         gen_helper_syscall(cpu_env, tcg_const_i32(s->pc - pc_start));
7348         /* TF handling for the syscall insn is different. The TF bit is  checked
7349            after the syscall insn completes. This allows #DB to not be
7350            generated after one has entered CPL0 if TF is set in FMASK.  */
7351         gen_eob_worker(s, false, true);
7352         break;
7353     case 0x107: /* sysret */
7354         if (!PE(s)) {
7355             gen_exception_gpf(s);
7356         } else {
7357             gen_helper_sysret(cpu_env, tcg_const_i32(dflag - 1));
7358             /* condition codes are modified only in long mode */
7359             if (LMA(s)) {
7360                 set_cc_op(s, CC_OP_EFLAGS);
7361             }
7362             /* TF handling for the sysret insn is different. The TF bit is
7363                checked after the sysret insn completes. This allows #DB to be
7364                generated "as if" the syscall insn in userspace has just
7365                completed.  */
7366             gen_eob_worker(s, false, true);
7367         }
7368         break;
7369 #endif
7370     case 0x1a2: /* cpuid */
7371         gen_update_cc_op(s);
7372         gen_jmp_im(s, pc_start - s->cs_base);
7373         gen_helper_cpuid(cpu_env);
7374         break;
7375     case 0xf4: /* hlt */
7376         if (check_cpl0(s)) {
7377             gen_update_cc_op(s);
7378             gen_jmp_im(s, pc_start - s->cs_base);
7379             gen_helper_hlt(cpu_env, tcg_const_i32(s->pc - pc_start));
7380             s->base.is_jmp = DISAS_NORETURN;
7381         }
7382         break;
7383     case 0x100:
7384         modrm = x86_ldub_code(env, s);
7385         mod = (modrm >> 6) & 3;
7386         op = (modrm >> 3) & 7;
7387         switch(op) {
7388         case 0: /* sldt */
7389             if (!PE(s) || VM86(s))
7390                 goto illegal_op;
7391             gen_svm_check_intercept(s, SVM_EXIT_LDTR_READ);
7392             tcg_gen_ld32u_tl(s->T0, cpu_env,
7393                              offsetof(CPUX86State, ldt.selector));
7394             ot = mod == 3 ? dflag : MO_16;
7395             gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 1);
7396             break;
7397         case 2: /* lldt */
7398             if (!PE(s) || VM86(s))
7399                 goto illegal_op;
7400             if (check_cpl0(s)) {
7401                 gen_svm_check_intercept(s, SVM_EXIT_LDTR_WRITE);
7402                 gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
7403                 tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
7404                 gen_helper_lldt(cpu_env, s->tmp2_i32);
7405             }
7406             break;
7407         case 1: /* str */
7408             if (!PE(s) || VM86(s))
7409                 goto illegal_op;
7410             gen_svm_check_intercept(s, SVM_EXIT_TR_READ);
7411             tcg_gen_ld32u_tl(s->T0, cpu_env,
7412                              offsetof(CPUX86State, tr.selector));
7413             ot = mod == 3 ? dflag : MO_16;
7414             gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 1);
7415             break;
7416         case 3: /* ltr */
7417             if (!PE(s) || VM86(s))
7418                 goto illegal_op;
7419             if (check_cpl0(s)) {
7420                 gen_svm_check_intercept(s, SVM_EXIT_TR_WRITE);
7421                 gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
7422                 tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
7423                 gen_helper_ltr(cpu_env, s->tmp2_i32);
7424             }
7425             break;
7426         case 4: /* verr */
7427         case 5: /* verw */
7428             if (!PE(s) || VM86(s))
7429                 goto illegal_op;
7430             gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
7431             gen_update_cc_op(s);
7432             if (op == 4) {
7433                 gen_helper_verr(cpu_env, s->T0);
7434             } else {
7435                 gen_helper_verw(cpu_env, s->T0);
7436             }
7437             set_cc_op(s, CC_OP_EFLAGS);
7438             break;
7439         default:
7440             goto unknown_op;
7441         }
7442         break;
7443 
7444     case 0x101:
7445         modrm = x86_ldub_code(env, s);
7446         switch (modrm) {
7447         CASE_MODRM_MEM_OP(0): /* sgdt */
7448             gen_svm_check_intercept(s, SVM_EXIT_GDTR_READ);
7449             gen_lea_modrm(env, s, modrm);
7450             tcg_gen_ld32u_tl(s->T0,
7451                              cpu_env, offsetof(CPUX86State, gdt.limit));
7452             gen_op_st_v(s, MO_16, s->T0, s->A0);
7453             gen_add_A0_im(s, 2);
7454             tcg_gen_ld_tl(s->T0, cpu_env, offsetof(CPUX86State, gdt.base));
7455             if (dflag == MO_16) {
7456                 tcg_gen_andi_tl(s->T0, s->T0, 0xffffff);
7457             }
7458             gen_op_st_v(s, CODE64(s) + MO_32, s->T0, s->A0);
7459             break;
7460 
7461         case 0xc8: /* monitor */
7462             if (!(s->cpuid_ext_features & CPUID_EXT_MONITOR) || CPL(s) != 0) {
7463                 goto illegal_op;
7464             }
7465             gen_update_cc_op(s);
7466             gen_jmp_im(s, pc_start - s->cs_base);
7467             tcg_gen_mov_tl(s->A0, cpu_regs[R_EAX]);
7468             gen_extu(s->aflag, s->A0);
7469             gen_add_A0_ds_seg(s);
7470             gen_helper_monitor(cpu_env, s->A0);
7471             break;
7472 
7473         case 0xc9: /* mwait */
7474             if (!(s->cpuid_ext_features & CPUID_EXT_MONITOR) || CPL(s) != 0) {
7475                 goto illegal_op;
7476             }
7477             gen_update_cc_op(s);
7478             gen_jmp_im(s, pc_start - s->cs_base);
7479             gen_helper_mwait(cpu_env, tcg_const_i32(s->pc - pc_start));
7480             s->base.is_jmp = DISAS_NORETURN;
7481             break;
7482 
7483         case 0xca: /* clac */
7484             if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_SMAP)
7485                 || CPL(s) != 0) {
7486                 goto illegal_op;
7487             }
7488             gen_helper_clac(cpu_env);
7489             gen_jmp_im(s, s->pc - s->cs_base);
7490             gen_eob(s);
7491             break;
7492 
7493         case 0xcb: /* stac */
7494             if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_SMAP)
7495                 || CPL(s) != 0) {
7496                 goto illegal_op;
7497             }
7498             gen_helper_stac(cpu_env);
7499             gen_jmp_im(s, s->pc - s->cs_base);
7500             gen_eob(s);
7501             break;
7502 
7503         CASE_MODRM_MEM_OP(1): /* sidt */
7504             gen_svm_check_intercept(s, SVM_EXIT_IDTR_READ);
7505             gen_lea_modrm(env, s, modrm);
7506             tcg_gen_ld32u_tl(s->T0, cpu_env, offsetof(CPUX86State, idt.limit));
7507             gen_op_st_v(s, MO_16, s->T0, s->A0);
7508             gen_add_A0_im(s, 2);
7509             tcg_gen_ld_tl(s->T0, cpu_env, offsetof(CPUX86State, idt.base));
7510             if (dflag == MO_16) {
7511                 tcg_gen_andi_tl(s->T0, s->T0, 0xffffff);
7512             }
7513             gen_op_st_v(s, CODE64(s) + MO_32, s->T0, s->A0);
7514             break;
7515 
7516         case 0xd0: /* xgetbv */
7517             if ((s->cpuid_ext_features & CPUID_EXT_XSAVE) == 0
7518                 || (s->prefix & (PREFIX_LOCK | PREFIX_DATA
7519                                  | PREFIX_REPZ | PREFIX_REPNZ))) {
7520                 goto illegal_op;
7521             }
7522             tcg_gen_trunc_tl_i32(s->tmp2_i32, cpu_regs[R_ECX]);
7523             gen_helper_xgetbv(s->tmp1_i64, cpu_env, s->tmp2_i32);
7524             tcg_gen_extr_i64_tl(cpu_regs[R_EAX], cpu_regs[R_EDX], s->tmp1_i64);
7525             break;
7526 
7527         case 0xd1: /* xsetbv */
7528             if ((s->cpuid_ext_features & CPUID_EXT_XSAVE) == 0
7529                 || (s->prefix & (PREFIX_LOCK | PREFIX_DATA
7530                                  | PREFIX_REPZ | PREFIX_REPNZ))) {
7531                 goto illegal_op;
7532             }
7533             if (!check_cpl0(s)) {
7534                 break;
7535             }
7536             tcg_gen_concat_tl_i64(s->tmp1_i64, cpu_regs[R_EAX],
7537                                   cpu_regs[R_EDX]);
7538             tcg_gen_trunc_tl_i32(s->tmp2_i32, cpu_regs[R_ECX]);
7539             gen_helper_xsetbv(cpu_env, s->tmp2_i32, s->tmp1_i64);
7540             /* End TB because translation flags may change.  */
7541             gen_jmp_im(s, s->pc - s->cs_base);
7542             gen_eob(s);
7543             break;
7544 
7545         case 0xd8: /* VMRUN */
7546             if (!SVME(s) || !PE(s)) {
7547                 goto illegal_op;
7548             }
7549             if (!check_cpl0(s)) {
7550                 break;
7551             }
7552             gen_update_cc_op(s);
7553             gen_jmp_im(s, pc_start - s->cs_base);
7554             gen_helper_vmrun(cpu_env, tcg_const_i32(s->aflag - 1),
7555                              tcg_const_i32(s->pc - pc_start));
7556             tcg_gen_exit_tb(NULL, 0);
7557             s->base.is_jmp = DISAS_NORETURN;
7558             break;
7559 
7560         case 0xd9: /* VMMCALL */
7561             if (!SVME(s)) {
7562                 goto illegal_op;
7563             }
7564             gen_update_cc_op(s);
7565             gen_jmp_im(s, pc_start - s->cs_base);
7566             gen_helper_vmmcall(cpu_env);
7567             break;
7568 
7569         case 0xda: /* VMLOAD */
7570             if (!SVME(s) || !PE(s)) {
7571                 goto illegal_op;
7572             }
7573             if (!check_cpl0(s)) {
7574                 break;
7575             }
7576             gen_update_cc_op(s);
7577             gen_jmp_im(s, pc_start - s->cs_base);
7578             gen_helper_vmload(cpu_env, tcg_const_i32(s->aflag - 1));
7579             break;
7580 
7581         case 0xdb: /* VMSAVE */
7582             if (!SVME(s) || !PE(s)) {
7583                 goto illegal_op;
7584             }
7585             if (!check_cpl0(s)) {
7586                 break;
7587             }
7588             gen_update_cc_op(s);
7589             gen_jmp_im(s, pc_start - s->cs_base);
7590             gen_helper_vmsave(cpu_env, tcg_const_i32(s->aflag - 1));
7591             break;
7592 
7593         case 0xdc: /* STGI */
7594             if ((!SVME(s) && !(s->cpuid_ext3_features & CPUID_EXT3_SKINIT))
7595                 || !PE(s)) {
7596                 goto illegal_op;
7597             }
7598             if (!check_cpl0(s)) {
7599                 break;
7600             }
7601             gen_update_cc_op(s);
7602             gen_helper_stgi(cpu_env);
7603             gen_jmp_im(s, s->pc - s->cs_base);
7604             gen_eob(s);
7605             break;
7606 
7607         case 0xdd: /* CLGI */
7608             if (!SVME(s) || !PE(s)) {
7609                 goto illegal_op;
7610             }
7611             if (!check_cpl0(s)) {
7612                 break;
7613             }
7614             gen_update_cc_op(s);
7615             gen_jmp_im(s, pc_start - s->cs_base);
7616             gen_helper_clgi(cpu_env);
7617             break;
7618 
7619         case 0xde: /* SKINIT */
7620             if ((!SVME(s) && !(s->cpuid_ext3_features & CPUID_EXT3_SKINIT))
7621                 || !PE(s)) {
7622                 goto illegal_op;
7623             }
7624             gen_svm_check_intercept(s, SVM_EXIT_SKINIT);
7625             /* If not intercepted, not implemented -- raise #UD. */
7626             goto illegal_op;
7627 
7628         case 0xdf: /* INVLPGA */
7629             if (!SVME(s) || !PE(s)) {
7630                 goto illegal_op;
7631             }
7632             if (!check_cpl0(s)) {
7633                 break;
7634             }
7635             gen_svm_check_intercept(s, SVM_EXIT_INVLPGA);
7636             if (s->aflag == MO_64) {
7637                 tcg_gen_mov_tl(s->A0, cpu_regs[R_EAX]);
7638             } else {
7639                 tcg_gen_ext32u_tl(s->A0, cpu_regs[R_EAX]);
7640             }
7641             gen_helper_flush_page(cpu_env, s->A0);
7642             gen_jmp_im(s, s->pc - s->cs_base);
7643             gen_eob(s);
7644             break;
7645 
7646         CASE_MODRM_MEM_OP(2): /* lgdt */
7647             if (!check_cpl0(s)) {
7648                 break;
7649             }
7650             gen_svm_check_intercept(s, SVM_EXIT_GDTR_WRITE);
7651             gen_lea_modrm(env, s, modrm);
7652             gen_op_ld_v(s, MO_16, s->T1, s->A0);
7653             gen_add_A0_im(s, 2);
7654             gen_op_ld_v(s, CODE64(s) + MO_32, s->T0, s->A0);
7655             if (dflag == MO_16) {
7656                 tcg_gen_andi_tl(s->T0, s->T0, 0xffffff);
7657             }
7658             tcg_gen_st_tl(s->T0, cpu_env, offsetof(CPUX86State, gdt.base));
7659             tcg_gen_st32_tl(s->T1, cpu_env, offsetof(CPUX86State, gdt.limit));
7660             break;
7661 
7662         CASE_MODRM_MEM_OP(3): /* lidt */
7663             if (!check_cpl0(s)) {
7664                 break;
7665             }
7666             gen_svm_check_intercept(s, SVM_EXIT_IDTR_WRITE);
7667             gen_lea_modrm(env, s, modrm);
7668             gen_op_ld_v(s, MO_16, s->T1, s->A0);
7669             gen_add_A0_im(s, 2);
7670             gen_op_ld_v(s, CODE64(s) + MO_32, s->T0, s->A0);
7671             if (dflag == MO_16) {
7672                 tcg_gen_andi_tl(s->T0, s->T0, 0xffffff);
7673             }
7674             tcg_gen_st_tl(s->T0, cpu_env, offsetof(CPUX86State, idt.base));
7675             tcg_gen_st32_tl(s->T1, cpu_env, offsetof(CPUX86State, idt.limit));
7676             break;
7677 
7678         CASE_MODRM_OP(4): /* smsw */
7679             gen_svm_check_intercept(s, SVM_EXIT_READ_CR0);
7680             tcg_gen_ld_tl(s->T0, cpu_env, offsetof(CPUX86State, cr[0]));
7681             /*
7682              * In 32-bit mode, the higher 16 bits of the destination
7683              * register are undefined.  In practice CR0[31:0] is stored
7684              * just like in 64-bit mode.
7685              */
7686             mod = (modrm >> 6) & 3;
7687             ot = (mod != 3 ? MO_16 : s->dflag);
7688             gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 1);
7689             break;
7690         case 0xee: /* rdpkru */
7691             if (prefixes & PREFIX_LOCK) {
7692                 goto illegal_op;
7693             }
7694             tcg_gen_trunc_tl_i32(s->tmp2_i32, cpu_regs[R_ECX]);
7695             gen_helper_rdpkru(s->tmp1_i64, cpu_env, s->tmp2_i32);
7696             tcg_gen_extr_i64_tl(cpu_regs[R_EAX], cpu_regs[R_EDX], s->tmp1_i64);
7697             break;
7698         case 0xef: /* wrpkru */
7699             if (prefixes & PREFIX_LOCK) {
7700                 goto illegal_op;
7701             }
7702             tcg_gen_concat_tl_i64(s->tmp1_i64, cpu_regs[R_EAX],
7703                                   cpu_regs[R_EDX]);
7704             tcg_gen_trunc_tl_i32(s->tmp2_i32, cpu_regs[R_ECX]);
7705             gen_helper_wrpkru(cpu_env, s->tmp2_i32, s->tmp1_i64);
7706             break;
7707 
7708         CASE_MODRM_OP(6): /* lmsw */
7709             if (!check_cpl0(s)) {
7710                 break;
7711             }
7712             gen_svm_check_intercept(s, SVM_EXIT_WRITE_CR0);
7713             gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
7714             /*
7715              * Only the 4 lower bits of CR0 are modified.
7716              * PE cannot be set to zero if already set to one.
7717              */
7718             tcg_gen_ld_tl(s->T1, cpu_env, offsetof(CPUX86State, cr[0]));
7719             tcg_gen_andi_tl(s->T0, s->T0, 0xf);
7720             tcg_gen_andi_tl(s->T1, s->T1, ~0xe);
7721             tcg_gen_or_tl(s->T0, s->T0, s->T1);
7722             gen_helper_write_crN(cpu_env, tcg_constant_i32(0), s->T0);
7723             gen_jmp_im(s, s->pc - s->cs_base);
7724             gen_eob(s);
7725             break;
7726 
7727         CASE_MODRM_MEM_OP(7): /* invlpg */
7728             if (!check_cpl0(s)) {
7729                 break;
7730             }
7731             gen_svm_check_intercept(s, SVM_EXIT_INVLPG);
7732             gen_lea_modrm(env, s, modrm);
7733             gen_helper_flush_page(cpu_env, s->A0);
7734             gen_jmp_im(s, s->pc - s->cs_base);
7735             gen_eob(s);
7736             break;
7737 
7738         case 0xf8: /* swapgs */
7739 #ifdef TARGET_X86_64
7740             if (CODE64(s)) {
7741                 if (check_cpl0(s)) {
7742                     tcg_gen_mov_tl(s->T0, cpu_seg_base[R_GS]);
7743                     tcg_gen_ld_tl(cpu_seg_base[R_GS], cpu_env,
7744                                   offsetof(CPUX86State, kernelgsbase));
7745                     tcg_gen_st_tl(s->T0, cpu_env,
7746                                   offsetof(CPUX86State, kernelgsbase));
7747                 }
7748                 break;
7749             }
7750 #endif
7751             goto illegal_op;
7752 
7753         case 0xf9: /* rdtscp */
7754             if (!(s->cpuid_ext2_features & CPUID_EXT2_RDTSCP)) {
7755                 goto illegal_op;
7756             }
7757             gen_update_cc_op(s);
7758             gen_jmp_im(s, pc_start - s->cs_base);
7759             if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
7760                 gen_io_start();
7761             }
7762             gen_helper_rdtscp(cpu_env);
7763             if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
7764                 gen_jmp(s, s->pc - s->cs_base);
7765             }
7766             break;
7767 
7768         default:
7769             goto unknown_op;
7770         }
7771         break;
7772 
7773     case 0x108: /* invd */
7774     case 0x109: /* wbinvd */
7775         if (check_cpl0(s)) {
7776             gen_svm_check_intercept(s, (b & 2) ? SVM_EXIT_INVD : SVM_EXIT_WBINVD);
7777             /* nothing to do */
7778         }
7779         break;
7780     case 0x63: /* arpl or movslS (x86_64) */
7781 #ifdef TARGET_X86_64
7782         if (CODE64(s)) {
7783             int d_ot;
7784             /* d_ot is the size of destination */
7785             d_ot = dflag;
7786 
7787             modrm = x86_ldub_code(env, s);
7788             reg = ((modrm >> 3) & 7) | REX_R(s);
7789             mod = (modrm >> 6) & 3;
7790             rm = (modrm & 7) | REX_B(s);
7791 
7792             if (mod == 3) {
7793                 gen_op_mov_v_reg(s, MO_32, s->T0, rm);
7794                 /* sign extend */
7795                 if (d_ot == MO_64) {
7796                     tcg_gen_ext32s_tl(s->T0, s->T0);
7797                 }
7798                 gen_op_mov_reg_v(s, d_ot, reg, s->T0);
7799             } else {
7800                 gen_lea_modrm(env, s, modrm);
7801                 gen_op_ld_v(s, MO_32 | MO_SIGN, s->T0, s->A0);
7802                 gen_op_mov_reg_v(s, d_ot, reg, s->T0);
7803             }
7804         } else
7805 #endif
7806         {
7807             TCGLabel *label1;
7808             TCGv t0, t1, t2, a0;
7809 
7810             if (!PE(s) || VM86(s))
7811                 goto illegal_op;
7812             t0 = tcg_temp_local_new();
7813             t1 = tcg_temp_local_new();
7814             t2 = tcg_temp_local_new();
7815             ot = MO_16;
7816             modrm = x86_ldub_code(env, s);
7817             reg = (modrm >> 3) & 7;
7818             mod = (modrm >> 6) & 3;
7819             rm = modrm & 7;
7820             if (mod != 3) {
7821                 gen_lea_modrm(env, s, modrm);
7822                 gen_op_ld_v(s, ot, t0, s->A0);
7823                 a0 = tcg_temp_local_new();
7824                 tcg_gen_mov_tl(a0, s->A0);
7825             } else {
7826                 gen_op_mov_v_reg(s, ot, t0, rm);
7827                 a0 = NULL;
7828             }
7829             gen_op_mov_v_reg(s, ot, t1, reg);
7830             tcg_gen_andi_tl(s->tmp0, t0, 3);
7831             tcg_gen_andi_tl(t1, t1, 3);
7832             tcg_gen_movi_tl(t2, 0);
7833             label1 = gen_new_label();
7834             tcg_gen_brcond_tl(TCG_COND_GE, s->tmp0, t1, label1);
7835             tcg_gen_andi_tl(t0, t0, ~3);
7836             tcg_gen_or_tl(t0, t0, t1);
7837             tcg_gen_movi_tl(t2, CC_Z);
7838             gen_set_label(label1);
7839             if (mod != 3) {
7840                 gen_op_st_v(s, ot, t0, a0);
7841                 tcg_temp_free(a0);
7842            } else {
7843                 gen_op_mov_reg_v(s, ot, rm, t0);
7844             }
7845             gen_compute_eflags(s);
7846             tcg_gen_andi_tl(cpu_cc_src, cpu_cc_src, ~CC_Z);
7847             tcg_gen_or_tl(cpu_cc_src, cpu_cc_src, t2);
7848             tcg_temp_free(t0);
7849             tcg_temp_free(t1);
7850             tcg_temp_free(t2);
7851         }
7852         break;
7853     case 0x102: /* lar */
7854     case 0x103: /* lsl */
7855         {
7856             TCGLabel *label1;
7857             TCGv t0;
7858             if (!PE(s) || VM86(s))
7859                 goto illegal_op;
7860             ot = dflag != MO_16 ? MO_32 : MO_16;
7861             modrm = x86_ldub_code(env, s);
7862             reg = ((modrm >> 3) & 7) | REX_R(s);
7863             gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
7864             t0 = tcg_temp_local_new();
7865             gen_update_cc_op(s);
7866             if (b == 0x102) {
7867                 gen_helper_lar(t0, cpu_env, s->T0);
7868             } else {
7869                 gen_helper_lsl(t0, cpu_env, s->T0);
7870             }
7871             tcg_gen_andi_tl(s->tmp0, cpu_cc_src, CC_Z);
7872             label1 = gen_new_label();
7873             tcg_gen_brcondi_tl(TCG_COND_EQ, s->tmp0, 0, label1);
7874             gen_op_mov_reg_v(s, ot, reg, t0);
7875             gen_set_label(label1);
7876             set_cc_op(s, CC_OP_EFLAGS);
7877             tcg_temp_free(t0);
7878         }
7879         break;
7880     case 0x118:
7881         modrm = x86_ldub_code(env, s);
7882         mod = (modrm >> 6) & 3;
7883         op = (modrm >> 3) & 7;
7884         switch(op) {
7885         case 0: /* prefetchnta */
7886         case 1: /* prefetchnt0 */
7887         case 2: /* prefetchnt0 */
7888         case 3: /* prefetchnt0 */
7889             if (mod == 3)
7890                 goto illegal_op;
7891             gen_nop_modrm(env, s, modrm);
7892             /* nothing more to do */
7893             break;
7894         default: /* nop (multi byte) */
7895             gen_nop_modrm(env, s, modrm);
7896             break;
7897         }
7898         break;
7899     case 0x11a:
7900         modrm = x86_ldub_code(env, s);
7901         if (s->flags & HF_MPX_EN_MASK) {
7902             mod = (modrm >> 6) & 3;
7903             reg = ((modrm >> 3) & 7) | REX_R(s);
7904             if (prefixes & PREFIX_REPZ) {
7905                 /* bndcl */
7906                 if (reg >= 4
7907                     || (prefixes & PREFIX_LOCK)
7908                     || s->aflag == MO_16) {
7909                     goto illegal_op;
7910                 }
7911                 gen_bndck(env, s, modrm, TCG_COND_LTU, cpu_bndl[reg]);
7912             } else if (prefixes & PREFIX_REPNZ) {
7913                 /* bndcu */
7914                 if (reg >= 4
7915                     || (prefixes & PREFIX_LOCK)
7916                     || s->aflag == MO_16) {
7917                     goto illegal_op;
7918                 }
7919                 TCGv_i64 notu = tcg_temp_new_i64();
7920                 tcg_gen_not_i64(notu, cpu_bndu[reg]);
7921                 gen_bndck(env, s, modrm, TCG_COND_GTU, notu);
7922                 tcg_temp_free_i64(notu);
7923             } else if (prefixes & PREFIX_DATA) {
7924                 /* bndmov -- from reg/mem */
7925                 if (reg >= 4 || s->aflag == MO_16) {
7926                     goto illegal_op;
7927                 }
7928                 if (mod == 3) {
7929                     int reg2 = (modrm & 7) | REX_B(s);
7930                     if (reg2 >= 4 || (prefixes & PREFIX_LOCK)) {
7931                         goto illegal_op;
7932                     }
7933                     if (s->flags & HF_MPX_IU_MASK) {
7934                         tcg_gen_mov_i64(cpu_bndl[reg], cpu_bndl[reg2]);
7935                         tcg_gen_mov_i64(cpu_bndu[reg], cpu_bndu[reg2]);
7936                     }
7937                 } else {
7938                     gen_lea_modrm(env, s, modrm);
7939                     if (CODE64(s)) {
7940                         tcg_gen_qemu_ld_i64(cpu_bndl[reg], s->A0,
7941                                             s->mem_index, MO_LEQ);
7942                         tcg_gen_addi_tl(s->A0, s->A0, 8);
7943                         tcg_gen_qemu_ld_i64(cpu_bndu[reg], s->A0,
7944                                             s->mem_index, MO_LEQ);
7945                     } else {
7946                         tcg_gen_qemu_ld_i64(cpu_bndl[reg], s->A0,
7947                                             s->mem_index, MO_LEUL);
7948                         tcg_gen_addi_tl(s->A0, s->A0, 4);
7949                         tcg_gen_qemu_ld_i64(cpu_bndu[reg], s->A0,
7950                                             s->mem_index, MO_LEUL);
7951                     }
7952                     /* bnd registers are now in-use */
7953                     gen_set_hflag(s, HF_MPX_IU_MASK);
7954                 }
7955             } else if (mod != 3) {
7956                 /* bndldx */
7957                 AddressParts a = gen_lea_modrm_0(env, s, modrm);
7958                 if (reg >= 4
7959                     || (prefixes & PREFIX_LOCK)
7960                     || s->aflag == MO_16
7961                     || a.base < -1) {
7962                     goto illegal_op;
7963                 }
7964                 if (a.base >= 0) {
7965                     tcg_gen_addi_tl(s->A0, cpu_regs[a.base], a.disp);
7966                 } else {
7967                     tcg_gen_movi_tl(s->A0, 0);
7968                 }
7969                 gen_lea_v_seg(s, s->aflag, s->A0, a.def_seg, s->override);
7970                 if (a.index >= 0) {
7971                     tcg_gen_mov_tl(s->T0, cpu_regs[a.index]);
7972                 } else {
7973                     tcg_gen_movi_tl(s->T0, 0);
7974                 }
7975                 if (CODE64(s)) {
7976                     gen_helper_bndldx64(cpu_bndl[reg], cpu_env, s->A0, s->T0);
7977                     tcg_gen_ld_i64(cpu_bndu[reg], cpu_env,
7978                                    offsetof(CPUX86State, mmx_t0.MMX_Q(0)));
7979                 } else {
7980                     gen_helper_bndldx32(cpu_bndu[reg], cpu_env, s->A0, s->T0);
7981                     tcg_gen_ext32u_i64(cpu_bndl[reg], cpu_bndu[reg]);
7982                     tcg_gen_shri_i64(cpu_bndu[reg], cpu_bndu[reg], 32);
7983                 }
7984                 gen_set_hflag(s, HF_MPX_IU_MASK);
7985             }
7986         }
7987         gen_nop_modrm(env, s, modrm);
7988         break;
7989     case 0x11b:
7990         modrm = x86_ldub_code(env, s);
7991         if (s->flags & HF_MPX_EN_MASK) {
7992             mod = (modrm >> 6) & 3;
7993             reg = ((modrm >> 3) & 7) | REX_R(s);
7994             if (mod != 3 && (prefixes & PREFIX_REPZ)) {
7995                 /* bndmk */
7996                 if (reg >= 4
7997                     || (prefixes & PREFIX_LOCK)
7998                     || s->aflag == MO_16) {
7999                     goto illegal_op;
8000                 }
8001                 AddressParts a = gen_lea_modrm_0(env, s, modrm);
8002                 if (a.base >= 0) {
8003                     tcg_gen_extu_tl_i64(cpu_bndl[reg], cpu_regs[a.base]);
8004                     if (!CODE64(s)) {
8005                         tcg_gen_ext32u_i64(cpu_bndl[reg], cpu_bndl[reg]);
8006                     }
8007                 } else if (a.base == -1) {
8008                     /* no base register has lower bound of 0 */
8009                     tcg_gen_movi_i64(cpu_bndl[reg], 0);
8010                 } else {
8011                     /* rip-relative generates #ud */
8012                     goto illegal_op;
8013                 }
8014                 tcg_gen_not_tl(s->A0, gen_lea_modrm_1(s, a));
8015                 if (!CODE64(s)) {
8016                     tcg_gen_ext32u_tl(s->A0, s->A0);
8017                 }
8018                 tcg_gen_extu_tl_i64(cpu_bndu[reg], s->A0);
8019                 /* bnd registers are now in-use */
8020                 gen_set_hflag(s, HF_MPX_IU_MASK);
8021                 break;
8022             } else if (prefixes & PREFIX_REPNZ) {
8023                 /* bndcn */
8024                 if (reg >= 4
8025                     || (prefixes & PREFIX_LOCK)
8026                     || s->aflag == MO_16) {
8027                     goto illegal_op;
8028                 }
8029                 gen_bndck(env, s, modrm, TCG_COND_GTU, cpu_bndu[reg]);
8030             } else if (prefixes & PREFIX_DATA) {
8031                 /* bndmov -- to reg/mem */
8032                 if (reg >= 4 || s->aflag == MO_16) {
8033                     goto illegal_op;
8034                 }
8035                 if (mod == 3) {
8036                     int reg2 = (modrm & 7) | REX_B(s);
8037                     if (reg2 >= 4 || (prefixes & PREFIX_LOCK)) {
8038                         goto illegal_op;
8039                     }
8040                     if (s->flags & HF_MPX_IU_MASK) {
8041                         tcg_gen_mov_i64(cpu_bndl[reg2], cpu_bndl[reg]);
8042                         tcg_gen_mov_i64(cpu_bndu[reg2], cpu_bndu[reg]);
8043                     }
8044                 } else {
8045                     gen_lea_modrm(env, s, modrm);
8046                     if (CODE64(s)) {
8047                         tcg_gen_qemu_st_i64(cpu_bndl[reg], s->A0,
8048                                             s->mem_index, MO_LEQ);
8049                         tcg_gen_addi_tl(s->A0, s->A0, 8);
8050                         tcg_gen_qemu_st_i64(cpu_bndu[reg], s->A0,
8051                                             s->mem_index, MO_LEQ);
8052                     } else {
8053                         tcg_gen_qemu_st_i64(cpu_bndl[reg], s->A0,
8054                                             s->mem_index, MO_LEUL);
8055                         tcg_gen_addi_tl(s->A0, s->A0, 4);
8056                         tcg_gen_qemu_st_i64(cpu_bndu[reg], s->A0,
8057                                             s->mem_index, MO_LEUL);
8058                     }
8059                 }
8060             } else if (mod != 3) {
8061                 /* bndstx */
8062                 AddressParts a = gen_lea_modrm_0(env, s, modrm);
8063                 if (reg >= 4
8064                     || (prefixes & PREFIX_LOCK)
8065                     || s->aflag == MO_16
8066                     || a.base < -1) {
8067                     goto illegal_op;
8068                 }
8069                 if (a.base >= 0) {
8070                     tcg_gen_addi_tl(s->A0, cpu_regs[a.base], a.disp);
8071                 } else {
8072                     tcg_gen_movi_tl(s->A0, 0);
8073                 }
8074                 gen_lea_v_seg(s, s->aflag, s->A0, a.def_seg, s->override);
8075                 if (a.index >= 0) {
8076                     tcg_gen_mov_tl(s->T0, cpu_regs[a.index]);
8077                 } else {
8078                     tcg_gen_movi_tl(s->T0, 0);
8079                 }
8080                 if (CODE64(s)) {
8081                     gen_helper_bndstx64(cpu_env, s->A0, s->T0,
8082                                         cpu_bndl[reg], cpu_bndu[reg]);
8083                 } else {
8084                     gen_helper_bndstx32(cpu_env, s->A0, s->T0,
8085                                         cpu_bndl[reg], cpu_bndu[reg]);
8086                 }
8087             }
8088         }
8089         gen_nop_modrm(env, s, modrm);
8090         break;
8091     case 0x119: case 0x11c ... 0x11f: /* nop (multi byte) */
8092         modrm = x86_ldub_code(env, s);
8093         gen_nop_modrm(env, s, modrm);
8094         break;
8095 
8096     case 0x120: /* mov reg, crN */
8097     case 0x122: /* mov crN, reg */
8098         if (!check_cpl0(s)) {
8099             break;
8100         }
8101         modrm = x86_ldub_code(env, s);
8102         /*
8103          * Ignore the mod bits (assume (modrm&0xc0)==0xc0).
8104          * AMD documentation (24594.pdf) and testing of Intel 386 and 486
8105          * processors all show that the mod bits are assumed to be 1's,
8106          * regardless of actual values.
8107          */
8108         rm = (modrm & 7) | REX_B(s);
8109         reg = ((modrm >> 3) & 7) | REX_R(s);
8110         switch (reg) {
8111         case 0:
8112             if ((prefixes & PREFIX_LOCK) &&
8113                 (s->cpuid_ext3_features & CPUID_EXT3_CR8LEG)) {
8114                 reg = 8;
8115             }
8116             break;
8117         case 2:
8118         case 3:
8119         case 4:
8120         case 8:
8121             break;
8122         default:
8123             goto unknown_op;
8124         }
8125         ot  = (CODE64(s) ? MO_64 : MO_32);
8126 
8127         if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
8128             gen_io_start();
8129         }
8130         if (b & 2) {
8131             gen_svm_check_intercept(s, SVM_EXIT_WRITE_CR0 + reg);
8132             gen_op_mov_v_reg(s, ot, s->T0, rm);
8133             gen_helper_write_crN(cpu_env, tcg_constant_i32(reg), s->T0);
8134             gen_jmp_im(s, s->pc - s->cs_base);
8135             gen_eob(s);
8136         } else {
8137             gen_svm_check_intercept(s, SVM_EXIT_READ_CR0 + reg);
8138             gen_helper_read_crN(s->T0, cpu_env, tcg_constant_i32(reg));
8139             gen_op_mov_reg_v(s, ot, rm, s->T0);
8140             if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
8141                 gen_jmp(s, s->pc - s->cs_base);
8142             }
8143         }
8144         break;
8145 
8146     case 0x121: /* mov reg, drN */
8147     case 0x123: /* mov drN, reg */
8148         if (check_cpl0(s)) {
8149             modrm = x86_ldub_code(env, s);
8150             /* Ignore the mod bits (assume (modrm&0xc0)==0xc0).
8151              * AMD documentation (24594.pdf) and testing of
8152              * intel 386 and 486 processors all show that the mod bits
8153              * are assumed to be 1's, regardless of actual values.
8154              */
8155             rm = (modrm & 7) | REX_B(s);
8156             reg = ((modrm >> 3) & 7) | REX_R(s);
8157             if (CODE64(s))
8158                 ot = MO_64;
8159             else
8160                 ot = MO_32;
8161             if (reg >= 8) {
8162                 goto illegal_op;
8163             }
8164             if (b & 2) {
8165                 gen_svm_check_intercept(s, SVM_EXIT_WRITE_DR0 + reg);
8166                 gen_op_mov_v_reg(s, ot, s->T0, rm);
8167                 tcg_gen_movi_i32(s->tmp2_i32, reg);
8168                 gen_helper_set_dr(cpu_env, s->tmp2_i32, s->T0);
8169                 gen_jmp_im(s, s->pc - s->cs_base);
8170                 gen_eob(s);
8171             } else {
8172                 gen_svm_check_intercept(s, SVM_EXIT_READ_DR0 + reg);
8173                 tcg_gen_movi_i32(s->tmp2_i32, reg);
8174                 gen_helper_get_dr(s->T0, cpu_env, s->tmp2_i32);
8175                 gen_op_mov_reg_v(s, ot, rm, s->T0);
8176             }
8177         }
8178         break;
8179     case 0x106: /* clts */
8180         if (check_cpl0(s)) {
8181             gen_svm_check_intercept(s, SVM_EXIT_WRITE_CR0);
8182             gen_helper_clts(cpu_env);
8183             /* abort block because static cpu state changed */
8184             gen_jmp_im(s, s->pc - s->cs_base);
8185             gen_eob(s);
8186         }
8187         break;
8188     /* MMX/3DNow!/SSE/SSE2/SSE3/SSSE3/SSE4 support */
8189     case 0x1c3: /* MOVNTI reg, mem */
8190         if (!(s->cpuid_features & CPUID_SSE2))
8191             goto illegal_op;
8192         ot = mo_64_32(dflag);
8193         modrm = x86_ldub_code(env, s);
8194         mod = (modrm >> 6) & 3;
8195         if (mod == 3)
8196             goto illegal_op;
8197         reg = ((modrm >> 3) & 7) | REX_R(s);
8198         /* generate a generic store */
8199         gen_ldst_modrm(env, s, modrm, ot, reg, 1);
8200         break;
8201     case 0x1ae:
8202         modrm = x86_ldub_code(env, s);
8203         switch (modrm) {
8204         CASE_MODRM_MEM_OP(0): /* fxsave */
8205             if (!(s->cpuid_features & CPUID_FXSR)
8206                 || (prefixes & PREFIX_LOCK)) {
8207                 goto illegal_op;
8208             }
8209             if ((s->flags & HF_EM_MASK) || (s->flags & HF_TS_MASK)) {
8210                 gen_exception(s, EXCP07_PREX, pc_start - s->cs_base);
8211                 break;
8212             }
8213             gen_lea_modrm(env, s, modrm);
8214             gen_helper_fxsave(cpu_env, s->A0);
8215             break;
8216 
8217         CASE_MODRM_MEM_OP(1): /* fxrstor */
8218             if (!(s->cpuid_features & CPUID_FXSR)
8219                 || (prefixes & PREFIX_LOCK)) {
8220                 goto illegal_op;
8221             }
8222             if ((s->flags & HF_EM_MASK) || (s->flags & HF_TS_MASK)) {
8223                 gen_exception(s, EXCP07_PREX, pc_start - s->cs_base);
8224                 break;
8225             }
8226             gen_lea_modrm(env, s, modrm);
8227             gen_helper_fxrstor(cpu_env, s->A0);
8228             break;
8229 
8230         CASE_MODRM_MEM_OP(2): /* ldmxcsr */
8231             if ((s->flags & HF_EM_MASK) || !(s->flags & HF_OSFXSR_MASK)) {
8232                 goto illegal_op;
8233             }
8234             if (s->flags & HF_TS_MASK) {
8235                 gen_exception(s, EXCP07_PREX, pc_start - s->cs_base);
8236                 break;
8237             }
8238             gen_lea_modrm(env, s, modrm);
8239             tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0, s->mem_index, MO_LEUL);
8240             gen_helper_ldmxcsr(cpu_env, s->tmp2_i32);
8241             break;
8242 
8243         CASE_MODRM_MEM_OP(3): /* stmxcsr */
8244             if ((s->flags & HF_EM_MASK) || !(s->flags & HF_OSFXSR_MASK)) {
8245                 goto illegal_op;
8246             }
8247             if (s->flags & HF_TS_MASK) {
8248                 gen_exception(s, EXCP07_PREX, pc_start - s->cs_base);
8249                 break;
8250             }
8251             gen_helper_update_mxcsr(cpu_env);
8252             gen_lea_modrm(env, s, modrm);
8253             tcg_gen_ld32u_tl(s->T0, cpu_env, offsetof(CPUX86State, mxcsr));
8254             gen_op_st_v(s, MO_32, s->T0, s->A0);
8255             break;
8256 
8257         CASE_MODRM_MEM_OP(4): /* xsave */
8258             if ((s->cpuid_ext_features & CPUID_EXT_XSAVE) == 0
8259                 || (prefixes & (PREFIX_LOCK | PREFIX_DATA
8260                                 | PREFIX_REPZ | PREFIX_REPNZ))) {
8261                 goto illegal_op;
8262             }
8263             gen_lea_modrm(env, s, modrm);
8264             tcg_gen_concat_tl_i64(s->tmp1_i64, cpu_regs[R_EAX],
8265                                   cpu_regs[R_EDX]);
8266             gen_helper_xsave(cpu_env, s->A0, s->tmp1_i64);
8267             break;
8268 
8269         CASE_MODRM_MEM_OP(5): /* xrstor */
8270             if ((s->cpuid_ext_features & CPUID_EXT_XSAVE) == 0
8271                 || (prefixes & (PREFIX_LOCK | PREFIX_DATA
8272                                 | PREFIX_REPZ | PREFIX_REPNZ))) {
8273                 goto illegal_op;
8274             }
8275             gen_lea_modrm(env, s, modrm);
8276             tcg_gen_concat_tl_i64(s->tmp1_i64, cpu_regs[R_EAX],
8277                                   cpu_regs[R_EDX]);
8278             gen_helper_xrstor(cpu_env, s->A0, s->tmp1_i64);
8279             /* XRSTOR is how MPX is enabled, which changes how
8280                we translate.  Thus we need to end the TB.  */
8281             gen_update_cc_op(s);
8282             gen_jmp_im(s, s->pc - s->cs_base);
8283             gen_eob(s);
8284             break;
8285 
8286         CASE_MODRM_MEM_OP(6): /* xsaveopt / clwb */
8287             if (prefixes & PREFIX_LOCK) {
8288                 goto illegal_op;
8289             }
8290             if (prefixes & PREFIX_DATA) {
8291                 /* clwb */
8292                 if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_CLWB)) {
8293                     goto illegal_op;
8294                 }
8295                 gen_nop_modrm(env, s, modrm);
8296             } else {
8297                 /* xsaveopt */
8298                 if ((s->cpuid_ext_features & CPUID_EXT_XSAVE) == 0
8299                     || (s->cpuid_xsave_features & CPUID_XSAVE_XSAVEOPT) == 0
8300                     || (prefixes & (PREFIX_REPZ | PREFIX_REPNZ))) {
8301                     goto illegal_op;
8302                 }
8303                 gen_lea_modrm(env, s, modrm);
8304                 tcg_gen_concat_tl_i64(s->tmp1_i64, cpu_regs[R_EAX],
8305                                       cpu_regs[R_EDX]);
8306                 gen_helper_xsaveopt(cpu_env, s->A0, s->tmp1_i64);
8307             }
8308             break;
8309 
8310         CASE_MODRM_MEM_OP(7): /* clflush / clflushopt */
8311             if (prefixes & PREFIX_LOCK) {
8312                 goto illegal_op;
8313             }
8314             if (prefixes & PREFIX_DATA) {
8315                 /* clflushopt */
8316                 if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_CLFLUSHOPT)) {
8317                     goto illegal_op;
8318                 }
8319             } else {
8320                 /* clflush */
8321                 if ((s->prefix & (PREFIX_REPZ | PREFIX_REPNZ))
8322                     || !(s->cpuid_features & CPUID_CLFLUSH)) {
8323                     goto illegal_op;
8324                 }
8325             }
8326             gen_nop_modrm(env, s, modrm);
8327             break;
8328 
8329         case 0xc0 ... 0xc7: /* rdfsbase (f3 0f ae /0) */
8330         case 0xc8 ... 0xcf: /* rdgsbase (f3 0f ae /1) */
8331         case 0xd0 ... 0xd7: /* wrfsbase (f3 0f ae /2) */
8332         case 0xd8 ... 0xdf: /* wrgsbase (f3 0f ae /3) */
8333             if (CODE64(s)
8334                 && (prefixes & PREFIX_REPZ)
8335                 && !(prefixes & PREFIX_LOCK)
8336                 && (s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_FSGSBASE)) {
8337                 TCGv base, treg, src, dst;
8338 
8339                 /* Preserve hflags bits by testing CR4 at runtime.  */
8340                 tcg_gen_movi_i32(s->tmp2_i32, CR4_FSGSBASE_MASK);
8341                 gen_helper_cr4_testbit(cpu_env, s->tmp2_i32);
8342 
8343                 base = cpu_seg_base[modrm & 8 ? R_GS : R_FS];
8344                 treg = cpu_regs[(modrm & 7) | REX_B(s)];
8345 
8346                 if (modrm & 0x10) {
8347                     /* wr*base */
8348                     dst = base, src = treg;
8349                 } else {
8350                     /* rd*base */
8351                     dst = treg, src = base;
8352                 }
8353 
8354                 if (s->dflag == MO_32) {
8355                     tcg_gen_ext32u_tl(dst, src);
8356                 } else {
8357                     tcg_gen_mov_tl(dst, src);
8358                 }
8359                 break;
8360             }
8361             goto unknown_op;
8362 
8363         case 0xf8: /* sfence / pcommit */
8364             if (prefixes & PREFIX_DATA) {
8365                 /* pcommit */
8366                 if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_PCOMMIT)
8367                     || (prefixes & PREFIX_LOCK)) {
8368                     goto illegal_op;
8369                 }
8370                 break;
8371             }
8372             /* fallthru */
8373         case 0xf9 ... 0xff: /* sfence */
8374             if (!(s->cpuid_features & CPUID_SSE)
8375                 || (prefixes & PREFIX_LOCK)) {
8376                 goto illegal_op;
8377             }
8378             tcg_gen_mb(TCG_MO_ST_ST | TCG_BAR_SC);
8379             break;
8380         case 0xe8 ... 0xef: /* lfence */
8381             if (!(s->cpuid_features & CPUID_SSE)
8382                 || (prefixes & PREFIX_LOCK)) {
8383                 goto illegal_op;
8384             }
8385             tcg_gen_mb(TCG_MO_LD_LD | TCG_BAR_SC);
8386             break;
8387         case 0xf0 ... 0xf7: /* mfence */
8388             if (!(s->cpuid_features & CPUID_SSE2)
8389                 || (prefixes & PREFIX_LOCK)) {
8390                 goto illegal_op;
8391             }
8392             tcg_gen_mb(TCG_MO_ALL | TCG_BAR_SC);
8393             break;
8394 
8395         default:
8396             goto unknown_op;
8397         }
8398         break;
8399 
8400     case 0x10d: /* 3DNow! prefetch(w) */
8401         modrm = x86_ldub_code(env, s);
8402         mod = (modrm >> 6) & 3;
8403         if (mod == 3)
8404             goto illegal_op;
8405         gen_nop_modrm(env, s, modrm);
8406         break;
8407     case 0x1aa: /* rsm */
8408         gen_svm_check_intercept(s, SVM_EXIT_RSM);
8409         if (!(s->flags & HF_SMM_MASK))
8410             goto illegal_op;
8411 #ifdef CONFIG_USER_ONLY
8412         /* we should not be in SMM mode */
8413         g_assert_not_reached();
8414 #else
8415         gen_update_cc_op(s);
8416         gen_jmp_im(s, s->pc - s->cs_base);
8417         gen_helper_rsm(cpu_env);
8418 #endif /* CONFIG_USER_ONLY */
8419         gen_eob(s);
8420         break;
8421     case 0x1b8: /* SSE4.2 popcnt */
8422         if ((prefixes & (PREFIX_REPZ | PREFIX_LOCK | PREFIX_REPNZ)) !=
8423              PREFIX_REPZ)
8424             goto illegal_op;
8425         if (!(s->cpuid_ext_features & CPUID_EXT_POPCNT))
8426             goto illegal_op;
8427 
8428         modrm = x86_ldub_code(env, s);
8429         reg = ((modrm >> 3) & 7) | REX_R(s);
8430 
8431         if (s->prefix & PREFIX_DATA) {
8432             ot = MO_16;
8433         } else {
8434             ot = mo_64_32(dflag);
8435         }
8436 
8437         gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
8438         gen_extu(ot, s->T0);
8439         tcg_gen_mov_tl(cpu_cc_src, s->T0);
8440         tcg_gen_ctpop_tl(s->T0, s->T0);
8441         gen_op_mov_reg_v(s, ot, reg, s->T0);
8442 
8443         set_cc_op(s, CC_OP_POPCNT);
8444         break;
8445     case 0x10e ... 0x10f:
8446         /* 3DNow! instructions, ignore prefixes */
8447         s->prefix &= ~(PREFIX_REPZ | PREFIX_REPNZ | PREFIX_DATA);
8448         /* fall through */
8449     case 0x110 ... 0x117:
8450     case 0x128 ... 0x12f:
8451     case 0x138 ... 0x13a:
8452     case 0x150 ... 0x179:
8453     case 0x17c ... 0x17f:
8454     case 0x1c2:
8455     case 0x1c4 ... 0x1c6:
8456     case 0x1d0 ... 0x1fe:
8457         gen_sse(env, s, b, pc_start);
8458         break;
8459     default:
8460         goto unknown_op;
8461     }
8462     return s->pc;
8463  illegal_op:
8464     gen_illegal_opcode(s);
8465     return s->pc;
8466  unknown_op:
8467     gen_unknown_opcode(env, s);
8468     return s->pc;
8469 }
8470 
8471 void tcg_x86_init(void)
8472 {
8473     static const char reg_names[CPU_NB_REGS][4] = {
8474 #ifdef TARGET_X86_64
8475         [R_EAX] = "rax",
8476         [R_EBX] = "rbx",
8477         [R_ECX] = "rcx",
8478         [R_EDX] = "rdx",
8479         [R_ESI] = "rsi",
8480         [R_EDI] = "rdi",
8481         [R_EBP] = "rbp",
8482         [R_ESP] = "rsp",
8483         [8]  = "r8",
8484         [9]  = "r9",
8485         [10] = "r10",
8486         [11] = "r11",
8487         [12] = "r12",
8488         [13] = "r13",
8489         [14] = "r14",
8490         [15] = "r15",
8491 #else
8492         [R_EAX] = "eax",
8493         [R_EBX] = "ebx",
8494         [R_ECX] = "ecx",
8495         [R_EDX] = "edx",
8496         [R_ESI] = "esi",
8497         [R_EDI] = "edi",
8498         [R_EBP] = "ebp",
8499         [R_ESP] = "esp",
8500 #endif
8501     };
8502     static const char seg_base_names[6][8] = {
8503         [R_CS] = "cs_base",
8504         [R_DS] = "ds_base",
8505         [R_ES] = "es_base",
8506         [R_FS] = "fs_base",
8507         [R_GS] = "gs_base",
8508         [R_SS] = "ss_base",
8509     };
8510     static const char bnd_regl_names[4][8] = {
8511         "bnd0_lb", "bnd1_lb", "bnd2_lb", "bnd3_lb"
8512     };
8513     static const char bnd_regu_names[4][8] = {
8514         "bnd0_ub", "bnd1_ub", "bnd2_ub", "bnd3_ub"
8515     };
8516     int i;
8517 
8518     cpu_cc_op = tcg_global_mem_new_i32(cpu_env,
8519                                        offsetof(CPUX86State, cc_op), "cc_op");
8520     cpu_cc_dst = tcg_global_mem_new(cpu_env, offsetof(CPUX86State, cc_dst),
8521                                     "cc_dst");
8522     cpu_cc_src = tcg_global_mem_new(cpu_env, offsetof(CPUX86State, cc_src),
8523                                     "cc_src");
8524     cpu_cc_src2 = tcg_global_mem_new(cpu_env, offsetof(CPUX86State, cc_src2),
8525                                      "cc_src2");
8526 
8527     for (i = 0; i < CPU_NB_REGS; ++i) {
8528         cpu_regs[i] = tcg_global_mem_new(cpu_env,
8529                                          offsetof(CPUX86State, regs[i]),
8530                                          reg_names[i]);
8531     }
8532 
8533     for (i = 0; i < 6; ++i) {
8534         cpu_seg_base[i]
8535             = tcg_global_mem_new(cpu_env,
8536                                  offsetof(CPUX86State, segs[i].base),
8537                                  seg_base_names[i]);
8538     }
8539 
8540     for (i = 0; i < 4; ++i) {
8541         cpu_bndl[i]
8542             = tcg_global_mem_new_i64(cpu_env,
8543                                      offsetof(CPUX86State, bnd_regs[i].lb),
8544                                      bnd_regl_names[i]);
8545         cpu_bndu[i]
8546             = tcg_global_mem_new_i64(cpu_env,
8547                                      offsetof(CPUX86State, bnd_regs[i].ub),
8548                                      bnd_regu_names[i]);
8549     }
8550 }
8551 
8552 static void i386_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cpu)
8553 {
8554     DisasContext *dc = container_of(dcbase, DisasContext, base);
8555     CPUX86State *env = cpu->env_ptr;
8556     uint32_t flags = dc->base.tb->flags;
8557     uint32_t cflags = tb_cflags(dc->base.tb);
8558     int cpl = (flags >> HF_CPL_SHIFT) & 3;
8559     int iopl = (flags >> IOPL_SHIFT) & 3;
8560 
8561     dc->cs_base = dc->base.tb->cs_base;
8562     dc->flags = flags;
8563 #ifndef CONFIG_USER_ONLY
8564     dc->cpl = cpl;
8565     dc->iopl = iopl;
8566 #endif
8567 
8568     /* We make some simplifying assumptions; validate they're correct. */
8569     g_assert(PE(dc) == ((flags & HF_PE_MASK) != 0));
8570     g_assert(CPL(dc) == cpl);
8571     g_assert(IOPL(dc) == iopl);
8572     g_assert(VM86(dc) == ((flags & HF_VM_MASK) != 0));
8573     g_assert(CODE32(dc) == ((flags & HF_CS32_MASK) != 0));
8574     g_assert(CODE64(dc) == ((flags & HF_CS64_MASK) != 0));
8575     g_assert(SS32(dc) == ((flags & HF_SS32_MASK) != 0));
8576     g_assert(LMA(dc) == ((flags & HF_LMA_MASK) != 0));
8577     g_assert(ADDSEG(dc) == ((flags & HF_ADDSEG_MASK) != 0));
8578     g_assert(SVME(dc) == ((flags & HF_SVME_MASK) != 0));
8579     g_assert(GUEST(dc) == ((flags & HF_GUEST_MASK) != 0));
8580 
8581     dc->cc_op = CC_OP_DYNAMIC;
8582     dc->cc_op_dirty = false;
8583     dc->popl_esp_hack = 0;
8584     /* select memory access functions */
8585     dc->mem_index = 0;
8586 #ifdef CONFIG_SOFTMMU
8587     dc->mem_index = cpu_mmu_index(env, false);
8588 #endif
8589     dc->cpuid_features = env->features[FEAT_1_EDX];
8590     dc->cpuid_ext_features = env->features[FEAT_1_ECX];
8591     dc->cpuid_ext2_features = env->features[FEAT_8000_0001_EDX];
8592     dc->cpuid_ext3_features = env->features[FEAT_8000_0001_ECX];
8593     dc->cpuid_7_0_ebx_features = env->features[FEAT_7_0_EBX];
8594     dc->cpuid_xsave_features = env->features[FEAT_XSAVE];
8595     dc->jmp_opt = !((cflags & CF_NO_GOTO_TB) ||
8596                     (flags & (HF_TF_MASK | HF_INHIBIT_IRQ_MASK)));
8597     /*
8598      * If jmp_opt, we want to handle each string instruction individually.
8599      * For icount also disable repz optimization so that each iteration
8600      * is accounted separately.
8601      */
8602     dc->repz_opt = !dc->jmp_opt && !(cflags & CF_USE_ICOUNT);
8603 
8604     dc->T0 = tcg_temp_new();
8605     dc->T1 = tcg_temp_new();
8606     dc->A0 = tcg_temp_new();
8607 
8608     dc->tmp0 = tcg_temp_new();
8609     dc->tmp1_i64 = tcg_temp_new_i64();
8610     dc->tmp2_i32 = tcg_temp_new_i32();
8611     dc->tmp3_i32 = tcg_temp_new_i32();
8612     dc->tmp4 = tcg_temp_new();
8613     dc->ptr0 = tcg_temp_new_ptr();
8614     dc->ptr1 = tcg_temp_new_ptr();
8615     dc->cc_srcT = tcg_temp_local_new();
8616 }
8617 
8618 static void i386_tr_tb_start(DisasContextBase *db, CPUState *cpu)
8619 {
8620 }
8621 
8622 static void i386_tr_insn_start(DisasContextBase *dcbase, CPUState *cpu)
8623 {
8624     DisasContext *dc = container_of(dcbase, DisasContext, base);
8625 
8626     tcg_gen_insn_start(dc->base.pc_next, dc->cc_op);
8627 }
8628 
8629 static void i386_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu)
8630 {
8631     DisasContext *dc = container_of(dcbase, DisasContext, base);
8632     target_ulong pc_next;
8633 
8634 #ifdef TARGET_VSYSCALL_PAGE
8635     /*
8636      * Detect entry into the vsyscall page and invoke the syscall.
8637      */
8638     if ((dc->base.pc_next & TARGET_PAGE_MASK) == TARGET_VSYSCALL_PAGE) {
8639         gen_exception(dc, EXCP_VSYSCALL, dc->base.pc_next);
8640         dc->base.pc_next = dc->pc + 1;
8641         return;
8642     }
8643 #endif
8644 
8645     pc_next = disas_insn(dc, cpu);
8646 
8647     if (dc->flags & (HF_TF_MASK | HF_INHIBIT_IRQ_MASK)) {
8648         /* if single step mode, we generate only one instruction and
8649            generate an exception */
8650         /* if irq were inhibited with HF_INHIBIT_IRQ_MASK, we clear
8651            the flag and abort the translation to give the irqs a
8652            chance to happen */
8653         dc->base.is_jmp = DISAS_TOO_MANY;
8654     } else if ((tb_cflags(dc->base.tb) & CF_USE_ICOUNT)
8655                && ((pc_next & TARGET_PAGE_MASK)
8656                    != ((pc_next + TARGET_MAX_INSN_SIZE - 1)
8657                        & TARGET_PAGE_MASK)
8658                    || (pc_next & ~TARGET_PAGE_MASK) == 0)) {
8659         /* Do not cross the boundary of the pages in icount mode,
8660            it can cause an exception. Do it only when boundary is
8661            crossed by the first instruction in the block.
8662            If current instruction already crossed the bound - it's ok,
8663            because an exception hasn't stopped this code.
8664          */
8665         dc->base.is_jmp = DISAS_TOO_MANY;
8666     } else if ((pc_next - dc->base.pc_first) >= (TARGET_PAGE_SIZE - 32)) {
8667         dc->base.is_jmp = DISAS_TOO_MANY;
8668     }
8669 
8670     dc->base.pc_next = pc_next;
8671 }
8672 
8673 static void i386_tr_tb_stop(DisasContextBase *dcbase, CPUState *cpu)
8674 {
8675     DisasContext *dc = container_of(dcbase, DisasContext, base);
8676 
8677     if (dc->base.is_jmp == DISAS_TOO_MANY) {
8678         gen_jmp_im(dc, dc->base.pc_next - dc->cs_base);
8679         gen_eob(dc);
8680     }
8681 }
8682 
8683 static void i386_tr_disas_log(const DisasContextBase *dcbase,
8684                               CPUState *cpu)
8685 {
8686     DisasContext *dc = container_of(dcbase, DisasContext, base);
8687 
8688     qemu_log("IN: %s\n", lookup_symbol(dc->base.pc_first));
8689     log_target_disas(cpu, dc->base.pc_first, dc->base.tb->size);
8690 }
8691 
8692 static const TranslatorOps i386_tr_ops = {
8693     .init_disas_context = i386_tr_init_disas_context,
8694     .tb_start           = i386_tr_tb_start,
8695     .insn_start         = i386_tr_insn_start,
8696     .translate_insn     = i386_tr_translate_insn,
8697     .tb_stop            = i386_tr_tb_stop,
8698     .disas_log          = i386_tr_disas_log,
8699 };
8700 
8701 /* generate intermediate code for basic block 'tb'.  */
8702 void gen_intermediate_code(CPUState *cpu, TranslationBlock *tb, int max_insns)
8703 {
8704     DisasContext dc;
8705 
8706     translator_loop(&i386_tr_ops, &dc.base, cpu, tb, max_insns);
8707 }
8708 
8709 void restore_state_to_opc(CPUX86State *env, TranslationBlock *tb,
8710                           target_ulong *data)
8711 {
8712     int cc_op = data[1];
8713     env->eip = data[0] - tb->cs_base;
8714     if (cc_op != CC_OP_DYNAMIC) {
8715         env->cc_op = cc_op;
8716     }
8717 }
8718