xref: /openbmc/qemu/target/i386/tcg/translate.c (revision b5fcfe92)
1 /*
2  *  i386 translation
3  *
4  *  Copyright (c) 2003 Fabrice Bellard
5  *
6  * This library is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * This library is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18  */
19 #include "qemu/osdep.h"
20 
21 #include "qemu/host-utils.h"
22 #include "cpu.h"
23 #include "disas/disas.h"
24 #include "exec/exec-all.h"
25 #include "tcg/tcg-op.h"
26 #include "exec/cpu_ldst.h"
27 #include "exec/translator.h"
28 
29 #include "exec/helper-proto.h"
30 #include "exec/helper-gen.h"
31 #include "helper-tcg.h"
32 
33 #include "exec/log.h"
34 
35 #define PREFIX_REPZ   0x01
36 #define PREFIX_REPNZ  0x02
37 #define PREFIX_LOCK   0x04
38 #define PREFIX_DATA   0x08
39 #define PREFIX_ADR    0x10
40 #define PREFIX_VEX    0x20
41 #define PREFIX_REX    0x40
42 
43 #ifdef TARGET_X86_64
44 # define ctztl  ctz64
45 # define clztl  clz64
46 #else
47 # define ctztl  ctz32
48 # define clztl  clz32
49 #endif
50 
51 /* For a switch indexed by MODRM, match all memory operands for a given OP.  */
52 #define CASE_MODRM_MEM_OP(OP) \
53     case (0 << 6) | (OP << 3) | 0 ... (0 << 6) | (OP << 3) | 7: \
54     case (1 << 6) | (OP << 3) | 0 ... (1 << 6) | (OP << 3) | 7: \
55     case (2 << 6) | (OP << 3) | 0 ... (2 << 6) | (OP << 3) | 7
56 
57 #define CASE_MODRM_OP(OP) \
58     case (0 << 6) | (OP << 3) | 0 ... (0 << 6) | (OP << 3) | 7: \
59     case (1 << 6) | (OP << 3) | 0 ... (1 << 6) | (OP << 3) | 7: \
60     case (2 << 6) | (OP << 3) | 0 ... (2 << 6) | (OP << 3) | 7: \
61     case (3 << 6) | (OP << 3) | 0 ... (3 << 6) | (OP << 3) | 7
62 
63 //#define MACRO_TEST   1
64 
65 /* global register indexes */
66 static TCGv cpu_cc_dst, cpu_cc_src, cpu_cc_src2;
67 static TCGv_i32 cpu_cc_op;
68 static TCGv cpu_regs[CPU_NB_REGS];
69 static TCGv cpu_seg_base[6];
70 static TCGv_i64 cpu_bndl[4];
71 static TCGv_i64 cpu_bndu[4];
72 
73 #include "exec/gen-icount.h"
74 
75 typedef struct DisasContext {
76     DisasContextBase base;
77 
78     target_ulong pc;       /* pc = eip + cs_base */
79     target_ulong pc_start; /* pc at TB entry */
80     target_ulong cs_base;  /* base of CS segment */
81 
82     MemOp aflag;
83     MemOp dflag;
84 
85     int8_t override; /* -1 if no override, else R_CS, R_DS, etc */
86     uint8_t prefix;
87 
88 #ifndef CONFIG_USER_ONLY
89     uint8_t cpl;   /* code priv level */
90     uint8_t iopl;  /* i/o priv level */
91 #endif
92     uint8_t vex_l;  /* vex vector length */
93     uint8_t vex_v;  /* vex vvvv register, without 1's complement.  */
94     uint8_t popl_esp_hack; /* for correct popl with esp base handling */
95     uint8_t rip_offset; /* only used in x86_64, but left for simplicity */
96 
97 #ifdef TARGET_X86_64
98     uint8_t rex_r;
99     uint8_t rex_x;
100     uint8_t rex_b;
101     bool rex_w;
102 #endif
103     bool jmp_opt; /* use direct block chaining for direct jumps */
104     bool repz_opt; /* optimize jumps within repz instructions */
105     bool cc_op_dirty;
106 
107     CCOp cc_op;  /* current CC operation */
108     int mem_index; /* select memory access functions */
109     uint32_t flags; /* all execution flags */
110     int cpuid_features;
111     int cpuid_ext_features;
112     int cpuid_ext2_features;
113     int cpuid_ext3_features;
114     int cpuid_7_0_ebx_features;
115     int cpuid_xsave_features;
116 
117     /* TCG local temps */
118     TCGv cc_srcT;
119     TCGv A0;
120     TCGv T0;
121     TCGv T1;
122 
123     /* TCG local register indexes (only used inside old micro ops) */
124     TCGv tmp0;
125     TCGv tmp4;
126     TCGv_ptr ptr0;
127     TCGv_ptr ptr1;
128     TCGv_i32 tmp2_i32;
129     TCGv_i32 tmp3_i32;
130     TCGv_i64 tmp1_i64;
131 
132     sigjmp_buf jmpbuf;
133 } DisasContext;
134 
135 /* The environment in which user-only runs is constrained. */
136 #ifdef CONFIG_USER_ONLY
137 #define PE(S)     true
138 #define CPL(S)    3
139 #define IOPL(S)   0
140 #define SVME(S)   false
141 #define GUEST(S)  false
142 #else
143 #define PE(S)     (((S)->flags & HF_PE_MASK) != 0)
144 #define CPL(S)    ((S)->cpl)
145 #define IOPL(S)   ((S)->iopl)
146 #define SVME(S)   (((S)->flags & HF_SVME_MASK) != 0)
147 #define GUEST(S)  (((S)->flags & HF_GUEST_MASK) != 0)
148 #endif
149 #if defined(CONFIG_USER_ONLY) && defined(TARGET_X86_64)
150 #define VM86(S)   false
151 #define CODE32(S) true
152 #define SS32(S)   true
153 #define ADDSEG(S) false
154 #else
155 #define VM86(S)   (((S)->flags & HF_VM_MASK) != 0)
156 #define CODE32(S) (((S)->flags & HF_CS32_MASK) != 0)
157 #define SS32(S)   (((S)->flags & HF_SS32_MASK) != 0)
158 #define ADDSEG(S) (((S)->flags & HF_ADDSEG_MASK) != 0)
159 #endif
160 #if !defined(TARGET_X86_64)
161 #define CODE64(S) false
162 #define LMA(S)    false
163 #elif defined(CONFIG_USER_ONLY)
164 #define CODE64(S) true
165 #define LMA(S)    true
166 #else
167 #define CODE64(S) (((S)->flags & HF_CS64_MASK) != 0)
168 #define LMA(S)    (((S)->flags & HF_LMA_MASK) != 0)
169 #endif
170 
171 #ifdef TARGET_X86_64
172 #define REX_PREFIX(S)  (((S)->prefix & PREFIX_REX) != 0)
173 #define REX_W(S)       ((S)->rex_w)
174 #define REX_R(S)       ((S)->rex_r + 0)
175 #define REX_X(S)       ((S)->rex_x + 0)
176 #define REX_B(S)       ((S)->rex_b + 0)
177 #else
178 #define REX_PREFIX(S)  false
179 #define REX_W(S)       false
180 #define REX_R(S)       0
181 #define REX_X(S)       0
182 #define REX_B(S)       0
183 #endif
184 
185 /*
186  * Many sysemu-only helpers are not reachable for user-only.
187  * Define stub generators here, so that we need not either sprinkle
188  * ifdefs through the translator, nor provide the helper function.
189  */
190 #define STUB_HELPER(NAME, ...) \
191     static inline void gen_helper_##NAME(__VA_ARGS__) \
192     { qemu_build_not_reached(); }
193 
194 #ifdef CONFIG_USER_ONLY
195 STUB_HELPER(clgi, TCGv_env env)
196 STUB_HELPER(flush_page, TCGv_env env, TCGv addr)
197 STUB_HELPER(hlt, TCGv_env env, TCGv_i32 pc_ofs)
198 STUB_HELPER(inb, TCGv ret, TCGv_env env, TCGv_i32 port)
199 STUB_HELPER(inw, TCGv ret, TCGv_env env, TCGv_i32 port)
200 STUB_HELPER(inl, TCGv ret, TCGv_env env, TCGv_i32 port)
201 STUB_HELPER(monitor, TCGv_env env, TCGv addr)
202 STUB_HELPER(mwait, TCGv_env env, TCGv_i32 pc_ofs)
203 STUB_HELPER(outb, TCGv_env env, TCGv_i32 port, TCGv_i32 val)
204 STUB_HELPER(outw, TCGv_env env, TCGv_i32 port, TCGv_i32 val)
205 STUB_HELPER(outl, TCGv_env env, TCGv_i32 port, TCGv_i32 val)
206 STUB_HELPER(rdmsr, TCGv_env env)
207 STUB_HELPER(read_crN, TCGv ret, TCGv_env env, TCGv_i32 reg)
208 STUB_HELPER(get_dr, TCGv ret, TCGv_env env, TCGv_i32 reg)
209 STUB_HELPER(set_dr, TCGv_env env, TCGv_i32 reg, TCGv val)
210 STUB_HELPER(stgi, TCGv_env env)
211 STUB_HELPER(svm_check_intercept, TCGv_env env, TCGv_i32 type)
212 STUB_HELPER(vmload, TCGv_env env, TCGv_i32 aflag)
213 STUB_HELPER(vmmcall, TCGv_env env)
214 STUB_HELPER(vmrun, TCGv_env env, TCGv_i32 aflag, TCGv_i32 pc_ofs)
215 STUB_HELPER(vmsave, TCGv_env env, TCGv_i32 aflag)
216 STUB_HELPER(write_crN, TCGv_env env, TCGv_i32 reg, TCGv val)
217 STUB_HELPER(wrmsr, TCGv_env env)
218 #endif
219 
220 static void gen_eob(DisasContext *s);
221 static void gen_jr(DisasContext *s, TCGv dest);
222 static void gen_jmp(DisasContext *s, target_ulong eip);
223 static void gen_jmp_tb(DisasContext *s, target_ulong eip, int tb_num);
224 static void gen_op(DisasContext *s1, int op, MemOp ot, int d);
225 static void gen_exception_gpf(DisasContext *s);
226 
227 /* i386 arith/logic operations */
228 enum {
229     OP_ADDL,
230     OP_ORL,
231     OP_ADCL,
232     OP_SBBL,
233     OP_ANDL,
234     OP_SUBL,
235     OP_XORL,
236     OP_CMPL,
237 };
238 
239 /* i386 shift ops */
240 enum {
241     OP_ROL,
242     OP_ROR,
243     OP_RCL,
244     OP_RCR,
245     OP_SHL,
246     OP_SHR,
247     OP_SHL1, /* undocumented */
248     OP_SAR = 7,
249 };
250 
251 enum {
252     JCC_O,
253     JCC_B,
254     JCC_Z,
255     JCC_BE,
256     JCC_S,
257     JCC_P,
258     JCC_L,
259     JCC_LE,
260 };
261 
262 enum {
263     /* I386 int registers */
264     OR_EAX,   /* MUST be even numbered */
265     OR_ECX,
266     OR_EDX,
267     OR_EBX,
268     OR_ESP,
269     OR_EBP,
270     OR_ESI,
271     OR_EDI,
272 
273     OR_TMP0 = 16,    /* temporary operand register */
274     OR_TMP1,
275     OR_A0, /* temporary register used when doing address evaluation */
276 };
277 
278 enum {
279     USES_CC_DST  = 1,
280     USES_CC_SRC  = 2,
281     USES_CC_SRC2 = 4,
282     USES_CC_SRCT = 8,
283 };
284 
285 /* Bit set if the global variable is live after setting CC_OP to X.  */
286 static const uint8_t cc_op_live[CC_OP_NB] = {
287     [CC_OP_DYNAMIC] = USES_CC_DST | USES_CC_SRC | USES_CC_SRC2,
288     [CC_OP_EFLAGS] = USES_CC_SRC,
289     [CC_OP_MULB ... CC_OP_MULQ] = USES_CC_DST | USES_CC_SRC,
290     [CC_OP_ADDB ... CC_OP_ADDQ] = USES_CC_DST | USES_CC_SRC,
291     [CC_OP_ADCB ... CC_OP_ADCQ] = USES_CC_DST | USES_CC_SRC | USES_CC_SRC2,
292     [CC_OP_SUBB ... CC_OP_SUBQ] = USES_CC_DST | USES_CC_SRC | USES_CC_SRCT,
293     [CC_OP_SBBB ... CC_OP_SBBQ] = USES_CC_DST | USES_CC_SRC | USES_CC_SRC2,
294     [CC_OP_LOGICB ... CC_OP_LOGICQ] = USES_CC_DST,
295     [CC_OP_INCB ... CC_OP_INCQ] = USES_CC_DST | USES_CC_SRC,
296     [CC_OP_DECB ... CC_OP_DECQ] = USES_CC_DST | USES_CC_SRC,
297     [CC_OP_SHLB ... CC_OP_SHLQ] = USES_CC_DST | USES_CC_SRC,
298     [CC_OP_SARB ... CC_OP_SARQ] = USES_CC_DST | USES_CC_SRC,
299     [CC_OP_BMILGB ... CC_OP_BMILGQ] = USES_CC_DST | USES_CC_SRC,
300     [CC_OP_ADCX] = USES_CC_DST | USES_CC_SRC,
301     [CC_OP_ADOX] = USES_CC_SRC | USES_CC_SRC2,
302     [CC_OP_ADCOX] = USES_CC_DST | USES_CC_SRC | USES_CC_SRC2,
303     [CC_OP_CLR] = 0,
304     [CC_OP_POPCNT] = USES_CC_SRC,
305 };
306 
307 static void set_cc_op(DisasContext *s, CCOp op)
308 {
309     int dead;
310 
311     if (s->cc_op == op) {
312         return;
313     }
314 
315     /* Discard CC computation that will no longer be used.  */
316     dead = cc_op_live[s->cc_op] & ~cc_op_live[op];
317     if (dead & USES_CC_DST) {
318         tcg_gen_discard_tl(cpu_cc_dst);
319     }
320     if (dead & USES_CC_SRC) {
321         tcg_gen_discard_tl(cpu_cc_src);
322     }
323     if (dead & USES_CC_SRC2) {
324         tcg_gen_discard_tl(cpu_cc_src2);
325     }
326     if (dead & USES_CC_SRCT) {
327         tcg_gen_discard_tl(s->cc_srcT);
328     }
329 
330     if (op == CC_OP_DYNAMIC) {
331         /* The DYNAMIC setting is translator only, and should never be
332            stored.  Thus we always consider it clean.  */
333         s->cc_op_dirty = false;
334     } else {
335         /* Discard any computed CC_OP value (see shifts).  */
336         if (s->cc_op == CC_OP_DYNAMIC) {
337             tcg_gen_discard_i32(cpu_cc_op);
338         }
339         s->cc_op_dirty = true;
340     }
341     s->cc_op = op;
342 }
343 
344 static void gen_update_cc_op(DisasContext *s)
345 {
346     if (s->cc_op_dirty) {
347         tcg_gen_movi_i32(cpu_cc_op, s->cc_op);
348         s->cc_op_dirty = false;
349     }
350 }
351 
352 #ifdef TARGET_X86_64
353 
354 #define NB_OP_SIZES 4
355 
356 #else /* !TARGET_X86_64 */
357 
358 #define NB_OP_SIZES 3
359 
360 #endif /* !TARGET_X86_64 */
361 
362 #if HOST_BIG_ENDIAN
363 #define REG_B_OFFSET (sizeof(target_ulong) - 1)
364 #define REG_H_OFFSET (sizeof(target_ulong) - 2)
365 #define REG_W_OFFSET (sizeof(target_ulong) - 2)
366 #define REG_L_OFFSET (sizeof(target_ulong) - 4)
367 #define REG_LH_OFFSET (sizeof(target_ulong) - 8)
368 #else
369 #define REG_B_OFFSET 0
370 #define REG_H_OFFSET 1
371 #define REG_W_OFFSET 0
372 #define REG_L_OFFSET 0
373 #define REG_LH_OFFSET 4
374 #endif
375 
376 /* In instruction encodings for byte register accesses the
377  * register number usually indicates "low 8 bits of register N";
378  * however there are some special cases where N 4..7 indicates
379  * [AH, CH, DH, BH], ie "bits 15..8 of register N-4". Return
380  * true for this special case, false otherwise.
381  */
382 static inline bool byte_reg_is_xH(DisasContext *s, int reg)
383 {
384     /* Any time the REX prefix is present, byte registers are uniform */
385     if (reg < 4 || REX_PREFIX(s)) {
386         return false;
387     }
388     return true;
389 }
390 
391 /* Select the size of a push/pop operation.  */
392 static inline MemOp mo_pushpop(DisasContext *s, MemOp ot)
393 {
394     if (CODE64(s)) {
395         return ot == MO_16 ? MO_16 : MO_64;
396     } else {
397         return ot;
398     }
399 }
400 
401 /* Select the size of the stack pointer.  */
402 static inline MemOp mo_stacksize(DisasContext *s)
403 {
404     return CODE64(s) ? MO_64 : SS32(s) ? MO_32 : MO_16;
405 }
406 
407 /* Select only size 64 else 32.  Used for SSE operand sizes.  */
408 static inline MemOp mo_64_32(MemOp ot)
409 {
410 #ifdef TARGET_X86_64
411     return ot == MO_64 ? MO_64 : MO_32;
412 #else
413     return MO_32;
414 #endif
415 }
416 
417 /* Select size 8 if lsb of B is clear, else OT.  Used for decoding
418    byte vs word opcodes.  */
419 static inline MemOp mo_b_d(int b, MemOp ot)
420 {
421     return b & 1 ? ot : MO_8;
422 }
423 
424 /* Select size 8 if lsb of B is clear, else OT capped at 32.
425    Used for decoding operand size of port opcodes.  */
426 static inline MemOp mo_b_d32(int b, MemOp ot)
427 {
428     return b & 1 ? (ot == MO_16 ? MO_16 : MO_32) : MO_8;
429 }
430 
431 static void gen_op_mov_reg_v(DisasContext *s, MemOp ot, int reg, TCGv t0)
432 {
433     switch(ot) {
434     case MO_8:
435         if (!byte_reg_is_xH(s, reg)) {
436             tcg_gen_deposit_tl(cpu_regs[reg], cpu_regs[reg], t0, 0, 8);
437         } else {
438             tcg_gen_deposit_tl(cpu_regs[reg - 4], cpu_regs[reg - 4], t0, 8, 8);
439         }
440         break;
441     case MO_16:
442         tcg_gen_deposit_tl(cpu_regs[reg], cpu_regs[reg], t0, 0, 16);
443         break;
444     case MO_32:
445         /* For x86_64, this sets the higher half of register to zero.
446            For i386, this is equivalent to a mov. */
447         tcg_gen_ext32u_tl(cpu_regs[reg], t0);
448         break;
449 #ifdef TARGET_X86_64
450     case MO_64:
451         tcg_gen_mov_tl(cpu_regs[reg], t0);
452         break;
453 #endif
454     default:
455         tcg_abort();
456     }
457 }
458 
459 static inline
460 void gen_op_mov_v_reg(DisasContext *s, MemOp ot, TCGv t0, int reg)
461 {
462     if (ot == MO_8 && byte_reg_is_xH(s, reg)) {
463         tcg_gen_extract_tl(t0, cpu_regs[reg - 4], 8, 8);
464     } else {
465         tcg_gen_mov_tl(t0, cpu_regs[reg]);
466     }
467 }
468 
469 static void gen_add_A0_im(DisasContext *s, int val)
470 {
471     tcg_gen_addi_tl(s->A0, s->A0, val);
472     if (!CODE64(s)) {
473         tcg_gen_ext32u_tl(s->A0, s->A0);
474     }
475 }
476 
477 static inline void gen_op_jmp_v(TCGv dest)
478 {
479     tcg_gen_st_tl(dest, cpu_env, offsetof(CPUX86State, eip));
480 }
481 
482 static inline
483 void gen_op_add_reg_im(DisasContext *s, MemOp size, int reg, int32_t val)
484 {
485     tcg_gen_addi_tl(s->tmp0, cpu_regs[reg], val);
486     gen_op_mov_reg_v(s, size, reg, s->tmp0);
487 }
488 
489 static inline void gen_op_add_reg_T0(DisasContext *s, MemOp size, int reg)
490 {
491     tcg_gen_add_tl(s->tmp0, cpu_regs[reg], s->T0);
492     gen_op_mov_reg_v(s, size, reg, s->tmp0);
493 }
494 
495 static inline void gen_op_ld_v(DisasContext *s, int idx, TCGv t0, TCGv a0)
496 {
497     tcg_gen_qemu_ld_tl(t0, a0, s->mem_index, idx | MO_LE);
498 }
499 
500 static inline void gen_op_st_v(DisasContext *s, int idx, TCGv t0, TCGv a0)
501 {
502     tcg_gen_qemu_st_tl(t0, a0, s->mem_index, idx | MO_LE);
503 }
504 
505 static inline void gen_op_st_rm_T0_A0(DisasContext *s, int idx, int d)
506 {
507     if (d == OR_TMP0) {
508         gen_op_st_v(s, idx, s->T0, s->A0);
509     } else {
510         gen_op_mov_reg_v(s, idx, d, s->T0);
511     }
512 }
513 
514 static inline void gen_jmp_im(DisasContext *s, target_ulong pc)
515 {
516     tcg_gen_movi_tl(s->tmp0, pc);
517     gen_op_jmp_v(s->tmp0);
518 }
519 
520 /* Compute SEG:REG into A0.  SEG is selected from the override segment
521    (OVR_SEG) and the default segment (DEF_SEG).  OVR_SEG may be -1 to
522    indicate no override.  */
523 static void gen_lea_v_seg(DisasContext *s, MemOp aflag, TCGv a0,
524                           int def_seg, int ovr_seg)
525 {
526     switch (aflag) {
527 #ifdef TARGET_X86_64
528     case MO_64:
529         if (ovr_seg < 0) {
530             tcg_gen_mov_tl(s->A0, a0);
531             return;
532         }
533         break;
534 #endif
535     case MO_32:
536         /* 32 bit address */
537         if (ovr_seg < 0 && ADDSEG(s)) {
538             ovr_seg = def_seg;
539         }
540         if (ovr_seg < 0) {
541             tcg_gen_ext32u_tl(s->A0, a0);
542             return;
543         }
544         break;
545     case MO_16:
546         /* 16 bit address */
547         tcg_gen_ext16u_tl(s->A0, a0);
548         a0 = s->A0;
549         if (ovr_seg < 0) {
550             if (ADDSEG(s)) {
551                 ovr_seg = def_seg;
552             } else {
553                 return;
554             }
555         }
556         break;
557     default:
558         tcg_abort();
559     }
560 
561     if (ovr_seg >= 0) {
562         TCGv seg = cpu_seg_base[ovr_seg];
563 
564         if (aflag == MO_64) {
565             tcg_gen_add_tl(s->A0, a0, seg);
566         } else if (CODE64(s)) {
567             tcg_gen_ext32u_tl(s->A0, a0);
568             tcg_gen_add_tl(s->A0, s->A0, seg);
569         } else {
570             tcg_gen_add_tl(s->A0, a0, seg);
571             tcg_gen_ext32u_tl(s->A0, s->A0);
572         }
573     }
574 }
575 
576 static inline void gen_string_movl_A0_ESI(DisasContext *s)
577 {
578     gen_lea_v_seg(s, s->aflag, cpu_regs[R_ESI], R_DS, s->override);
579 }
580 
581 static inline void gen_string_movl_A0_EDI(DisasContext *s)
582 {
583     gen_lea_v_seg(s, s->aflag, cpu_regs[R_EDI], R_ES, -1);
584 }
585 
586 static inline void gen_op_movl_T0_Dshift(DisasContext *s, MemOp ot)
587 {
588     tcg_gen_ld32s_tl(s->T0, cpu_env, offsetof(CPUX86State, df));
589     tcg_gen_shli_tl(s->T0, s->T0, ot);
590 };
591 
592 static TCGv gen_ext_tl(TCGv dst, TCGv src, MemOp size, bool sign)
593 {
594     switch (size) {
595     case MO_8:
596         if (sign) {
597             tcg_gen_ext8s_tl(dst, src);
598         } else {
599             tcg_gen_ext8u_tl(dst, src);
600         }
601         return dst;
602     case MO_16:
603         if (sign) {
604             tcg_gen_ext16s_tl(dst, src);
605         } else {
606             tcg_gen_ext16u_tl(dst, src);
607         }
608         return dst;
609 #ifdef TARGET_X86_64
610     case MO_32:
611         if (sign) {
612             tcg_gen_ext32s_tl(dst, src);
613         } else {
614             tcg_gen_ext32u_tl(dst, src);
615         }
616         return dst;
617 #endif
618     default:
619         return src;
620     }
621 }
622 
623 static void gen_extu(MemOp ot, TCGv reg)
624 {
625     gen_ext_tl(reg, reg, ot, false);
626 }
627 
628 static void gen_exts(MemOp ot, TCGv reg)
629 {
630     gen_ext_tl(reg, reg, ot, true);
631 }
632 
633 static inline
634 void gen_op_jnz_ecx(DisasContext *s, MemOp size, TCGLabel *label1)
635 {
636     tcg_gen_mov_tl(s->tmp0, cpu_regs[R_ECX]);
637     gen_extu(size, s->tmp0);
638     tcg_gen_brcondi_tl(TCG_COND_NE, s->tmp0, 0, label1);
639 }
640 
641 static inline
642 void gen_op_jz_ecx(DisasContext *s, MemOp size, TCGLabel *label1)
643 {
644     tcg_gen_mov_tl(s->tmp0, cpu_regs[R_ECX]);
645     gen_extu(size, s->tmp0);
646     tcg_gen_brcondi_tl(TCG_COND_EQ, s->tmp0, 0, label1);
647 }
648 
649 static void gen_helper_in_func(MemOp ot, TCGv v, TCGv_i32 n)
650 {
651     switch (ot) {
652     case MO_8:
653         gen_helper_inb(v, cpu_env, n);
654         break;
655     case MO_16:
656         gen_helper_inw(v, cpu_env, n);
657         break;
658     case MO_32:
659         gen_helper_inl(v, cpu_env, n);
660         break;
661     default:
662         tcg_abort();
663     }
664 }
665 
666 static void gen_helper_out_func(MemOp ot, TCGv_i32 v, TCGv_i32 n)
667 {
668     switch (ot) {
669     case MO_8:
670         gen_helper_outb(cpu_env, v, n);
671         break;
672     case MO_16:
673         gen_helper_outw(cpu_env, v, n);
674         break;
675     case MO_32:
676         gen_helper_outl(cpu_env, v, n);
677         break;
678     default:
679         tcg_abort();
680     }
681 }
682 
683 /*
684  * Validate that access to [port, port + 1<<ot) is allowed.
685  * Raise #GP, or VMM exit if not.
686  */
687 static bool gen_check_io(DisasContext *s, MemOp ot, TCGv_i32 port,
688                          uint32_t svm_flags)
689 {
690 #ifdef CONFIG_USER_ONLY
691     /*
692      * We do not implement the ioperm(2) syscall, so the TSS check
693      * will always fail.
694      */
695     gen_exception_gpf(s);
696     return false;
697 #else
698     if (PE(s) && (CPL(s) > IOPL(s) || VM86(s))) {
699         gen_helper_check_io(cpu_env, port, tcg_constant_i32(1 << ot));
700     }
701     if (GUEST(s)) {
702         target_ulong cur_eip = s->base.pc_next - s->cs_base;
703         target_ulong next_eip = s->pc - s->cs_base;
704 
705         gen_update_cc_op(s);
706         gen_jmp_im(s, cur_eip);
707         if (s->prefix & (PREFIX_REPZ | PREFIX_REPNZ)) {
708             svm_flags |= SVM_IOIO_REP_MASK;
709         }
710         svm_flags |= 1 << (SVM_IOIO_SIZE_SHIFT + ot);
711         gen_helper_svm_check_io(cpu_env, port,
712                                 tcg_constant_i32(svm_flags),
713                                 tcg_constant_i32(next_eip - cur_eip));
714     }
715     return true;
716 #endif
717 }
718 
719 static inline void gen_movs(DisasContext *s, MemOp ot)
720 {
721     gen_string_movl_A0_ESI(s);
722     gen_op_ld_v(s, ot, s->T0, s->A0);
723     gen_string_movl_A0_EDI(s);
724     gen_op_st_v(s, ot, s->T0, s->A0);
725     gen_op_movl_T0_Dshift(s, ot);
726     gen_op_add_reg_T0(s, s->aflag, R_ESI);
727     gen_op_add_reg_T0(s, s->aflag, R_EDI);
728 }
729 
730 static void gen_op_update1_cc(DisasContext *s)
731 {
732     tcg_gen_mov_tl(cpu_cc_dst, s->T0);
733 }
734 
735 static void gen_op_update2_cc(DisasContext *s)
736 {
737     tcg_gen_mov_tl(cpu_cc_src, s->T1);
738     tcg_gen_mov_tl(cpu_cc_dst, s->T0);
739 }
740 
741 static void gen_op_update3_cc(DisasContext *s, TCGv reg)
742 {
743     tcg_gen_mov_tl(cpu_cc_src2, reg);
744     tcg_gen_mov_tl(cpu_cc_src, s->T1);
745     tcg_gen_mov_tl(cpu_cc_dst, s->T0);
746 }
747 
748 static inline void gen_op_testl_T0_T1_cc(DisasContext *s)
749 {
750     tcg_gen_and_tl(cpu_cc_dst, s->T0, s->T1);
751 }
752 
753 static void gen_op_update_neg_cc(DisasContext *s)
754 {
755     tcg_gen_mov_tl(cpu_cc_dst, s->T0);
756     tcg_gen_neg_tl(cpu_cc_src, s->T0);
757     tcg_gen_movi_tl(s->cc_srcT, 0);
758 }
759 
760 /* compute all eflags to cc_src */
761 static void gen_compute_eflags(DisasContext *s)
762 {
763     TCGv zero, dst, src1, src2;
764     int live, dead;
765 
766     if (s->cc_op == CC_OP_EFLAGS) {
767         return;
768     }
769     if (s->cc_op == CC_OP_CLR) {
770         tcg_gen_movi_tl(cpu_cc_src, CC_Z | CC_P);
771         set_cc_op(s, CC_OP_EFLAGS);
772         return;
773     }
774 
775     zero = NULL;
776     dst = cpu_cc_dst;
777     src1 = cpu_cc_src;
778     src2 = cpu_cc_src2;
779 
780     /* Take care to not read values that are not live.  */
781     live = cc_op_live[s->cc_op] & ~USES_CC_SRCT;
782     dead = live ^ (USES_CC_DST | USES_CC_SRC | USES_CC_SRC2);
783     if (dead) {
784         zero = tcg_const_tl(0);
785         if (dead & USES_CC_DST) {
786             dst = zero;
787         }
788         if (dead & USES_CC_SRC) {
789             src1 = zero;
790         }
791         if (dead & USES_CC_SRC2) {
792             src2 = zero;
793         }
794     }
795 
796     gen_update_cc_op(s);
797     gen_helper_cc_compute_all(cpu_cc_src, dst, src1, src2, cpu_cc_op);
798     set_cc_op(s, CC_OP_EFLAGS);
799 
800     if (dead) {
801         tcg_temp_free(zero);
802     }
803 }
804 
805 typedef struct CCPrepare {
806     TCGCond cond;
807     TCGv reg;
808     TCGv reg2;
809     target_ulong imm;
810     target_ulong mask;
811     bool use_reg2;
812     bool no_setcond;
813 } CCPrepare;
814 
815 /* compute eflags.C to reg */
816 static CCPrepare gen_prepare_eflags_c(DisasContext *s, TCGv reg)
817 {
818     TCGv t0, t1;
819     int size, shift;
820 
821     switch (s->cc_op) {
822     case CC_OP_SUBB ... CC_OP_SUBQ:
823         /* (DATA_TYPE)CC_SRCT < (DATA_TYPE)CC_SRC */
824         size = s->cc_op - CC_OP_SUBB;
825         t1 = gen_ext_tl(s->tmp0, cpu_cc_src, size, false);
826         /* If no temporary was used, be careful not to alias t1 and t0.  */
827         t0 = t1 == cpu_cc_src ? s->tmp0 : reg;
828         tcg_gen_mov_tl(t0, s->cc_srcT);
829         gen_extu(size, t0);
830         goto add_sub;
831 
832     case CC_OP_ADDB ... CC_OP_ADDQ:
833         /* (DATA_TYPE)CC_DST < (DATA_TYPE)CC_SRC */
834         size = s->cc_op - CC_OP_ADDB;
835         t1 = gen_ext_tl(s->tmp0, cpu_cc_src, size, false);
836         t0 = gen_ext_tl(reg, cpu_cc_dst, size, false);
837     add_sub:
838         return (CCPrepare) { .cond = TCG_COND_LTU, .reg = t0,
839                              .reg2 = t1, .mask = -1, .use_reg2 = true };
840 
841     case CC_OP_LOGICB ... CC_OP_LOGICQ:
842     case CC_OP_CLR:
843     case CC_OP_POPCNT:
844         return (CCPrepare) { .cond = TCG_COND_NEVER, .mask = -1 };
845 
846     case CC_OP_INCB ... CC_OP_INCQ:
847     case CC_OP_DECB ... CC_OP_DECQ:
848         return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
849                              .mask = -1, .no_setcond = true };
850 
851     case CC_OP_SHLB ... CC_OP_SHLQ:
852         /* (CC_SRC >> (DATA_BITS - 1)) & 1 */
853         size = s->cc_op - CC_OP_SHLB;
854         shift = (8 << size) - 1;
855         return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
856                              .mask = (target_ulong)1 << shift };
857 
858     case CC_OP_MULB ... CC_OP_MULQ:
859         return (CCPrepare) { .cond = TCG_COND_NE,
860                              .reg = cpu_cc_src, .mask = -1 };
861 
862     case CC_OP_BMILGB ... CC_OP_BMILGQ:
863         size = s->cc_op - CC_OP_BMILGB;
864         t0 = gen_ext_tl(reg, cpu_cc_src, size, false);
865         return (CCPrepare) { .cond = TCG_COND_EQ, .reg = t0, .mask = -1 };
866 
867     case CC_OP_ADCX:
868     case CC_OP_ADCOX:
869         return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_dst,
870                              .mask = -1, .no_setcond = true };
871 
872     case CC_OP_EFLAGS:
873     case CC_OP_SARB ... CC_OP_SARQ:
874         /* CC_SRC & 1 */
875         return (CCPrepare) { .cond = TCG_COND_NE,
876                              .reg = cpu_cc_src, .mask = CC_C };
877 
878     default:
879        /* The need to compute only C from CC_OP_DYNAMIC is important
880           in efficiently implementing e.g. INC at the start of a TB.  */
881        gen_update_cc_op(s);
882        gen_helper_cc_compute_c(reg, cpu_cc_dst, cpu_cc_src,
883                                cpu_cc_src2, cpu_cc_op);
884        return (CCPrepare) { .cond = TCG_COND_NE, .reg = reg,
885                             .mask = -1, .no_setcond = true };
886     }
887 }
888 
889 /* compute eflags.P to reg */
890 static CCPrepare gen_prepare_eflags_p(DisasContext *s, TCGv reg)
891 {
892     gen_compute_eflags(s);
893     return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
894                          .mask = CC_P };
895 }
896 
897 /* compute eflags.S to reg */
898 static CCPrepare gen_prepare_eflags_s(DisasContext *s, TCGv reg)
899 {
900     switch (s->cc_op) {
901     case CC_OP_DYNAMIC:
902         gen_compute_eflags(s);
903         /* FALLTHRU */
904     case CC_OP_EFLAGS:
905     case CC_OP_ADCX:
906     case CC_OP_ADOX:
907     case CC_OP_ADCOX:
908         return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
909                              .mask = CC_S };
910     case CC_OP_CLR:
911     case CC_OP_POPCNT:
912         return (CCPrepare) { .cond = TCG_COND_NEVER, .mask = -1 };
913     default:
914         {
915             MemOp size = (s->cc_op - CC_OP_ADDB) & 3;
916             TCGv t0 = gen_ext_tl(reg, cpu_cc_dst, size, true);
917             return (CCPrepare) { .cond = TCG_COND_LT, .reg = t0, .mask = -1 };
918         }
919     }
920 }
921 
922 /* compute eflags.O to reg */
923 static CCPrepare gen_prepare_eflags_o(DisasContext *s, TCGv reg)
924 {
925     switch (s->cc_op) {
926     case CC_OP_ADOX:
927     case CC_OP_ADCOX:
928         return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src2,
929                              .mask = -1, .no_setcond = true };
930     case CC_OP_CLR:
931     case CC_OP_POPCNT:
932         return (CCPrepare) { .cond = TCG_COND_NEVER, .mask = -1 };
933     default:
934         gen_compute_eflags(s);
935         return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
936                              .mask = CC_O };
937     }
938 }
939 
940 /* compute eflags.Z to reg */
941 static CCPrepare gen_prepare_eflags_z(DisasContext *s, TCGv reg)
942 {
943     switch (s->cc_op) {
944     case CC_OP_DYNAMIC:
945         gen_compute_eflags(s);
946         /* FALLTHRU */
947     case CC_OP_EFLAGS:
948     case CC_OP_ADCX:
949     case CC_OP_ADOX:
950     case CC_OP_ADCOX:
951         return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
952                              .mask = CC_Z };
953     case CC_OP_CLR:
954         return (CCPrepare) { .cond = TCG_COND_ALWAYS, .mask = -1 };
955     case CC_OP_POPCNT:
956         return (CCPrepare) { .cond = TCG_COND_EQ, .reg = cpu_cc_src,
957                              .mask = -1 };
958     default:
959         {
960             MemOp size = (s->cc_op - CC_OP_ADDB) & 3;
961             TCGv t0 = gen_ext_tl(reg, cpu_cc_dst, size, false);
962             return (CCPrepare) { .cond = TCG_COND_EQ, .reg = t0, .mask = -1 };
963         }
964     }
965 }
966 
967 /* perform a conditional store into register 'reg' according to jump opcode
968    value 'b'. In the fast case, T0 is guaranted not to be used. */
969 static CCPrepare gen_prepare_cc(DisasContext *s, int b, TCGv reg)
970 {
971     int inv, jcc_op, cond;
972     MemOp size;
973     CCPrepare cc;
974     TCGv t0;
975 
976     inv = b & 1;
977     jcc_op = (b >> 1) & 7;
978 
979     switch (s->cc_op) {
980     case CC_OP_SUBB ... CC_OP_SUBQ:
981         /* We optimize relational operators for the cmp/jcc case.  */
982         size = s->cc_op - CC_OP_SUBB;
983         switch (jcc_op) {
984         case JCC_BE:
985             tcg_gen_mov_tl(s->tmp4, s->cc_srcT);
986             gen_extu(size, s->tmp4);
987             t0 = gen_ext_tl(s->tmp0, cpu_cc_src, size, false);
988             cc = (CCPrepare) { .cond = TCG_COND_LEU, .reg = s->tmp4,
989                                .reg2 = t0, .mask = -1, .use_reg2 = true };
990             break;
991 
992         case JCC_L:
993             cond = TCG_COND_LT;
994             goto fast_jcc_l;
995         case JCC_LE:
996             cond = TCG_COND_LE;
997         fast_jcc_l:
998             tcg_gen_mov_tl(s->tmp4, s->cc_srcT);
999             gen_exts(size, s->tmp4);
1000             t0 = gen_ext_tl(s->tmp0, cpu_cc_src, size, true);
1001             cc = (CCPrepare) { .cond = cond, .reg = s->tmp4,
1002                                .reg2 = t0, .mask = -1, .use_reg2 = true };
1003             break;
1004 
1005         default:
1006             goto slow_jcc;
1007         }
1008         break;
1009 
1010     default:
1011     slow_jcc:
1012         /* This actually generates good code for JC, JZ and JS.  */
1013         switch (jcc_op) {
1014         case JCC_O:
1015             cc = gen_prepare_eflags_o(s, reg);
1016             break;
1017         case JCC_B:
1018             cc = gen_prepare_eflags_c(s, reg);
1019             break;
1020         case JCC_Z:
1021             cc = gen_prepare_eflags_z(s, reg);
1022             break;
1023         case JCC_BE:
1024             gen_compute_eflags(s);
1025             cc = (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
1026                                .mask = CC_Z | CC_C };
1027             break;
1028         case JCC_S:
1029             cc = gen_prepare_eflags_s(s, reg);
1030             break;
1031         case JCC_P:
1032             cc = gen_prepare_eflags_p(s, reg);
1033             break;
1034         case JCC_L:
1035             gen_compute_eflags(s);
1036             if (reg == cpu_cc_src) {
1037                 reg = s->tmp0;
1038             }
1039             tcg_gen_shri_tl(reg, cpu_cc_src, 4); /* CC_O -> CC_S */
1040             tcg_gen_xor_tl(reg, reg, cpu_cc_src);
1041             cc = (CCPrepare) { .cond = TCG_COND_NE, .reg = reg,
1042                                .mask = CC_S };
1043             break;
1044         default:
1045         case JCC_LE:
1046             gen_compute_eflags(s);
1047             if (reg == cpu_cc_src) {
1048                 reg = s->tmp0;
1049             }
1050             tcg_gen_shri_tl(reg, cpu_cc_src, 4); /* CC_O -> CC_S */
1051             tcg_gen_xor_tl(reg, reg, cpu_cc_src);
1052             cc = (CCPrepare) { .cond = TCG_COND_NE, .reg = reg,
1053                                .mask = CC_S | CC_Z };
1054             break;
1055         }
1056         break;
1057     }
1058 
1059     if (inv) {
1060         cc.cond = tcg_invert_cond(cc.cond);
1061     }
1062     return cc;
1063 }
1064 
1065 static void gen_setcc1(DisasContext *s, int b, TCGv reg)
1066 {
1067     CCPrepare cc = gen_prepare_cc(s, b, reg);
1068 
1069     if (cc.no_setcond) {
1070         if (cc.cond == TCG_COND_EQ) {
1071             tcg_gen_xori_tl(reg, cc.reg, 1);
1072         } else {
1073             tcg_gen_mov_tl(reg, cc.reg);
1074         }
1075         return;
1076     }
1077 
1078     if (cc.cond == TCG_COND_NE && !cc.use_reg2 && cc.imm == 0 &&
1079         cc.mask != 0 && (cc.mask & (cc.mask - 1)) == 0) {
1080         tcg_gen_shri_tl(reg, cc.reg, ctztl(cc.mask));
1081         tcg_gen_andi_tl(reg, reg, 1);
1082         return;
1083     }
1084     if (cc.mask != -1) {
1085         tcg_gen_andi_tl(reg, cc.reg, cc.mask);
1086         cc.reg = reg;
1087     }
1088     if (cc.use_reg2) {
1089         tcg_gen_setcond_tl(cc.cond, reg, cc.reg, cc.reg2);
1090     } else {
1091         tcg_gen_setcondi_tl(cc.cond, reg, cc.reg, cc.imm);
1092     }
1093 }
1094 
1095 static inline void gen_compute_eflags_c(DisasContext *s, TCGv reg)
1096 {
1097     gen_setcc1(s, JCC_B << 1, reg);
1098 }
1099 
1100 /* generate a conditional jump to label 'l1' according to jump opcode
1101    value 'b'. In the fast case, T0 is guaranted not to be used. */
1102 static inline void gen_jcc1_noeob(DisasContext *s, int b, TCGLabel *l1)
1103 {
1104     CCPrepare cc = gen_prepare_cc(s, b, s->T0);
1105 
1106     if (cc.mask != -1) {
1107         tcg_gen_andi_tl(s->T0, cc.reg, cc.mask);
1108         cc.reg = s->T0;
1109     }
1110     if (cc.use_reg2) {
1111         tcg_gen_brcond_tl(cc.cond, cc.reg, cc.reg2, l1);
1112     } else {
1113         tcg_gen_brcondi_tl(cc.cond, cc.reg, cc.imm, l1);
1114     }
1115 }
1116 
1117 /* Generate a conditional jump to label 'l1' according to jump opcode
1118    value 'b'. In the fast case, T0 is guaranted not to be used.
1119    A translation block must end soon.  */
1120 static inline void gen_jcc1(DisasContext *s, int b, TCGLabel *l1)
1121 {
1122     CCPrepare cc = gen_prepare_cc(s, b, s->T0);
1123 
1124     gen_update_cc_op(s);
1125     if (cc.mask != -1) {
1126         tcg_gen_andi_tl(s->T0, cc.reg, cc.mask);
1127         cc.reg = s->T0;
1128     }
1129     set_cc_op(s, CC_OP_DYNAMIC);
1130     if (cc.use_reg2) {
1131         tcg_gen_brcond_tl(cc.cond, cc.reg, cc.reg2, l1);
1132     } else {
1133         tcg_gen_brcondi_tl(cc.cond, cc.reg, cc.imm, l1);
1134     }
1135 }
1136 
1137 /* XXX: does not work with gdbstub "ice" single step - not a
1138    serious problem */
1139 static TCGLabel *gen_jz_ecx_string(DisasContext *s, target_ulong next_eip)
1140 {
1141     TCGLabel *l1 = gen_new_label();
1142     TCGLabel *l2 = gen_new_label();
1143     gen_op_jnz_ecx(s, s->aflag, l1);
1144     gen_set_label(l2);
1145     gen_jmp_tb(s, next_eip, 1);
1146     gen_set_label(l1);
1147     return l2;
1148 }
1149 
1150 static inline void gen_stos(DisasContext *s, MemOp ot)
1151 {
1152     gen_op_mov_v_reg(s, MO_32, s->T0, R_EAX);
1153     gen_string_movl_A0_EDI(s);
1154     gen_op_st_v(s, ot, s->T0, s->A0);
1155     gen_op_movl_T0_Dshift(s, ot);
1156     gen_op_add_reg_T0(s, s->aflag, R_EDI);
1157 }
1158 
1159 static inline void gen_lods(DisasContext *s, MemOp ot)
1160 {
1161     gen_string_movl_A0_ESI(s);
1162     gen_op_ld_v(s, ot, s->T0, s->A0);
1163     gen_op_mov_reg_v(s, ot, R_EAX, s->T0);
1164     gen_op_movl_T0_Dshift(s, ot);
1165     gen_op_add_reg_T0(s, s->aflag, R_ESI);
1166 }
1167 
1168 static inline void gen_scas(DisasContext *s, MemOp ot)
1169 {
1170     gen_string_movl_A0_EDI(s);
1171     gen_op_ld_v(s, ot, s->T1, s->A0);
1172     gen_op(s, OP_CMPL, ot, R_EAX);
1173     gen_op_movl_T0_Dshift(s, ot);
1174     gen_op_add_reg_T0(s, s->aflag, R_EDI);
1175 }
1176 
1177 static inline void gen_cmps(DisasContext *s, MemOp ot)
1178 {
1179     gen_string_movl_A0_EDI(s);
1180     gen_op_ld_v(s, ot, s->T1, s->A0);
1181     gen_string_movl_A0_ESI(s);
1182     gen_op(s, OP_CMPL, ot, OR_TMP0);
1183     gen_op_movl_T0_Dshift(s, ot);
1184     gen_op_add_reg_T0(s, s->aflag, R_ESI);
1185     gen_op_add_reg_T0(s, s->aflag, R_EDI);
1186 }
1187 
1188 static void gen_bpt_io(DisasContext *s, TCGv_i32 t_port, int ot)
1189 {
1190     if (s->flags & HF_IOBPT_MASK) {
1191 #ifdef CONFIG_USER_ONLY
1192         /* user-mode cpu should not be in IOBPT mode */
1193         g_assert_not_reached();
1194 #else
1195         TCGv_i32 t_size = tcg_const_i32(1 << ot);
1196         TCGv t_next = tcg_const_tl(s->pc - s->cs_base);
1197 
1198         gen_helper_bpt_io(cpu_env, t_port, t_size, t_next);
1199         tcg_temp_free_i32(t_size);
1200         tcg_temp_free(t_next);
1201 #endif /* CONFIG_USER_ONLY */
1202     }
1203 }
1204 
1205 static inline void gen_ins(DisasContext *s, MemOp ot)
1206 {
1207     gen_string_movl_A0_EDI(s);
1208     /* Note: we must do this dummy write first to be restartable in
1209        case of page fault. */
1210     tcg_gen_movi_tl(s->T0, 0);
1211     gen_op_st_v(s, ot, s->T0, s->A0);
1212     tcg_gen_trunc_tl_i32(s->tmp2_i32, cpu_regs[R_EDX]);
1213     tcg_gen_andi_i32(s->tmp2_i32, s->tmp2_i32, 0xffff);
1214     gen_helper_in_func(ot, s->T0, s->tmp2_i32);
1215     gen_op_st_v(s, ot, s->T0, s->A0);
1216     gen_op_movl_T0_Dshift(s, ot);
1217     gen_op_add_reg_T0(s, s->aflag, R_EDI);
1218     gen_bpt_io(s, s->tmp2_i32, ot);
1219 }
1220 
1221 static inline void gen_outs(DisasContext *s, MemOp ot)
1222 {
1223     gen_string_movl_A0_ESI(s);
1224     gen_op_ld_v(s, ot, s->T0, s->A0);
1225 
1226     tcg_gen_trunc_tl_i32(s->tmp2_i32, cpu_regs[R_EDX]);
1227     tcg_gen_andi_i32(s->tmp2_i32, s->tmp2_i32, 0xffff);
1228     tcg_gen_trunc_tl_i32(s->tmp3_i32, s->T0);
1229     gen_helper_out_func(ot, s->tmp2_i32, s->tmp3_i32);
1230     gen_op_movl_T0_Dshift(s, ot);
1231     gen_op_add_reg_T0(s, s->aflag, R_ESI);
1232     gen_bpt_io(s, s->tmp2_i32, ot);
1233 }
1234 
1235 /* same method as Valgrind : we generate jumps to current or next
1236    instruction */
1237 #define GEN_REPZ(op)                                                          \
1238 static inline void gen_repz_ ## op(DisasContext *s, MemOp ot,              \
1239                                  target_ulong cur_eip, target_ulong next_eip) \
1240 {                                                                             \
1241     TCGLabel *l2;                                                             \
1242     gen_update_cc_op(s);                                                      \
1243     l2 = gen_jz_ecx_string(s, next_eip);                                      \
1244     gen_ ## op(s, ot);                                                        \
1245     gen_op_add_reg_im(s, s->aflag, R_ECX, -1);                                \
1246     /* a loop would cause two single step exceptions if ECX = 1               \
1247        before rep string_insn */                                              \
1248     if (s->repz_opt)                                                          \
1249         gen_op_jz_ecx(s, s->aflag, l2);                                       \
1250     gen_jmp(s, cur_eip);                                                      \
1251 }
1252 
1253 #define GEN_REPZ2(op)                                                         \
1254 static inline void gen_repz_ ## op(DisasContext *s, MemOp ot,              \
1255                                    target_ulong cur_eip,                      \
1256                                    target_ulong next_eip,                     \
1257                                    int nz)                                    \
1258 {                                                                             \
1259     TCGLabel *l2;                                                             \
1260     gen_update_cc_op(s);                                                      \
1261     l2 = gen_jz_ecx_string(s, next_eip);                                      \
1262     gen_ ## op(s, ot);                                                        \
1263     gen_op_add_reg_im(s, s->aflag, R_ECX, -1);                                \
1264     gen_update_cc_op(s);                                                      \
1265     gen_jcc1(s, (JCC_Z << 1) | (nz ^ 1), l2);                                 \
1266     if (s->repz_opt)                                                          \
1267         gen_op_jz_ecx(s, s->aflag, l2);                                       \
1268     gen_jmp(s, cur_eip);                                                      \
1269 }
1270 
1271 GEN_REPZ(movs)
1272 GEN_REPZ(stos)
1273 GEN_REPZ(lods)
1274 GEN_REPZ(ins)
1275 GEN_REPZ(outs)
1276 GEN_REPZ2(scas)
1277 GEN_REPZ2(cmps)
1278 
1279 static void gen_helper_fp_arith_ST0_FT0(int op)
1280 {
1281     switch (op) {
1282     case 0:
1283         gen_helper_fadd_ST0_FT0(cpu_env);
1284         break;
1285     case 1:
1286         gen_helper_fmul_ST0_FT0(cpu_env);
1287         break;
1288     case 2:
1289         gen_helper_fcom_ST0_FT0(cpu_env);
1290         break;
1291     case 3:
1292         gen_helper_fcom_ST0_FT0(cpu_env);
1293         break;
1294     case 4:
1295         gen_helper_fsub_ST0_FT0(cpu_env);
1296         break;
1297     case 5:
1298         gen_helper_fsubr_ST0_FT0(cpu_env);
1299         break;
1300     case 6:
1301         gen_helper_fdiv_ST0_FT0(cpu_env);
1302         break;
1303     case 7:
1304         gen_helper_fdivr_ST0_FT0(cpu_env);
1305         break;
1306     }
1307 }
1308 
1309 /* NOTE the exception in "r" op ordering */
1310 static void gen_helper_fp_arith_STN_ST0(int op, int opreg)
1311 {
1312     TCGv_i32 tmp = tcg_const_i32(opreg);
1313     switch (op) {
1314     case 0:
1315         gen_helper_fadd_STN_ST0(cpu_env, tmp);
1316         break;
1317     case 1:
1318         gen_helper_fmul_STN_ST0(cpu_env, tmp);
1319         break;
1320     case 4:
1321         gen_helper_fsubr_STN_ST0(cpu_env, tmp);
1322         break;
1323     case 5:
1324         gen_helper_fsub_STN_ST0(cpu_env, tmp);
1325         break;
1326     case 6:
1327         gen_helper_fdivr_STN_ST0(cpu_env, tmp);
1328         break;
1329     case 7:
1330         gen_helper_fdiv_STN_ST0(cpu_env, tmp);
1331         break;
1332     }
1333 }
1334 
1335 static void gen_exception(DisasContext *s, int trapno, target_ulong cur_eip)
1336 {
1337     gen_update_cc_op(s);
1338     gen_jmp_im(s, cur_eip);
1339     gen_helper_raise_exception(cpu_env, tcg_const_i32(trapno));
1340     s->base.is_jmp = DISAS_NORETURN;
1341 }
1342 
1343 /* Generate #UD for the current instruction.  The assumption here is that
1344    the instruction is known, but it isn't allowed in the current cpu mode.  */
1345 static void gen_illegal_opcode(DisasContext *s)
1346 {
1347     gen_exception(s, EXCP06_ILLOP, s->pc_start - s->cs_base);
1348 }
1349 
1350 /* Generate #GP for the current instruction. */
1351 static void gen_exception_gpf(DisasContext *s)
1352 {
1353     gen_exception(s, EXCP0D_GPF, s->pc_start - s->cs_base);
1354 }
1355 
1356 /* Check for cpl == 0; if not, raise #GP and return false. */
1357 static bool check_cpl0(DisasContext *s)
1358 {
1359     if (CPL(s) == 0) {
1360         return true;
1361     }
1362     gen_exception_gpf(s);
1363     return false;
1364 }
1365 
1366 /* If vm86, check for iopl == 3; if not, raise #GP and return false. */
1367 static bool check_vm86_iopl(DisasContext *s)
1368 {
1369     if (!VM86(s) || IOPL(s) == 3) {
1370         return true;
1371     }
1372     gen_exception_gpf(s);
1373     return false;
1374 }
1375 
1376 /* Check for iopl allowing access; if not, raise #GP and return false. */
1377 static bool check_iopl(DisasContext *s)
1378 {
1379     if (VM86(s) ? IOPL(s) == 3 : CPL(s) <= IOPL(s)) {
1380         return true;
1381     }
1382     gen_exception_gpf(s);
1383     return false;
1384 }
1385 
1386 /* if d == OR_TMP0, it means memory operand (address in A0) */
1387 static void gen_op(DisasContext *s1, int op, MemOp ot, int d)
1388 {
1389     if (d != OR_TMP0) {
1390         if (s1->prefix & PREFIX_LOCK) {
1391             /* Lock prefix when destination is not memory.  */
1392             gen_illegal_opcode(s1);
1393             return;
1394         }
1395         gen_op_mov_v_reg(s1, ot, s1->T0, d);
1396     } else if (!(s1->prefix & PREFIX_LOCK)) {
1397         gen_op_ld_v(s1, ot, s1->T0, s1->A0);
1398     }
1399     switch(op) {
1400     case OP_ADCL:
1401         gen_compute_eflags_c(s1, s1->tmp4);
1402         if (s1->prefix & PREFIX_LOCK) {
1403             tcg_gen_add_tl(s1->T0, s1->tmp4, s1->T1);
1404             tcg_gen_atomic_add_fetch_tl(s1->T0, s1->A0, s1->T0,
1405                                         s1->mem_index, ot | MO_LE);
1406         } else {
1407             tcg_gen_add_tl(s1->T0, s1->T0, s1->T1);
1408             tcg_gen_add_tl(s1->T0, s1->T0, s1->tmp4);
1409             gen_op_st_rm_T0_A0(s1, ot, d);
1410         }
1411         gen_op_update3_cc(s1, s1->tmp4);
1412         set_cc_op(s1, CC_OP_ADCB + ot);
1413         break;
1414     case OP_SBBL:
1415         gen_compute_eflags_c(s1, s1->tmp4);
1416         if (s1->prefix & PREFIX_LOCK) {
1417             tcg_gen_add_tl(s1->T0, s1->T1, s1->tmp4);
1418             tcg_gen_neg_tl(s1->T0, s1->T0);
1419             tcg_gen_atomic_add_fetch_tl(s1->T0, s1->A0, s1->T0,
1420                                         s1->mem_index, ot | MO_LE);
1421         } else {
1422             tcg_gen_sub_tl(s1->T0, s1->T0, s1->T1);
1423             tcg_gen_sub_tl(s1->T0, s1->T0, s1->tmp4);
1424             gen_op_st_rm_T0_A0(s1, ot, d);
1425         }
1426         gen_op_update3_cc(s1, s1->tmp4);
1427         set_cc_op(s1, CC_OP_SBBB + ot);
1428         break;
1429     case OP_ADDL:
1430         if (s1->prefix & PREFIX_LOCK) {
1431             tcg_gen_atomic_add_fetch_tl(s1->T0, s1->A0, s1->T1,
1432                                         s1->mem_index, ot | MO_LE);
1433         } else {
1434             tcg_gen_add_tl(s1->T0, s1->T0, s1->T1);
1435             gen_op_st_rm_T0_A0(s1, ot, d);
1436         }
1437         gen_op_update2_cc(s1);
1438         set_cc_op(s1, CC_OP_ADDB + ot);
1439         break;
1440     case OP_SUBL:
1441         if (s1->prefix & PREFIX_LOCK) {
1442             tcg_gen_neg_tl(s1->T0, s1->T1);
1443             tcg_gen_atomic_fetch_add_tl(s1->cc_srcT, s1->A0, s1->T0,
1444                                         s1->mem_index, ot | MO_LE);
1445             tcg_gen_sub_tl(s1->T0, s1->cc_srcT, s1->T1);
1446         } else {
1447             tcg_gen_mov_tl(s1->cc_srcT, s1->T0);
1448             tcg_gen_sub_tl(s1->T0, s1->T0, s1->T1);
1449             gen_op_st_rm_T0_A0(s1, ot, d);
1450         }
1451         gen_op_update2_cc(s1);
1452         set_cc_op(s1, CC_OP_SUBB + ot);
1453         break;
1454     default:
1455     case OP_ANDL:
1456         if (s1->prefix & PREFIX_LOCK) {
1457             tcg_gen_atomic_and_fetch_tl(s1->T0, s1->A0, s1->T1,
1458                                         s1->mem_index, ot | MO_LE);
1459         } else {
1460             tcg_gen_and_tl(s1->T0, s1->T0, s1->T1);
1461             gen_op_st_rm_T0_A0(s1, ot, d);
1462         }
1463         gen_op_update1_cc(s1);
1464         set_cc_op(s1, CC_OP_LOGICB + ot);
1465         break;
1466     case OP_ORL:
1467         if (s1->prefix & PREFIX_LOCK) {
1468             tcg_gen_atomic_or_fetch_tl(s1->T0, s1->A0, s1->T1,
1469                                        s1->mem_index, ot | MO_LE);
1470         } else {
1471             tcg_gen_or_tl(s1->T0, s1->T0, s1->T1);
1472             gen_op_st_rm_T0_A0(s1, ot, d);
1473         }
1474         gen_op_update1_cc(s1);
1475         set_cc_op(s1, CC_OP_LOGICB + ot);
1476         break;
1477     case OP_XORL:
1478         if (s1->prefix & PREFIX_LOCK) {
1479             tcg_gen_atomic_xor_fetch_tl(s1->T0, s1->A0, s1->T1,
1480                                         s1->mem_index, ot | MO_LE);
1481         } else {
1482             tcg_gen_xor_tl(s1->T0, s1->T0, s1->T1);
1483             gen_op_st_rm_T0_A0(s1, ot, d);
1484         }
1485         gen_op_update1_cc(s1);
1486         set_cc_op(s1, CC_OP_LOGICB + ot);
1487         break;
1488     case OP_CMPL:
1489         tcg_gen_mov_tl(cpu_cc_src, s1->T1);
1490         tcg_gen_mov_tl(s1->cc_srcT, s1->T0);
1491         tcg_gen_sub_tl(cpu_cc_dst, s1->T0, s1->T1);
1492         set_cc_op(s1, CC_OP_SUBB + ot);
1493         break;
1494     }
1495 }
1496 
1497 /* if d == OR_TMP0, it means memory operand (address in A0) */
1498 static void gen_inc(DisasContext *s1, MemOp ot, int d, int c)
1499 {
1500     if (s1->prefix & PREFIX_LOCK) {
1501         if (d != OR_TMP0) {
1502             /* Lock prefix when destination is not memory */
1503             gen_illegal_opcode(s1);
1504             return;
1505         }
1506         tcg_gen_movi_tl(s1->T0, c > 0 ? 1 : -1);
1507         tcg_gen_atomic_add_fetch_tl(s1->T0, s1->A0, s1->T0,
1508                                     s1->mem_index, ot | MO_LE);
1509     } else {
1510         if (d != OR_TMP0) {
1511             gen_op_mov_v_reg(s1, ot, s1->T0, d);
1512         } else {
1513             gen_op_ld_v(s1, ot, s1->T0, s1->A0);
1514         }
1515         tcg_gen_addi_tl(s1->T0, s1->T0, (c > 0 ? 1 : -1));
1516         gen_op_st_rm_T0_A0(s1, ot, d);
1517     }
1518 
1519     gen_compute_eflags_c(s1, cpu_cc_src);
1520     tcg_gen_mov_tl(cpu_cc_dst, s1->T0);
1521     set_cc_op(s1, (c > 0 ? CC_OP_INCB : CC_OP_DECB) + ot);
1522 }
1523 
1524 static void gen_shift_flags(DisasContext *s, MemOp ot, TCGv result,
1525                             TCGv shm1, TCGv count, bool is_right)
1526 {
1527     TCGv_i32 z32, s32, oldop;
1528     TCGv z_tl;
1529 
1530     /* Store the results into the CC variables.  If we know that the
1531        variable must be dead, store unconditionally.  Otherwise we'll
1532        need to not disrupt the current contents.  */
1533     z_tl = tcg_const_tl(0);
1534     if (cc_op_live[s->cc_op] & USES_CC_DST) {
1535         tcg_gen_movcond_tl(TCG_COND_NE, cpu_cc_dst, count, z_tl,
1536                            result, cpu_cc_dst);
1537     } else {
1538         tcg_gen_mov_tl(cpu_cc_dst, result);
1539     }
1540     if (cc_op_live[s->cc_op] & USES_CC_SRC) {
1541         tcg_gen_movcond_tl(TCG_COND_NE, cpu_cc_src, count, z_tl,
1542                            shm1, cpu_cc_src);
1543     } else {
1544         tcg_gen_mov_tl(cpu_cc_src, shm1);
1545     }
1546     tcg_temp_free(z_tl);
1547 
1548     /* Get the two potential CC_OP values into temporaries.  */
1549     tcg_gen_movi_i32(s->tmp2_i32, (is_right ? CC_OP_SARB : CC_OP_SHLB) + ot);
1550     if (s->cc_op == CC_OP_DYNAMIC) {
1551         oldop = cpu_cc_op;
1552     } else {
1553         tcg_gen_movi_i32(s->tmp3_i32, s->cc_op);
1554         oldop = s->tmp3_i32;
1555     }
1556 
1557     /* Conditionally store the CC_OP value.  */
1558     z32 = tcg_const_i32(0);
1559     s32 = tcg_temp_new_i32();
1560     tcg_gen_trunc_tl_i32(s32, count);
1561     tcg_gen_movcond_i32(TCG_COND_NE, cpu_cc_op, s32, z32, s->tmp2_i32, oldop);
1562     tcg_temp_free_i32(z32);
1563     tcg_temp_free_i32(s32);
1564 
1565     /* The CC_OP value is no longer predictable.  */
1566     set_cc_op(s, CC_OP_DYNAMIC);
1567 }
1568 
1569 static void gen_shift_rm_T1(DisasContext *s, MemOp ot, int op1,
1570                             int is_right, int is_arith)
1571 {
1572     target_ulong mask = (ot == MO_64 ? 0x3f : 0x1f);
1573 
1574     /* load */
1575     if (op1 == OR_TMP0) {
1576         gen_op_ld_v(s, ot, s->T0, s->A0);
1577     } else {
1578         gen_op_mov_v_reg(s, ot, s->T0, op1);
1579     }
1580 
1581     tcg_gen_andi_tl(s->T1, s->T1, mask);
1582     tcg_gen_subi_tl(s->tmp0, s->T1, 1);
1583 
1584     if (is_right) {
1585         if (is_arith) {
1586             gen_exts(ot, s->T0);
1587             tcg_gen_sar_tl(s->tmp0, s->T0, s->tmp0);
1588             tcg_gen_sar_tl(s->T0, s->T0, s->T1);
1589         } else {
1590             gen_extu(ot, s->T0);
1591             tcg_gen_shr_tl(s->tmp0, s->T0, s->tmp0);
1592             tcg_gen_shr_tl(s->T0, s->T0, s->T1);
1593         }
1594     } else {
1595         tcg_gen_shl_tl(s->tmp0, s->T0, s->tmp0);
1596         tcg_gen_shl_tl(s->T0, s->T0, s->T1);
1597     }
1598 
1599     /* store */
1600     gen_op_st_rm_T0_A0(s, ot, op1);
1601 
1602     gen_shift_flags(s, ot, s->T0, s->tmp0, s->T1, is_right);
1603 }
1604 
1605 static void gen_shift_rm_im(DisasContext *s, MemOp ot, int op1, int op2,
1606                             int is_right, int is_arith)
1607 {
1608     int mask = (ot == MO_64 ? 0x3f : 0x1f);
1609 
1610     /* load */
1611     if (op1 == OR_TMP0)
1612         gen_op_ld_v(s, ot, s->T0, s->A0);
1613     else
1614         gen_op_mov_v_reg(s, ot, s->T0, op1);
1615 
1616     op2 &= mask;
1617     if (op2 != 0) {
1618         if (is_right) {
1619             if (is_arith) {
1620                 gen_exts(ot, s->T0);
1621                 tcg_gen_sari_tl(s->tmp4, s->T0, op2 - 1);
1622                 tcg_gen_sari_tl(s->T0, s->T0, op2);
1623             } else {
1624                 gen_extu(ot, s->T0);
1625                 tcg_gen_shri_tl(s->tmp4, s->T0, op2 - 1);
1626                 tcg_gen_shri_tl(s->T0, s->T0, op2);
1627             }
1628         } else {
1629             tcg_gen_shli_tl(s->tmp4, s->T0, op2 - 1);
1630             tcg_gen_shli_tl(s->T0, s->T0, op2);
1631         }
1632     }
1633 
1634     /* store */
1635     gen_op_st_rm_T0_A0(s, ot, op1);
1636 
1637     /* update eflags if non zero shift */
1638     if (op2 != 0) {
1639         tcg_gen_mov_tl(cpu_cc_src, s->tmp4);
1640         tcg_gen_mov_tl(cpu_cc_dst, s->T0);
1641         set_cc_op(s, (is_right ? CC_OP_SARB : CC_OP_SHLB) + ot);
1642     }
1643 }
1644 
1645 static void gen_rot_rm_T1(DisasContext *s, MemOp ot, int op1, int is_right)
1646 {
1647     target_ulong mask = (ot == MO_64 ? 0x3f : 0x1f);
1648     TCGv_i32 t0, t1;
1649 
1650     /* load */
1651     if (op1 == OR_TMP0) {
1652         gen_op_ld_v(s, ot, s->T0, s->A0);
1653     } else {
1654         gen_op_mov_v_reg(s, ot, s->T0, op1);
1655     }
1656 
1657     tcg_gen_andi_tl(s->T1, s->T1, mask);
1658 
1659     switch (ot) {
1660     case MO_8:
1661         /* Replicate the 8-bit input so that a 32-bit rotate works.  */
1662         tcg_gen_ext8u_tl(s->T0, s->T0);
1663         tcg_gen_muli_tl(s->T0, s->T0, 0x01010101);
1664         goto do_long;
1665     case MO_16:
1666         /* Replicate the 16-bit input so that a 32-bit rotate works.  */
1667         tcg_gen_deposit_tl(s->T0, s->T0, s->T0, 16, 16);
1668         goto do_long;
1669     do_long:
1670 #ifdef TARGET_X86_64
1671     case MO_32:
1672         tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
1673         tcg_gen_trunc_tl_i32(s->tmp3_i32, s->T1);
1674         if (is_right) {
1675             tcg_gen_rotr_i32(s->tmp2_i32, s->tmp2_i32, s->tmp3_i32);
1676         } else {
1677             tcg_gen_rotl_i32(s->tmp2_i32, s->tmp2_i32, s->tmp3_i32);
1678         }
1679         tcg_gen_extu_i32_tl(s->T0, s->tmp2_i32);
1680         break;
1681 #endif
1682     default:
1683         if (is_right) {
1684             tcg_gen_rotr_tl(s->T0, s->T0, s->T1);
1685         } else {
1686             tcg_gen_rotl_tl(s->T0, s->T0, s->T1);
1687         }
1688         break;
1689     }
1690 
1691     /* store */
1692     gen_op_st_rm_T0_A0(s, ot, op1);
1693 
1694     /* We'll need the flags computed into CC_SRC.  */
1695     gen_compute_eflags(s);
1696 
1697     /* The value that was "rotated out" is now present at the other end
1698        of the word.  Compute C into CC_DST and O into CC_SRC2.  Note that
1699        since we've computed the flags into CC_SRC, these variables are
1700        currently dead.  */
1701     if (is_right) {
1702         tcg_gen_shri_tl(cpu_cc_src2, s->T0, mask - 1);
1703         tcg_gen_shri_tl(cpu_cc_dst, s->T0, mask);
1704         tcg_gen_andi_tl(cpu_cc_dst, cpu_cc_dst, 1);
1705     } else {
1706         tcg_gen_shri_tl(cpu_cc_src2, s->T0, mask);
1707         tcg_gen_andi_tl(cpu_cc_dst, s->T0, 1);
1708     }
1709     tcg_gen_andi_tl(cpu_cc_src2, cpu_cc_src2, 1);
1710     tcg_gen_xor_tl(cpu_cc_src2, cpu_cc_src2, cpu_cc_dst);
1711 
1712     /* Now conditionally store the new CC_OP value.  If the shift count
1713        is 0 we keep the CC_OP_EFLAGS setting so that only CC_SRC is live.
1714        Otherwise reuse CC_OP_ADCOX which have the C and O flags split out
1715        exactly as we computed above.  */
1716     t0 = tcg_const_i32(0);
1717     t1 = tcg_temp_new_i32();
1718     tcg_gen_trunc_tl_i32(t1, s->T1);
1719     tcg_gen_movi_i32(s->tmp2_i32, CC_OP_ADCOX);
1720     tcg_gen_movi_i32(s->tmp3_i32, CC_OP_EFLAGS);
1721     tcg_gen_movcond_i32(TCG_COND_NE, cpu_cc_op, t1, t0,
1722                         s->tmp2_i32, s->tmp3_i32);
1723     tcg_temp_free_i32(t0);
1724     tcg_temp_free_i32(t1);
1725 
1726     /* The CC_OP value is no longer predictable.  */
1727     set_cc_op(s, CC_OP_DYNAMIC);
1728 }
1729 
1730 static void gen_rot_rm_im(DisasContext *s, MemOp ot, int op1, int op2,
1731                           int is_right)
1732 {
1733     int mask = (ot == MO_64 ? 0x3f : 0x1f);
1734     int shift;
1735 
1736     /* load */
1737     if (op1 == OR_TMP0) {
1738         gen_op_ld_v(s, ot, s->T0, s->A0);
1739     } else {
1740         gen_op_mov_v_reg(s, ot, s->T0, op1);
1741     }
1742 
1743     op2 &= mask;
1744     if (op2 != 0) {
1745         switch (ot) {
1746 #ifdef TARGET_X86_64
1747         case MO_32:
1748             tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
1749             if (is_right) {
1750                 tcg_gen_rotri_i32(s->tmp2_i32, s->tmp2_i32, op2);
1751             } else {
1752                 tcg_gen_rotli_i32(s->tmp2_i32, s->tmp2_i32, op2);
1753             }
1754             tcg_gen_extu_i32_tl(s->T0, s->tmp2_i32);
1755             break;
1756 #endif
1757         default:
1758             if (is_right) {
1759                 tcg_gen_rotri_tl(s->T0, s->T0, op2);
1760             } else {
1761                 tcg_gen_rotli_tl(s->T0, s->T0, op2);
1762             }
1763             break;
1764         case MO_8:
1765             mask = 7;
1766             goto do_shifts;
1767         case MO_16:
1768             mask = 15;
1769         do_shifts:
1770             shift = op2 & mask;
1771             if (is_right) {
1772                 shift = mask + 1 - shift;
1773             }
1774             gen_extu(ot, s->T0);
1775             tcg_gen_shli_tl(s->tmp0, s->T0, shift);
1776             tcg_gen_shri_tl(s->T0, s->T0, mask + 1 - shift);
1777             tcg_gen_or_tl(s->T0, s->T0, s->tmp0);
1778             break;
1779         }
1780     }
1781 
1782     /* store */
1783     gen_op_st_rm_T0_A0(s, ot, op1);
1784 
1785     if (op2 != 0) {
1786         /* Compute the flags into CC_SRC.  */
1787         gen_compute_eflags(s);
1788 
1789         /* The value that was "rotated out" is now present at the other end
1790            of the word.  Compute C into CC_DST and O into CC_SRC2.  Note that
1791            since we've computed the flags into CC_SRC, these variables are
1792            currently dead.  */
1793         if (is_right) {
1794             tcg_gen_shri_tl(cpu_cc_src2, s->T0, mask - 1);
1795             tcg_gen_shri_tl(cpu_cc_dst, s->T0, mask);
1796             tcg_gen_andi_tl(cpu_cc_dst, cpu_cc_dst, 1);
1797         } else {
1798             tcg_gen_shri_tl(cpu_cc_src2, s->T0, mask);
1799             tcg_gen_andi_tl(cpu_cc_dst, s->T0, 1);
1800         }
1801         tcg_gen_andi_tl(cpu_cc_src2, cpu_cc_src2, 1);
1802         tcg_gen_xor_tl(cpu_cc_src2, cpu_cc_src2, cpu_cc_dst);
1803         set_cc_op(s, CC_OP_ADCOX);
1804     }
1805 }
1806 
1807 /* XXX: add faster immediate = 1 case */
1808 static void gen_rotc_rm_T1(DisasContext *s, MemOp ot, int op1,
1809                            int is_right)
1810 {
1811     gen_compute_eflags(s);
1812     assert(s->cc_op == CC_OP_EFLAGS);
1813 
1814     /* load */
1815     if (op1 == OR_TMP0)
1816         gen_op_ld_v(s, ot, s->T0, s->A0);
1817     else
1818         gen_op_mov_v_reg(s, ot, s->T0, op1);
1819 
1820     if (is_right) {
1821         switch (ot) {
1822         case MO_8:
1823             gen_helper_rcrb(s->T0, cpu_env, s->T0, s->T1);
1824             break;
1825         case MO_16:
1826             gen_helper_rcrw(s->T0, cpu_env, s->T0, s->T1);
1827             break;
1828         case MO_32:
1829             gen_helper_rcrl(s->T0, cpu_env, s->T0, s->T1);
1830             break;
1831 #ifdef TARGET_X86_64
1832         case MO_64:
1833             gen_helper_rcrq(s->T0, cpu_env, s->T0, s->T1);
1834             break;
1835 #endif
1836         default:
1837             tcg_abort();
1838         }
1839     } else {
1840         switch (ot) {
1841         case MO_8:
1842             gen_helper_rclb(s->T0, cpu_env, s->T0, s->T1);
1843             break;
1844         case MO_16:
1845             gen_helper_rclw(s->T0, cpu_env, s->T0, s->T1);
1846             break;
1847         case MO_32:
1848             gen_helper_rcll(s->T0, cpu_env, s->T0, s->T1);
1849             break;
1850 #ifdef TARGET_X86_64
1851         case MO_64:
1852             gen_helper_rclq(s->T0, cpu_env, s->T0, s->T1);
1853             break;
1854 #endif
1855         default:
1856             tcg_abort();
1857         }
1858     }
1859     /* store */
1860     gen_op_st_rm_T0_A0(s, ot, op1);
1861 }
1862 
1863 /* XXX: add faster immediate case */
1864 static void gen_shiftd_rm_T1(DisasContext *s, MemOp ot, int op1,
1865                              bool is_right, TCGv count_in)
1866 {
1867     target_ulong mask = (ot == MO_64 ? 63 : 31);
1868     TCGv count;
1869 
1870     /* load */
1871     if (op1 == OR_TMP0) {
1872         gen_op_ld_v(s, ot, s->T0, s->A0);
1873     } else {
1874         gen_op_mov_v_reg(s, ot, s->T0, op1);
1875     }
1876 
1877     count = tcg_temp_new();
1878     tcg_gen_andi_tl(count, count_in, mask);
1879 
1880     switch (ot) {
1881     case MO_16:
1882         /* Note: we implement the Intel behaviour for shift count > 16.
1883            This means "shrdw C, B, A" shifts A:B:A >> C.  Build the B:A
1884            portion by constructing it as a 32-bit value.  */
1885         if (is_right) {
1886             tcg_gen_deposit_tl(s->tmp0, s->T0, s->T1, 16, 16);
1887             tcg_gen_mov_tl(s->T1, s->T0);
1888             tcg_gen_mov_tl(s->T0, s->tmp0);
1889         } else {
1890             tcg_gen_deposit_tl(s->T1, s->T0, s->T1, 16, 16);
1891         }
1892         /*
1893          * If TARGET_X86_64 defined then fall through into MO_32 case,
1894          * otherwise fall through default case.
1895          */
1896     case MO_32:
1897 #ifdef TARGET_X86_64
1898         /* Concatenate the two 32-bit values and use a 64-bit shift.  */
1899         tcg_gen_subi_tl(s->tmp0, count, 1);
1900         if (is_right) {
1901             tcg_gen_concat_tl_i64(s->T0, s->T0, s->T1);
1902             tcg_gen_shr_i64(s->tmp0, s->T0, s->tmp0);
1903             tcg_gen_shr_i64(s->T0, s->T0, count);
1904         } else {
1905             tcg_gen_concat_tl_i64(s->T0, s->T1, s->T0);
1906             tcg_gen_shl_i64(s->tmp0, s->T0, s->tmp0);
1907             tcg_gen_shl_i64(s->T0, s->T0, count);
1908             tcg_gen_shri_i64(s->tmp0, s->tmp0, 32);
1909             tcg_gen_shri_i64(s->T0, s->T0, 32);
1910         }
1911         break;
1912 #endif
1913     default:
1914         tcg_gen_subi_tl(s->tmp0, count, 1);
1915         if (is_right) {
1916             tcg_gen_shr_tl(s->tmp0, s->T0, s->tmp0);
1917 
1918             tcg_gen_subfi_tl(s->tmp4, mask + 1, count);
1919             tcg_gen_shr_tl(s->T0, s->T0, count);
1920             tcg_gen_shl_tl(s->T1, s->T1, s->tmp4);
1921         } else {
1922             tcg_gen_shl_tl(s->tmp0, s->T0, s->tmp0);
1923             if (ot == MO_16) {
1924                 /* Only needed if count > 16, for Intel behaviour.  */
1925                 tcg_gen_subfi_tl(s->tmp4, 33, count);
1926                 tcg_gen_shr_tl(s->tmp4, s->T1, s->tmp4);
1927                 tcg_gen_or_tl(s->tmp0, s->tmp0, s->tmp4);
1928             }
1929 
1930             tcg_gen_subfi_tl(s->tmp4, mask + 1, count);
1931             tcg_gen_shl_tl(s->T0, s->T0, count);
1932             tcg_gen_shr_tl(s->T1, s->T1, s->tmp4);
1933         }
1934         tcg_gen_movi_tl(s->tmp4, 0);
1935         tcg_gen_movcond_tl(TCG_COND_EQ, s->T1, count, s->tmp4,
1936                            s->tmp4, s->T1);
1937         tcg_gen_or_tl(s->T0, s->T0, s->T1);
1938         break;
1939     }
1940 
1941     /* store */
1942     gen_op_st_rm_T0_A0(s, ot, op1);
1943 
1944     gen_shift_flags(s, ot, s->T0, s->tmp0, count, is_right);
1945     tcg_temp_free(count);
1946 }
1947 
1948 static void gen_shift(DisasContext *s1, int op, MemOp ot, int d, int s)
1949 {
1950     if (s != OR_TMP1)
1951         gen_op_mov_v_reg(s1, ot, s1->T1, s);
1952     switch(op) {
1953     case OP_ROL:
1954         gen_rot_rm_T1(s1, ot, d, 0);
1955         break;
1956     case OP_ROR:
1957         gen_rot_rm_T1(s1, ot, d, 1);
1958         break;
1959     case OP_SHL:
1960     case OP_SHL1:
1961         gen_shift_rm_T1(s1, ot, d, 0, 0);
1962         break;
1963     case OP_SHR:
1964         gen_shift_rm_T1(s1, ot, d, 1, 0);
1965         break;
1966     case OP_SAR:
1967         gen_shift_rm_T1(s1, ot, d, 1, 1);
1968         break;
1969     case OP_RCL:
1970         gen_rotc_rm_T1(s1, ot, d, 0);
1971         break;
1972     case OP_RCR:
1973         gen_rotc_rm_T1(s1, ot, d, 1);
1974         break;
1975     }
1976 }
1977 
1978 static void gen_shifti(DisasContext *s1, int op, MemOp ot, int d, int c)
1979 {
1980     switch(op) {
1981     case OP_ROL:
1982         gen_rot_rm_im(s1, ot, d, c, 0);
1983         break;
1984     case OP_ROR:
1985         gen_rot_rm_im(s1, ot, d, c, 1);
1986         break;
1987     case OP_SHL:
1988     case OP_SHL1:
1989         gen_shift_rm_im(s1, ot, d, c, 0, 0);
1990         break;
1991     case OP_SHR:
1992         gen_shift_rm_im(s1, ot, d, c, 1, 0);
1993         break;
1994     case OP_SAR:
1995         gen_shift_rm_im(s1, ot, d, c, 1, 1);
1996         break;
1997     default:
1998         /* currently not optimized */
1999         tcg_gen_movi_tl(s1->T1, c);
2000         gen_shift(s1, op, ot, d, OR_TMP1);
2001         break;
2002     }
2003 }
2004 
2005 #define X86_MAX_INSN_LENGTH 15
2006 
2007 static uint64_t advance_pc(CPUX86State *env, DisasContext *s, int num_bytes)
2008 {
2009     uint64_t pc = s->pc;
2010 
2011     s->pc += num_bytes;
2012     if (unlikely(s->pc - s->pc_start > X86_MAX_INSN_LENGTH)) {
2013         /* If the instruction's 16th byte is on a different page than the 1st, a
2014          * page fault on the second page wins over the general protection fault
2015          * caused by the instruction being too long.
2016          * This can happen even if the operand is only one byte long!
2017          */
2018         if (((s->pc - 1) ^ (pc - 1)) & TARGET_PAGE_MASK) {
2019             volatile uint8_t unused =
2020                 cpu_ldub_code(env, (s->pc - 1) & TARGET_PAGE_MASK);
2021             (void) unused;
2022         }
2023         siglongjmp(s->jmpbuf, 1);
2024     }
2025 
2026     return pc;
2027 }
2028 
2029 static inline uint8_t x86_ldub_code(CPUX86State *env, DisasContext *s)
2030 {
2031     return translator_ldub(env, &s->base, advance_pc(env, s, 1));
2032 }
2033 
2034 static inline int16_t x86_ldsw_code(CPUX86State *env, DisasContext *s)
2035 {
2036     return translator_ldsw(env, &s->base, advance_pc(env, s, 2));
2037 }
2038 
2039 static inline uint16_t x86_lduw_code(CPUX86State *env, DisasContext *s)
2040 {
2041     return translator_lduw(env, &s->base, advance_pc(env, s, 2));
2042 }
2043 
2044 static inline uint32_t x86_ldl_code(CPUX86State *env, DisasContext *s)
2045 {
2046     return translator_ldl(env, &s->base, advance_pc(env, s, 4));
2047 }
2048 
2049 #ifdef TARGET_X86_64
2050 static inline uint64_t x86_ldq_code(CPUX86State *env, DisasContext *s)
2051 {
2052     return translator_ldq(env, &s->base, advance_pc(env, s, 8));
2053 }
2054 #endif
2055 
2056 /* Decompose an address.  */
2057 
2058 typedef struct AddressParts {
2059     int def_seg;
2060     int base;
2061     int index;
2062     int scale;
2063     target_long disp;
2064 } AddressParts;
2065 
2066 static AddressParts gen_lea_modrm_0(CPUX86State *env, DisasContext *s,
2067                                     int modrm)
2068 {
2069     int def_seg, base, index, scale, mod, rm;
2070     target_long disp;
2071     bool havesib;
2072 
2073     def_seg = R_DS;
2074     index = -1;
2075     scale = 0;
2076     disp = 0;
2077 
2078     mod = (modrm >> 6) & 3;
2079     rm = modrm & 7;
2080     base = rm | REX_B(s);
2081 
2082     if (mod == 3) {
2083         /* Normally filtered out earlier, but including this path
2084            simplifies multi-byte nop, as well as bndcl, bndcu, bndcn.  */
2085         goto done;
2086     }
2087 
2088     switch (s->aflag) {
2089     case MO_64:
2090     case MO_32:
2091         havesib = 0;
2092         if (rm == 4) {
2093             int code = x86_ldub_code(env, s);
2094             scale = (code >> 6) & 3;
2095             index = ((code >> 3) & 7) | REX_X(s);
2096             if (index == 4) {
2097                 index = -1;  /* no index */
2098             }
2099             base = (code & 7) | REX_B(s);
2100             havesib = 1;
2101         }
2102 
2103         switch (mod) {
2104         case 0:
2105             if ((base & 7) == 5) {
2106                 base = -1;
2107                 disp = (int32_t)x86_ldl_code(env, s);
2108                 if (CODE64(s) && !havesib) {
2109                     base = -2;
2110                     disp += s->pc + s->rip_offset;
2111                 }
2112             }
2113             break;
2114         case 1:
2115             disp = (int8_t)x86_ldub_code(env, s);
2116             break;
2117         default:
2118         case 2:
2119             disp = (int32_t)x86_ldl_code(env, s);
2120             break;
2121         }
2122 
2123         /* For correct popl handling with esp.  */
2124         if (base == R_ESP && s->popl_esp_hack) {
2125             disp += s->popl_esp_hack;
2126         }
2127         if (base == R_EBP || base == R_ESP) {
2128             def_seg = R_SS;
2129         }
2130         break;
2131 
2132     case MO_16:
2133         if (mod == 0) {
2134             if (rm == 6) {
2135                 base = -1;
2136                 disp = x86_lduw_code(env, s);
2137                 break;
2138             }
2139         } else if (mod == 1) {
2140             disp = (int8_t)x86_ldub_code(env, s);
2141         } else {
2142             disp = (int16_t)x86_lduw_code(env, s);
2143         }
2144 
2145         switch (rm) {
2146         case 0:
2147             base = R_EBX;
2148             index = R_ESI;
2149             break;
2150         case 1:
2151             base = R_EBX;
2152             index = R_EDI;
2153             break;
2154         case 2:
2155             base = R_EBP;
2156             index = R_ESI;
2157             def_seg = R_SS;
2158             break;
2159         case 3:
2160             base = R_EBP;
2161             index = R_EDI;
2162             def_seg = R_SS;
2163             break;
2164         case 4:
2165             base = R_ESI;
2166             break;
2167         case 5:
2168             base = R_EDI;
2169             break;
2170         case 6:
2171             base = R_EBP;
2172             def_seg = R_SS;
2173             break;
2174         default:
2175         case 7:
2176             base = R_EBX;
2177             break;
2178         }
2179         break;
2180 
2181     default:
2182         tcg_abort();
2183     }
2184 
2185  done:
2186     return (AddressParts){ def_seg, base, index, scale, disp };
2187 }
2188 
2189 /* Compute the address, with a minimum number of TCG ops.  */
2190 static TCGv gen_lea_modrm_1(DisasContext *s, AddressParts a)
2191 {
2192     TCGv ea = NULL;
2193 
2194     if (a.index >= 0) {
2195         if (a.scale == 0) {
2196             ea = cpu_regs[a.index];
2197         } else {
2198             tcg_gen_shli_tl(s->A0, cpu_regs[a.index], a.scale);
2199             ea = s->A0;
2200         }
2201         if (a.base >= 0) {
2202             tcg_gen_add_tl(s->A0, ea, cpu_regs[a.base]);
2203             ea = s->A0;
2204         }
2205     } else if (a.base >= 0) {
2206         ea = cpu_regs[a.base];
2207     }
2208     if (!ea) {
2209         tcg_gen_movi_tl(s->A0, a.disp);
2210         ea = s->A0;
2211     } else if (a.disp != 0) {
2212         tcg_gen_addi_tl(s->A0, ea, a.disp);
2213         ea = s->A0;
2214     }
2215 
2216     return ea;
2217 }
2218 
2219 static void gen_lea_modrm(CPUX86State *env, DisasContext *s, int modrm)
2220 {
2221     AddressParts a = gen_lea_modrm_0(env, s, modrm);
2222     TCGv ea = gen_lea_modrm_1(s, a);
2223     gen_lea_v_seg(s, s->aflag, ea, a.def_seg, s->override);
2224 }
2225 
2226 static void gen_nop_modrm(CPUX86State *env, DisasContext *s, int modrm)
2227 {
2228     (void)gen_lea_modrm_0(env, s, modrm);
2229 }
2230 
2231 /* Used for BNDCL, BNDCU, BNDCN.  */
2232 static void gen_bndck(CPUX86State *env, DisasContext *s, int modrm,
2233                       TCGCond cond, TCGv_i64 bndv)
2234 {
2235     TCGv ea = gen_lea_modrm_1(s, gen_lea_modrm_0(env, s, modrm));
2236 
2237     tcg_gen_extu_tl_i64(s->tmp1_i64, ea);
2238     if (!CODE64(s)) {
2239         tcg_gen_ext32u_i64(s->tmp1_i64, s->tmp1_i64);
2240     }
2241     tcg_gen_setcond_i64(cond, s->tmp1_i64, s->tmp1_i64, bndv);
2242     tcg_gen_extrl_i64_i32(s->tmp2_i32, s->tmp1_i64);
2243     gen_helper_bndck(cpu_env, s->tmp2_i32);
2244 }
2245 
2246 /* used for LEA and MOV AX, mem */
2247 static void gen_add_A0_ds_seg(DisasContext *s)
2248 {
2249     gen_lea_v_seg(s, s->aflag, s->A0, R_DS, s->override);
2250 }
2251 
2252 /* generate modrm memory load or store of 'reg'. TMP0 is used if reg ==
2253    OR_TMP0 */
2254 static void gen_ldst_modrm(CPUX86State *env, DisasContext *s, int modrm,
2255                            MemOp ot, int reg, int is_store)
2256 {
2257     int mod, rm;
2258 
2259     mod = (modrm >> 6) & 3;
2260     rm = (modrm & 7) | REX_B(s);
2261     if (mod == 3) {
2262         if (is_store) {
2263             if (reg != OR_TMP0)
2264                 gen_op_mov_v_reg(s, ot, s->T0, reg);
2265             gen_op_mov_reg_v(s, ot, rm, s->T0);
2266         } else {
2267             gen_op_mov_v_reg(s, ot, s->T0, rm);
2268             if (reg != OR_TMP0)
2269                 gen_op_mov_reg_v(s, ot, reg, s->T0);
2270         }
2271     } else {
2272         gen_lea_modrm(env, s, modrm);
2273         if (is_store) {
2274             if (reg != OR_TMP0)
2275                 gen_op_mov_v_reg(s, ot, s->T0, reg);
2276             gen_op_st_v(s, ot, s->T0, s->A0);
2277         } else {
2278             gen_op_ld_v(s, ot, s->T0, s->A0);
2279             if (reg != OR_TMP0)
2280                 gen_op_mov_reg_v(s, ot, reg, s->T0);
2281         }
2282     }
2283 }
2284 
2285 static inline uint32_t insn_get(CPUX86State *env, DisasContext *s, MemOp ot)
2286 {
2287     uint32_t ret;
2288 
2289     switch (ot) {
2290     case MO_8:
2291         ret = x86_ldub_code(env, s);
2292         break;
2293     case MO_16:
2294         ret = x86_lduw_code(env, s);
2295         break;
2296     case MO_32:
2297 #ifdef TARGET_X86_64
2298     case MO_64:
2299 #endif
2300         ret = x86_ldl_code(env, s);
2301         break;
2302     default:
2303         tcg_abort();
2304     }
2305     return ret;
2306 }
2307 
2308 static inline int insn_const_size(MemOp ot)
2309 {
2310     if (ot <= MO_32) {
2311         return 1 << ot;
2312     } else {
2313         return 4;
2314     }
2315 }
2316 
2317 static void gen_goto_tb(DisasContext *s, int tb_num, target_ulong eip)
2318 {
2319     target_ulong pc = s->cs_base + eip;
2320 
2321     if (translator_use_goto_tb(&s->base, pc))  {
2322         /* jump to same page: we can use a direct jump */
2323         tcg_gen_goto_tb(tb_num);
2324         gen_jmp_im(s, eip);
2325         tcg_gen_exit_tb(s->base.tb, tb_num);
2326         s->base.is_jmp = DISAS_NORETURN;
2327     } else {
2328         /* jump to another page */
2329         gen_jmp_im(s, eip);
2330         gen_jr(s, s->tmp0);
2331     }
2332 }
2333 
2334 static inline void gen_jcc(DisasContext *s, int b,
2335                            target_ulong val, target_ulong next_eip)
2336 {
2337     TCGLabel *l1, *l2;
2338 
2339     if (s->jmp_opt) {
2340         l1 = gen_new_label();
2341         gen_jcc1(s, b, l1);
2342 
2343         gen_goto_tb(s, 0, next_eip);
2344 
2345         gen_set_label(l1);
2346         gen_goto_tb(s, 1, val);
2347     } else {
2348         l1 = gen_new_label();
2349         l2 = gen_new_label();
2350         gen_jcc1(s, b, l1);
2351 
2352         gen_jmp_im(s, next_eip);
2353         tcg_gen_br(l2);
2354 
2355         gen_set_label(l1);
2356         gen_jmp_im(s, val);
2357         gen_set_label(l2);
2358         gen_eob(s);
2359     }
2360 }
2361 
2362 static void gen_cmovcc1(CPUX86State *env, DisasContext *s, MemOp ot, int b,
2363                         int modrm, int reg)
2364 {
2365     CCPrepare cc;
2366 
2367     gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
2368 
2369     cc = gen_prepare_cc(s, b, s->T1);
2370     if (cc.mask != -1) {
2371         TCGv t0 = tcg_temp_new();
2372         tcg_gen_andi_tl(t0, cc.reg, cc.mask);
2373         cc.reg = t0;
2374     }
2375     if (!cc.use_reg2) {
2376         cc.reg2 = tcg_const_tl(cc.imm);
2377     }
2378 
2379     tcg_gen_movcond_tl(cc.cond, s->T0, cc.reg, cc.reg2,
2380                        s->T0, cpu_regs[reg]);
2381     gen_op_mov_reg_v(s, ot, reg, s->T0);
2382 
2383     if (cc.mask != -1) {
2384         tcg_temp_free(cc.reg);
2385     }
2386     if (!cc.use_reg2) {
2387         tcg_temp_free(cc.reg2);
2388     }
2389 }
2390 
2391 static inline void gen_op_movl_T0_seg(DisasContext *s, X86Seg seg_reg)
2392 {
2393     tcg_gen_ld32u_tl(s->T0, cpu_env,
2394                      offsetof(CPUX86State,segs[seg_reg].selector));
2395 }
2396 
2397 static inline void gen_op_movl_seg_T0_vm(DisasContext *s, X86Seg seg_reg)
2398 {
2399     tcg_gen_ext16u_tl(s->T0, s->T0);
2400     tcg_gen_st32_tl(s->T0, cpu_env,
2401                     offsetof(CPUX86State,segs[seg_reg].selector));
2402     tcg_gen_shli_tl(cpu_seg_base[seg_reg], s->T0, 4);
2403 }
2404 
2405 /* move T0 to seg_reg and compute if the CPU state may change. Never
2406    call this function with seg_reg == R_CS */
2407 static void gen_movl_seg_T0(DisasContext *s, X86Seg seg_reg)
2408 {
2409     if (PE(s) && !VM86(s)) {
2410         tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
2411         gen_helper_load_seg(cpu_env, tcg_const_i32(seg_reg), s->tmp2_i32);
2412         /* abort translation because the addseg value may change or
2413            because ss32 may change. For R_SS, translation must always
2414            stop as a special handling must be done to disable hardware
2415            interrupts for the next instruction */
2416         if (seg_reg == R_SS || (CODE32(s) && seg_reg < R_FS)) {
2417             s->base.is_jmp = DISAS_TOO_MANY;
2418         }
2419     } else {
2420         gen_op_movl_seg_T0_vm(s, seg_reg);
2421         if (seg_reg == R_SS) {
2422             s->base.is_jmp = DISAS_TOO_MANY;
2423         }
2424     }
2425 }
2426 
2427 static void gen_svm_check_intercept(DisasContext *s, uint32_t type)
2428 {
2429     /* no SVM activated; fast case */
2430     if (likely(!GUEST(s))) {
2431         return;
2432     }
2433     gen_helper_svm_check_intercept(cpu_env, tcg_constant_i32(type));
2434 }
2435 
2436 static inline void gen_stack_update(DisasContext *s, int addend)
2437 {
2438     gen_op_add_reg_im(s, mo_stacksize(s), R_ESP, addend);
2439 }
2440 
2441 /* Generate a push. It depends on ss32, addseg and dflag.  */
2442 static void gen_push_v(DisasContext *s, TCGv val)
2443 {
2444     MemOp d_ot = mo_pushpop(s, s->dflag);
2445     MemOp a_ot = mo_stacksize(s);
2446     int size = 1 << d_ot;
2447     TCGv new_esp = s->A0;
2448 
2449     tcg_gen_subi_tl(s->A0, cpu_regs[R_ESP], size);
2450 
2451     if (!CODE64(s)) {
2452         if (ADDSEG(s)) {
2453             new_esp = s->tmp4;
2454             tcg_gen_mov_tl(new_esp, s->A0);
2455         }
2456         gen_lea_v_seg(s, a_ot, s->A0, R_SS, -1);
2457     }
2458 
2459     gen_op_st_v(s, d_ot, val, s->A0);
2460     gen_op_mov_reg_v(s, a_ot, R_ESP, new_esp);
2461 }
2462 
2463 /* two step pop is necessary for precise exceptions */
2464 static MemOp gen_pop_T0(DisasContext *s)
2465 {
2466     MemOp d_ot = mo_pushpop(s, s->dflag);
2467 
2468     gen_lea_v_seg(s, mo_stacksize(s), cpu_regs[R_ESP], R_SS, -1);
2469     gen_op_ld_v(s, d_ot, s->T0, s->A0);
2470 
2471     return d_ot;
2472 }
2473 
2474 static inline void gen_pop_update(DisasContext *s, MemOp ot)
2475 {
2476     gen_stack_update(s, 1 << ot);
2477 }
2478 
2479 static inline void gen_stack_A0(DisasContext *s)
2480 {
2481     gen_lea_v_seg(s, SS32(s) ? MO_32 : MO_16, cpu_regs[R_ESP], R_SS, -1);
2482 }
2483 
2484 static void gen_pusha(DisasContext *s)
2485 {
2486     MemOp s_ot = SS32(s) ? MO_32 : MO_16;
2487     MemOp d_ot = s->dflag;
2488     int size = 1 << d_ot;
2489     int i;
2490 
2491     for (i = 0; i < 8; i++) {
2492         tcg_gen_addi_tl(s->A0, cpu_regs[R_ESP], (i - 8) * size);
2493         gen_lea_v_seg(s, s_ot, s->A0, R_SS, -1);
2494         gen_op_st_v(s, d_ot, cpu_regs[7 - i], s->A0);
2495     }
2496 
2497     gen_stack_update(s, -8 * size);
2498 }
2499 
2500 static void gen_popa(DisasContext *s)
2501 {
2502     MemOp s_ot = SS32(s) ? MO_32 : MO_16;
2503     MemOp d_ot = s->dflag;
2504     int size = 1 << d_ot;
2505     int i;
2506 
2507     for (i = 0; i < 8; i++) {
2508         /* ESP is not reloaded */
2509         if (7 - i == R_ESP) {
2510             continue;
2511         }
2512         tcg_gen_addi_tl(s->A0, cpu_regs[R_ESP], i * size);
2513         gen_lea_v_seg(s, s_ot, s->A0, R_SS, -1);
2514         gen_op_ld_v(s, d_ot, s->T0, s->A0);
2515         gen_op_mov_reg_v(s, d_ot, 7 - i, s->T0);
2516     }
2517 
2518     gen_stack_update(s, 8 * size);
2519 }
2520 
2521 static void gen_enter(DisasContext *s, int esp_addend, int level)
2522 {
2523     MemOp d_ot = mo_pushpop(s, s->dflag);
2524     MemOp a_ot = CODE64(s) ? MO_64 : SS32(s) ? MO_32 : MO_16;
2525     int size = 1 << d_ot;
2526 
2527     /* Push BP; compute FrameTemp into T1.  */
2528     tcg_gen_subi_tl(s->T1, cpu_regs[R_ESP], size);
2529     gen_lea_v_seg(s, a_ot, s->T1, R_SS, -1);
2530     gen_op_st_v(s, d_ot, cpu_regs[R_EBP], s->A0);
2531 
2532     level &= 31;
2533     if (level != 0) {
2534         int i;
2535 
2536         /* Copy level-1 pointers from the previous frame.  */
2537         for (i = 1; i < level; ++i) {
2538             tcg_gen_subi_tl(s->A0, cpu_regs[R_EBP], size * i);
2539             gen_lea_v_seg(s, a_ot, s->A0, R_SS, -1);
2540             gen_op_ld_v(s, d_ot, s->tmp0, s->A0);
2541 
2542             tcg_gen_subi_tl(s->A0, s->T1, size * i);
2543             gen_lea_v_seg(s, a_ot, s->A0, R_SS, -1);
2544             gen_op_st_v(s, d_ot, s->tmp0, s->A0);
2545         }
2546 
2547         /* Push the current FrameTemp as the last level.  */
2548         tcg_gen_subi_tl(s->A0, s->T1, size * level);
2549         gen_lea_v_seg(s, a_ot, s->A0, R_SS, -1);
2550         gen_op_st_v(s, d_ot, s->T1, s->A0);
2551     }
2552 
2553     /* Copy the FrameTemp value to EBP.  */
2554     gen_op_mov_reg_v(s, a_ot, R_EBP, s->T1);
2555 
2556     /* Compute the final value of ESP.  */
2557     tcg_gen_subi_tl(s->T1, s->T1, esp_addend + size * level);
2558     gen_op_mov_reg_v(s, a_ot, R_ESP, s->T1);
2559 }
2560 
2561 static void gen_leave(DisasContext *s)
2562 {
2563     MemOp d_ot = mo_pushpop(s, s->dflag);
2564     MemOp a_ot = mo_stacksize(s);
2565 
2566     gen_lea_v_seg(s, a_ot, cpu_regs[R_EBP], R_SS, -1);
2567     gen_op_ld_v(s, d_ot, s->T0, s->A0);
2568 
2569     tcg_gen_addi_tl(s->T1, cpu_regs[R_EBP], 1 << d_ot);
2570 
2571     gen_op_mov_reg_v(s, d_ot, R_EBP, s->T0);
2572     gen_op_mov_reg_v(s, a_ot, R_ESP, s->T1);
2573 }
2574 
2575 /* Similarly, except that the assumption here is that we don't decode
2576    the instruction at all -- either a missing opcode, an unimplemented
2577    feature, or just a bogus instruction stream.  */
2578 static void gen_unknown_opcode(CPUX86State *env, DisasContext *s)
2579 {
2580     gen_illegal_opcode(s);
2581 
2582     if (qemu_loglevel_mask(LOG_UNIMP)) {
2583         FILE *logfile = qemu_log_trylock();
2584         if (logfile) {
2585             target_ulong pc = s->pc_start, end = s->pc;
2586 
2587             fprintf(logfile, "ILLOPC: " TARGET_FMT_lx ":", pc);
2588             for (; pc < end; ++pc) {
2589                 fprintf(logfile, " %02x", cpu_ldub_code(env, pc));
2590             }
2591             fprintf(logfile, "\n");
2592             qemu_log_unlock(logfile);
2593         }
2594     }
2595 }
2596 
2597 /* an interrupt is different from an exception because of the
2598    privilege checks */
2599 static void gen_interrupt(DisasContext *s, int intno,
2600                           target_ulong cur_eip, target_ulong next_eip)
2601 {
2602     gen_update_cc_op(s);
2603     gen_jmp_im(s, cur_eip);
2604     gen_helper_raise_interrupt(cpu_env, tcg_const_i32(intno),
2605                                tcg_const_i32(next_eip - cur_eip));
2606     s->base.is_jmp = DISAS_NORETURN;
2607 }
2608 
2609 static void gen_set_hflag(DisasContext *s, uint32_t mask)
2610 {
2611     if ((s->flags & mask) == 0) {
2612         TCGv_i32 t = tcg_temp_new_i32();
2613         tcg_gen_ld_i32(t, cpu_env, offsetof(CPUX86State, hflags));
2614         tcg_gen_ori_i32(t, t, mask);
2615         tcg_gen_st_i32(t, cpu_env, offsetof(CPUX86State, hflags));
2616         tcg_temp_free_i32(t);
2617         s->flags |= mask;
2618     }
2619 }
2620 
2621 static void gen_reset_hflag(DisasContext *s, uint32_t mask)
2622 {
2623     if (s->flags & mask) {
2624         TCGv_i32 t = tcg_temp_new_i32();
2625         tcg_gen_ld_i32(t, cpu_env, offsetof(CPUX86State, hflags));
2626         tcg_gen_andi_i32(t, t, ~mask);
2627         tcg_gen_st_i32(t, cpu_env, offsetof(CPUX86State, hflags));
2628         tcg_temp_free_i32(t);
2629         s->flags &= ~mask;
2630     }
2631 }
2632 
2633 /* Clear BND registers during legacy branches.  */
2634 static void gen_bnd_jmp(DisasContext *s)
2635 {
2636     /* Clear the registers only if BND prefix is missing, MPX is enabled,
2637        and if the BNDREGs are known to be in use (non-zero) already.
2638        The helper itself will check BNDPRESERVE at runtime.  */
2639     if ((s->prefix & PREFIX_REPNZ) == 0
2640         && (s->flags & HF_MPX_EN_MASK) != 0
2641         && (s->flags & HF_MPX_IU_MASK) != 0) {
2642         gen_helper_bnd_jmp(cpu_env);
2643     }
2644 }
2645 
2646 /* Generate an end of block. Trace exception is also generated if needed.
2647    If INHIBIT, set HF_INHIBIT_IRQ_MASK if it isn't already set.
2648    If RECHECK_TF, emit a rechecking helper for #DB, ignoring the state of
2649    S->TF.  This is used by the syscall/sysret insns.  */
2650 static void
2651 do_gen_eob_worker(DisasContext *s, bool inhibit, bool recheck_tf, bool jr)
2652 {
2653     gen_update_cc_op(s);
2654 
2655     /* If several instructions disable interrupts, only the first does it.  */
2656     if (inhibit && !(s->flags & HF_INHIBIT_IRQ_MASK)) {
2657         gen_set_hflag(s, HF_INHIBIT_IRQ_MASK);
2658     } else {
2659         gen_reset_hflag(s, HF_INHIBIT_IRQ_MASK);
2660     }
2661 
2662     if (s->base.tb->flags & HF_RF_MASK) {
2663         gen_helper_reset_rf(cpu_env);
2664     }
2665     if (recheck_tf) {
2666         gen_helper_rechecking_single_step(cpu_env);
2667         tcg_gen_exit_tb(NULL, 0);
2668     } else if (s->flags & HF_TF_MASK) {
2669         gen_helper_single_step(cpu_env);
2670     } else if (jr) {
2671         tcg_gen_lookup_and_goto_ptr();
2672     } else {
2673         tcg_gen_exit_tb(NULL, 0);
2674     }
2675     s->base.is_jmp = DISAS_NORETURN;
2676 }
2677 
2678 static inline void
2679 gen_eob_worker(DisasContext *s, bool inhibit, bool recheck_tf)
2680 {
2681     do_gen_eob_worker(s, inhibit, recheck_tf, false);
2682 }
2683 
2684 /* End of block.
2685    If INHIBIT, set HF_INHIBIT_IRQ_MASK if it isn't already set.  */
2686 static void gen_eob_inhibit_irq(DisasContext *s, bool inhibit)
2687 {
2688     gen_eob_worker(s, inhibit, false);
2689 }
2690 
2691 /* End of block, resetting the inhibit irq flag.  */
2692 static void gen_eob(DisasContext *s)
2693 {
2694     gen_eob_worker(s, false, false);
2695 }
2696 
2697 /* Jump to register */
2698 static void gen_jr(DisasContext *s, TCGv dest)
2699 {
2700     do_gen_eob_worker(s, false, false, true);
2701 }
2702 
2703 /* generate a jump to eip. No segment change must happen before as a
2704    direct call to the next block may occur */
2705 static void gen_jmp_tb(DisasContext *s, target_ulong eip, int tb_num)
2706 {
2707     gen_update_cc_op(s);
2708     set_cc_op(s, CC_OP_DYNAMIC);
2709     if (s->jmp_opt) {
2710         gen_goto_tb(s, tb_num, eip);
2711     } else {
2712         gen_jmp_im(s, eip);
2713         gen_eob(s);
2714     }
2715 }
2716 
2717 static void gen_jmp(DisasContext *s, target_ulong eip)
2718 {
2719     gen_jmp_tb(s, eip, 0);
2720 }
2721 
2722 static inline void gen_ldq_env_A0(DisasContext *s, int offset)
2723 {
2724     tcg_gen_qemu_ld_i64(s->tmp1_i64, s->A0, s->mem_index, MO_LEUQ);
2725     tcg_gen_st_i64(s->tmp1_i64, cpu_env, offset);
2726 }
2727 
2728 static inline void gen_stq_env_A0(DisasContext *s, int offset)
2729 {
2730     tcg_gen_ld_i64(s->tmp1_i64, cpu_env, offset);
2731     tcg_gen_qemu_st_i64(s->tmp1_i64, s->A0, s->mem_index, MO_LEUQ);
2732 }
2733 
2734 static inline void gen_ldo_env_A0(DisasContext *s, int offset)
2735 {
2736     int mem_index = s->mem_index;
2737     tcg_gen_qemu_ld_i64(s->tmp1_i64, s->A0, mem_index, MO_LEUQ);
2738     tcg_gen_st_i64(s->tmp1_i64, cpu_env, offset + offsetof(ZMMReg, ZMM_Q(0)));
2739     tcg_gen_addi_tl(s->tmp0, s->A0, 8);
2740     tcg_gen_qemu_ld_i64(s->tmp1_i64, s->tmp0, mem_index, MO_LEUQ);
2741     tcg_gen_st_i64(s->tmp1_i64, cpu_env, offset + offsetof(ZMMReg, ZMM_Q(1)));
2742 }
2743 
2744 static inline void gen_sto_env_A0(DisasContext *s, int offset)
2745 {
2746     int mem_index = s->mem_index;
2747     tcg_gen_ld_i64(s->tmp1_i64, cpu_env, offset + offsetof(ZMMReg, ZMM_Q(0)));
2748     tcg_gen_qemu_st_i64(s->tmp1_i64, s->A0, mem_index, MO_LEUQ);
2749     tcg_gen_addi_tl(s->tmp0, s->A0, 8);
2750     tcg_gen_ld_i64(s->tmp1_i64, cpu_env, offset + offsetof(ZMMReg, ZMM_Q(1)));
2751     tcg_gen_qemu_st_i64(s->tmp1_i64, s->tmp0, mem_index, MO_LEUQ);
2752 }
2753 
2754 static inline void gen_op_movo(DisasContext *s, int d_offset, int s_offset)
2755 {
2756     tcg_gen_ld_i64(s->tmp1_i64, cpu_env, s_offset + offsetof(ZMMReg, ZMM_Q(0)));
2757     tcg_gen_st_i64(s->tmp1_i64, cpu_env, d_offset + offsetof(ZMMReg, ZMM_Q(0)));
2758     tcg_gen_ld_i64(s->tmp1_i64, cpu_env, s_offset + offsetof(ZMMReg, ZMM_Q(1)));
2759     tcg_gen_st_i64(s->tmp1_i64, cpu_env, d_offset + offsetof(ZMMReg, ZMM_Q(1)));
2760 }
2761 
2762 static inline void gen_op_movq(DisasContext *s, int d_offset, int s_offset)
2763 {
2764     tcg_gen_ld_i64(s->tmp1_i64, cpu_env, s_offset);
2765     tcg_gen_st_i64(s->tmp1_i64, cpu_env, d_offset);
2766 }
2767 
2768 static inline void gen_op_movl(DisasContext *s, int d_offset, int s_offset)
2769 {
2770     tcg_gen_ld_i32(s->tmp2_i32, cpu_env, s_offset);
2771     tcg_gen_st_i32(s->tmp2_i32, cpu_env, d_offset);
2772 }
2773 
2774 static inline void gen_op_movq_env_0(DisasContext *s, int d_offset)
2775 {
2776     tcg_gen_movi_i64(s->tmp1_i64, 0);
2777     tcg_gen_st_i64(s->tmp1_i64, cpu_env, d_offset);
2778 }
2779 
2780 #define ZMM_OFFSET(reg) offsetof(CPUX86State, xmm_regs[reg])
2781 
2782 typedef void (*SSEFunc_i_ep)(TCGv_i32 val, TCGv_ptr env, TCGv_ptr reg);
2783 typedef void (*SSEFunc_l_ep)(TCGv_i64 val, TCGv_ptr env, TCGv_ptr reg);
2784 typedef void (*SSEFunc_0_epi)(TCGv_ptr env, TCGv_ptr reg, TCGv_i32 val);
2785 typedef void (*SSEFunc_0_epl)(TCGv_ptr env, TCGv_ptr reg, TCGv_i64 val);
2786 typedef void (*SSEFunc_0_epp)(TCGv_ptr env, TCGv_ptr reg_a, TCGv_ptr reg_b);
2787 typedef void (*SSEFunc_0_eppp)(TCGv_ptr env, TCGv_ptr reg_a, TCGv_ptr reg_b,
2788                                TCGv_ptr reg_c);
2789 typedef void (*SSEFunc_0_eppi)(TCGv_ptr env, TCGv_ptr reg_a, TCGv_ptr reg_b,
2790                                TCGv_i32 val);
2791 typedef void (*SSEFunc_0_ppi)(TCGv_ptr reg_a, TCGv_ptr reg_b, TCGv_i32 val);
2792 typedef void (*SSEFunc_0_eppt)(TCGv_ptr env, TCGv_ptr reg_a, TCGv_ptr reg_b,
2793                                TCGv val);
2794 
2795 #define SSE_OPF_CMP       (1 << 1) /* does not write for first operand */
2796 #define SSE_OPF_SPECIAL   (1 << 3) /* magic */
2797 #define SSE_OPF_3DNOW     (1 << 4) /* 3DNow! instruction */
2798 #define SSE_OPF_MMX       (1 << 5) /* MMX/integer/AVX2 instruction */
2799 #define SSE_OPF_SCALAR    (1 << 6) /* Has SSE scalar variants */
2800 #define SSE_OPF_SHUF      (1 << 9) /* pshufx/shufpx */
2801 
2802 #define OP(op, flags, a, b, c, d)       \
2803     {flags, {{.op = a}, {.op = b}, {.op = c}, {.op = d} } }
2804 
2805 #define MMX_OP(x) OP(op1, SSE_OPF_MMX, \
2806         gen_helper_ ## x ## _mmx, gen_helper_ ## x ## _xmm, NULL, NULL)
2807 
2808 #define SSE_FOP(name) OP(op1, SSE_OPF_SCALAR, \
2809         gen_helper_##name##ps##_xmm, gen_helper_##name##pd##_xmm, \
2810         gen_helper_##name##ss, gen_helper_##name##sd)
2811 #define SSE_OP(sname, dname, op, flags) OP(op, flags, \
2812         gen_helper_##sname##_xmm, gen_helper_##dname##_xmm, NULL, NULL)
2813 
2814 typedef union SSEFuncs {
2815     SSEFunc_0_epp op1;
2816     SSEFunc_0_ppi op1i;
2817     SSEFunc_0_eppt op1t;
2818 } SSEFuncs;
2819 
2820 struct SSEOpHelper_table1 {
2821     int flags;
2822     SSEFuncs fn[4];
2823 };
2824 
2825 #define SSE_3DNOW { SSE_OPF_3DNOW }
2826 #define SSE_SPECIAL { SSE_OPF_SPECIAL }
2827 
2828 static const struct SSEOpHelper_table1 sse_op_table1[256] = {
2829     /* 3DNow! extensions */
2830     [0x0e] = SSE_SPECIAL, /* femms */
2831     [0x0f] = SSE_3DNOW, /* pf... (sse_op_table5) */
2832     /* pure SSE operations */
2833     [0x10] = SSE_SPECIAL, /* movups, movupd, movss, movsd */
2834     [0x11] = SSE_SPECIAL, /* movups, movupd, movss, movsd */
2835     [0x12] = SSE_SPECIAL, /* movlps, movlpd, movsldup, movddup */
2836     [0x13] = SSE_SPECIAL, /* movlps, movlpd */
2837     [0x14] = SSE_OP(punpckldq, punpcklqdq, op1, 0), /* unpcklps, unpcklpd */
2838     [0x15] = SSE_OP(punpckhdq, punpckhqdq, op1, 0), /* unpckhps, unpckhpd */
2839     [0x16] = SSE_SPECIAL, /* movhps, movhpd, movshdup */
2840     [0x17] = SSE_SPECIAL, /* movhps, movhpd */
2841 
2842     [0x28] = SSE_SPECIAL, /* movaps, movapd */
2843     [0x29] = SSE_SPECIAL, /* movaps, movapd */
2844     [0x2a] = SSE_SPECIAL, /* cvtpi2ps, cvtpi2pd, cvtsi2ss, cvtsi2sd */
2845     [0x2b] = SSE_SPECIAL, /* movntps, movntpd, movntss, movntsd */
2846     [0x2c] = SSE_SPECIAL, /* cvttps2pi, cvttpd2pi, cvttsd2si, cvttss2si */
2847     [0x2d] = SSE_SPECIAL, /* cvtps2pi, cvtpd2pi, cvtsd2si, cvtss2si */
2848     [0x2e] = OP(op1, SSE_OPF_CMP | SSE_OPF_SCALAR,
2849             gen_helper_ucomiss, gen_helper_ucomisd, NULL, NULL),
2850     [0x2f] = OP(op1, SSE_OPF_CMP | SSE_OPF_SCALAR,
2851             gen_helper_comiss, gen_helper_comisd, NULL, NULL),
2852     [0x50] = SSE_SPECIAL, /* movmskps, movmskpd */
2853     [0x51] = OP(op1, SSE_OPF_SCALAR,
2854                 gen_helper_sqrtps_xmm, gen_helper_sqrtpd_xmm,
2855                 gen_helper_sqrtss, gen_helper_sqrtsd),
2856     [0x52] = OP(op1, SSE_OPF_SCALAR,
2857                 gen_helper_rsqrtps_xmm, NULL, gen_helper_rsqrtss, NULL),
2858     [0x53] = OP(op1, SSE_OPF_SCALAR,
2859                 gen_helper_rcpps_xmm, NULL, gen_helper_rcpss, NULL),
2860     [0x54] = SSE_OP(pand, pand, op1, 0), /* andps, andpd */
2861     [0x55] = SSE_OP(pandn, pandn, op1, 0), /* andnps, andnpd */
2862     [0x56] = SSE_OP(por, por, op1, 0), /* orps, orpd */
2863     [0x57] = SSE_OP(pxor, pxor, op1, 0), /* xorps, xorpd */
2864     [0x58] = SSE_FOP(add),
2865     [0x59] = SSE_FOP(mul),
2866     [0x5a] = OP(op1, SSE_OPF_SCALAR,
2867                 gen_helper_cvtps2pd_xmm, gen_helper_cvtpd2ps_xmm,
2868                 gen_helper_cvtss2sd, gen_helper_cvtsd2ss),
2869     [0x5b] = OP(op1, 0,
2870                 gen_helper_cvtdq2ps_xmm, gen_helper_cvtps2dq_xmm,
2871                 gen_helper_cvttps2dq_xmm, NULL),
2872     [0x5c] = SSE_FOP(sub),
2873     [0x5d] = SSE_FOP(min),
2874     [0x5e] = SSE_FOP(div),
2875     [0x5f] = SSE_FOP(max),
2876 
2877     [0xc2] = SSE_FOP(cmpeq), /* sse_op_table4 */
2878     [0xc6] = SSE_OP(shufps, shufpd, op1i, SSE_OPF_SHUF),
2879 
2880     /* SSSE3, SSE4, MOVBE, CRC32, BMI1, BMI2, ADX.  */
2881     [0x38] = SSE_SPECIAL,
2882     [0x3a] = SSE_SPECIAL,
2883 
2884     /* MMX ops and their SSE extensions */
2885     [0x60] = MMX_OP(punpcklbw),
2886     [0x61] = MMX_OP(punpcklwd),
2887     [0x62] = MMX_OP(punpckldq),
2888     [0x63] = MMX_OP(packsswb),
2889     [0x64] = MMX_OP(pcmpgtb),
2890     [0x65] = MMX_OP(pcmpgtw),
2891     [0x66] = MMX_OP(pcmpgtl),
2892     [0x67] = MMX_OP(packuswb),
2893     [0x68] = MMX_OP(punpckhbw),
2894     [0x69] = MMX_OP(punpckhwd),
2895     [0x6a] = MMX_OP(punpckhdq),
2896     [0x6b] = MMX_OP(packssdw),
2897     [0x6c] = OP(op1, SSE_OPF_MMX,
2898                 NULL, gen_helper_punpcklqdq_xmm, NULL, NULL),
2899     [0x6d] = OP(op1, SSE_OPF_MMX,
2900                 NULL, gen_helper_punpckhqdq_xmm, NULL, NULL),
2901     [0x6e] = SSE_SPECIAL, /* movd mm, ea */
2902     [0x6f] = SSE_SPECIAL, /* movq, movdqa, , movqdu */
2903     [0x70] = OP(op1i, SSE_OPF_SHUF | SSE_OPF_MMX,
2904             gen_helper_pshufw_mmx, gen_helper_pshufd_xmm,
2905             gen_helper_pshufhw_xmm, gen_helper_pshuflw_xmm),
2906     [0x71] = SSE_SPECIAL, /* shiftw */
2907     [0x72] = SSE_SPECIAL, /* shiftd */
2908     [0x73] = SSE_SPECIAL, /* shiftq */
2909     [0x74] = MMX_OP(pcmpeqb),
2910     [0x75] = MMX_OP(pcmpeqw),
2911     [0x76] = MMX_OP(pcmpeql),
2912     [0x77] = SSE_SPECIAL, /* emms */
2913     [0x78] = SSE_SPECIAL, /* extrq_i, insertq_i (sse4a) */
2914     [0x79] = OP(op1, 0,
2915             NULL, gen_helper_extrq_r, NULL, gen_helper_insertq_r),
2916     [0x7c] = OP(op1, 0,
2917                 NULL, gen_helper_haddpd_xmm, NULL, gen_helper_haddps_xmm),
2918     [0x7d] = OP(op1, 0,
2919                 NULL, gen_helper_hsubpd_xmm, NULL, gen_helper_hsubps_xmm),
2920     [0x7e] = SSE_SPECIAL, /* movd, movd, , movq */
2921     [0x7f] = SSE_SPECIAL, /* movq, movdqa, movdqu */
2922     [0xc4] = SSE_SPECIAL, /* pinsrw */
2923     [0xc5] = SSE_SPECIAL, /* pextrw */
2924     [0xd0] = OP(op1, 0,
2925                 NULL, gen_helper_addsubpd_xmm, NULL, gen_helper_addsubps_xmm),
2926     [0xd1] = MMX_OP(psrlw),
2927     [0xd2] = MMX_OP(psrld),
2928     [0xd3] = MMX_OP(psrlq),
2929     [0xd4] = MMX_OP(paddq),
2930     [0xd5] = MMX_OP(pmullw),
2931     [0xd6] = SSE_SPECIAL,
2932     [0xd7] = SSE_SPECIAL, /* pmovmskb */
2933     [0xd8] = MMX_OP(psubusb),
2934     [0xd9] = MMX_OP(psubusw),
2935     [0xda] = MMX_OP(pminub),
2936     [0xdb] = MMX_OP(pand),
2937     [0xdc] = MMX_OP(paddusb),
2938     [0xdd] = MMX_OP(paddusw),
2939     [0xde] = MMX_OP(pmaxub),
2940     [0xdf] = MMX_OP(pandn),
2941     [0xe0] = MMX_OP(pavgb),
2942     [0xe1] = MMX_OP(psraw),
2943     [0xe2] = MMX_OP(psrad),
2944     [0xe3] = MMX_OP(pavgw),
2945     [0xe4] = MMX_OP(pmulhuw),
2946     [0xe5] = MMX_OP(pmulhw),
2947     [0xe6] = OP(op1, 0,
2948             NULL, gen_helper_cvttpd2dq_xmm,
2949             gen_helper_cvtdq2pd_xmm, gen_helper_cvtpd2dq_xmm),
2950     [0xe7] = SSE_SPECIAL,  /* movntq, movntq */
2951     [0xe8] = MMX_OP(psubsb),
2952     [0xe9] = MMX_OP(psubsw),
2953     [0xea] = MMX_OP(pminsw),
2954     [0xeb] = MMX_OP(por),
2955     [0xec] = MMX_OP(paddsb),
2956     [0xed] = MMX_OP(paddsw),
2957     [0xee] = MMX_OP(pmaxsw),
2958     [0xef] = MMX_OP(pxor),
2959     [0xf0] = SSE_SPECIAL, /* lddqu */
2960     [0xf1] = MMX_OP(psllw),
2961     [0xf2] = MMX_OP(pslld),
2962     [0xf3] = MMX_OP(psllq),
2963     [0xf4] = MMX_OP(pmuludq),
2964     [0xf5] = MMX_OP(pmaddwd),
2965     [0xf6] = MMX_OP(psadbw),
2966     [0xf7] = OP(op1t, SSE_OPF_MMX,
2967                 gen_helper_maskmov_mmx, gen_helper_maskmov_xmm, NULL, NULL),
2968     [0xf8] = MMX_OP(psubb),
2969     [0xf9] = MMX_OP(psubw),
2970     [0xfa] = MMX_OP(psubl),
2971     [0xfb] = MMX_OP(psubq),
2972     [0xfc] = MMX_OP(paddb),
2973     [0xfd] = MMX_OP(paddw),
2974     [0xfe] = MMX_OP(paddl),
2975 };
2976 #undef MMX_OP
2977 #undef OP
2978 #undef SSE_FOP
2979 #undef SSE_OP
2980 #undef SSE_SPECIAL
2981 
2982 #define MMX_OP2(x) { gen_helper_ ## x ## _mmx, gen_helper_ ## x ## _xmm }
2983 
2984 static const SSEFunc_0_epp sse_op_table2[3 * 8][2] = {
2985     [0 + 2] = MMX_OP2(psrlw),
2986     [0 + 4] = MMX_OP2(psraw),
2987     [0 + 6] = MMX_OP2(psllw),
2988     [8 + 2] = MMX_OP2(psrld),
2989     [8 + 4] = MMX_OP2(psrad),
2990     [8 + 6] = MMX_OP2(pslld),
2991     [16 + 2] = MMX_OP2(psrlq),
2992     [16 + 3] = { NULL, gen_helper_psrldq_xmm },
2993     [16 + 6] = MMX_OP2(psllq),
2994     [16 + 7] = { NULL, gen_helper_pslldq_xmm },
2995 };
2996 
2997 static const SSEFunc_0_epi sse_op_table3ai[] = {
2998     gen_helper_cvtsi2ss,
2999     gen_helper_cvtsi2sd
3000 };
3001 
3002 #ifdef TARGET_X86_64
3003 static const SSEFunc_0_epl sse_op_table3aq[] = {
3004     gen_helper_cvtsq2ss,
3005     gen_helper_cvtsq2sd
3006 };
3007 #endif
3008 
3009 static const SSEFunc_i_ep sse_op_table3bi[] = {
3010     gen_helper_cvttss2si,
3011     gen_helper_cvtss2si,
3012     gen_helper_cvttsd2si,
3013     gen_helper_cvtsd2si
3014 };
3015 
3016 #ifdef TARGET_X86_64
3017 static const SSEFunc_l_ep sse_op_table3bq[] = {
3018     gen_helper_cvttss2sq,
3019     gen_helper_cvtss2sq,
3020     gen_helper_cvttsd2sq,
3021     gen_helper_cvtsd2sq
3022 };
3023 #endif
3024 
3025 #define SSE_CMP(x) { \
3026     gen_helper_ ## x ## ps ## _xmm, gen_helper_ ## x ## pd ## _xmm, \
3027     gen_helper_ ## x ## ss, gen_helper_ ## x ## sd}
3028 static const SSEFunc_0_epp sse_op_table4[8][4] = {
3029     SSE_CMP(cmpeq),
3030     SSE_CMP(cmplt),
3031     SSE_CMP(cmple),
3032     SSE_CMP(cmpunord),
3033     SSE_CMP(cmpneq),
3034     SSE_CMP(cmpnlt),
3035     SSE_CMP(cmpnle),
3036     SSE_CMP(cmpord),
3037 };
3038 #undef SSE_CMP
3039 
3040 static const SSEFunc_0_epp sse_op_table5[256] = {
3041     [0x0c] = gen_helper_pi2fw,
3042     [0x0d] = gen_helper_pi2fd,
3043     [0x1c] = gen_helper_pf2iw,
3044     [0x1d] = gen_helper_pf2id,
3045     [0x8a] = gen_helper_pfnacc,
3046     [0x8e] = gen_helper_pfpnacc,
3047     [0x90] = gen_helper_pfcmpge,
3048     [0x94] = gen_helper_pfmin,
3049     [0x96] = gen_helper_pfrcp,
3050     [0x97] = gen_helper_pfrsqrt,
3051     [0x9a] = gen_helper_pfsub,
3052     [0x9e] = gen_helper_pfadd,
3053     [0xa0] = gen_helper_pfcmpgt,
3054     [0xa4] = gen_helper_pfmax,
3055     [0xa6] = gen_helper_movq, /* pfrcpit1; no need to actually increase precision */
3056     [0xa7] = gen_helper_movq, /* pfrsqit1 */
3057     [0xaa] = gen_helper_pfsubr,
3058     [0xae] = gen_helper_pfacc,
3059     [0xb0] = gen_helper_pfcmpeq,
3060     [0xb4] = gen_helper_pfmul,
3061     [0xb6] = gen_helper_movq, /* pfrcpit2 */
3062     [0xb7] = gen_helper_pmulhrw_mmx,
3063     [0xbb] = gen_helper_pswapd,
3064     [0xbf] = gen_helper_pavgb_mmx,
3065 };
3066 
3067 struct SSEOpHelper_table6 {
3068     SSEFuncs fn[2];
3069     uint32_t ext_mask;
3070     int flags;
3071 };
3072 
3073 struct SSEOpHelper_table7 {
3074     union {
3075         SSEFunc_0_eppi op1;
3076     } fn[2];
3077     uint32_t ext_mask;
3078     int flags;
3079 };
3080 
3081 #define gen_helper_special_xmm NULL
3082 
3083 #define OP(name, op, flags, ext, mmx_name) \
3084     {{{.op = mmx_name}, {.op = gen_helper_ ## name ## _xmm} }, \
3085         CPUID_EXT_ ## ext, flags}
3086 #define BINARY_OP_MMX(name, ext) \
3087     OP(name, op1, SSE_OPF_MMX, ext, gen_helper_ ## name ## _mmx)
3088 #define BINARY_OP(name, ext, flags) \
3089     OP(name, op1, flags, ext, NULL)
3090 #define UNARY_OP_MMX(name, ext) \
3091     OP(name, op1, SSE_OPF_MMX, ext, gen_helper_ ## name ## _mmx)
3092 #define UNARY_OP(name, ext, flags) \
3093     OP(name, op1, flags, ext, NULL)
3094 #define BLENDV_OP(name, ext, flags) OP(name, op1, 0, ext, NULL)
3095 #define CMP_OP(name, ext) OP(name, op1, SSE_OPF_CMP, ext, NULL)
3096 #define SPECIAL_OP(ext) OP(special, op1, SSE_OPF_SPECIAL, ext, NULL)
3097 
3098 /* prefix [66] 0f 38 */
3099 static const struct SSEOpHelper_table6 sse_op_table6[256] = {
3100     [0x00] = BINARY_OP_MMX(pshufb, SSSE3),
3101     [0x01] = BINARY_OP_MMX(phaddw, SSSE3),
3102     [0x02] = BINARY_OP_MMX(phaddd, SSSE3),
3103     [0x03] = BINARY_OP_MMX(phaddsw, SSSE3),
3104     [0x04] = BINARY_OP_MMX(pmaddubsw, SSSE3),
3105     [0x05] = BINARY_OP_MMX(phsubw, SSSE3),
3106     [0x06] = BINARY_OP_MMX(phsubd, SSSE3),
3107     [0x07] = BINARY_OP_MMX(phsubsw, SSSE3),
3108     [0x08] = BINARY_OP_MMX(psignb, SSSE3),
3109     [0x09] = BINARY_OP_MMX(psignw, SSSE3),
3110     [0x0a] = BINARY_OP_MMX(psignd, SSSE3),
3111     [0x0b] = BINARY_OP_MMX(pmulhrsw, SSSE3),
3112     [0x10] = BLENDV_OP(pblendvb, SSE41, SSE_OPF_MMX),
3113     [0x14] = BLENDV_OP(blendvps, SSE41, 0),
3114     [0x15] = BLENDV_OP(blendvpd, SSE41, 0),
3115     [0x17] = CMP_OP(ptest, SSE41),
3116     [0x1c] = UNARY_OP_MMX(pabsb, SSSE3),
3117     [0x1d] = UNARY_OP_MMX(pabsw, SSSE3),
3118     [0x1e] = UNARY_OP_MMX(pabsd, SSSE3),
3119     [0x20] = UNARY_OP(pmovsxbw, SSE41, SSE_OPF_MMX),
3120     [0x21] = UNARY_OP(pmovsxbd, SSE41, SSE_OPF_MMX),
3121     [0x22] = UNARY_OP(pmovsxbq, SSE41, SSE_OPF_MMX),
3122     [0x23] = UNARY_OP(pmovsxwd, SSE41, SSE_OPF_MMX),
3123     [0x24] = UNARY_OP(pmovsxwq, SSE41, SSE_OPF_MMX),
3124     [0x25] = UNARY_OP(pmovsxdq, SSE41, SSE_OPF_MMX),
3125     [0x28] = BINARY_OP(pmuldq, SSE41, SSE_OPF_MMX),
3126     [0x29] = BINARY_OP(pcmpeqq, SSE41, SSE_OPF_MMX),
3127     [0x2a] = SPECIAL_OP(SSE41), /* movntqda */
3128     [0x2b] = BINARY_OP(packusdw, SSE41, SSE_OPF_MMX),
3129     [0x30] = UNARY_OP(pmovzxbw, SSE41, SSE_OPF_MMX),
3130     [0x31] = UNARY_OP(pmovzxbd, SSE41, SSE_OPF_MMX),
3131     [0x32] = UNARY_OP(pmovzxbq, SSE41, SSE_OPF_MMX),
3132     [0x33] = UNARY_OP(pmovzxwd, SSE41, SSE_OPF_MMX),
3133     [0x34] = UNARY_OP(pmovzxwq, SSE41, SSE_OPF_MMX),
3134     [0x35] = UNARY_OP(pmovzxdq, SSE41, SSE_OPF_MMX),
3135     [0x37] = BINARY_OP(pcmpgtq, SSE41, SSE_OPF_MMX),
3136     [0x38] = BINARY_OP(pminsb, SSE41, SSE_OPF_MMX),
3137     [0x39] = BINARY_OP(pminsd, SSE41, SSE_OPF_MMX),
3138     [0x3a] = BINARY_OP(pminuw, SSE41, SSE_OPF_MMX),
3139     [0x3b] = BINARY_OP(pminud, SSE41, SSE_OPF_MMX),
3140     [0x3c] = BINARY_OP(pmaxsb, SSE41, SSE_OPF_MMX),
3141     [0x3d] = BINARY_OP(pmaxsd, SSE41, SSE_OPF_MMX),
3142     [0x3e] = BINARY_OP(pmaxuw, SSE41, SSE_OPF_MMX),
3143     [0x3f] = BINARY_OP(pmaxud, SSE41, SSE_OPF_MMX),
3144     [0x40] = BINARY_OP(pmulld, SSE41, SSE_OPF_MMX),
3145     [0x41] = UNARY_OP(phminposuw, SSE41, 0),
3146     [0xdb] = UNARY_OP(aesimc, AES, 0),
3147     [0xdc] = BINARY_OP(aesenc, AES, 0),
3148     [0xdd] = BINARY_OP(aesenclast, AES, 0),
3149     [0xde] = BINARY_OP(aesdec, AES, 0),
3150     [0xdf] = BINARY_OP(aesdeclast, AES, 0),
3151 };
3152 
3153 /* prefix [66] 0f 3a */
3154 static const struct SSEOpHelper_table7 sse_op_table7[256] = {
3155     [0x08] = UNARY_OP(roundps, SSE41, 0),
3156     [0x09] = UNARY_OP(roundpd, SSE41, 0),
3157     [0x0a] = UNARY_OP(roundss, SSE41, SSE_OPF_SCALAR),
3158     [0x0b] = UNARY_OP(roundsd, SSE41, SSE_OPF_SCALAR),
3159     [0x0c] = BINARY_OP(blendps, SSE41, 0),
3160     [0x0d] = BINARY_OP(blendpd, SSE41, 0),
3161     [0x0e] = BINARY_OP(pblendw, SSE41, SSE_OPF_MMX),
3162     [0x0f] = BINARY_OP_MMX(palignr, SSSE3),
3163     [0x14] = SPECIAL_OP(SSE41), /* pextrb */
3164     [0x15] = SPECIAL_OP(SSE41), /* pextrw */
3165     [0x16] = SPECIAL_OP(SSE41), /* pextrd/pextrq */
3166     [0x17] = SPECIAL_OP(SSE41), /* extractps */
3167     [0x20] = SPECIAL_OP(SSE41), /* pinsrb */
3168     [0x21] = SPECIAL_OP(SSE41), /* insertps */
3169     [0x22] = SPECIAL_OP(SSE41), /* pinsrd/pinsrq */
3170     [0x40] = BINARY_OP(dpps, SSE41, 0),
3171     [0x41] = BINARY_OP(dppd, SSE41, 0),
3172     [0x42] = BINARY_OP(mpsadbw, SSE41, SSE_OPF_MMX),
3173     [0x44] = BINARY_OP(pclmulqdq, PCLMULQDQ, 0),
3174     [0x60] = CMP_OP(pcmpestrm, SSE42),
3175     [0x61] = CMP_OP(pcmpestri, SSE42),
3176     [0x62] = CMP_OP(pcmpistrm, SSE42),
3177     [0x63] = CMP_OP(pcmpistri, SSE42),
3178     [0xdf] = UNARY_OP(aeskeygenassist, AES, 0),
3179 };
3180 
3181 #undef OP
3182 #undef BINARY_OP_MMX
3183 #undef BINARY_OP
3184 #undef UNARY_OP_MMX
3185 #undef UNARY_OP
3186 #undef BLENDV_OP
3187 #undef SPECIAL_OP
3188 
3189 /* VEX prefix not allowed */
3190 #define CHECK_NO_VEX(s) do { \
3191     if (s->prefix & PREFIX_VEX) \
3192         goto illegal_op; \
3193     } while (0)
3194 
3195 static void gen_sse(CPUX86State *env, DisasContext *s, int b,
3196                     target_ulong pc_start)
3197 {
3198     int b1, op1_offset, op2_offset, is_xmm, val;
3199     int modrm, mod, rm, reg;
3200     int sse_op_flags;
3201     SSEFuncs sse_op_fn;
3202     const struct SSEOpHelper_table6 *op6;
3203     const struct SSEOpHelper_table7 *op7;
3204     MemOp ot;
3205 
3206     b &= 0xff;
3207     if (s->prefix & PREFIX_DATA)
3208         b1 = 1;
3209     else if (s->prefix & PREFIX_REPZ)
3210         b1 = 2;
3211     else if (s->prefix & PREFIX_REPNZ)
3212         b1 = 3;
3213     else
3214         b1 = 0;
3215     sse_op_flags = sse_op_table1[b].flags;
3216     sse_op_fn = sse_op_table1[b].fn[b1];
3217     if ((sse_op_flags & (SSE_OPF_SPECIAL | SSE_OPF_3DNOW)) == 0
3218             && !sse_op_fn.op1) {
3219         goto unknown_op;
3220     }
3221     if ((b <= 0x5f && b >= 0x10) || b == 0xc6 || b == 0xc2) {
3222         is_xmm = 1;
3223     } else {
3224         if (b1 == 0) {
3225             /* MMX case */
3226             is_xmm = 0;
3227         } else {
3228             is_xmm = 1;
3229         }
3230     }
3231     if (sse_op_flags & SSE_OPF_3DNOW) {
3232         if (!(s->cpuid_ext2_features & CPUID_EXT2_3DNOW)) {
3233             goto illegal_op;
3234         }
3235     }
3236     /* simple MMX/SSE operation */
3237     if (s->flags & HF_TS_MASK) {
3238         gen_exception(s, EXCP07_PREX, pc_start - s->cs_base);
3239         return;
3240     }
3241     if (s->flags & HF_EM_MASK) {
3242     illegal_op:
3243         gen_illegal_opcode(s);
3244         return;
3245     }
3246     if (is_xmm
3247         && !(s->flags & HF_OSFXSR_MASK)
3248         && (b != 0x38 && b != 0x3a)) {
3249         goto unknown_op;
3250     }
3251     if (b == 0x0e) {
3252         if (!(s->cpuid_ext2_features & CPUID_EXT2_3DNOW)) {
3253             /* If we were fully decoding this we might use illegal_op.  */
3254             goto unknown_op;
3255         }
3256         /* femms */
3257         gen_helper_emms(cpu_env);
3258         return;
3259     }
3260     if (b == 0x77) {
3261         /* emms */
3262         gen_helper_emms(cpu_env);
3263         return;
3264     }
3265     /* prepare MMX state (XXX: optimize by storing fptt and fptags in
3266        the static cpu state) */
3267     if (!is_xmm) {
3268         gen_helper_enter_mmx(cpu_env);
3269     }
3270 
3271     modrm = x86_ldub_code(env, s);
3272     reg = ((modrm >> 3) & 7);
3273     if (is_xmm) {
3274         reg |= REX_R(s);
3275     }
3276     mod = (modrm >> 6) & 3;
3277     if (sse_op_flags & SSE_OPF_SPECIAL) {
3278         b |= (b1 << 8);
3279         switch(b) {
3280         case 0x0e7: /* movntq */
3281             CHECK_NO_VEX(s);
3282             if (mod == 3) {
3283                 goto illegal_op;
3284             }
3285             gen_lea_modrm(env, s, modrm);
3286             gen_stq_env_A0(s, offsetof(CPUX86State, fpregs[reg].mmx));
3287             break;
3288         case 0x1e7: /* movntdq */
3289         case 0x02b: /* movntps */
3290         case 0x12b: /* movntps */
3291             if (mod == 3)
3292                 goto illegal_op;
3293             gen_lea_modrm(env, s, modrm);
3294             gen_sto_env_A0(s, ZMM_OFFSET(reg));
3295             break;
3296         case 0x3f0: /* lddqu */
3297             if (mod == 3)
3298                 goto illegal_op;
3299             gen_lea_modrm(env, s, modrm);
3300             gen_ldo_env_A0(s, ZMM_OFFSET(reg));
3301             break;
3302         case 0x22b: /* movntss */
3303         case 0x32b: /* movntsd */
3304             if (mod == 3)
3305                 goto illegal_op;
3306             gen_lea_modrm(env, s, modrm);
3307             if (b1 & 1) {
3308                 gen_stq_env_A0(s, offsetof(CPUX86State,
3309                                            xmm_regs[reg].ZMM_Q(0)));
3310             } else {
3311                 tcg_gen_ld32u_tl(s->T0, cpu_env, offsetof(CPUX86State,
3312                     xmm_regs[reg].ZMM_L(0)));
3313                 gen_op_st_v(s, MO_32, s->T0, s->A0);
3314             }
3315             break;
3316         case 0x6e: /* movd mm, ea */
3317             CHECK_NO_VEX(s);
3318 #ifdef TARGET_X86_64
3319             if (s->dflag == MO_64) {
3320                 gen_ldst_modrm(env, s, modrm, MO_64, OR_TMP0, 0);
3321                 tcg_gen_st_tl(s->T0, cpu_env,
3322                               offsetof(CPUX86State, fpregs[reg].mmx));
3323             } else
3324 #endif
3325             {
3326                 gen_ldst_modrm(env, s, modrm, MO_32, OR_TMP0, 0);
3327                 tcg_gen_addi_ptr(s->ptr0, cpu_env,
3328                                  offsetof(CPUX86State,fpregs[reg].mmx));
3329                 tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
3330                 gen_helper_movl_mm_T0_mmx(s->ptr0, s->tmp2_i32);
3331             }
3332             break;
3333         case 0x16e: /* movd xmm, ea */
3334 #ifdef TARGET_X86_64
3335             if (s->dflag == MO_64) {
3336                 gen_ldst_modrm(env, s, modrm, MO_64, OR_TMP0, 0);
3337                 tcg_gen_addi_ptr(s->ptr0, cpu_env, ZMM_OFFSET(reg));
3338                 gen_helper_movq_mm_T0_xmm(s->ptr0, s->T0);
3339             } else
3340 #endif
3341             {
3342                 gen_ldst_modrm(env, s, modrm, MO_32, OR_TMP0, 0);
3343                 tcg_gen_addi_ptr(s->ptr0, cpu_env, ZMM_OFFSET(reg));
3344                 tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
3345                 gen_helper_movl_mm_T0_xmm(s->ptr0, s->tmp2_i32);
3346             }
3347             break;
3348         case 0x6f: /* movq mm, ea */
3349             CHECK_NO_VEX(s);
3350             if (mod != 3) {
3351                 gen_lea_modrm(env, s, modrm);
3352                 gen_ldq_env_A0(s, offsetof(CPUX86State, fpregs[reg].mmx));
3353             } else {
3354                 rm = (modrm & 7);
3355                 tcg_gen_ld_i64(s->tmp1_i64, cpu_env,
3356                                offsetof(CPUX86State,fpregs[rm].mmx));
3357                 tcg_gen_st_i64(s->tmp1_i64, cpu_env,
3358                                offsetof(CPUX86State,fpregs[reg].mmx));
3359             }
3360             break;
3361         case 0x010: /* movups */
3362         case 0x110: /* movupd */
3363         case 0x028: /* movaps */
3364         case 0x128: /* movapd */
3365         case 0x16f: /* movdqa xmm, ea */
3366         case 0x26f: /* movdqu xmm, ea */
3367             if (mod != 3) {
3368                 gen_lea_modrm(env, s, modrm);
3369                 gen_ldo_env_A0(s, ZMM_OFFSET(reg));
3370             } else {
3371                 rm = (modrm & 7) | REX_B(s);
3372                 gen_op_movo(s, ZMM_OFFSET(reg), ZMM_OFFSET(rm));
3373             }
3374             break;
3375         case 0x210: /* movss xmm, ea */
3376             if (mod != 3) {
3377                 gen_lea_modrm(env, s, modrm);
3378                 gen_op_ld_v(s, MO_32, s->T0, s->A0);
3379                 tcg_gen_st32_tl(s->T0, cpu_env,
3380                                 offsetof(CPUX86State, xmm_regs[reg].ZMM_L(0)));
3381                 tcg_gen_movi_tl(s->T0, 0);
3382                 tcg_gen_st32_tl(s->T0, cpu_env,
3383                                 offsetof(CPUX86State, xmm_regs[reg].ZMM_L(1)));
3384                 tcg_gen_st32_tl(s->T0, cpu_env,
3385                                 offsetof(CPUX86State, xmm_regs[reg].ZMM_L(2)));
3386                 tcg_gen_st32_tl(s->T0, cpu_env,
3387                                 offsetof(CPUX86State, xmm_regs[reg].ZMM_L(3)));
3388             } else {
3389                 rm = (modrm & 7) | REX_B(s);
3390                 tcg_gen_ld_i32(s->tmp2_i32, cpu_env,
3391                                offsetof(CPUX86State, xmm_regs[rm].ZMM_L(0)));
3392                 tcg_gen_st_i32(s->tmp2_i32, cpu_env,
3393                                offsetof(CPUX86State, xmm_regs[reg].ZMM_L(0)));
3394             }
3395             break;
3396         case 0x310: /* movsd xmm, ea */
3397             if (mod != 3) {
3398                 gen_lea_modrm(env, s, modrm);
3399                 gen_ldq_env_A0(s, offsetof(CPUX86State,
3400                                            xmm_regs[reg].ZMM_Q(0)));
3401                 tcg_gen_movi_tl(s->T0, 0);
3402                 tcg_gen_st32_tl(s->T0, cpu_env,
3403                                 offsetof(CPUX86State, xmm_regs[reg].ZMM_L(2)));
3404                 tcg_gen_st32_tl(s->T0, cpu_env,
3405                                 offsetof(CPUX86State, xmm_regs[reg].ZMM_L(3)));
3406             } else {
3407                 rm = (modrm & 7) | REX_B(s);
3408                 gen_op_movq(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_Q(0)),
3409                             offsetof(CPUX86State, xmm_regs[rm].ZMM_Q(0)));
3410             }
3411             break;
3412         case 0x012: /* movlps */
3413         case 0x112: /* movlpd */
3414             if (mod != 3) {
3415                 gen_lea_modrm(env, s, modrm);
3416                 gen_ldq_env_A0(s, offsetof(CPUX86State,
3417                                            xmm_regs[reg].ZMM_Q(0)));
3418             } else {
3419                 /* movhlps */
3420                 rm = (modrm & 7) | REX_B(s);
3421                 gen_op_movq(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_Q(0)),
3422                             offsetof(CPUX86State,xmm_regs[rm].ZMM_Q(1)));
3423             }
3424             break;
3425         case 0x212: /* movsldup */
3426             if (mod != 3) {
3427                 gen_lea_modrm(env, s, modrm);
3428                 gen_ldo_env_A0(s, ZMM_OFFSET(reg));
3429             } else {
3430                 rm = (modrm & 7) | REX_B(s);
3431                 gen_op_movl(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_L(0)),
3432                             offsetof(CPUX86State,xmm_regs[rm].ZMM_L(0)));
3433                 gen_op_movl(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_L(2)),
3434                             offsetof(CPUX86State,xmm_regs[rm].ZMM_L(2)));
3435             }
3436             gen_op_movl(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_L(1)),
3437                         offsetof(CPUX86State,xmm_regs[reg].ZMM_L(0)));
3438             gen_op_movl(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_L(3)),
3439                         offsetof(CPUX86State,xmm_regs[reg].ZMM_L(2)));
3440             break;
3441         case 0x312: /* movddup */
3442             if (mod != 3) {
3443                 gen_lea_modrm(env, s, modrm);
3444                 gen_ldq_env_A0(s, offsetof(CPUX86State,
3445                                            xmm_regs[reg].ZMM_Q(0)));
3446             } else {
3447                 rm = (modrm & 7) | REX_B(s);
3448                 gen_op_movq(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_Q(0)),
3449                             offsetof(CPUX86State,xmm_regs[rm].ZMM_Q(0)));
3450             }
3451             gen_op_movq(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_Q(1)),
3452                         offsetof(CPUX86State,xmm_regs[reg].ZMM_Q(0)));
3453             break;
3454         case 0x016: /* movhps */
3455         case 0x116: /* movhpd */
3456             if (mod != 3) {
3457                 gen_lea_modrm(env, s, modrm);
3458                 gen_ldq_env_A0(s, offsetof(CPUX86State,
3459                                            xmm_regs[reg].ZMM_Q(1)));
3460             } else {
3461                 /* movlhps */
3462                 rm = (modrm & 7) | REX_B(s);
3463                 gen_op_movq(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_Q(1)),
3464                             offsetof(CPUX86State,xmm_regs[rm].ZMM_Q(0)));
3465             }
3466             break;
3467         case 0x216: /* movshdup */
3468             if (mod != 3) {
3469                 gen_lea_modrm(env, s, modrm);
3470                 gen_ldo_env_A0(s, ZMM_OFFSET(reg));
3471             } else {
3472                 rm = (modrm & 7) | REX_B(s);
3473                 gen_op_movl(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_L(1)),
3474                             offsetof(CPUX86State,xmm_regs[rm].ZMM_L(1)));
3475                 gen_op_movl(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_L(3)),
3476                             offsetof(CPUX86State,xmm_regs[rm].ZMM_L(3)));
3477             }
3478             gen_op_movl(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_L(0)),
3479                         offsetof(CPUX86State,xmm_regs[reg].ZMM_L(1)));
3480             gen_op_movl(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_L(2)),
3481                         offsetof(CPUX86State,xmm_regs[reg].ZMM_L(3)));
3482             break;
3483         case 0x178:
3484         case 0x378:
3485             CHECK_NO_VEX(s);
3486             {
3487                 int bit_index, field_length;
3488 
3489                 if (b1 == 1 && reg != 0)
3490                     goto illegal_op;
3491                 field_length = x86_ldub_code(env, s) & 0x3F;
3492                 bit_index = x86_ldub_code(env, s) & 0x3F;
3493                 tcg_gen_addi_ptr(s->ptr0, cpu_env, ZMM_OFFSET(reg));
3494                 if (b1 == 1)
3495                     gen_helper_extrq_i(cpu_env, s->ptr0,
3496                                        tcg_const_i32(bit_index),
3497                                        tcg_const_i32(field_length));
3498                 else
3499                     gen_helper_insertq_i(cpu_env, s->ptr0,
3500                                          tcg_const_i32(bit_index),
3501                                          tcg_const_i32(field_length));
3502             }
3503             break;
3504         case 0x7e: /* movd ea, mm */
3505             CHECK_NO_VEX(s);
3506 #ifdef TARGET_X86_64
3507             if (s->dflag == MO_64) {
3508                 tcg_gen_ld_i64(s->T0, cpu_env,
3509                                offsetof(CPUX86State,fpregs[reg].mmx));
3510                 gen_ldst_modrm(env, s, modrm, MO_64, OR_TMP0, 1);
3511             } else
3512 #endif
3513             {
3514                 tcg_gen_ld32u_tl(s->T0, cpu_env,
3515                                  offsetof(CPUX86State,fpregs[reg].mmx.MMX_L(0)));
3516                 gen_ldst_modrm(env, s, modrm, MO_32, OR_TMP0, 1);
3517             }
3518             break;
3519         case 0x17e: /* movd ea, xmm */
3520 #ifdef TARGET_X86_64
3521             if (s->dflag == MO_64) {
3522                 tcg_gen_ld_i64(s->T0, cpu_env,
3523                                offsetof(CPUX86State,xmm_regs[reg].ZMM_Q(0)));
3524                 gen_ldst_modrm(env, s, modrm, MO_64, OR_TMP0, 1);
3525             } else
3526 #endif
3527             {
3528                 tcg_gen_ld32u_tl(s->T0, cpu_env,
3529                                  offsetof(CPUX86State,xmm_regs[reg].ZMM_L(0)));
3530                 gen_ldst_modrm(env, s, modrm, MO_32, OR_TMP0, 1);
3531             }
3532             break;
3533         case 0x27e: /* movq xmm, ea */
3534             if (mod != 3) {
3535                 gen_lea_modrm(env, s, modrm);
3536                 gen_ldq_env_A0(s, offsetof(CPUX86State,
3537                                            xmm_regs[reg].ZMM_Q(0)));
3538             } else {
3539                 rm = (modrm & 7) | REX_B(s);
3540                 gen_op_movq(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_Q(0)),
3541                             offsetof(CPUX86State,xmm_regs[rm].ZMM_Q(0)));
3542             }
3543             gen_op_movq_env_0(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_Q(1)));
3544             break;
3545         case 0x7f: /* movq ea, mm */
3546             CHECK_NO_VEX(s);
3547             if (mod != 3) {
3548                 gen_lea_modrm(env, s, modrm);
3549                 gen_stq_env_A0(s, offsetof(CPUX86State, fpregs[reg].mmx));
3550             } else {
3551                 rm = (modrm & 7);
3552                 gen_op_movq(s, offsetof(CPUX86State, fpregs[rm].mmx),
3553                             offsetof(CPUX86State,fpregs[reg].mmx));
3554             }
3555             break;
3556         case 0x011: /* movups */
3557         case 0x111: /* movupd */
3558         case 0x029: /* movaps */
3559         case 0x129: /* movapd */
3560         case 0x17f: /* movdqa ea, xmm */
3561         case 0x27f: /* movdqu ea, xmm */
3562             if (mod != 3) {
3563                 gen_lea_modrm(env, s, modrm);
3564                 gen_sto_env_A0(s, ZMM_OFFSET(reg));
3565             } else {
3566                 rm = (modrm & 7) | REX_B(s);
3567                 gen_op_movo(s, ZMM_OFFSET(rm), ZMM_OFFSET(reg));
3568             }
3569             break;
3570         case 0x211: /* movss ea, xmm */
3571             if (mod != 3) {
3572                 gen_lea_modrm(env, s, modrm);
3573                 tcg_gen_ld32u_tl(s->T0, cpu_env,
3574                                  offsetof(CPUX86State, xmm_regs[reg].ZMM_L(0)));
3575                 gen_op_st_v(s, MO_32, s->T0, s->A0);
3576             } else {
3577                 rm = (modrm & 7) | REX_B(s);
3578                 gen_op_movl(s, offsetof(CPUX86State, xmm_regs[rm].ZMM_L(0)),
3579                             offsetof(CPUX86State,xmm_regs[reg].ZMM_L(0)));
3580             }
3581             break;
3582         case 0x311: /* movsd ea, xmm */
3583             if (mod != 3) {
3584                 gen_lea_modrm(env, s, modrm);
3585                 gen_stq_env_A0(s, offsetof(CPUX86State,
3586                                            xmm_regs[reg].ZMM_Q(0)));
3587             } else {
3588                 rm = (modrm & 7) | REX_B(s);
3589                 gen_op_movq(s, offsetof(CPUX86State, xmm_regs[rm].ZMM_Q(0)),
3590                             offsetof(CPUX86State,xmm_regs[reg].ZMM_Q(0)));
3591             }
3592             break;
3593         case 0x013: /* movlps */
3594         case 0x113: /* movlpd */
3595             if (mod != 3) {
3596                 gen_lea_modrm(env, s, modrm);
3597                 gen_stq_env_A0(s, offsetof(CPUX86State,
3598                                            xmm_regs[reg].ZMM_Q(0)));
3599             } else {
3600                 goto illegal_op;
3601             }
3602             break;
3603         case 0x017: /* movhps */
3604         case 0x117: /* movhpd */
3605             if (mod != 3) {
3606                 gen_lea_modrm(env, s, modrm);
3607                 gen_stq_env_A0(s, offsetof(CPUX86State,
3608                                            xmm_regs[reg].ZMM_Q(1)));
3609             } else {
3610                 goto illegal_op;
3611             }
3612             break;
3613         case 0x71: /* shift mm, im */
3614         case 0x72:
3615         case 0x73:
3616         case 0x171: /* shift xmm, im */
3617         case 0x172:
3618         case 0x173:
3619             val = x86_ldub_code(env, s);
3620             if (is_xmm) {
3621                 tcg_gen_movi_tl(s->T0, val);
3622                 tcg_gen_st32_tl(s->T0, cpu_env,
3623                                 offsetof(CPUX86State, xmm_t0.ZMM_L(0)));
3624                 tcg_gen_movi_tl(s->T0, 0);
3625                 tcg_gen_st32_tl(s->T0, cpu_env,
3626                                 offsetof(CPUX86State, xmm_t0.ZMM_L(1)));
3627                 op1_offset = offsetof(CPUX86State,xmm_t0);
3628             } else {
3629                 CHECK_NO_VEX(s);
3630                 tcg_gen_movi_tl(s->T0, val);
3631                 tcg_gen_st32_tl(s->T0, cpu_env,
3632                                 offsetof(CPUX86State, mmx_t0.MMX_L(0)));
3633                 tcg_gen_movi_tl(s->T0, 0);
3634                 tcg_gen_st32_tl(s->T0, cpu_env,
3635                                 offsetof(CPUX86State, mmx_t0.MMX_L(1)));
3636                 op1_offset = offsetof(CPUX86State,mmx_t0);
3637             }
3638             assert(b1 < 2);
3639             SSEFunc_0_epp fn = sse_op_table2[((b - 1) & 3) * 8 +
3640                                        (((modrm >> 3)) & 7)][b1];
3641             if (!fn) {
3642                 goto unknown_op;
3643             }
3644             if (is_xmm) {
3645                 rm = (modrm & 7) | REX_B(s);
3646                 op2_offset = ZMM_OFFSET(rm);
3647             } else {
3648                 rm = (modrm & 7);
3649                 op2_offset = offsetof(CPUX86State,fpregs[rm].mmx);
3650             }
3651             tcg_gen_addi_ptr(s->ptr0, cpu_env, op2_offset);
3652             tcg_gen_addi_ptr(s->ptr1, cpu_env, op1_offset);
3653             fn(cpu_env, s->ptr0, s->ptr1);
3654             break;
3655         case 0x050: /* movmskps */
3656             rm = (modrm & 7) | REX_B(s);
3657             tcg_gen_addi_ptr(s->ptr0, cpu_env, ZMM_OFFSET(rm));
3658             gen_helper_movmskps_xmm(s->tmp2_i32, cpu_env, s->ptr0);
3659             tcg_gen_extu_i32_tl(cpu_regs[reg], s->tmp2_i32);
3660             break;
3661         case 0x150: /* movmskpd */
3662             rm = (modrm & 7) | REX_B(s);
3663             tcg_gen_addi_ptr(s->ptr0, cpu_env, ZMM_OFFSET(rm));
3664             gen_helper_movmskpd_xmm(s->tmp2_i32, cpu_env, s->ptr0);
3665             tcg_gen_extu_i32_tl(cpu_regs[reg], s->tmp2_i32);
3666             break;
3667         case 0x02a: /* cvtpi2ps */
3668         case 0x12a: /* cvtpi2pd */
3669             CHECK_NO_VEX(s);
3670             gen_helper_enter_mmx(cpu_env);
3671             if (mod != 3) {
3672                 gen_lea_modrm(env, s, modrm);
3673                 op2_offset = offsetof(CPUX86State,mmx_t0);
3674                 gen_ldq_env_A0(s, op2_offset);
3675             } else {
3676                 rm = (modrm & 7);
3677                 op2_offset = offsetof(CPUX86State,fpregs[rm].mmx);
3678             }
3679             op1_offset = ZMM_OFFSET(reg);
3680             tcg_gen_addi_ptr(s->ptr0, cpu_env, op1_offset);
3681             tcg_gen_addi_ptr(s->ptr1, cpu_env, op2_offset);
3682             switch(b >> 8) {
3683             case 0x0:
3684                 gen_helper_cvtpi2ps(cpu_env, s->ptr0, s->ptr1);
3685                 break;
3686             default:
3687             case 0x1:
3688                 gen_helper_cvtpi2pd(cpu_env, s->ptr0, s->ptr1);
3689                 break;
3690             }
3691             break;
3692         case 0x22a: /* cvtsi2ss */
3693         case 0x32a: /* cvtsi2sd */
3694             ot = mo_64_32(s->dflag);
3695             gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
3696             op1_offset = ZMM_OFFSET(reg);
3697             tcg_gen_addi_ptr(s->ptr0, cpu_env, op1_offset);
3698             if (ot == MO_32) {
3699                 SSEFunc_0_epi sse_fn_epi = sse_op_table3ai[(b >> 8) & 1];
3700                 tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
3701                 sse_fn_epi(cpu_env, s->ptr0, s->tmp2_i32);
3702             } else {
3703 #ifdef TARGET_X86_64
3704                 SSEFunc_0_epl sse_fn_epl = sse_op_table3aq[(b >> 8) & 1];
3705                 sse_fn_epl(cpu_env, s->ptr0, s->T0);
3706 #else
3707                 goto illegal_op;
3708 #endif
3709             }
3710             break;
3711         case 0x02c: /* cvttps2pi */
3712         case 0x12c: /* cvttpd2pi */
3713         case 0x02d: /* cvtps2pi */
3714         case 0x12d: /* cvtpd2pi */
3715             CHECK_NO_VEX(s);
3716             gen_helper_enter_mmx(cpu_env);
3717             if (mod != 3) {
3718                 gen_lea_modrm(env, s, modrm);
3719                 op2_offset = offsetof(CPUX86State,xmm_t0);
3720                 gen_ldo_env_A0(s, op2_offset);
3721             } else {
3722                 rm = (modrm & 7) | REX_B(s);
3723                 op2_offset = ZMM_OFFSET(rm);
3724             }
3725             op1_offset = offsetof(CPUX86State,fpregs[reg & 7].mmx);
3726             tcg_gen_addi_ptr(s->ptr0, cpu_env, op1_offset);
3727             tcg_gen_addi_ptr(s->ptr1, cpu_env, op2_offset);
3728             switch(b) {
3729             case 0x02c:
3730                 gen_helper_cvttps2pi(cpu_env, s->ptr0, s->ptr1);
3731                 break;
3732             case 0x12c:
3733                 gen_helper_cvttpd2pi(cpu_env, s->ptr0, s->ptr1);
3734                 break;
3735             case 0x02d:
3736                 gen_helper_cvtps2pi(cpu_env, s->ptr0, s->ptr1);
3737                 break;
3738             case 0x12d:
3739                 gen_helper_cvtpd2pi(cpu_env, s->ptr0, s->ptr1);
3740                 break;
3741             }
3742             break;
3743         case 0x22c: /* cvttss2si */
3744         case 0x32c: /* cvttsd2si */
3745         case 0x22d: /* cvtss2si */
3746         case 0x32d: /* cvtsd2si */
3747             ot = mo_64_32(s->dflag);
3748             if (mod != 3) {
3749                 gen_lea_modrm(env, s, modrm);
3750                 if ((b >> 8) & 1) {
3751                     gen_ldq_env_A0(s, offsetof(CPUX86State, xmm_t0.ZMM_Q(0)));
3752                 } else {
3753                     gen_op_ld_v(s, MO_32, s->T0, s->A0);
3754                     tcg_gen_st32_tl(s->T0, cpu_env,
3755                                     offsetof(CPUX86State, xmm_t0.ZMM_L(0)));
3756                 }
3757                 op2_offset = offsetof(CPUX86State,xmm_t0);
3758             } else {
3759                 rm = (modrm & 7) | REX_B(s);
3760                 op2_offset = ZMM_OFFSET(rm);
3761             }
3762             tcg_gen_addi_ptr(s->ptr0, cpu_env, op2_offset);
3763             if (ot == MO_32) {
3764                 SSEFunc_i_ep sse_fn_i_ep =
3765                     sse_op_table3bi[((b >> 7) & 2) | (b & 1)];
3766                 sse_fn_i_ep(s->tmp2_i32, cpu_env, s->ptr0);
3767                 tcg_gen_extu_i32_tl(s->T0, s->tmp2_i32);
3768             } else {
3769 #ifdef TARGET_X86_64
3770                 SSEFunc_l_ep sse_fn_l_ep =
3771                     sse_op_table3bq[((b >> 7) & 2) | (b & 1)];
3772                 sse_fn_l_ep(s->T0, cpu_env, s->ptr0);
3773 #else
3774                 goto illegal_op;
3775 #endif
3776             }
3777             gen_op_mov_reg_v(s, ot, reg, s->T0);
3778             break;
3779         case 0xc4: /* pinsrw */
3780         case 0x1c4:
3781             s->rip_offset = 1;
3782             gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
3783             val = x86_ldub_code(env, s);
3784             if (b1) {
3785                 val &= 7;
3786                 tcg_gen_st16_tl(s->T0, cpu_env,
3787                                 offsetof(CPUX86State,xmm_regs[reg].ZMM_W(val)));
3788             } else {
3789                 CHECK_NO_VEX(s);
3790                 val &= 3;
3791                 tcg_gen_st16_tl(s->T0, cpu_env,
3792                                 offsetof(CPUX86State,fpregs[reg].mmx.MMX_W(val)));
3793             }
3794             break;
3795         case 0xc5: /* pextrw */
3796         case 0x1c5:
3797             if (mod != 3)
3798                 goto illegal_op;
3799             ot = mo_64_32(s->dflag);
3800             val = x86_ldub_code(env, s);
3801             if (b1) {
3802                 val &= 7;
3803                 rm = (modrm & 7) | REX_B(s);
3804                 tcg_gen_ld16u_tl(s->T0, cpu_env,
3805                                  offsetof(CPUX86State,xmm_regs[rm].ZMM_W(val)));
3806             } else {
3807                 val &= 3;
3808                 rm = (modrm & 7);
3809                 tcg_gen_ld16u_tl(s->T0, cpu_env,
3810                                 offsetof(CPUX86State,fpregs[rm].mmx.MMX_W(val)));
3811             }
3812             reg = ((modrm >> 3) & 7) | REX_R(s);
3813             gen_op_mov_reg_v(s, ot, reg, s->T0);
3814             break;
3815         case 0x1d6: /* movq ea, xmm */
3816             if (mod != 3) {
3817                 gen_lea_modrm(env, s, modrm);
3818                 gen_stq_env_A0(s, offsetof(CPUX86State,
3819                                            xmm_regs[reg].ZMM_Q(0)));
3820             } else {
3821                 rm = (modrm & 7) | REX_B(s);
3822                 gen_op_movq(s, offsetof(CPUX86State, xmm_regs[rm].ZMM_Q(0)),
3823                             offsetof(CPUX86State,xmm_regs[reg].ZMM_Q(0)));
3824                 gen_op_movq_env_0(s,
3825                                   offsetof(CPUX86State, xmm_regs[rm].ZMM_Q(1)));
3826             }
3827             break;
3828         case 0x2d6: /* movq2dq */
3829             CHECK_NO_VEX(s);
3830             gen_helper_enter_mmx(cpu_env);
3831             rm = (modrm & 7);
3832             gen_op_movq(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_Q(0)),
3833                         offsetof(CPUX86State,fpregs[rm].mmx));
3834             gen_op_movq_env_0(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_Q(1)));
3835             break;
3836         case 0x3d6: /* movdq2q */
3837             CHECK_NO_VEX(s);
3838             gen_helper_enter_mmx(cpu_env);
3839             rm = (modrm & 7) | REX_B(s);
3840             gen_op_movq(s, offsetof(CPUX86State, fpregs[reg & 7].mmx),
3841                         offsetof(CPUX86State,xmm_regs[rm].ZMM_Q(0)));
3842             break;
3843         case 0xd7: /* pmovmskb */
3844         case 0x1d7:
3845             if (mod != 3)
3846                 goto illegal_op;
3847             if (b1) {
3848                 rm = (modrm & 7) | REX_B(s);
3849                 tcg_gen_addi_ptr(s->ptr0, cpu_env, ZMM_OFFSET(rm));
3850                 gen_helper_pmovmskb_xmm(s->tmp2_i32, cpu_env, s->ptr0);
3851             } else {
3852                 CHECK_NO_VEX(s);
3853                 rm = (modrm & 7);
3854                 tcg_gen_addi_ptr(s->ptr0, cpu_env,
3855                                  offsetof(CPUX86State, fpregs[rm].mmx));
3856                 gen_helper_pmovmskb_mmx(s->tmp2_i32, cpu_env, s->ptr0);
3857             }
3858             reg = ((modrm >> 3) & 7) | REX_R(s);
3859             tcg_gen_extu_i32_tl(cpu_regs[reg], s->tmp2_i32);
3860             break;
3861 
3862         case 0x138:
3863         case 0x038:
3864             b = modrm;
3865             if ((b & 0xf0) == 0xf0) {
3866                 goto do_0f_38_fx;
3867             }
3868             modrm = x86_ldub_code(env, s);
3869             rm = modrm & 7;
3870             reg = ((modrm >> 3) & 7) | REX_R(s);
3871             mod = (modrm >> 6) & 3;
3872 
3873             assert(b1 < 2);
3874             op6 = &sse_op_table6[b];
3875             if (op6->ext_mask == 0) {
3876                 goto unknown_op;
3877             }
3878             if (!(s->cpuid_ext_features & op6->ext_mask)) {
3879                 goto illegal_op;
3880             }
3881 
3882             if (b1) {
3883                 op1_offset = ZMM_OFFSET(reg);
3884                 if (mod == 3) {
3885                     op2_offset = ZMM_OFFSET(rm | REX_B(s));
3886                 } else {
3887                     op2_offset = offsetof(CPUX86State,xmm_t0);
3888                     gen_lea_modrm(env, s, modrm);
3889                     switch (b) {
3890                     case 0x20: case 0x30: /* pmovsxbw, pmovzxbw */
3891                     case 0x23: case 0x33: /* pmovsxwd, pmovzxwd */
3892                     case 0x25: case 0x35: /* pmovsxdq, pmovzxdq */
3893                         gen_ldq_env_A0(s, op2_offset +
3894                                         offsetof(ZMMReg, ZMM_Q(0)));
3895                         break;
3896                     case 0x21: case 0x31: /* pmovsxbd, pmovzxbd */
3897                     case 0x24: case 0x34: /* pmovsxwq, pmovzxwq */
3898                         tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0,
3899                                             s->mem_index, MO_LEUL);
3900                         tcg_gen_st_i32(s->tmp2_i32, cpu_env, op2_offset +
3901                                         offsetof(ZMMReg, ZMM_L(0)));
3902                         break;
3903                     case 0x22: case 0x32: /* pmovsxbq, pmovzxbq */
3904                         tcg_gen_qemu_ld_tl(s->tmp0, s->A0,
3905                                            s->mem_index, MO_LEUW);
3906                         tcg_gen_st16_tl(s->tmp0, cpu_env, op2_offset +
3907                                         offsetof(ZMMReg, ZMM_W(0)));
3908                         break;
3909                     case 0x2a:            /* movntqda */
3910                         gen_ldo_env_A0(s, op1_offset);
3911                         return;
3912                     default:
3913                         gen_ldo_env_A0(s, op2_offset);
3914                     }
3915                 }
3916                 if (!op6->fn[b1].op1) {
3917                     goto illegal_op;
3918                 }
3919                 tcg_gen_addi_ptr(s->ptr0, cpu_env, op1_offset);
3920                 tcg_gen_addi_ptr(s->ptr1, cpu_env, op2_offset);
3921                 op6->fn[b1].op1(cpu_env, s->ptr0, s->ptr1);
3922             } else {
3923                 CHECK_NO_VEX(s);
3924                 if ((op6->flags & SSE_OPF_MMX) == 0) {
3925                     goto unknown_op;
3926                 }
3927                 op1_offset = offsetof(CPUX86State,fpregs[reg].mmx);
3928                 if (mod == 3) {
3929                     op2_offset = offsetof(CPUX86State,fpregs[rm].mmx);
3930                 } else {
3931                     op2_offset = offsetof(CPUX86State,mmx_t0);
3932                     gen_lea_modrm(env, s, modrm);
3933                     gen_ldq_env_A0(s, op2_offset);
3934                 }
3935                 tcg_gen_addi_ptr(s->ptr0, cpu_env, op1_offset);
3936                 tcg_gen_addi_ptr(s->ptr1, cpu_env, op2_offset);
3937                 op6->fn[0].op1(cpu_env, s->ptr0, s->ptr1);
3938             }
3939 
3940             if (op6->flags & SSE_OPF_CMP) {
3941                 set_cc_op(s, CC_OP_EFLAGS);
3942             }
3943             break;
3944 
3945         case 0x238:
3946         case 0x338:
3947         do_0f_38_fx:
3948             /* Various integer extensions at 0f 38 f[0-f].  */
3949             b = modrm | (b1 << 8);
3950             modrm = x86_ldub_code(env, s);
3951             reg = ((modrm >> 3) & 7) | REX_R(s);
3952 
3953             switch (b) {
3954             case 0x3f0: /* crc32 Gd,Eb */
3955             case 0x3f1: /* crc32 Gd,Ey */
3956             do_crc32:
3957                 CHECK_NO_VEX(s);
3958                 if (!(s->cpuid_ext_features & CPUID_EXT_SSE42)) {
3959                     goto illegal_op;
3960                 }
3961                 if ((b & 0xff) == 0xf0) {
3962                     ot = MO_8;
3963                 } else if (s->dflag != MO_64) {
3964                     ot = (s->prefix & PREFIX_DATA ? MO_16 : MO_32);
3965                 } else {
3966                     ot = MO_64;
3967                 }
3968 
3969                 tcg_gen_trunc_tl_i32(s->tmp2_i32, cpu_regs[reg]);
3970                 gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
3971                 gen_helper_crc32(s->T0, s->tmp2_i32,
3972                                  s->T0, tcg_const_i32(8 << ot));
3973 
3974                 ot = mo_64_32(s->dflag);
3975                 gen_op_mov_reg_v(s, ot, reg, s->T0);
3976                 break;
3977 
3978             case 0x1f0: /* crc32 or movbe */
3979             case 0x1f1:
3980                 CHECK_NO_VEX(s);
3981                 /* For these insns, the f3 prefix is supposed to have priority
3982                    over the 66 prefix, but that's not what we implement above
3983                    setting b1.  */
3984                 if (s->prefix & PREFIX_REPNZ) {
3985                     goto do_crc32;
3986                 }
3987                 /* FALLTHRU */
3988             case 0x0f0: /* movbe Gy,My */
3989             case 0x0f1: /* movbe My,Gy */
3990                 CHECK_NO_VEX(s);
3991                 if (!(s->cpuid_ext_features & CPUID_EXT_MOVBE)) {
3992                     goto illegal_op;
3993                 }
3994                 if (s->dflag != MO_64) {
3995                     ot = (s->prefix & PREFIX_DATA ? MO_16 : MO_32);
3996                 } else {
3997                     ot = MO_64;
3998                 }
3999 
4000                 gen_lea_modrm(env, s, modrm);
4001                 if ((b & 1) == 0) {
4002                     tcg_gen_qemu_ld_tl(s->T0, s->A0,
4003                                        s->mem_index, ot | MO_BE);
4004                     gen_op_mov_reg_v(s, ot, reg, s->T0);
4005                 } else {
4006                     tcg_gen_qemu_st_tl(cpu_regs[reg], s->A0,
4007                                        s->mem_index, ot | MO_BE);
4008                 }
4009                 break;
4010 
4011             case 0x0f2: /* andn Gy, By, Ey */
4012                 if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI1)
4013                     || !(s->prefix & PREFIX_VEX)
4014                     || s->vex_l != 0) {
4015                     goto illegal_op;
4016                 }
4017                 ot = mo_64_32(s->dflag);
4018                 gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
4019                 tcg_gen_andc_tl(s->T0, s->T0, cpu_regs[s->vex_v]);
4020                 gen_op_mov_reg_v(s, ot, reg, s->T0);
4021                 gen_op_update1_cc(s);
4022                 set_cc_op(s, CC_OP_LOGICB + ot);
4023                 break;
4024 
4025             case 0x0f7: /* bextr Gy, Ey, By */
4026                 if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI1)
4027                     || !(s->prefix & PREFIX_VEX)
4028                     || s->vex_l != 0) {
4029                     goto illegal_op;
4030                 }
4031                 ot = mo_64_32(s->dflag);
4032                 {
4033                     TCGv bound, zero;
4034 
4035                     gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
4036                     /* Extract START, and shift the operand.
4037                        Shifts larger than operand size get zeros.  */
4038                     tcg_gen_ext8u_tl(s->A0, cpu_regs[s->vex_v]);
4039                     tcg_gen_shr_tl(s->T0, s->T0, s->A0);
4040 
4041                     bound = tcg_const_tl(ot == MO_64 ? 63 : 31);
4042                     zero = tcg_const_tl(0);
4043                     tcg_gen_movcond_tl(TCG_COND_LEU, s->T0, s->A0, bound,
4044                                        s->T0, zero);
4045                     tcg_temp_free(zero);
4046 
4047                     /* Extract the LEN into a mask.  Lengths larger than
4048                        operand size get all ones.  */
4049                     tcg_gen_extract_tl(s->A0, cpu_regs[s->vex_v], 8, 8);
4050                     tcg_gen_movcond_tl(TCG_COND_LEU, s->A0, s->A0, bound,
4051                                        s->A0, bound);
4052                     tcg_temp_free(bound);
4053                     tcg_gen_movi_tl(s->T1, 1);
4054                     tcg_gen_shl_tl(s->T1, s->T1, s->A0);
4055                     tcg_gen_subi_tl(s->T1, s->T1, 1);
4056                     tcg_gen_and_tl(s->T0, s->T0, s->T1);
4057 
4058                     gen_op_mov_reg_v(s, ot, reg, s->T0);
4059                     gen_op_update1_cc(s);
4060                     set_cc_op(s, CC_OP_LOGICB + ot);
4061                 }
4062                 break;
4063 
4064             case 0x0f5: /* bzhi Gy, Ey, By */
4065                 if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI2)
4066                     || !(s->prefix & PREFIX_VEX)
4067                     || s->vex_l != 0) {
4068                     goto illegal_op;
4069                 }
4070                 ot = mo_64_32(s->dflag);
4071                 gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
4072                 tcg_gen_ext8u_tl(s->T1, cpu_regs[s->vex_v]);
4073                 {
4074                     TCGv bound = tcg_const_tl(ot == MO_64 ? 63 : 31);
4075                     /* Note that since we're using BMILG (in order to get O
4076                        cleared) we need to store the inverse into C.  */
4077                     tcg_gen_setcond_tl(TCG_COND_LT, cpu_cc_src,
4078                                        s->T1, bound);
4079                     tcg_gen_movcond_tl(TCG_COND_GT, s->T1, s->T1,
4080                                        bound, bound, s->T1);
4081                     tcg_temp_free(bound);
4082                 }
4083                 tcg_gen_movi_tl(s->A0, -1);
4084                 tcg_gen_shl_tl(s->A0, s->A0, s->T1);
4085                 tcg_gen_andc_tl(s->T0, s->T0, s->A0);
4086                 gen_op_mov_reg_v(s, ot, reg, s->T0);
4087                 gen_op_update1_cc(s);
4088                 set_cc_op(s, CC_OP_BMILGB + ot);
4089                 break;
4090 
4091             case 0x3f6: /* mulx By, Gy, rdx, Ey */
4092                 if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI2)
4093                     || !(s->prefix & PREFIX_VEX)
4094                     || s->vex_l != 0) {
4095                     goto illegal_op;
4096                 }
4097                 ot = mo_64_32(s->dflag);
4098                 gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
4099                 switch (ot) {
4100                 default:
4101                     tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
4102                     tcg_gen_trunc_tl_i32(s->tmp3_i32, cpu_regs[R_EDX]);
4103                     tcg_gen_mulu2_i32(s->tmp2_i32, s->tmp3_i32,
4104                                       s->tmp2_i32, s->tmp3_i32);
4105                     tcg_gen_extu_i32_tl(cpu_regs[s->vex_v], s->tmp2_i32);
4106                     tcg_gen_extu_i32_tl(cpu_regs[reg], s->tmp3_i32);
4107                     break;
4108 #ifdef TARGET_X86_64
4109                 case MO_64:
4110                     tcg_gen_mulu2_i64(s->T0, s->T1,
4111                                       s->T0, cpu_regs[R_EDX]);
4112                     tcg_gen_mov_i64(cpu_regs[s->vex_v], s->T0);
4113                     tcg_gen_mov_i64(cpu_regs[reg], s->T1);
4114                     break;
4115 #endif
4116                 }
4117                 break;
4118 
4119             case 0x3f5: /* pdep Gy, By, Ey */
4120                 if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI2)
4121                     || !(s->prefix & PREFIX_VEX)
4122                     || s->vex_l != 0) {
4123                     goto illegal_op;
4124                 }
4125                 ot = mo_64_32(s->dflag);
4126                 gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
4127                 /* Note that by zero-extending the source operand, we
4128                    automatically handle zero-extending the result.  */
4129                 if (ot == MO_64) {
4130                     tcg_gen_mov_tl(s->T1, cpu_regs[s->vex_v]);
4131                 } else {
4132                     tcg_gen_ext32u_tl(s->T1, cpu_regs[s->vex_v]);
4133                 }
4134                 gen_helper_pdep(cpu_regs[reg], s->T1, s->T0);
4135                 break;
4136 
4137             case 0x2f5: /* pext Gy, By, Ey */
4138                 if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI2)
4139                     || !(s->prefix & PREFIX_VEX)
4140                     || s->vex_l != 0) {
4141                     goto illegal_op;
4142                 }
4143                 ot = mo_64_32(s->dflag);
4144                 gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
4145                 /* Note that by zero-extending the source operand, we
4146                    automatically handle zero-extending the result.  */
4147                 if (ot == MO_64) {
4148                     tcg_gen_mov_tl(s->T1, cpu_regs[s->vex_v]);
4149                 } else {
4150                     tcg_gen_ext32u_tl(s->T1, cpu_regs[s->vex_v]);
4151                 }
4152                 gen_helper_pext(cpu_regs[reg], s->T1, s->T0);
4153                 break;
4154 
4155             case 0x1f6: /* adcx Gy, Ey */
4156             case 0x2f6: /* adox Gy, Ey */
4157                 CHECK_NO_VEX(s);
4158                 if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_ADX)) {
4159                     goto illegal_op;
4160                 } else {
4161                     TCGv carry_in, carry_out, zero;
4162                     int end_op;
4163 
4164                     ot = mo_64_32(s->dflag);
4165                     gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
4166 
4167                     /* Re-use the carry-out from a previous round.  */
4168                     carry_in = NULL;
4169                     carry_out = (b == 0x1f6 ? cpu_cc_dst : cpu_cc_src2);
4170                     switch (s->cc_op) {
4171                     case CC_OP_ADCX:
4172                         if (b == 0x1f6) {
4173                             carry_in = cpu_cc_dst;
4174                             end_op = CC_OP_ADCX;
4175                         } else {
4176                             end_op = CC_OP_ADCOX;
4177                         }
4178                         break;
4179                     case CC_OP_ADOX:
4180                         if (b == 0x1f6) {
4181                             end_op = CC_OP_ADCOX;
4182                         } else {
4183                             carry_in = cpu_cc_src2;
4184                             end_op = CC_OP_ADOX;
4185                         }
4186                         break;
4187                     case CC_OP_ADCOX:
4188                         end_op = CC_OP_ADCOX;
4189                         carry_in = carry_out;
4190                         break;
4191                     default:
4192                         end_op = (b == 0x1f6 ? CC_OP_ADCX : CC_OP_ADOX);
4193                         break;
4194                     }
4195                     /* If we can't reuse carry-out, get it out of EFLAGS.  */
4196                     if (!carry_in) {
4197                         if (s->cc_op != CC_OP_ADCX && s->cc_op != CC_OP_ADOX) {
4198                             gen_compute_eflags(s);
4199                         }
4200                         carry_in = s->tmp0;
4201                         tcg_gen_extract_tl(carry_in, cpu_cc_src,
4202                                            ctz32(b == 0x1f6 ? CC_C : CC_O), 1);
4203                     }
4204 
4205                     switch (ot) {
4206 #ifdef TARGET_X86_64
4207                     case MO_32:
4208                         /* If we know TL is 64-bit, and we want a 32-bit
4209                            result, just do everything in 64-bit arithmetic.  */
4210                         tcg_gen_ext32u_i64(cpu_regs[reg], cpu_regs[reg]);
4211                         tcg_gen_ext32u_i64(s->T0, s->T0);
4212                         tcg_gen_add_i64(s->T0, s->T0, cpu_regs[reg]);
4213                         tcg_gen_add_i64(s->T0, s->T0, carry_in);
4214                         tcg_gen_ext32u_i64(cpu_regs[reg], s->T0);
4215                         tcg_gen_shri_i64(carry_out, s->T0, 32);
4216                         break;
4217 #endif
4218                     default:
4219                         /* Otherwise compute the carry-out in two steps.  */
4220                         zero = tcg_const_tl(0);
4221                         tcg_gen_add2_tl(s->T0, carry_out,
4222                                         s->T0, zero,
4223                                         carry_in, zero);
4224                         tcg_gen_add2_tl(cpu_regs[reg], carry_out,
4225                                         cpu_regs[reg], carry_out,
4226                                         s->T0, zero);
4227                         tcg_temp_free(zero);
4228                         break;
4229                     }
4230                     set_cc_op(s, end_op);
4231                 }
4232                 break;
4233 
4234             case 0x1f7: /* shlx Gy, Ey, By */
4235             case 0x2f7: /* sarx Gy, Ey, By */
4236             case 0x3f7: /* shrx Gy, Ey, By */
4237                 if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI2)
4238                     || !(s->prefix & PREFIX_VEX)
4239                     || s->vex_l != 0) {
4240                     goto illegal_op;
4241                 }
4242                 ot = mo_64_32(s->dflag);
4243                 gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
4244                 if (ot == MO_64) {
4245                     tcg_gen_andi_tl(s->T1, cpu_regs[s->vex_v], 63);
4246                 } else {
4247                     tcg_gen_andi_tl(s->T1, cpu_regs[s->vex_v], 31);
4248                 }
4249                 if (b == 0x1f7) {
4250                     tcg_gen_shl_tl(s->T0, s->T0, s->T1);
4251                 } else if (b == 0x2f7) {
4252                     if (ot != MO_64) {
4253                         tcg_gen_ext32s_tl(s->T0, s->T0);
4254                     }
4255                     tcg_gen_sar_tl(s->T0, s->T0, s->T1);
4256                 } else {
4257                     if (ot != MO_64) {
4258                         tcg_gen_ext32u_tl(s->T0, s->T0);
4259                     }
4260                     tcg_gen_shr_tl(s->T0, s->T0, s->T1);
4261                 }
4262                 gen_op_mov_reg_v(s, ot, reg, s->T0);
4263                 break;
4264 
4265             case 0x0f3:
4266             case 0x1f3:
4267             case 0x2f3:
4268             case 0x3f3: /* Group 17 */
4269                 if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI1)
4270                     || !(s->prefix & PREFIX_VEX)
4271                     || s->vex_l != 0) {
4272                     goto illegal_op;
4273                 }
4274                 ot = mo_64_32(s->dflag);
4275                 gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
4276 
4277                 tcg_gen_mov_tl(cpu_cc_src, s->T0);
4278                 switch (reg & 7) {
4279                 case 1: /* blsr By,Ey */
4280                     tcg_gen_subi_tl(s->T1, s->T0, 1);
4281                     tcg_gen_and_tl(s->T0, s->T0, s->T1);
4282                     break;
4283                 case 2: /* blsmsk By,Ey */
4284                     tcg_gen_subi_tl(s->T1, s->T0, 1);
4285                     tcg_gen_xor_tl(s->T0, s->T0, s->T1);
4286                     break;
4287                 case 3: /* blsi By, Ey */
4288                     tcg_gen_neg_tl(s->T1, s->T0);
4289                     tcg_gen_and_tl(s->T0, s->T0, s->T1);
4290                     break;
4291                 default:
4292                     goto unknown_op;
4293                 }
4294                 tcg_gen_mov_tl(cpu_cc_dst, s->T0);
4295                 gen_op_mov_reg_v(s, ot, s->vex_v, s->T0);
4296                 set_cc_op(s, CC_OP_BMILGB + ot);
4297                 break;
4298 
4299             default:
4300                 goto unknown_op;
4301             }
4302             break;
4303 
4304         case 0x03a:
4305         case 0x13a:
4306             b = modrm;
4307             modrm = x86_ldub_code(env, s);
4308             rm = modrm & 7;
4309             reg = ((modrm >> 3) & 7) | REX_R(s);
4310             mod = (modrm >> 6) & 3;
4311 
4312             assert(b1 < 2);
4313             op7 = &sse_op_table7[b];
4314             if (op7->ext_mask == 0) {
4315                 goto unknown_op;
4316             }
4317             if (!(s->cpuid_ext_features & op7->ext_mask)) {
4318                 goto illegal_op;
4319             }
4320 
4321             s->rip_offset = 1;
4322 
4323             if (op7->flags & SSE_OPF_SPECIAL) {
4324                 /* None of the "special" ops are valid on mmx registers */
4325                 if (b1 == 0) {
4326                     goto illegal_op;
4327                 }
4328                 ot = mo_64_32(s->dflag);
4329                 rm = (modrm & 7) | REX_B(s);
4330                 if (mod != 3)
4331                     gen_lea_modrm(env, s, modrm);
4332                 reg = ((modrm >> 3) & 7) | REX_R(s);
4333                 val = x86_ldub_code(env, s);
4334                 switch (b) {
4335                 case 0x14: /* pextrb */
4336                     tcg_gen_ld8u_tl(s->T0, cpu_env, offsetof(CPUX86State,
4337                                             xmm_regs[reg].ZMM_B(val & 15)));
4338                     if (mod == 3) {
4339                         gen_op_mov_reg_v(s, ot, rm, s->T0);
4340                     } else {
4341                         tcg_gen_qemu_st_tl(s->T0, s->A0,
4342                                            s->mem_index, MO_UB);
4343                     }
4344                     break;
4345                 case 0x15: /* pextrw */
4346                     tcg_gen_ld16u_tl(s->T0, cpu_env, offsetof(CPUX86State,
4347                                             xmm_regs[reg].ZMM_W(val & 7)));
4348                     if (mod == 3) {
4349                         gen_op_mov_reg_v(s, ot, rm, s->T0);
4350                     } else {
4351                         tcg_gen_qemu_st_tl(s->T0, s->A0,
4352                                            s->mem_index, MO_LEUW);
4353                     }
4354                     break;
4355                 case 0x16:
4356                     if (ot == MO_32) { /* pextrd */
4357                         tcg_gen_ld_i32(s->tmp2_i32, cpu_env,
4358                                         offsetof(CPUX86State,
4359                                                 xmm_regs[reg].ZMM_L(val & 3)));
4360                         if (mod == 3) {
4361                             tcg_gen_extu_i32_tl(cpu_regs[rm], s->tmp2_i32);
4362                         } else {
4363                             tcg_gen_qemu_st_i32(s->tmp2_i32, s->A0,
4364                                                 s->mem_index, MO_LEUL);
4365                         }
4366                     } else { /* pextrq */
4367 #ifdef TARGET_X86_64
4368                         tcg_gen_ld_i64(s->tmp1_i64, cpu_env,
4369                                         offsetof(CPUX86State,
4370                                                 xmm_regs[reg].ZMM_Q(val & 1)));
4371                         if (mod == 3) {
4372                             tcg_gen_mov_i64(cpu_regs[rm], s->tmp1_i64);
4373                         } else {
4374                             tcg_gen_qemu_st_i64(s->tmp1_i64, s->A0,
4375                                                 s->mem_index, MO_LEUQ);
4376                         }
4377 #else
4378                         goto illegal_op;
4379 #endif
4380                     }
4381                     break;
4382                 case 0x17: /* extractps */
4383                     tcg_gen_ld32u_tl(s->T0, cpu_env, offsetof(CPUX86State,
4384                                             xmm_regs[reg].ZMM_L(val & 3)));
4385                     if (mod == 3) {
4386                         gen_op_mov_reg_v(s, ot, rm, s->T0);
4387                     } else {
4388                         tcg_gen_qemu_st_tl(s->T0, s->A0,
4389                                            s->mem_index, MO_LEUL);
4390                     }
4391                     break;
4392                 case 0x20: /* pinsrb */
4393                     if (mod == 3) {
4394                         gen_op_mov_v_reg(s, MO_32, s->T0, rm);
4395                     } else {
4396                         tcg_gen_qemu_ld_tl(s->T0, s->A0,
4397                                            s->mem_index, MO_UB);
4398                     }
4399                     tcg_gen_st8_tl(s->T0, cpu_env, offsetof(CPUX86State,
4400                                             xmm_regs[reg].ZMM_B(val & 15)));
4401                     break;
4402                 case 0x21: /* insertps */
4403                     if (mod == 3) {
4404                         tcg_gen_ld_i32(s->tmp2_i32, cpu_env,
4405                                         offsetof(CPUX86State,xmm_regs[rm]
4406                                                 .ZMM_L((val >> 6) & 3)));
4407                     } else {
4408                         tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0,
4409                                             s->mem_index, MO_LEUL);
4410                     }
4411                     tcg_gen_st_i32(s->tmp2_i32, cpu_env,
4412                                     offsetof(CPUX86State,xmm_regs[reg]
4413                                             .ZMM_L((val >> 4) & 3)));
4414                     if ((val >> 0) & 1)
4415                         tcg_gen_st_i32(tcg_const_i32(0 /*float32_zero*/),
4416                                         cpu_env, offsetof(CPUX86State,
4417                                                 xmm_regs[reg].ZMM_L(0)));
4418                     if ((val >> 1) & 1)
4419                         tcg_gen_st_i32(tcg_const_i32(0 /*float32_zero*/),
4420                                         cpu_env, offsetof(CPUX86State,
4421                                                 xmm_regs[reg].ZMM_L(1)));
4422                     if ((val >> 2) & 1)
4423                         tcg_gen_st_i32(tcg_const_i32(0 /*float32_zero*/),
4424                                         cpu_env, offsetof(CPUX86State,
4425                                                 xmm_regs[reg].ZMM_L(2)));
4426                     if ((val >> 3) & 1)
4427                         tcg_gen_st_i32(tcg_const_i32(0 /*float32_zero*/),
4428                                         cpu_env, offsetof(CPUX86State,
4429                                                 xmm_regs[reg].ZMM_L(3)));
4430                     break;
4431                 case 0x22:
4432                     if (ot == MO_32) { /* pinsrd */
4433                         if (mod == 3) {
4434                             tcg_gen_trunc_tl_i32(s->tmp2_i32, cpu_regs[rm]);
4435                         } else {
4436                             tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0,
4437                                                 s->mem_index, MO_LEUL);
4438                         }
4439                         tcg_gen_st_i32(s->tmp2_i32, cpu_env,
4440                                         offsetof(CPUX86State,
4441                                                 xmm_regs[reg].ZMM_L(val & 3)));
4442                     } else { /* pinsrq */
4443 #ifdef TARGET_X86_64
4444                         if (mod == 3) {
4445                             gen_op_mov_v_reg(s, ot, s->tmp1_i64, rm);
4446                         } else {
4447                             tcg_gen_qemu_ld_i64(s->tmp1_i64, s->A0,
4448                                                 s->mem_index, MO_LEUQ);
4449                         }
4450                         tcg_gen_st_i64(s->tmp1_i64, cpu_env,
4451                                         offsetof(CPUX86State,
4452                                                 xmm_regs[reg].ZMM_Q(val & 1)));
4453 #else
4454                         goto illegal_op;
4455 #endif
4456                     }
4457                     break;
4458                 }
4459                 return;
4460             }
4461 
4462             if (b1 == 0) {
4463                 CHECK_NO_VEX(s);
4464                 /* MMX */
4465                 if ((op7->flags & SSE_OPF_MMX) == 0) {
4466                     goto illegal_op;
4467                 }
4468                 op1_offset = offsetof(CPUX86State,fpregs[reg].mmx);
4469                 if (mod == 3) {
4470                     op2_offset = offsetof(CPUX86State,fpregs[rm].mmx);
4471                 } else {
4472                     op2_offset = offsetof(CPUX86State,mmx_t0);
4473                     gen_lea_modrm(env, s, modrm);
4474                     gen_ldq_env_A0(s, op2_offset);
4475                 }
4476                 val = x86_ldub_code(env, s);
4477                 tcg_gen_addi_ptr(s->ptr0, cpu_env, op1_offset);
4478                 tcg_gen_addi_ptr(s->ptr1, cpu_env, op2_offset);
4479 
4480                 /* We only actually have one MMX instuction (palignr) */
4481                 assert(b == 0x0f);
4482 
4483                 op7->fn[0].op1(cpu_env, s->ptr0, s->ptr1,
4484                                tcg_const_i32(val));
4485                 break;
4486             }
4487 
4488             /* SSE */
4489             op1_offset = ZMM_OFFSET(reg);
4490             if (mod == 3) {
4491                 op2_offset = ZMM_OFFSET(rm | REX_B(s));
4492             } else {
4493                 op2_offset = offsetof(CPUX86State, xmm_t0);
4494                 gen_lea_modrm(env, s, modrm);
4495                 gen_ldo_env_A0(s, op2_offset);
4496             }
4497 
4498             val = x86_ldub_code(env, s);
4499             if ((b & 0xfc) == 0x60) { /* pcmpXstrX */
4500                 set_cc_op(s, CC_OP_EFLAGS);
4501 
4502                 if (s->dflag == MO_64) {
4503                     /* The helper must use entire 64-bit gp registers */
4504                     val |= 1 << 8;
4505                 }
4506             }
4507 
4508             tcg_gen_addi_ptr(s->ptr0, cpu_env, op1_offset);
4509             tcg_gen_addi_ptr(s->ptr1, cpu_env, op2_offset);
4510             op7->fn[b1].op1(cpu_env, s->ptr0, s->ptr1, tcg_const_i32(val));
4511             if (op7->flags & SSE_OPF_CMP) {
4512                 set_cc_op(s, CC_OP_EFLAGS);
4513             }
4514             break;
4515 
4516         case 0x33a:
4517             /* Various integer extensions at 0f 3a f[0-f].  */
4518             b = modrm | (b1 << 8);
4519             modrm = x86_ldub_code(env, s);
4520             reg = ((modrm >> 3) & 7) | REX_R(s);
4521 
4522             switch (b) {
4523             case 0x3f0: /* rorx Gy,Ey, Ib */
4524                 if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI2)
4525                     || !(s->prefix & PREFIX_VEX)
4526                     || s->vex_l != 0) {
4527                     goto illegal_op;
4528                 }
4529                 ot = mo_64_32(s->dflag);
4530                 gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
4531                 b = x86_ldub_code(env, s);
4532                 if (ot == MO_64) {
4533                     tcg_gen_rotri_tl(s->T0, s->T0, b & 63);
4534                 } else {
4535                     tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
4536                     tcg_gen_rotri_i32(s->tmp2_i32, s->tmp2_i32, b & 31);
4537                     tcg_gen_extu_i32_tl(s->T0, s->tmp2_i32);
4538                 }
4539                 gen_op_mov_reg_v(s, ot, reg, s->T0);
4540                 break;
4541 
4542             default:
4543                 goto unknown_op;
4544             }
4545             break;
4546 
4547         default:
4548         unknown_op:
4549             gen_unknown_opcode(env, s);
4550             return;
4551         }
4552     } else {
4553         /* generic MMX or SSE operation */
4554         switch(b) {
4555         case 0x70: /* pshufx insn */
4556         case 0xc6: /* pshufx insn */
4557         case 0xc2: /* compare insns */
4558             s->rip_offset = 1;
4559             break;
4560         default:
4561             break;
4562         }
4563         if (is_xmm) {
4564             op1_offset = ZMM_OFFSET(reg);
4565             if (mod != 3) {
4566                 int sz = 4;
4567 
4568                 gen_lea_modrm(env, s, modrm);
4569                 op2_offset = offsetof(CPUX86State, xmm_t0);
4570 
4571                 if (sse_op_flags & SSE_OPF_SCALAR) {
4572                     if (sse_op_flags & SSE_OPF_CMP) {
4573                         /* ucomis[sd], comis[sd] */
4574                         if (b1 == 0) {
4575                             sz = 2;
4576                         } else {
4577                             sz = 3;
4578                         }
4579                     } else {
4580                         /* Most sse scalar operations.  */
4581                         if (b1 == 2) {
4582                             sz = 2;
4583                         } else if (b1 == 3) {
4584                             sz = 3;
4585                         }
4586                     }
4587                 }
4588 
4589                 switch (sz) {
4590                 case 2:
4591                     /* 32 bit access */
4592                     gen_op_ld_v(s, MO_32, s->T0, s->A0);
4593                     tcg_gen_st32_tl(s->T0, cpu_env,
4594                                     offsetof(CPUX86State, xmm_t0.ZMM_L(0)));
4595                     break;
4596                 case 3:
4597                     /* 64 bit access */
4598                     gen_ldq_env_A0(s, offsetof(CPUX86State, xmm_t0.ZMM_D(0)));
4599                     break;
4600                 default:
4601                     /* 128 bit access */
4602                     gen_ldo_env_A0(s, op2_offset);
4603                     break;
4604                 }
4605             } else {
4606                 rm = (modrm & 7) | REX_B(s);
4607                 op2_offset = ZMM_OFFSET(rm);
4608             }
4609         } else {
4610             CHECK_NO_VEX(s);
4611             op1_offset = offsetof(CPUX86State,fpregs[reg].mmx);
4612             if (mod != 3) {
4613                 gen_lea_modrm(env, s, modrm);
4614                 op2_offset = offsetof(CPUX86State,mmx_t0);
4615                 gen_ldq_env_A0(s, op2_offset);
4616             } else {
4617                 rm = (modrm & 7);
4618                 op2_offset = offsetof(CPUX86State,fpregs[rm].mmx);
4619             }
4620             if (sse_op_flags & SSE_OPF_3DNOW) {
4621                 /* 3DNow! data insns */
4622                 val = x86_ldub_code(env, s);
4623                 SSEFunc_0_epp op_3dnow = sse_op_table5[val];
4624                 if (!op_3dnow) {
4625                     goto unknown_op;
4626                 }
4627                 tcg_gen_addi_ptr(s->ptr0, cpu_env, op1_offset);
4628                 tcg_gen_addi_ptr(s->ptr1, cpu_env, op2_offset);
4629                 op_3dnow(cpu_env, s->ptr0, s->ptr1);
4630                 return;
4631             }
4632         }
4633         tcg_gen_addi_ptr(s->ptr0, cpu_env, op1_offset);
4634         tcg_gen_addi_ptr(s->ptr1, cpu_env, op2_offset);
4635         if (sse_op_flags & SSE_OPF_SHUF) {
4636             val = x86_ldub_code(env, s);
4637             sse_op_fn.op1i(s->ptr0, s->ptr1, tcg_const_i32(val));
4638         } else if (b == 0xf7) {
4639             /* maskmov : we must prepare A0 */
4640             if (mod != 3) {
4641                 goto illegal_op;
4642             }
4643             tcg_gen_mov_tl(s->A0, cpu_regs[R_EDI]);
4644             gen_extu(s->aflag, s->A0);
4645             gen_add_A0_ds_seg(s);
4646             sse_op_fn.op1t(cpu_env, s->ptr0, s->ptr1, s->A0);
4647         } else if (b == 0xc2) {
4648             /* compare insns, bits 7:3 (7:5 for AVX) are ignored */
4649             val = x86_ldub_code(env, s) & 7;
4650             sse_op_table4[val][b1](cpu_env, s->ptr0, s->ptr1);
4651         } else {
4652             sse_op_fn.op1(cpu_env, s->ptr0, s->ptr1);
4653         }
4654 
4655         if (sse_op_flags & SSE_OPF_CMP) {
4656             set_cc_op(s, CC_OP_EFLAGS);
4657         }
4658     }
4659 }
4660 
4661 /* convert one instruction. s->base.is_jmp is set if the translation must
4662    be stopped. Return the next pc value */
4663 static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
4664 {
4665     CPUX86State *env = cpu->env_ptr;
4666     int b, prefixes;
4667     int shift;
4668     MemOp ot, aflag, dflag;
4669     int modrm, reg, rm, mod, op, opreg, val;
4670     target_ulong next_eip, tval;
4671     target_ulong pc_start = s->base.pc_next;
4672 
4673     s->pc_start = s->pc = pc_start;
4674     s->override = -1;
4675 #ifdef TARGET_X86_64
4676     s->rex_w = false;
4677     s->rex_r = 0;
4678     s->rex_x = 0;
4679     s->rex_b = 0;
4680 #endif
4681     s->rip_offset = 0; /* for relative ip address */
4682     s->vex_l = 0;
4683     s->vex_v = 0;
4684     if (sigsetjmp(s->jmpbuf, 0) != 0) {
4685         gen_exception_gpf(s);
4686         return s->pc;
4687     }
4688 
4689     prefixes = 0;
4690 
4691  next_byte:
4692     b = x86_ldub_code(env, s);
4693     /* Collect prefixes.  */
4694     switch (b) {
4695     case 0xf3:
4696         prefixes |= PREFIX_REPZ;
4697         goto next_byte;
4698     case 0xf2:
4699         prefixes |= PREFIX_REPNZ;
4700         goto next_byte;
4701     case 0xf0:
4702         prefixes |= PREFIX_LOCK;
4703         goto next_byte;
4704     case 0x2e:
4705         s->override = R_CS;
4706         goto next_byte;
4707     case 0x36:
4708         s->override = R_SS;
4709         goto next_byte;
4710     case 0x3e:
4711         s->override = R_DS;
4712         goto next_byte;
4713     case 0x26:
4714         s->override = R_ES;
4715         goto next_byte;
4716     case 0x64:
4717         s->override = R_FS;
4718         goto next_byte;
4719     case 0x65:
4720         s->override = R_GS;
4721         goto next_byte;
4722     case 0x66:
4723         prefixes |= PREFIX_DATA;
4724         goto next_byte;
4725     case 0x67:
4726         prefixes |= PREFIX_ADR;
4727         goto next_byte;
4728 #ifdef TARGET_X86_64
4729     case 0x40 ... 0x4f:
4730         if (CODE64(s)) {
4731             /* REX prefix */
4732             prefixes |= PREFIX_REX;
4733             s->rex_w = (b >> 3) & 1;
4734             s->rex_r = (b & 0x4) << 1;
4735             s->rex_x = (b & 0x2) << 2;
4736             s->rex_b = (b & 0x1) << 3;
4737             goto next_byte;
4738         }
4739         break;
4740 #endif
4741     case 0xc5: /* 2-byte VEX */
4742     case 0xc4: /* 3-byte VEX */
4743         /* VEX prefixes cannot be used except in 32-bit mode.
4744            Otherwise the instruction is LES or LDS.  */
4745         if (CODE32(s) && !VM86(s)) {
4746             static const int pp_prefix[4] = {
4747                 0, PREFIX_DATA, PREFIX_REPZ, PREFIX_REPNZ
4748             };
4749             int vex3, vex2 = x86_ldub_code(env, s);
4750 
4751             if (!CODE64(s) && (vex2 & 0xc0) != 0xc0) {
4752                 /* 4.1.4.6: In 32-bit mode, bits [7:6] must be 11b,
4753                    otherwise the instruction is LES or LDS.  */
4754                 s->pc--; /* rewind the advance_pc() x86_ldub_code() did */
4755                 break;
4756             }
4757 
4758             /* 4.1.1-4.1.3: No preceding lock, 66, f2, f3, or rex prefixes. */
4759             if (prefixes & (PREFIX_REPZ | PREFIX_REPNZ
4760                             | PREFIX_LOCK | PREFIX_DATA | PREFIX_REX)) {
4761                 goto illegal_op;
4762             }
4763 #ifdef TARGET_X86_64
4764             s->rex_r = (~vex2 >> 4) & 8;
4765 #endif
4766             if (b == 0xc5) {
4767                 /* 2-byte VEX prefix: RVVVVlpp, implied 0f leading opcode byte */
4768                 vex3 = vex2;
4769                 b = x86_ldub_code(env, s) | 0x100;
4770             } else {
4771                 /* 3-byte VEX prefix: RXBmmmmm wVVVVlpp */
4772                 vex3 = x86_ldub_code(env, s);
4773 #ifdef TARGET_X86_64
4774                 s->rex_x = (~vex2 >> 3) & 8;
4775                 s->rex_b = (~vex2 >> 2) & 8;
4776                 s->rex_w = (vex3 >> 7) & 1;
4777 #endif
4778                 switch (vex2 & 0x1f) {
4779                 case 0x01: /* Implied 0f leading opcode bytes.  */
4780                     b = x86_ldub_code(env, s) | 0x100;
4781                     break;
4782                 case 0x02: /* Implied 0f 38 leading opcode bytes.  */
4783                     b = 0x138;
4784                     break;
4785                 case 0x03: /* Implied 0f 3a leading opcode bytes.  */
4786                     b = 0x13a;
4787                     break;
4788                 default:   /* Reserved for future use.  */
4789                     goto unknown_op;
4790                 }
4791             }
4792             s->vex_v = (~vex3 >> 3) & 0xf;
4793             s->vex_l = (vex3 >> 2) & 1;
4794             prefixes |= pp_prefix[vex3 & 3] | PREFIX_VEX;
4795         }
4796         break;
4797     }
4798 
4799     /* Post-process prefixes.  */
4800     if (CODE64(s)) {
4801         /* In 64-bit mode, the default data size is 32-bit.  Select 64-bit
4802            data with rex_w, and 16-bit data with 0x66; rex_w takes precedence
4803            over 0x66 if both are present.  */
4804         dflag = (REX_W(s) ? MO_64 : prefixes & PREFIX_DATA ? MO_16 : MO_32);
4805         /* In 64-bit mode, 0x67 selects 32-bit addressing.  */
4806         aflag = (prefixes & PREFIX_ADR ? MO_32 : MO_64);
4807     } else {
4808         /* In 16/32-bit mode, 0x66 selects the opposite data size.  */
4809         if (CODE32(s) ^ ((prefixes & PREFIX_DATA) != 0)) {
4810             dflag = MO_32;
4811         } else {
4812             dflag = MO_16;
4813         }
4814         /* In 16/32-bit mode, 0x67 selects the opposite addressing.  */
4815         if (CODE32(s) ^ ((prefixes & PREFIX_ADR) != 0)) {
4816             aflag = MO_32;
4817         }  else {
4818             aflag = MO_16;
4819         }
4820     }
4821 
4822     s->prefix = prefixes;
4823     s->aflag = aflag;
4824     s->dflag = dflag;
4825 
4826     /* now check op code */
4827  reswitch:
4828     switch(b) {
4829     case 0x0f:
4830         /**************************/
4831         /* extended op code */
4832         b = x86_ldub_code(env, s) | 0x100;
4833         goto reswitch;
4834 
4835         /**************************/
4836         /* arith & logic */
4837     case 0x00 ... 0x05:
4838     case 0x08 ... 0x0d:
4839     case 0x10 ... 0x15:
4840     case 0x18 ... 0x1d:
4841     case 0x20 ... 0x25:
4842     case 0x28 ... 0x2d:
4843     case 0x30 ... 0x35:
4844     case 0x38 ... 0x3d:
4845         {
4846             int op, f, val;
4847             op = (b >> 3) & 7;
4848             f = (b >> 1) & 3;
4849 
4850             ot = mo_b_d(b, dflag);
4851 
4852             switch(f) {
4853             case 0: /* OP Ev, Gv */
4854                 modrm = x86_ldub_code(env, s);
4855                 reg = ((modrm >> 3) & 7) | REX_R(s);
4856                 mod = (modrm >> 6) & 3;
4857                 rm = (modrm & 7) | REX_B(s);
4858                 if (mod != 3) {
4859                     gen_lea_modrm(env, s, modrm);
4860                     opreg = OR_TMP0;
4861                 } else if (op == OP_XORL && rm == reg) {
4862                 xor_zero:
4863                     /* xor reg, reg optimisation */
4864                     set_cc_op(s, CC_OP_CLR);
4865                     tcg_gen_movi_tl(s->T0, 0);
4866                     gen_op_mov_reg_v(s, ot, reg, s->T0);
4867                     break;
4868                 } else {
4869                     opreg = rm;
4870                 }
4871                 gen_op_mov_v_reg(s, ot, s->T1, reg);
4872                 gen_op(s, op, ot, opreg);
4873                 break;
4874             case 1: /* OP Gv, Ev */
4875                 modrm = x86_ldub_code(env, s);
4876                 mod = (modrm >> 6) & 3;
4877                 reg = ((modrm >> 3) & 7) | REX_R(s);
4878                 rm = (modrm & 7) | REX_B(s);
4879                 if (mod != 3) {
4880                     gen_lea_modrm(env, s, modrm);
4881                     gen_op_ld_v(s, ot, s->T1, s->A0);
4882                 } else if (op == OP_XORL && rm == reg) {
4883                     goto xor_zero;
4884                 } else {
4885                     gen_op_mov_v_reg(s, ot, s->T1, rm);
4886                 }
4887                 gen_op(s, op, ot, reg);
4888                 break;
4889             case 2: /* OP A, Iv */
4890                 val = insn_get(env, s, ot);
4891                 tcg_gen_movi_tl(s->T1, val);
4892                 gen_op(s, op, ot, OR_EAX);
4893                 break;
4894             }
4895         }
4896         break;
4897 
4898     case 0x82:
4899         if (CODE64(s))
4900             goto illegal_op;
4901         /* fall through */
4902     case 0x80: /* GRP1 */
4903     case 0x81:
4904     case 0x83:
4905         {
4906             int val;
4907 
4908             ot = mo_b_d(b, dflag);
4909 
4910             modrm = x86_ldub_code(env, s);
4911             mod = (modrm >> 6) & 3;
4912             rm = (modrm & 7) | REX_B(s);
4913             op = (modrm >> 3) & 7;
4914 
4915             if (mod != 3) {
4916                 if (b == 0x83)
4917                     s->rip_offset = 1;
4918                 else
4919                     s->rip_offset = insn_const_size(ot);
4920                 gen_lea_modrm(env, s, modrm);
4921                 opreg = OR_TMP0;
4922             } else {
4923                 opreg = rm;
4924             }
4925 
4926             switch(b) {
4927             default:
4928             case 0x80:
4929             case 0x81:
4930             case 0x82:
4931                 val = insn_get(env, s, ot);
4932                 break;
4933             case 0x83:
4934                 val = (int8_t)insn_get(env, s, MO_8);
4935                 break;
4936             }
4937             tcg_gen_movi_tl(s->T1, val);
4938             gen_op(s, op, ot, opreg);
4939         }
4940         break;
4941 
4942         /**************************/
4943         /* inc, dec, and other misc arith */
4944     case 0x40 ... 0x47: /* inc Gv */
4945         ot = dflag;
4946         gen_inc(s, ot, OR_EAX + (b & 7), 1);
4947         break;
4948     case 0x48 ... 0x4f: /* dec Gv */
4949         ot = dflag;
4950         gen_inc(s, ot, OR_EAX + (b & 7), -1);
4951         break;
4952     case 0xf6: /* GRP3 */
4953     case 0xf7:
4954         ot = mo_b_d(b, dflag);
4955 
4956         modrm = x86_ldub_code(env, s);
4957         mod = (modrm >> 6) & 3;
4958         rm = (modrm & 7) | REX_B(s);
4959         op = (modrm >> 3) & 7;
4960         if (mod != 3) {
4961             if (op == 0) {
4962                 s->rip_offset = insn_const_size(ot);
4963             }
4964             gen_lea_modrm(env, s, modrm);
4965             /* For those below that handle locked memory, don't load here.  */
4966             if (!(s->prefix & PREFIX_LOCK)
4967                 || op != 2) {
4968                 gen_op_ld_v(s, ot, s->T0, s->A0);
4969             }
4970         } else {
4971             gen_op_mov_v_reg(s, ot, s->T0, rm);
4972         }
4973 
4974         switch(op) {
4975         case 0: /* test */
4976             val = insn_get(env, s, ot);
4977             tcg_gen_movi_tl(s->T1, val);
4978             gen_op_testl_T0_T1_cc(s);
4979             set_cc_op(s, CC_OP_LOGICB + ot);
4980             break;
4981         case 2: /* not */
4982             if (s->prefix & PREFIX_LOCK) {
4983                 if (mod == 3) {
4984                     goto illegal_op;
4985                 }
4986                 tcg_gen_movi_tl(s->T0, ~0);
4987                 tcg_gen_atomic_xor_fetch_tl(s->T0, s->A0, s->T0,
4988                                             s->mem_index, ot | MO_LE);
4989             } else {
4990                 tcg_gen_not_tl(s->T0, s->T0);
4991                 if (mod != 3) {
4992                     gen_op_st_v(s, ot, s->T0, s->A0);
4993                 } else {
4994                     gen_op_mov_reg_v(s, ot, rm, s->T0);
4995                 }
4996             }
4997             break;
4998         case 3: /* neg */
4999             if (s->prefix & PREFIX_LOCK) {
5000                 TCGLabel *label1;
5001                 TCGv a0, t0, t1, t2;
5002 
5003                 if (mod == 3) {
5004                     goto illegal_op;
5005                 }
5006                 a0 = tcg_temp_local_new();
5007                 t0 = tcg_temp_local_new();
5008                 label1 = gen_new_label();
5009 
5010                 tcg_gen_mov_tl(a0, s->A0);
5011                 tcg_gen_mov_tl(t0, s->T0);
5012 
5013                 gen_set_label(label1);
5014                 t1 = tcg_temp_new();
5015                 t2 = tcg_temp_new();
5016                 tcg_gen_mov_tl(t2, t0);
5017                 tcg_gen_neg_tl(t1, t0);
5018                 tcg_gen_atomic_cmpxchg_tl(t0, a0, t0, t1,
5019                                           s->mem_index, ot | MO_LE);
5020                 tcg_temp_free(t1);
5021                 tcg_gen_brcond_tl(TCG_COND_NE, t0, t2, label1);
5022 
5023                 tcg_temp_free(t2);
5024                 tcg_temp_free(a0);
5025                 tcg_gen_mov_tl(s->T0, t0);
5026                 tcg_temp_free(t0);
5027             } else {
5028                 tcg_gen_neg_tl(s->T0, s->T0);
5029                 if (mod != 3) {
5030                     gen_op_st_v(s, ot, s->T0, s->A0);
5031                 } else {
5032                     gen_op_mov_reg_v(s, ot, rm, s->T0);
5033                 }
5034             }
5035             gen_op_update_neg_cc(s);
5036             set_cc_op(s, CC_OP_SUBB + ot);
5037             break;
5038         case 4: /* mul */
5039             switch(ot) {
5040             case MO_8:
5041                 gen_op_mov_v_reg(s, MO_8, s->T1, R_EAX);
5042                 tcg_gen_ext8u_tl(s->T0, s->T0);
5043                 tcg_gen_ext8u_tl(s->T1, s->T1);
5044                 /* XXX: use 32 bit mul which could be faster */
5045                 tcg_gen_mul_tl(s->T0, s->T0, s->T1);
5046                 gen_op_mov_reg_v(s, MO_16, R_EAX, s->T0);
5047                 tcg_gen_mov_tl(cpu_cc_dst, s->T0);
5048                 tcg_gen_andi_tl(cpu_cc_src, s->T0, 0xff00);
5049                 set_cc_op(s, CC_OP_MULB);
5050                 break;
5051             case MO_16:
5052                 gen_op_mov_v_reg(s, MO_16, s->T1, R_EAX);
5053                 tcg_gen_ext16u_tl(s->T0, s->T0);
5054                 tcg_gen_ext16u_tl(s->T1, s->T1);
5055                 /* XXX: use 32 bit mul which could be faster */
5056                 tcg_gen_mul_tl(s->T0, s->T0, s->T1);
5057                 gen_op_mov_reg_v(s, MO_16, R_EAX, s->T0);
5058                 tcg_gen_mov_tl(cpu_cc_dst, s->T0);
5059                 tcg_gen_shri_tl(s->T0, s->T0, 16);
5060                 gen_op_mov_reg_v(s, MO_16, R_EDX, s->T0);
5061                 tcg_gen_mov_tl(cpu_cc_src, s->T0);
5062                 set_cc_op(s, CC_OP_MULW);
5063                 break;
5064             default:
5065             case MO_32:
5066                 tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
5067                 tcg_gen_trunc_tl_i32(s->tmp3_i32, cpu_regs[R_EAX]);
5068                 tcg_gen_mulu2_i32(s->tmp2_i32, s->tmp3_i32,
5069                                   s->tmp2_i32, s->tmp3_i32);
5070                 tcg_gen_extu_i32_tl(cpu_regs[R_EAX], s->tmp2_i32);
5071                 tcg_gen_extu_i32_tl(cpu_regs[R_EDX], s->tmp3_i32);
5072                 tcg_gen_mov_tl(cpu_cc_dst, cpu_regs[R_EAX]);
5073                 tcg_gen_mov_tl(cpu_cc_src, cpu_regs[R_EDX]);
5074                 set_cc_op(s, CC_OP_MULL);
5075                 break;
5076 #ifdef TARGET_X86_64
5077             case MO_64:
5078                 tcg_gen_mulu2_i64(cpu_regs[R_EAX], cpu_regs[R_EDX],
5079                                   s->T0, cpu_regs[R_EAX]);
5080                 tcg_gen_mov_tl(cpu_cc_dst, cpu_regs[R_EAX]);
5081                 tcg_gen_mov_tl(cpu_cc_src, cpu_regs[R_EDX]);
5082                 set_cc_op(s, CC_OP_MULQ);
5083                 break;
5084 #endif
5085             }
5086             break;
5087         case 5: /* imul */
5088             switch(ot) {
5089             case MO_8:
5090                 gen_op_mov_v_reg(s, MO_8, s->T1, R_EAX);
5091                 tcg_gen_ext8s_tl(s->T0, s->T0);
5092                 tcg_gen_ext8s_tl(s->T1, s->T1);
5093                 /* XXX: use 32 bit mul which could be faster */
5094                 tcg_gen_mul_tl(s->T0, s->T0, s->T1);
5095                 gen_op_mov_reg_v(s, MO_16, R_EAX, s->T0);
5096                 tcg_gen_mov_tl(cpu_cc_dst, s->T0);
5097                 tcg_gen_ext8s_tl(s->tmp0, s->T0);
5098                 tcg_gen_sub_tl(cpu_cc_src, s->T0, s->tmp0);
5099                 set_cc_op(s, CC_OP_MULB);
5100                 break;
5101             case MO_16:
5102                 gen_op_mov_v_reg(s, MO_16, s->T1, R_EAX);
5103                 tcg_gen_ext16s_tl(s->T0, s->T0);
5104                 tcg_gen_ext16s_tl(s->T1, s->T1);
5105                 /* XXX: use 32 bit mul which could be faster */
5106                 tcg_gen_mul_tl(s->T0, s->T0, s->T1);
5107                 gen_op_mov_reg_v(s, MO_16, R_EAX, s->T0);
5108                 tcg_gen_mov_tl(cpu_cc_dst, s->T0);
5109                 tcg_gen_ext16s_tl(s->tmp0, s->T0);
5110                 tcg_gen_sub_tl(cpu_cc_src, s->T0, s->tmp0);
5111                 tcg_gen_shri_tl(s->T0, s->T0, 16);
5112                 gen_op_mov_reg_v(s, MO_16, R_EDX, s->T0);
5113                 set_cc_op(s, CC_OP_MULW);
5114                 break;
5115             default:
5116             case MO_32:
5117                 tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
5118                 tcg_gen_trunc_tl_i32(s->tmp3_i32, cpu_regs[R_EAX]);
5119                 tcg_gen_muls2_i32(s->tmp2_i32, s->tmp3_i32,
5120                                   s->tmp2_i32, s->tmp3_i32);
5121                 tcg_gen_extu_i32_tl(cpu_regs[R_EAX], s->tmp2_i32);
5122                 tcg_gen_extu_i32_tl(cpu_regs[R_EDX], s->tmp3_i32);
5123                 tcg_gen_sari_i32(s->tmp2_i32, s->tmp2_i32, 31);
5124                 tcg_gen_mov_tl(cpu_cc_dst, cpu_regs[R_EAX]);
5125                 tcg_gen_sub_i32(s->tmp2_i32, s->tmp2_i32, s->tmp3_i32);
5126                 tcg_gen_extu_i32_tl(cpu_cc_src, s->tmp2_i32);
5127                 set_cc_op(s, CC_OP_MULL);
5128                 break;
5129 #ifdef TARGET_X86_64
5130             case MO_64:
5131                 tcg_gen_muls2_i64(cpu_regs[R_EAX], cpu_regs[R_EDX],
5132                                   s->T0, cpu_regs[R_EAX]);
5133                 tcg_gen_mov_tl(cpu_cc_dst, cpu_regs[R_EAX]);
5134                 tcg_gen_sari_tl(cpu_cc_src, cpu_regs[R_EAX], 63);
5135                 tcg_gen_sub_tl(cpu_cc_src, cpu_cc_src, cpu_regs[R_EDX]);
5136                 set_cc_op(s, CC_OP_MULQ);
5137                 break;
5138 #endif
5139             }
5140             break;
5141         case 6: /* div */
5142             switch(ot) {
5143             case MO_8:
5144                 gen_helper_divb_AL(cpu_env, s->T0);
5145                 break;
5146             case MO_16:
5147                 gen_helper_divw_AX(cpu_env, s->T0);
5148                 break;
5149             default:
5150             case MO_32:
5151                 gen_helper_divl_EAX(cpu_env, s->T0);
5152                 break;
5153 #ifdef TARGET_X86_64
5154             case MO_64:
5155                 gen_helper_divq_EAX(cpu_env, s->T0);
5156                 break;
5157 #endif
5158             }
5159             break;
5160         case 7: /* idiv */
5161             switch(ot) {
5162             case MO_8:
5163                 gen_helper_idivb_AL(cpu_env, s->T0);
5164                 break;
5165             case MO_16:
5166                 gen_helper_idivw_AX(cpu_env, s->T0);
5167                 break;
5168             default:
5169             case MO_32:
5170                 gen_helper_idivl_EAX(cpu_env, s->T0);
5171                 break;
5172 #ifdef TARGET_X86_64
5173             case MO_64:
5174                 gen_helper_idivq_EAX(cpu_env, s->T0);
5175                 break;
5176 #endif
5177             }
5178             break;
5179         default:
5180             goto unknown_op;
5181         }
5182         break;
5183 
5184     case 0xfe: /* GRP4 */
5185     case 0xff: /* GRP5 */
5186         ot = mo_b_d(b, dflag);
5187 
5188         modrm = x86_ldub_code(env, s);
5189         mod = (modrm >> 6) & 3;
5190         rm = (modrm & 7) | REX_B(s);
5191         op = (modrm >> 3) & 7;
5192         if (op >= 2 && b == 0xfe) {
5193             goto unknown_op;
5194         }
5195         if (CODE64(s)) {
5196             if (op == 2 || op == 4) {
5197                 /* operand size for jumps is 64 bit */
5198                 ot = MO_64;
5199             } else if (op == 3 || op == 5) {
5200                 ot = dflag != MO_16 ? MO_32 + REX_W(s) : MO_16;
5201             } else if (op == 6) {
5202                 /* default push size is 64 bit */
5203                 ot = mo_pushpop(s, dflag);
5204             }
5205         }
5206         if (mod != 3) {
5207             gen_lea_modrm(env, s, modrm);
5208             if (op >= 2 && op != 3 && op != 5)
5209                 gen_op_ld_v(s, ot, s->T0, s->A0);
5210         } else {
5211             gen_op_mov_v_reg(s, ot, s->T0, rm);
5212         }
5213 
5214         switch(op) {
5215         case 0: /* inc Ev */
5216             if (mod != 3)
5217                 opreg = OR_TMP0;
5218             else
5219                 opreg = rm;
5220             gen_inc(s, ot, opreg, 1);
5221             break;
5222         case 1: /* dec Ev */
5223             if (mod != 3)
5224                 opreg = OR_TMP0;
5225             else
5226                 opreg = rm;
5227             gen_inc(s, ot, opreg, -1);
5228             break;
5229         case 2: /* call Ev */
5230             /* XXX: optimize if memory (no 'and' is necessary) */
5231             if (dflag == MO_16) {
5232                 tcg_gen_ext16u_tl(s->T0, s->T0);
5233             }
5234             next_eip = s->pc - s->cs_base;
5235             tcg_gen_movi_tl(s->T1, next_eip);
5236             gen_push_v(s, s->T1);
5237             gen_op_jmp_v(s->T0);
5238             gen_bnd_jmp(s);
5239             gen_jr(s, s->T0);
5240             break;
5241         case 3: /* lcall Ev */
5242             if (mod == 3) {
5243                 goto illegal_op;
5244             }
5245             gen_op_ld_v(s, ot, s->T1, s->A0);
5246             gen_add_A0_im(s, 1 << ot);
5247             gen_op_ld_v(s, MO_16, s->T0, s->A0);
5248         do_lcall:
5249             if (PE(s) && !VM86(s)) {
5250                 tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
5251                 gen_helper_lcall_protected(cpu_env, s->tmp2_i32, s->T1,
5252                                            tcg_const_i32(dflag - 1),
5253                                            tcg_const_tl(s->pc - s->cs_base));
5254             } else {
5255                 tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
5256                 gen_helper_lcall_real(cpu_env, s->tmp2_i32, s->T1,
5257                                       tcg_const_i32(dflag - 1),
5258                                       tcg_const_i32(s->pc - s->cs_base));
5259             }
5260             tcg_gen_ld_tl(s->tmp4, cpu_env, offsetof(CPUX86State, eip));
5261             gen_jr(s, s->tmp4);
5262             break;
5263         case 4: /* jmp Ev */
5264             if (dflag == MO_16) {
5265                 tcg_gen_ext16u_tl(s->T0, s->T0);
5266             }
5267             gen_op_jmp_v(s->T0);
5268             gen_bnd_jmp(s);
5269             gen_jr(s, s->T0);
5270             break;
5271         case 5: /* ljmp Ev */
5272             if (mod == 3) {
5273                 goto illegal_op;
5274             }
5275             gen_op_ld_v(s, ot, s->T1, s->A0);
5276             gen_add_A0_im(s, 1 << ot);
5277             gen_op_ld_v(s, MO_16, s->T0, s->A0);
5278         do_ljmp:
5279             if (PE(s) && !VM86(s)) {
5280                 tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
5281                 gen_helper_ljmp_protected(cpu_env, s->tmp2_i32, s->T1,
5282                                           tcg_const_tl(s->pc - s->cs_base));
5283             } else {
5284                 gen_op_movl_seg_T0_vm(s, R_CS);
5285                 gen_op_jmp_v(s->T1);
5286             }
5287             tcg_gen_ld_tl(s->tmp4, cpu_env, offsetof(CPUX86State, eip));
5288             gen_jr(s, s->tmp4);
5289             break;
5290         case 6: /* push Ev */
5291             gen_push_v(s, s->T0);
5292             break;
5293         default:
5294             goto unknown_op;
5295         }
5296         break;
5297 
5298     case 0x84: /* test Ev, Gv */
5299     case 0x85:
5300         ot = mo_b_d(b, dflag);
5301 
5302         modrm = x86_ldub_code(env, s);
5303         reg = ((modrm >> 3) & 7) | REX_R(s);
5304 
5305         gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
5306         gen_op_mov_v_reg(s, ot, s->T1, reg);
5307         gen_op_testl_T0_T1_cc(s);
5308         set_cc_op(s, CC_OP_LOGICB + ot);
5309         break;
5310 
5311     case 0xa8: /* test eAX, Iv */
5312     case 0xa9:
5313         ot = mo_b_d(b, dflag);
5314         val = insn_get(env, s, ot);
5315 
5316         gen_op_mov_v_reg(s, ot, s->T0, OR_EAX);
5317         tcg_gen_movi_tl(s->T1, val);
5318         gen_op_testl_T0_T1_cc(s);
5319         set_cc_op(s, CC_OP_LOGICB + ot);
5320         break;
5321 
5322     case 0x98: /* CWDE/CBW */
5323         switch (dflag) {
5324 #ifdef TARGET_X86_64
5325         case MO_64:
5326             gen_op_mov_v_reg(s, MO_32, s->T0, R_EAX);
5327             tcg_gen_ext32s_tl(s->T0, s->T0);
5328             gen_op_mov_reg_v(s, MO_64, R_EAX, s->T0);
5329             break;
5330 #endif
5331         case MO_32:
5332             gen_op_mov_v_reg(s, MO_16, s->T0, R_EAX);
5333             tcg_gen_ext16s_tl(s->T0, s->T0);
5334             gen_op_mov_reg_v(s, MO_32, R_EAX, s->T0);
5335             break;
5336         case MO_16:
5337             gen_op_mov_v_reg(s, MO_8, s->T0, R_EAX);
5338             tcg_gen_ext8s_tl(s->T0, s->T0);
5339             gen_op_mov_reg_v(s, MO_16, R_EAX, s->T0);
5340             break;
5341         default:
5342             tcg_abort();
5343         }
5344         break;
5345     case 0x99: /* CDQ/CWD */
5346         switch (dflag) {
5347 #ifdef TARGET_X86_64
5348         case MO_64:
5349             gen_op_mov_v_reg(s, MO_64, s->T0, R_EAX);
5350             tcg_gen_sari_tl(s->T0, s->T0, 63);
5351             gen_op_mov_reg_v(s, MO_64, R_EDX, s->T0);
5352             break;
5353 #endif
5354         case MO_32:
5355             gen_op_mov_v_reg(s, MO_32, s->T0, R_EAX);
5356             tcg_gen_ext32s_tl(s->T0, s->T0);
5357             tcg_gen_sari_tl(s->T0, s->T0, 31);
5358             gen_op_mov_reg_v(s, MO_32, R_EDX, s->T0);
5359             break;
5360         case MO_16:
5361             gen_op_mov_v_reg(s, MO_16, s->T0, R_EAX);
5362             tcg_gen_ext16s_tl(s->T0, s->T0);
5363             tcg_gen_sari_tl(s->T0, s->T0, 15);
5364             gen_op_mov_reg_v(s, MO_16, R_EDX, s->T0);
5365             break;
5366         default:
5367             tcg_abort();
5368         }
5369         break;
5370     case 0x1af: /* imul Gv, Ev */
5371     case 0x69: /* imul Gv, Ev, I */
5372     case 0x6b:
5373         ot = dflag;
5374         modrm = x86_ldub_code(env, s);
5375         reg = ((modrm >> 3) & 7) | REX_R(s);
5376         if (b == 0x69)
5377             s->rip_offset = insn_const_size(ot);
5378         else if (b == 0x6b)
5379             s->rip_offset = 1;
5380         gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
5381         if (b == 0x69) {
5382             val = insn_get(env, s, ot);
5383             tcg_gen_movi_tl(s->T1, val);
5384         } else if (b == 0x6b) {
5385             val = (int8_t)insn_get(env, s, MO_8);
5386             tcg_gen_movi_tl(s->T1, val);
5387         } else {
5388             gen_op_mov_v_reg(s, ot, s->T1, reg);
5389         }
5390         switch (ot) {
5391 #ifdef TARGET_X86_64
5392         case MO_64:
5393             tcg_gen_muls2_i64(cpu_regs[reg], s->T1, s->T0, s->T1);
5394             tcg_gen_mov_tl(cpu_cc_dst, cpu_regs[reg]);
5395             tcg_gen_sari_tl(cpu_cc_src, cpu_cc_dst, 63);
5396             tcg_gen_sub_tl(cpu_cc_src, cpu_cc_src, s->T1);
5397             break;
5398 #endif
5399         case MO_32:
5400             tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
5401             tcg_gen_trunc_tl_i32(s->tmp3_i32, s->T1);
5402             tcg_gen_muls2_i32(s->tmp2_i32, s->tmp3_i32,
5403                               s->tmp2_i32, s->tmp3_i32);
5404             tcg_gen_extu_i32_tl(cpu_regs[reg], s->tmp2_i32);
5405             tcg_gen_sari_i32(s->tmp2_i32, s->tmp2_i32, 31);
5406             tcg_gen_mov_tl(cpu_cc_dst, cpu_regs[reg]);
5407             tcg_gen_sub_i32(s->tmp2_i32, s->tmp2_i32, s->tmp3_i32);
5408             tcg_gen_extu_i32_tl(cpu_cc_src, s->tmp2_i32);
5409             break;
5410         default:
5411             tcg_gen_ext16s_tl(s->T0, s->T0);
5412             tcg_gen_ext16s_tl(s->T1, s->T1);
5413             /* XXX: use 32 bit mul which could be faster */
5414             tcg_gen_mul_tl(s->T0, s->T0, s->T1);
5415             tcg_gen_mov_tl(cpu_cc_dst, s->T0);
5416             tcg_gen_ext16s_tl(s->tmp0, s->T0);
5417             tcg_gen_sub_tl(cpu_cc_src, s->T0, s->tmp0);
5418             gen_op_mov_reg_v(s, ot, reg, s->T0);
5419             break;
5420         }
5421         set_cc_op(s, CC_OP_MULB + ot);
5422         break;
5423     case 0x1c0:
5424     case 0x1c1: /* xadd Ev, Gv */
5425         ot = mo_b_d(b, dflag);
5426         modrm = x86_ldub_code(env, s);
5427         reg = ((modrm >> 3) & 7) | REX_R(s);
5428         mod = (modrm >> 6) & 3;
5429         gen_op_mov_v_reg(s, ot, s->T0, reg);
5430         if (mod == 3) {
5431             rm = (modrm & 7) | REX_B(s);
5432             gen_op_mov_v_reg(s, ot, s->T1, rm);
5433             tcg_gen_add_tl(s->T0, s->T0, s->T1);
5434             gen_op_mov_reg_v(s, ot, reg, s->T1);
5435             gen_op_mov_reg_v(s, ot, rm, s->T0);
5436         } else {
5437             gen_lea_modrm(env, s, modrm);
5438             if (s->prefix & PREFIX_LOCK) {
5439                 tcg_gen_atomic_fetch_add_tl(s->T1, s->A0, s->T0,
5440                                             s->mem_index, ot | MO_LE);
5441                 tcg_gen_add_tl(s->T0, s->T0, s->T1);
5442             } else {
5443                 gen_op_ld_v(s, ot, s->T1, s->A0);
5444                 tcg_gen_add_tl(s->T0, s->T0, s->T1);
5445                 gen_op_st_v(s, ot, s->T0, s->A0);
5446             }
5447             gen_op_mov_reg_v(s, ot, reg, s->T1);
5448         }
5449         gen_op_update2_cc(s);
5450         set_cc_op(s, CC_OP_ADDB + ot);
5451         break;
5452     case 0x1b0:
5453     case 0x1b1: /* cmpxchg Ev, Gv */
5454         {
5455             TCGv oldv, newv, cmpv;
5456 
5457             ot = mo_b_d(b, dflag);
5458             modrm = x86_ldub_code(env, s);
5459             reg = ((modrm >> 3) & 7) | REX_R(s);
5460             mod = (modrm >> 6) & 3;
5461             oldv = tcg_temp_new();
5462             newv = tcg_temp_new();
5463             cmpv = tcg_temp_new();
5464             gen_op_mov_v_reg(s, ot, newv, reg);
5465             tcg_gen_mov_tl(cmpv, cpu_regs[R_EAX]);
5466 
5467             if (s->prefix & PREFIX_LOCK) {
5468                 if (mod == 3) {
5469                     goto illegal_op;
5470                 }
5471                 gen_lea_modrm(env, s, modrm);
5472                 tcg_gen_atomic_cmpxchg_tl(oldv, s->A0, cmpv, newv,
5473                                           s->mem_index, ot | MO_LE);
5474                 gen_op_mov_reg_v(s, ot, R_EAX, oldv);
5475             } else {
5476                 if (mod == 3) {
5477                     rm = (modrm & 7) | REX_B(s);
5478                     gen_op_mov_v_reg(s, ot, oldv, rm);
5479                 } else {
5480                     gen_lea_modrm(env, s, modrm);
5481                     gen_op_ld_v(s, ot, oldv, s->A0);
5482                     rm = 0; /* avoid warning */
5483                 }
5484                 gen_extu(ot, oldv);
5485                 gen_extu(ot, cmpv);
5486                 /* store value = (old == cmp ? new : old);  */
5487                 tcg_gen_movcond_tl(TCG_COND_EQ, newv, oldv, cmpv, newv, oldv);
5488                 if (mod == 3) {
5489                     gen_op_mov_reg_v(s, ot, R_EAX, oldv);
5490                     gen_op_mov_reg_v(s, ot, rm, newv);
5491                 } else {
5492                     /* Perform an unconditional store cycle like physical cpu;
5493                        must be before changing accumulator to ensure
5494                        idempotency if the store faults and the instruction
5495                        is restarted */
5496                     gen_op_st_v(s, ot, newv, s->A0);
5497                     gen_op_mov_reg_v(s, ot, R_EAX, oldv);
5498                 }
5499             }
5500             tcg_gen_mov_tl(cpu_cc_src, oldv);
5501             tcg_gen_mov_tl(s->cc_srcT, cmpv);
5502             tcg_gen_sub_tl(cpu_cc_dst, cmpv, oldv);
5503             set_cc_op(s, CC_OP_SUBB + ot);
5504             tcg_temp_free(oldv);
5505             tcg_temp_free(newv);
5506             tcg_temp_free(cmpv);
5507         }
5508         break;
5509     case 0x1c7: /* cmpxchg8b */
5510         modrm = x86_ldub_code(env, s);
5511         mod = (modrm >> 6) & 3;
5512         switch ((modrm >> 3) & 7) {
5513         case 1: /* CMPXCHG8, CMPXCHG16 */
5514             if (mod == 3) {
5515                 goto illegal_op;
5516             }
5517 #ifdef TARGET_X86_64
5518             if (dflag == MO_64) {
5519                 if (!(s->cpuid_ext_features & CPUID_EXT_CX16)) {
5520                     goto illegal_op;
5521                 }
5522                 gen_lea_modrm(env, s, modrm);
5523                 if ((s->prefix & PREFIX_LOCK) &&
5524                     (tb_cflags(s->base.tb) & CF_PARALLEL)) {
5525                     gen_helper_cmpxchg16b(cpu_env, s->A0);
5526                 } else {
5527                     gen_helper_cmpxchg16b_unlocked(cpu_env, s->A0);
5528                 }
5529                 set_cc_op(s, CC_OP_EFLAGS);
5530                 break;
5531             }
5532 #endif
5533             if (!(s->cpuid_features & CPUID_CX8)) {
5534                 goto illegal_op;
5535             }
5536             gen_lea_modrm(env, s, modrm);
5537             if ((s->prefix & PREFIX_LOCK) &&
5538                 (tb_cflags(s->base.tb) & CF_PARALLEL)) {
5539                 gen_helper_cmpxchg8b(cpu_env, s->A0);
5540             } else {
5541                 gen_helper_cmpxchg8b_unlocked(cpu_env, s->A0);
5542             }
5543             set_cc_op(s, CC_OP_EFLAGS);
5544             break;
5545 
5546         case 7: /* RDSEED */
5547         case 6: /* RDRAND */
5548             if (mod != 3 ||
5549                 (s->prefix & (PREFIX_LOCK | PREFIX_REPZ | PREFIX_REPNZ)) ||
5550                 !(s->cpuid_ext_features & CPUID_EXT_RDRAND)) {
5551                 goto illegal_op;
5552             }
5553             if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
5554                 gen_io_start();
5555             }
5556             gen_helper_rdrand(s->T0, cpu_env);
5557             rm = (modrm & 7) | REX_B(s);
5558             gen_op_mov_reg_v(s, dflag, rm, s->T0);
5559             set_cc_op(s, CC_OP_EFLAGS);
5560             if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
5561                 gen_jmp(s, s->pc - s->cs_base);
5562             }
5563             break;
5564 
5565         default:
5566             goto illegal_op;
5567         }
5568         break;
5569 
5570         /**************************/
5571         /* push/pop */
5572     case 0x50 ... 0x57: /* push */
5573         gen_op_mov_v_reg(s, MO_32, s->T0, (b & 7) | REX_B(s));
5574         gen_push_v(s, s->T0);
5575         break;
5576     case 0x58 ... 0x5f: /* pop */
5577         ot = gen_pop_T0(s);
5578         /* NOTE: order is important for pop %sp */
5579         gen_pop_update(s, ot);
5580         gen_op_mov_reg_v(s, ot, (b & 7) | REX_B(s), s->T0);
5581         break;
5582     case 0x60: /* pusha */
5583         if (CODE64(s))
5584             goto illegal_op;
5585         gen_pusha(s);
5586         break;
5587     case 0x61: /* popa */
5588         if (CODE64(s))
5589             goto illegal_op;
5590         gen_popa(s);
5591         break;
5592     case 0x68: /* push Iv */
5593     case 0x6a:
5594         ot = mo_pushpop(s, dflag);
5595         if (b == 0x68)
5596             val = insn_get(env, s, ot);
5597         else
5598             val = (int8_t)insn_get(env, s, MO_8);
5599         tcg_gen_movi_tl(s->T0, val);
5600         gen_push_v(s, s->T0);
5601         break;
5602     case 0x8f: /* pop Ev */
5603         modrm = x86_ldub_code(env, s);
5604         mod = (modrm >> 6) & 3;
5605         ot = gen_pop_T0(s);
5606         if (mod == 3) {
5607             /* NOTE: order is important for pop %sp */
5608             gen_pop_update(s, ot);
5609             rm = (modrm & 7) | REX_B(s);
5610             gen_op_mov_reg_v(s, ot, rm, s->T0);
5611         } else {
5612             /* NOTE: order is important too for MMU exceptions */
5613             s->popl_esp_hack = 1 << ot;
5614             gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 1);
5615             s->popl_esp_hack = 0;
5616             gen_pop_update(s, ot);
5617         }
5618         break;
5619     case 0xc8: /* enter */
5620         {
5621             int level;
5622             val = x86_lduw_code(env, s);
5623             level = x86_ldub_code(env, s);
5624             gen_enter(s, val, level);
5625         }
5626         break;
5627     case 0xc9: /* leave */
5628         gen_leave(s);
5629         break;
5630     case 0x06: /* push es */
5631     case 0x0e: /* push cs */
5632     case 0x16: /* push ss */
5633     case 0x1e: /* push ds */
5634         if (CODE64(s))
5635             goto illegal_op;
5636         gen_op_movl_T0_seg(s, b >> 3);
5637         gen_push_v(s, s->T0);
5638         break;
5639     case 0x1a0: /* push fs */
5640     case 0x1a8: /* push gs */
5641         gen_op_movl_T0_seg(s, (b >> 3) & 7);
5642         gen_push_v(s, s->T0);
5643         break;
5644     case 0x07: /* pop es */
5645     case 0x17: /* pop ss */
5646     case 0x1f: /* pop ds */
5647         if (CODE64(s))
5648             goto illegal_op;
5649         reg = b >> 3;
5650         ot = gen_pop_T0(s);
5651         gen_movl_seg_T0(s, reg);
5652         gen_pop_update(s, ot);
5653         /* Note that reg == R_SS in gen_movl_seg_T0 always sets is_jmp.  */
5654         if (s->base.is_jmp) {
5655             gen_jmp_im(s, s->pc - s->cs_base);
5656             if (reg == R_SS) {
5657                 s->flags &= ~HF_TF_MASK;
5658                 gen_eob_inhibit_irq(s, true);
5659             } else {
5660                 gen_eob(s);
5661             }
5662         }
5663         break;
5664     case 0x1a1: /* pop fs */
5665     case 0x1a9: /* pop gs */
5666         ot = gen_pop_T0(s);
5667         gen_movl_seg_T0(s, (b >> 3) & 7);
5668         gen_pop_update(s, ot);
5669         if (s->base.is_jmp) {
5670             gen_jmp_im(s, s->pc - s->cs_base);
5671             gen_eob(s);
5672         }
5673         break;
5674 
5675         /**************************/
5676         /* mov */
5677     case 0x88:
5678     case 0x89: /* mov Gv, Ev */
5679         ot = mo_b_d(b, dflag);
5680         modrm = x86_ldub_code(env, s);
5681         reg = ((modrm >> 3) & 7) | REX_R(s);
5682 
5683         /* generate a generic store */
5684         gen_ldst_modrm(env, s, modrm, ot, reg, 1);
5685         break;
5686     case 0xc6:
5687     case 0xc7: /* mov Ev, Iv */
5688         ot = mo_b_d(b, dflag);
5689         modrm = x86_ldub_code(env, s);
5690         mod = (modrm >> 6) & 3;
5691         if (mod != 3) {
5692             s->rip_offset = insn_const_size(ot);
5693             gen_lea_modrm(env, s, modrm);
5694         }
5695         val = insn_get(env, s, ot);
5696         tcg_gen_movi_tl(s->T0, val);
5697         if (mod != 3) {
5698             gen_op_st_v(s, ot, s->T0, s->A0);
5699         } else {
5700             gen_op_mov_reg_v(s, ot, (modrm & 7) | REX_B(s), s->T0);
5701         }
5702         break;
5703     case 0x8a:
5704     case 0x8b: /* mov Ev, Gv */
5705         ot = mo_b_d(b, dflag);
5706         modrm = x86_ldub_code(env, s);
5707         reg = ((modrm >> 3) & 7) | REX_R(s);
5708 
5709         gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
5710         gen_op_mov_reg_v(s, ot, reg, s->T0);
5711         break;
5712     case 0x8e: /* mov seg, Gv */
5713         modrm = x86_ldub_code(env, s);
5714         reg = (modrm >> 3) & 7;
5715         if (reg >= 6 || reg == R_CS)
5716             goto illegal_op;
5717         gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
5718         gen_movl_seg_T0(s, reg);
5719         /* Note that reg == R_SS in gen_movl_seg_T0 always sets is_jmp.  */
5720         if (s->base.is_jmp) {
5721             gen_jmp_im(s, s->pc - s->cs_base);
5722             if (reg == R_SS) {
5723                 s->flags &= ~HF_TF_MASK;
5724                 gen_eob_inhibit_irq(s, true);
5725             } else {
5726                 gen_eob(s);
5727             }
5728         }
5729         break;
5730     case 0x8c: /* mov Gv, seg */
5731         modrm = x86_ldub_code(env, s);
5732         reg = (modrm >> 3) & 7;
5733         mod = (modrm >> 6) & 3;
5734         if (reg >= 6)
5735             goto illegal_op;
5736         gen_op_movl_T0_seg(s, reg);
5737         ot = mod == 3 ? dflag : MO_16;
5738         gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 1);
5739         break;
5740 
5741     case 0x1b6: /* movzbS Gv, Eb */
5742     case 0x1b7: /* movzwS Gv, Eb */
5743     case 0x1be: /* movsbS Gv, Eb */
5744     case 0x1bf: /* movswS Gv, Eb */
5745         {
5746             MemOp d_ot;
5747             MemOp s_ot;
5748 
5749             /* d_ot is the size of destination */
5750             d_ot = dflag;
5751             /* ot is the size of source */
5752             ot = (b & 1) + MO_8;
5753             /* s_ot is the sign+size of source */
5754             s_ot = b & 8 ? MO_SIGN | ot : ot;
5755 
5756             modrm = x86_ldub_code(env, s);
5757             reg = ((modrm >> 3) & 7) | REX_R(s);
5758             mod = (modrm >> 6) & 3;
5759             rm = (modrm & 7) | REX_B(s);
5760 
5761             if (mod == 3) {
5762                 if (s_ot == MO_SB && byte_reg_is_xH(s, rm)) {
5763                     tcg_gen_sextract_tl(s->T0, cpu_regs[rm - 4], 8, 8);
5764                 } else {
5765                     gen_op_mov_v_reg(s, ot, s->T0, rm);
5766                     switch (s_ot) {
5767                     case MO_UB:
5768                         tcg_gen_ext8u_tl(s->T0, s->T0);
5769                         break;
5770                     case MO_SB:
5771                         tcg_gen_ext8s_tl(s->T0, s->T0);
5772                         break;
5773                     case MO_UW:
5774                         tcg_gen_ext16u_tl(s->T0, s->T0);
5775                         break;
5776                     default:
5777                     case MO_SW:
5778                         tcg_gen_ext16s_tl(s->T0, s->T0);
5779                         break;
5780                     }
5781                 }
5782                 gen_op_mov_reg_v(s, d_ot, reg, s->T0);
5783             } else {
5784                 gen_lea_modrm(env, s, modrm);
5785                 gen_op_ld_v(s, s_ot, s->T0, s->A0);
5786                 gen_op_mov_reg_v(s, d_ot, reg, s->T0);
5787             }
5788         }
5789         break;
5790 
5791     case 0x8d: /* lea */
5792         modrm = x86_ldub_code(env, s);
5793         mod = (modrm >> 6) & 3;
5794         if (mod == 3)
5795             goto illegal_op;
5796         reg = ((modrm >> 3) & 7) | REX_R(s);
5797         {
5798             AddressParts a = gen_lea_modrm_0(env, s, modrm);
5799             TCGv ea = gen_lea_modrm_1(s, a);
5800             gen_lea_v_seg(s, s->aflag, ea, -1, -1);
5801             gen_op_mov_reg_v(s, dflag, reg, s->A0);
5802         }
5803         break;
5804 
5805     case 0xa0: /* mov EAX, Ov */
5806     case 0xa1:
5807     case 0xa2: /* mov Ov, EAX */
5808     case 0xa3:
5809         {
5810             target_ulong offset_addr;
5811 
5812             ot = mo_b_d(b, dflag);
5813             switch (s->aflag) {
5814 #ifdef TARGET_X86_64
5815             case MO_64:
5816                 offset_addr = x86_ldq_code(env, s);
5817                 break;
5818 #endif
5819             default:
5820                 offset_addr = insn_get(env, s, s->aflag);
5821                 break;
5822             }
5823             tcg_gen_movi_tl(s->A0, offset_addr);
5824             gen_add_A0_ds_seg(s);
5825             if ((b & 2) == 0) {
5826                 gen_op_ld_v(s, ot, s->T0, s->A0);
5827                 gen_op_mov_reg_v(s, ot, R_EAX, s->T0);
5828             } else {
5829                 gen_op_mov_v_reg(s, ot, s->T0, R_EAX);
5830                 gen_op_st_v(s, ot, s->T0, s->A0);
5831             }
5832         }
5833         break;
5834     case 0xd7: /* xlat */
5835         tcg_gen_mov_tl(s->A0, cpu_regs[R_EBX]);
5836         tcg_gen_ext8u_tl(s->T0, cpu_regs[R_EAX]);
5837         tcg_gen_add_tl(s->A0, s->A0, s->T0);
5838         gen_extu(s->aflag, s->A0);
5839         gen_add_A0_ds_seg(s);
5840         gen_op_ld_v(s, MO_8, s->T0, s->A0);
5841         gen_op_mov_reg_v(s, MO_8, R_EAX, s->T0);
5842         break;
5843     case 0xb0 ... 0xb7: /* mov R, Ib */
5844         val = insn_get(env, s, MO_8);
5845         tcg_gen_movi_tl(s->T0, val);
5846         gen_op_mov_reg_v(s, MO_8, (b & 7) | REX_B(s), s->T0);
5847         break;
5848     case 0xb8 ... 0xbf: /* mov R, Iv */
5849 #ifdef TARGET_X86_64
5850         if (dflag == MO_64) {
5851             uint64_t tmp;
5852             /* 64 bit case */
5853             tmp = x86_ldq_code(env, s);
5854             reg = (b & 7) | REX_B(s);
5855             tcg_gen_movi_tl(s->T0, tmp);
5856             gen_op_mov_reg_v(s, MO_64, reg, s->T0);
5857         } else
5858 #endif
5859         {
5860             ot = dflag;
5861             val = insn_get(env, s, ot);
5862             reg = (b & 7) | REX_B(s);
5863             tcg_gen_movi_tl(s->T0, val);
5864             gen_op_mov_reg_v(s, ot, reg, s->T0);
5865         }
5866         break;
5867 
5868     case 0x91 ... 0x97: /* xchg R, EAX */
5869     do_xchg_reg_eax:
5870         ot = dflag;
5871         reg = (b & 7) | REX_B(s);
5872         rm = R_EAX;
5873         goto do_xchg_reg;
5874     case 0x86:
5875     case 0x87: /* xchg Ev, Gv */
5876         ot = mo_b_d(b, dflag);
5877         modrm = x86_ldub_code(env, s);
5878         reg = ((modrm >> 3) & 7) | REX_R(s);
5879         mod = (modrm >> 6) & 3;
5880         if (mod == 3) {
5881             rm = (modrm & 7) | REX_B(s);
5882         do_xchg_reg:
5883             gen_op_mov_v_reg(s, ot, s->T0, reg);
5884             gen_op_mov_v_reg(s, ot, s->T1, rm);
5885             gen_op_mov_reg_v(s, ot, rm, s->T0);
5886             gen_op_mov_reg_v(s, ot, reg, s->T1);
5887         } else {
5888             gen_lea_modrm(env, s, modrm);
5889             gen_op_mov_v_reg(s, ot, s->T0, reg);
5890             /* for xchg, lock is implicit */
5891             tcg_gen_atomic_xchg_tl(s->T1, s->A0, s->T0,
5892                                    s->mem_index, ot | MO_LE);
5893             gen_op_mov_reg_v(s, ot, reg, s->T1);
5894         }
5895         break;
5896     case 0xc4: /* les Gv */
5897         /* In CODE64 this is VEX3; see above.  */
5898         op = R_ES;
5899         goto do_lxx;
5900     case 0xc5: /* lds Gv */
5901         /* In CODE64 this is VEX2; see above.  */
5902         op = R_DS;
5903         goto do_lxx;
5904     case 0x1b2: /* lss Gv */
5905         op = R_SS;
5906         goto do_lxx;
5907     case 0x1b4: /* lfs Gv */
5908         op = R_FS;
5909         goto do_lxx;
5910     case 0x1b5: /* lgs Gv */
5911         op = R_GS;
5912     do_lxx:
5913         ot = dflag != MO_16 ? MO_32 : MO_16;
5914         modrm = x86_ldub_code(env, s);
5915         reg = ((modrm >> 3) & 7) | REX_R(s);
5916         mod = (modrm >> 6) & 3;
5917         if (mod == 3)
5918             goto illegal_op;
5919         gen_lea_modrm(env, s, modrm);
5920         gen_op_ld_v(s, ot, s->T1, s->A0);
5921         gen_add_A0_im(s, 1 << ot);
5922         /* load the segment first to handle exceptions properly */
5923         gen_op_ld_v(s, MO_16, s->T0, s->A0);
5924         gen_movl_seg_T0(s, op);
5925         /* then put the data */
5926         gen_op_mov_reg_v(s, ot, reg, s->T1);
5927         if (s->base.is_jmp) {
5928             gen_jmp_im(s, s->pc - s->cs_base);
5929             gen_eob(s);
5930         }
5931         break;
5932 
5933         /************************/
5934         /* shifts */
5935     case 0xc0:
5936     case 0xc1:
5937         /* shift Ev,Ib */
5938         shift = 2;
5939     grp2:
5940         {
5941             ot = mo_b_d(b, dflag);
5942             modrm = x86_ldub_code(env, s);
5943             mod = (modrm >> 6) & 3;
5944             op = (modrm >> 3) & 7;
5945 
5946             if (mod != 3) {
5947                 if (shift == 2) {
5948                     s->rip_offset = 1;
5949                 }
5950                 gen_lea_modrm(env, s, modrm);
5951                 opreg = OR_TMP0;
5952             } else {
5953                 opreg = (modrm & 7) | REX_B(s);
5954             }
5955 
5956             /* simpler op */
5957             if (shift == 0) {
5958                 gen_shift(s, op, ot, opreg, OR_ECX);
5959             } else {
5960                 if (shift == 2) {
5961                     shift = x86_ldub_code(env, s);
5962                 }
5963                 gen_shifti(s, op, ot, opreg, shift);
5964             }
5965         }
5966         break;
5967     case 0xd0:
5968     case 0xd1:
5969         /* shift Ev,1 */
5970         shift = 1;
5971         goto grp2;
5972     case 0xd2:
5973     case 0xd3:
5974         /* shift Ev,cl */
5975         shift = 0;
5976         goto grp2;
5977 
5978     case 0x1a4: /* shld imm */
5979         op = 0;
5980         shift = 1;
5981         goto do_shiftd;
5982     case 0x1a5: /* shld cl */
5983         op = 0;
5984         shift = 0;
5985         goto do_shiftd;
5986     case 0x1ac: /* shrd imm */
5987         op = 1;
5988         shift = 1;
5989         goto do_shiftd;
5990     case 0x1ad: /* shrd cl */
5991         op = 1;
5992         shift = 0;
5993     do_shiftd:
5994         ot = dflag;
5995         modrm = x86_ldub_code(env, s);
5996         mod = (modrm >> 6) & 3;
5997         rm = (modrm & 7) | REX_B(s);
5998         reg = ((modrm >> 3) & 7) | REX_R(s);
5999         if (mod != 3) {
6000             gen_lea_modrm(env, s, modrm);
6001             opreg = OR_TMP0;
6002         } else {
6003             opreg = rm;
6004         }
6005         gen_op_mov_v_reg(s, ot, s->T1, reg);
6006 
6007         if (shift) {
6008             TCGv imm = tcg_const_tl(x86_ldub_code(env, s));
6009             gen_shiftd_rm_T1(s, ot, opreg, op, imm);
6010             tcg_temp_free(imm);
6011         } else {
6012             gen_shiftd_rm_T1(s, ot, opreg, op, cpu_regs[R_ECX]);
6013         }
6014         break;
6015 
6016         /************************/
6017         /* floats */
6018     case 0xd8 ... 0xdf:
6019         {
6020             bool update_fip = true;
6021 
6022             if (s->flags & (HF_EM_MASK | HF_TS_MASK)) {
6023                 /* if CR0.EM or CR0.TS are set, generate an FPU exception */
6024                 /* XXX: what to do if illegal op ? */
6025                 gen_exception(s, EXCP07_PREX, pc_start - s->cs_base);
6026                 break;
6027             }
6028             modrm = x86_ldub_code(env, s);
6029             mod = (modrm >> 6) & 3;
6030             rm = modrm & 7;
6031             op = ((b & 7) << 3) | ((modrm >> 3) & 7);
6032             if (mod != 3) {
6033                 /* memory op */
6034                 AddressParts a = gen_lea_modrm_0(env, s, modrm);
6035                 TCGv ea = gen_lea_modrm_1(s, a);
6036                 TCGv last_addr = tcg_temp_new();
6037                 bool update_fdp = true;
6038 
6039                 tcg_gen_mov_tl(last_addr, ea);
6040                 gen_lea_v_seg(s, s->aflag, ea, a.def_seg, s->override);
6041 
6042                 switch (op) {
6043                 case 0x00 ... 0x07: /* fxxxs */
6044                 case 0x10 ... 0x17: /* fixxxl */
6045                 case 0x20 ... 0x27: /* fxxxl */
6046                 case 0x30 ... 0x37: /* fixxx */
6047                     {
6048                         int op1;
6049                         op1 = op & 7;
6050 
6051                         switch (op >> 4) {
6052                         case 0:
6053                             tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0,
6054                                                 s->mem_index, MO_LEUL);
6055                             gen_helper_flds_FT0(cpu_env, s->tmp2_i32);
6056                             break;
6057                         case 1:
6058                             tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0,
6059                                                 s->mem_index, MO_LEUL);
6060                             gen_helper_fildl_FT0(cpu_env, s->tmp2_i32);
6061                             break;
6062                         case 2:
6063                             tcg_gen_qemu_ld_i64(s->tmp1_i64, s->A0,
6064                                                 s->mem_index, MO_LEUQ);
6065                             gen_helper_fldl_FT0(cpu_env, s->tmp1_i64);
6066                             break;
6067                         case 3:
6068                         default:
6069                             tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0,
6070                                                 s->mem_index, MO_LESW);
6071                             gen_helper_fildl_FT0(cpu_env, s->tmp2_i32);
6072                             break;
6073                         }
6074 
6075                         gen_helper_fp_arith_ST0_FT0(op1);
6076                         if (op1 == 3) {
6077                             /* fcomp needs pop */
6078                             gen_helper_fpop(cpu_env);
6079                         }
6080                     }
6081                     break;
6082                 case 0x08: /* flds */
6083                 case 0x0a: /* fsts */
6084                 case 0x0b: /* fstps */
6085                 case 0x18 ... 0x1b: /* fildl, fisttpl, fistl, fistpl */
6086                 case 0x28 ... 0x2b: /* fldl, fisttpll, fstl, fstpl */
6087                 case 0x38 ... 0x3b: /* filds, fisttps, fists, fistps */
6088                     switch (op & 7) {
6089                     case 0:
6090                         switch (op >> 4) {
6091                         case 0:
6092                             tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0,
6093                                                 s->mem_index, MO_LEUL);
6094                             gen_helper_flds_ST0(cpu_env, s->tmp2_i32);
6095                             break;
6096                         case 1:
6097                             tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0,
6098                                                 s->mem_index, MO_LEUL);
6099                             gen_helper_fildl_ST0(cpu_env, s->tmp2_i32);
6100                             break;
6101                         case 2:
6102                             tcg_gen_qemu_ld_i64(s->tmp1_i64, s->A0,
6103                                                 s->mem_index, MO_LEUQ);
6104                             gen_helper_fldl_ST0(cpu_env, s->tmp1_i64);
6105                             break;
6106                         case 3:
6107                         default:
6108                             tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0,
6109                                                 s->mem_index, MO_LESW);
6110                             gen_helper_fildl_ST0(cpu_env, s->tmp2_i32);
6111                             break;
6112                         }
6113                         break;
6114                     case 1:
6115                         /* XXX: the corresponding CPUID bit must be tested ! */
6116                         switch (op >> 4) {
6117                         case 1:
6118                             gen_helper_fisttl_ST0(s->tmp2_i32, cpu_env);
6119                             tcg_gen_qemu_st_i32(s->tmp2_i32, s->A0,
6120                                                 s->mem_index, MO_LEUL);
6121                             break;
6122                         case 2:
6123                             gen_helper_fisttll_ST0(s->tmp1_i64, cpu_env);
6124                             tcg_gen_qemu_st_i64(s->tmp1_i64, s->A0,
6125                                                 s->mem_index, MO_LEUQ);
6126                             break;
6127                         case 3:
6128                         default:
6129                             gen_helper_fistt_ST0(s->tmp2_i32, cpu_env);
6130                             tcg_gen_qemu_st_i32(s->tmp2_i32, s->A0,
6131                                                 s->mem_index, MO_LEUW);
6132                             break;
6133                         }
6134                         gen_helper_fpop(cpu_env);
6135                         break;
6136                     default:
6137                         switch (op >> 4) {
6138                         case 0:
6139                             gen_helper_fsts_ST0(s->tmp2_i32, cpu_env);
6140                             tcg_gen_qemu_st_i32(s->tmp2_i32, s->A0,
6141                                                 s->mem_index, MO_LEUL);
6142                             break;
6143                         case 1:
6144                             gen_helper_fistl_ST0(s->tmp2_i32, cpu_env);
6145                             tcg_gen_qemu_st_i32(s->tmp2_i32, s->A0,
6146                                                 s->mem_index, MO_LEUL);
6147                             break;
6148                         case 2:
6149                             gen_helper_fstl_ST0(s->tmp1_i64, cpu_env);
6150                             tcg_gen_qemu_st_i64(s->tmp1_i64, s->A0,
6151                                                 s->mem_index, MO_LEUQ);
6152                             break;
6153                         case 3:
6154                         default:
6155                             gen_helper_fist_ST0(s->tmp2_i32, cpu_env);
6156                             tcg_gen_qemu_st_i32(s->tmp2_i32, s->A0,
6157                                                 s->mem_index, MO_LEUW);
6158                             break;
6159                         }
6160                         if ((op & 7) == 3) {
6161                             gen_helper_fpop(cpu_env);
6162                         }
6163                         break;
6164                     }
6165                     break;
6166                 case 0x0c: /* fldenv mem */
6167                     gen_helper_fldenv(cpu_env, s->A0,
6168                                       tcg_const_i32(dflag - 1));
6169                     update_fip = update_fdp = false;
6170                     break;
6171                 case 0x0d: /* fldcw mem */
6172                     tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0,
6173                                         s->mem_index, MO_LEUW);
6174                     gen_helper_fldcw(cpu_env, s->tmp2_i32);
6175                     update_fip = update_fdp = false;
6176                     break;
6177                 case 0x0e: /* fnstenv mem */
6178                     gen_helper_fstenv(cpu_env, s->A0,
6179                                       tcg_const_i32(dflag - 1));
6180                     update_fip = update_fdp = false;
6181                     break;
6182                 case 0x0f: /* fnstcw mem */
6183                     gen_helper_fnstcw(s->tmp2_i32, cpu_env);
6184                     tcg_gen_qemu_st_i32(s->tmp2_i32, s->A0,
6185                                         s->mem_index, MO_LEUW);
6186                     update_fip = update_fdp = false;
6187                     break;
6188                 case 0x1d: /* fldt mem */
6189                     gen_helper_fldt_ST0(cpu_env, s->A0);
6190                     break;
6191                 case 0x1f: /* fstpt mem */
6192                     gen_helper_fstt_ST0(cpu_env, s->A0);
6193                     gen_helper_fpop(cpu_env);
6194                     break;
6195                 case 0x2c: /* frstor mem */
6196                     gen_helper_frstor(cpu_env, s->A0,
6197                                       tcg_const_i32(dflag - 1));
6198                     update_fip = update_fdp = false;
6199                     break;
6200                 case 0x2e: /* fnsave mem */
6201                     gen_helper_fsave(cpu_env, s->A0,
6202                                      tcg_const_i32(dflag - 1));
6203                     update_fip = update_fdp = false;
6204                     break;
6205                 case 0x2f: /* fnstsw mem */
6206                     gen_helper_fnstsw(s->tmp2_i32, cpu_env);
6207                     tcg_gen_qemu_st_i32(s->tmp2_i32, s->A0,
6208                                         s->mem_index, MO_LEUW);
6209                     update_fip = update_fdp = false;
6210                     break;
6211                 case 0x3c: /* fbld */
6212                     gen_helper_fbld_ST0(cpu_env, s->A0);
6213                     break;
6214                 case 0x3e: /* fbstp */
6215                     gen_helper_fbst_ST0(cpu_env, s->A0);
6216                     gen_helper_fpop(cpu_env);
6217                     break;
6218                 case 0x3d: /* fildll */
6219                     tcg_gen_qemu_ld_i64(s->tmp1_i64, s->A0,
6220                                         s->mem_index, MO_LEUQ);
6221                     gen_helper_fildll_ST0(cpu_env, s->tmp1_i64);
6222                     break;
6223                 case 0x3f: /* fistpll */
6224                     gen_helper_fistll_ST0(s->tmp1_i64, cpu_env);
6225                     tcg_gen_qemu_st_i64(s->tmp1_i64, s->A0,
6226                                         s->mem_index, MO_LEUQ);
6227                     gen_helper_fpop(cpu_env);
6228                     break;
6229                 default:
6230                     goto unknown_op;
6231                 }
6232 
6233                 if (update_fdp) {
6234                     int last_seg = s->override >= 0 ? s->override : a.def_seg;
6235 
6236                     tcg_gen_ld_i32(s->tmp2_i32, cpu_env,
6237                                    offsetof(CPUX86State,
6238                                             segs[last_seg].selector));
6239                     tcg_gen_st16_i32(s->tmp2_i32, cpu_env,
6240                                      offsetof(CPUX86State, fpds));
6241                     tcg_gen_st_tl(last_addr, cpu_env,
6242                                   offsetof(CPUX86State, fpdp));
6243                 }
6244                 tcg_temp_free(last_addr);
6245             } else {
6246                 /* register float ops */
6247                 opreg = rm;
6248 
6249                 switch (op) {
6250                 case 0x08: /* fld sti */
6251                     gen_helper_fpush(cpu_env);
6252                     gen_helper_fmov_ST0_STN(cpu_env,
6253                                             tcg_const_i32((opreg + 1) & 7));
6254                     break;
6255                 case 0x09: /* fxchg sti */
6256                 case 0x29: /* fxchg4 sti, undocumented op */
6257                 case 0x39: /* fxchg7 sti, undocumented op */
6258                     gen_helper_fxchg_ST0_STN(cpu_env, tcg_const_i32(opreg));
6259                     break;
6260                 case 0x0a: /* grp d9/2 */
6261                     switch (rm) {
6262                     case 0: /* fnop */
6263                         /* check exceptions (FreeBSD FPU probe) */
6264                         gen_helper_fwait(cpu_env);
6265                         update_fip = false;
6266                         break;
6267                     default:
6268                         goto unknown_op;
6269                     }
6270                     break;
6271                 case 0x0c: /* grp d9/4 */
6272                     switch (rm) {
6273                     case 0: /* fchs */
6274                         gen_helper_fchs_ST0(cpu_env);
6275                         break;
6276                     case 1: /* fabs */
6277                         gen_helper_fabs_ST0(cpu_env);
6278                         break;
6279                     case 4: /* ftst */
6280                         gen_helper_fldz_FT0(cpu_env);
6281                         gen_helper_fcom_ST0_FT0(cpu_env);
6282                         break;
6283                     case 5: /* fxam */
6284                         gen_helper_fxam_ST0(cpu_env);
6285                         break;
6286                     default:
6287                         goto unknown_op;
6288                     }
6289                     break;
6290                 case 0x0d: /* grp d9/5 */
6291                     {
6292                         switch (rm) {
6293                         case 0:
6294                             gen_helper_fpush(cpu_env);
6295                             gen_helper_fld1_ST0(cpu_env);
6296                             break;
6297                         case 1:
6298                             gen_helper_fpush(cpu_env);
6299                             gen_helper_fldl2t_ST0(cpu_env);
6300                             break;
6301                         case 2:
6302                             gen_helper_fpush(cpu_env);
6303                             gen_helper_fldl2e_ST0(cpu_env);
6304                             break;
6305                         case 3:
6306                             gen_helper_fpush(cpu_env);
6307                             gen_helper_fldpi_ST0(cpu_env);
6308                             break;
6309                         case 4:
6310                             gen_helper_fpush(cpu_env);
6311                             gen_helper_fldlg2_ST0(cpu_env);
6312                             break;
6313                         case 5:
6314                             gen_helper_fpush(cpu_env);
6315                             gen_helper_fldln2_ST0(cpu_env);
6316                             break;
6317                         case 6:
6318                             gen_helper_fpush(cpu_env);
6319                             gen_helper_fldz_ST0(cpu_env);
6320                             break;
6321                         default:
6322                             goto unknown_op;
6323                         }
6324                     }
6325                     break;
6326                 case 0x0e: /* grp d9/6 */
6327                     switch (rm) {
6328                     case 0: /* f2xm1 */
6329                         gen_helper_f2xm1(cpu_env);
6330                         break;
6331                     case 1: /* fyl2x */
6332                         gen_helper_fyl2x(cpu_env);
6333                         break;
6334                     case 2: /* fptan */
6335                         gen_helper_fptan(cpu_env);
6336                         break;
6337                     case 3: /* fpatan */
6338                         gen_helper_fpatan(cpu_env);
6339                         break;
6340                     case 4: /* fxtract */
6341                         gen_helper_fxtract(cpu_env);
6342                         break;
6343                     case 5: /* fprem1 */
6344                         gen_helper_fprem1(cpu_env);
6345                         break;
6346                     case 6: /* fdecstp */
6347                         gen_helper_fdecstp(cpu_env);
6348                         break;
6349                     default:
6350                     case 7: /* fincstp */
6351                         gen_helper_fincstp(cpu_env);
6352                         break;
6353                     }
6354                     break;
6355                 case 0x0f: /* grp d9/7 */
6356                     switch (rm) {
6357                     case 0: /* fprem */
6358                         gen_helper_fprem(cpu_env);
6359                         break;
6360                     case 1: /* fyl2xp1 */
6361                         gen_helper_fyl2xp1(cpu_env);
6362                         break;
6363                     case 2: /* fsqrt */
6364                         gen_helper_fsqrt(cpu_env);
6365                         break;
6366                     case 3: /* fsincos */
6367                         gen_helper_fsincos(cpu_env);
6368                         break;
6369                     case 5: /* fscale */
6370                         gen_helper_fscale(cpu_env);
6371                         break;
6372                     case 4: /* frndint */
6373                         gen_helper_frndint(cpu_env);
6374                         break;
6375                     case 6: /* fsin */
6376                         gen_helper_fsin(cpu_env);
6377                         break;
6378                     default:
6379                     case 7: /* fcos */
6380                         gen_helper_fcos(cpu_env);
6381                         break;
6382                     }
6383                     break;
6384                 case 0x00: case 0x01: case 0x04 ... 0x07: /* fxxx st, sti */
6385                 case 0x20: case 0x21: case 0x24 ... 0x27: /* fxxx sti, st */
6386                 case 0x30: case 0x31: case 0x34 ... 0x37: /* fxxxp sti, st */
6387                     {
6388                         int op1;
6389 
6390                         op1 = op & 7;
6391                         if (op >= 0x20) {
6392                             gen_helper_fp_arith_STN_ST0(op1, opreg);
6393                             if (op >= 0x30) {
6394                                 gen_helper_fpop(cpu_env);
6395                             }
6396                         } else {
6397                             gen_helper_fmov_FT0_STN(cpu_env,
6398                                                     tcg_const_i32(opreg));
6399                             gen_helper_fp_arith_ST0_FT0(op1);
6400                         }
6401                     }
6402                     break;
6403                 case 0x02: /* fcom */
6404                 case 0x22: /* fcom2, undocumented op */
6405                     gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg));
6406                     gen_helper_fcom_ST0_FT0(cpu_env);
6407                     break;
6408                 case 0x03: /* fcomp */
6409                 case 0x23: /* fcomp3, undocumented op */
6410                 case 0x32: /* fcomp5, undocumented op */
6411                     gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg));
6412                     gen_helper_fcom_ST0_FT0(cpu_env);
6413                     gen_helper_fpop(cpu_env);
6414                     break;
6415                 case 0x15: /* da/5 */
6416                     switch (rm) {
6417                     case 1: /* fucompp */
6418                         gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(1));
6419                         gen_helper_fucom_ST0_FT0(cpu_env);
6420                         gen_helper_fpop(cpu_env);
6421                         gen_helper_fpop(cpu_env);
6422                         break;
6423                     default:
6424                         goto unknown_op;
6425                     }
6426                     break;
6427                 case 0x1c:
6428                     switch (rm) {
6429                     case 0: /* feni (287 only, just do nop here) */
6430                         break;
6431                     case 1: /* fdisi (287 only, just do nop here) */
6432                         break;
6433                     case 2: /* fclex */
6434                         gen_helper_fclex(cpu_env);
6435                         update_fip = false;
6436                         break;
6437                     case 3: /* fninit */
6438                         gen_helper_fninit(cpu_env);
6439                         update_fip = false;
6440                         break;
6441                     case 4: /* fsetpm (287 only, just do nop here) */
6442                         break;
6443                     default:
6444                         goto unknown_op;
6445                     }
6446                     break;
6447                 case 0x1d: /* fucomi */
6448                     if (!(s->cpuid_features & CPUID_CMOV)) {
6449                         goto illegal_op;
6450                     }
6451                     gen_update_cc_op(s);
6452                     gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg));
6453                     gen_helper_fucomi_ST0_FT0(cpu_env);
6454                     set_cc_op(s, CC_OP_EFLAGS);
6455                     break;
6456                 case 0x1e: /* fcomi */
6457                     if (!(s->cpuid_features & CPUID_CMOV)) {
6458                         goto illegal_op;
6459                     }
6460                     gen_update_cc_op(s);
6461                     gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg));
6462                     gen_helper_fcomi_ST0_FT0(cpu_env);
6463                     set_cc_op(s, CC_OP_EFLAGS);
6464                     break;
6465                 case 0x28: /* ffree sti */
6466                     gen_helper_ffree_STN(cpu_env, tcg_const_i32(opreg));
6467                     break;
6468                 case 0x2a: /* fst sti */
6469                     gen_helper_fmov_STN_ST0(cpu_env, tcg_const_i32(opreg));
6470                     break;
6471                 case 0x2b: /* fstp sti */
6472                 case 0x0b: /* fstp1 sti, undocumented op */
6473                 case 0x3a: /* fstp8 sti, undocumented op */
6474                 case 0x3b: /* fstp9 sti, undocumented op */
6475                     gen_helper_fmov_STN_ST0(cpu_env, tcg_const_i32(opreg));
6476                     gen_helper_fpop(cpu_env);
6477                     break;
6478                 case 0x2c: /* fucom st(i) */
6479                     gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg));
6480                     gen_helper_fucom_ST0_FT0(cpu_env);
6481                     break;
6482                 case 0x2d: /* fucomp st(i) */
6483                     gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg));
6484                     gen_helper_fucom_ST0_FT0(cpu_env);
6485                     gen_helper_fpop(cpu_env);
6486                     break;
6487                 case 0x33: /* de/3 */
6488                     switch (rm) {
6489                     case 1: /* fcompp */
6490                         gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(1));
6491                         gen_helper_fcom_ST0_FT0(cpu_env);
6492                         gen_helper_fpop(cpu_env);
6493                         gen_helper_fpop(cpu_env);
6494                         break;
6495                     default:
6496                         goto unknown_op;
6497                     }
6498                     break;
6499                 case 0x38: /* ffreep sti, undocumented op */
6500                     gen_helper_ffree_STN(cpu_env, tcg_const_i32(opreg));
6501                     gen_helper_fpop(cpu_env);
6502                     break;
6503                 case 0x3c: /* df/4 */
6504                     switch (rm) {
6505                     case 0:
6506                         gen_helper_fnstsw(s->tmp2_i32, cpu_env);
6507                         tcg_gen_extu_i32_tl(s->T0, s->tmp2_i32);
6508                         gen_op_mov_reg_v(s, MO_16, R_EAX, s->T0);
6509                         break;
6510                     default:
6511                         goto unknown_op;
6512                     }
6513                     break;
6514                 case 0x3d: /* fucomip */
6515                     if (!(s->cpuid_features & CPUID_CMOV)) {
6516                         goto illegal_op;
6517                     }
6518                     gen_update_cc_op(s);
6519                     gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg));
6520                     gen_helper_fucomi_ST0_FT0(cpu_env);
6521                     gen_helper_fpop(cpu_env);
6522                     set_cc_op(s, CC_OP_EFLAGS);
6523                     break;
6524                 case 0x3e: /* fcomip */
6525                     if (!(s->cpuid_features & CPUID_CMOV)) {
6526                         goto illegal_op;
6527                     }
6528                     gen_update_cc_op(s);
6529                     gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg));
6530                     gen_helper_fcomi_ST0_FT0(cpu_env);
6531                     gen_helper_fpop(cpu_env);
6532                     set_cc_op(s, CC_OP_EFLAGS);
6533                     break;
6534                 case 0x10 ... 0x13: /* fcmovxx */
6535                 case 0x18 ... 0x1b:
6536                     {
6537                         int op1;
6538                         TCGLabel *l1;
6539                         static const uint8_t fcmov_cc[8] = {
6540                             (JCC_B << 1),
6541                             (JCC_Z << 1),
6542                             (JCC_BE << 1),
6543                             (JCC_P << 1),
6544                         };
6545 
6546                         if (!(s->cpuid_features & CPUID_CMOV)) {
6547                             goto illegal_op;
6548                         }
6549                         op1 = fcmov_cc[op & 3] | (((op >> 3) & 1) ^ 1);
6550                         l1 = gen_new_label();
6551                         gen_jcc1_noeob(s, op1, l1);
6552                         gen_helper_fmov_ST0_STN(cpu_env, tcg_const_i32(opreg));
6553                         gen_set_label(l1);
6554                     }
6555                     break;
6556                 default:
6557                     goto unknown_op;
6558                 }
6559             }
6560 
6561             if (update_fip) {
6562                 tcg_gen_ld_i32(s->tmp2_i32, cpu_env,
6563                                offsetof(CPUX86State, segs[R_CS].selector));
6564                 tcg_gen_st16_i32(s->tmp2_i32, cpu_env,
6565                                  offsetof(CPUX86State, fpcs));
6566                 tcg_gen_st_tl(tcg_constant_tl(pc_start - s->cs_base),
6567                               cpu_env, offsetof(CPUX86State, fpip));
6568             }
6569         }
6570         break;
6571         /************************/
6572         /* string ops */
6573 
6574     case 0xa4: /* movsS */
6575     case 0xa5:
6576         ot = mo_b_d(b, dflag);
6577         if (prefixes & (PREFIX_REPZ | PREFIX_REPNZ)) {
6578             gen_repz_movs(s, ot, pc_start - s->cs_base, s->pc - s->cs_base);
6579         } else {
6580             gen_movs(s, ot);
6581         }
6582         break;
6583 
6584     case 0xaa: /* stosS */
6585     case 0xab:
6586         ot = mo_b_d(b, dflag);
6587         if (prefixes & (PREFIX_REPZ | PREFIX_REPNZ)) {
6588             gen_repz_stos(s, ot, pc_start - s->cs_base, s->pc - s->cs_base);
6589         } else {
6590             gen_stos(s, ot);
6591         }
6592         break;
6593     case 0xac: /* lodsS */
6594     case 0xad:
6595         ot = mo_b_d(b, dflag);
6596         if (prefixes & (PREFIX_REPZ | PREFIX_REPNZ)) {
6597             gen_repz_lods(s, ot, pc_start - s->cs_base, s->pc - s->cs_base);
6598         } else {
6599             gen_lods(s, ot);
6600         }
6601         break;
6602     case 0xae: /* scasS */
6603     case 0xaf:
6604         ot = mo_b_d(b, dflag);
6605         if (prefixes & PREFIX_REPNZ) {
6606             gen_repz_scas(s, ot, pc_start - s->cs_base, s->pc - s->cs_base, 1);
6607         } else if (prefixes & PREFIX_REPZ) {
6608             gen_repz_scas(s, ot, pc_start - s->cs_base, s->pc - s->cs_base, 0);
6609         } else {
6610             gen_scas(s, ot);
6611         }
6612         break;
6613 
6614     case 0xa6: /* cmpsS */
6615     case 0xa7:
6616         ot = mo_b_d(b, dflag);
6617         if (prefixes & PREFIX_REPNZ) {
6618             gen_repz_cmps(s, ot, pc_start - s->cs_base, s->pc - s->cs_base, 1);
6619         } else if (prefixes & PREFIX_REPZ) {
6620             gen_repz_cmps(s, ot, pc_start - s->cs_base, s->pc - s->cs_base, 0);
6621         } else {
6622             gen_cmps(s, ot);
6623         }
6624         break;
6625     case 0x6c: /* insS */
6626     case 0x6d:
6627         ot = mo_b_d32(b, dflag);
6628         tcg_gen_trunc_tl_i32(s->tmp2_i32, cpu_regs[R_EDX]);
6629         tcg_gen_ext16u_i32(s->tmp2_i32, s->tmp2_i32);
6630         if (!gen_check_io(s, ot, s->tmp2_i32,
6631                           SVM_IOIO_TYPE_MASK | SVM_IOIO_STR_MASK)) {
6632             break;
6633         }
6634         if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
6635             gen_io_start();
6636         }
6637         if (prefixes & (PREFIX_REPZ | PREFIX_REPNZ)) {
6638             gen_repz_ins(s, ot, pc_start - s->cs_base, s->pc - s->cs_base);
6639             /* jump generated by gen_repz_ins */
6640         } else {
6641             gen_ins(s, ot);
6642             if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
6643                 gen_jmp(s, s->pc - s->cs_base);
6644             }
6645         }
6646         break;
6647     case 0x6e: /* outsS */
6648     case 0x6f:
6649         ot = mo_b_d32(b, dflag);
6650         tcg_gen_trunc_tl_i32(s->tmp2_i32, cpu_regs[R_EDX]);
6651         tcg_gen_ext16u_i32(s->tmp2_i32, s->tmp2_i32);
6652         if (!gen_check_io(s, ot, s->tmp2_i32, SVM_IOIO_STR_MASK)) {
6653             break;
6654         }
6655         if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
6656             gen_io_start();
6657         }
6658         if (prefixes & (PREFIX_REPZ | PREFIX_REPNZ)) {
6659             gen_repz_outs(s, ot, pc_start - s->cs_base, s->pc - s->cs_base);
6660             /* jump generated by gen_repz_outs */
6661         } else {
6662             gen_outs(s, ot);
6663             if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
6664                 gen_jmp(s, s->pc - s->cs_base);
6665             }
6666         }
6667         break;
6668 
6669         /************************/
6670         /* port I/O */
6671 
6672     case 0xe4:
6673     case 0xe5:
6674         ot = mo_b_d32(b, dflag);
6675         val = x86_ldub_code(env, s);
6676         tcg_gen_movi_i32(s->tmp2_i32, val);
6677         if (!gen_check_io(s, ot, s->tmp2_i32, SVM_IOIO_TYPE_MASK)) {
6678             break;
6679         }
6680         if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
6681             gen_io_start();
6682         }
6683         gen_helper_in_func(ot, s->T1, s->tmp2_i32);
6684         gen_op_mov_reg_v(s, ot, R_EAX, s->T1);
6685         gen_bpt_io(s, s->tmp2_i32, ot);
6686         if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
6687             gen_jmp(s, s->pc - s->cs_base);
6688         }
6689         break;
6690     case 0xe6:
6691     case 0xe7:
6692         ot = mo_b_d32(b, dflag);
6693         val = x86_ldub_code(env, s);
6694         tcg_gen_movi_i32(s->tmp2_i32, val);
6695         if (!gen_check_io(s, ot, s->tmp2_i32, 0)) {
6696             break;
6697         }
6698         if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
6699             gen_io_start();
6700         }
6701         gen_op_mov_v_reg(s, ot, s->T1, R_EAX);
6702         tcg_gen_trunc_tl_i32(s->tmp3_i32, s->T1);
6703         gen_helper_out_func(ot, s->tmp2_i32, s->tmp3_i32);
6704         gen_bpt_io(s, s->tmp2_i32, ot);
6705         if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
6706             gen_jmp(s, s->pc - s->cs_base);
6707         }
6708         break;
6709     case 0xec:
6710     case 0xed:
6711         ot = mo_b_d32(b, dflag);
6712         tcg_gen_trunc_tl_i32(s->tmp2_i32, cpu_regs[R_EDX]);
6713         tcg_gen_ext16u_i32(s->tmp2_i32, s->tmp2_i32);
6714         if (!gen_check_io(s, ot, s->tmp2_i32, SVM_IOIO_TYPE_MASK)) {
6715             break;
6716         }
6717         if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
6718             gen_io_start();
6719         }
6720         gen_helper_in_func(ot, s->T1, s->tmp2_i32);
6721         gen_op_mov_reg_v(s, ot, R_EAX, s->T1);
6722         gen_bpt_io(s, s->tmp2_i32, ot);
6723         if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
6724             gen_jmp(s, s->pc - s->cs_base);
6725         }
6726         break;
6727     case 0xee:
6728     case 0xef:
6729         ot = mo_b_d32(b, dflag);
6730         tcg_gen_trunc_tl_i32(s->tmp2_i32, cpu_regs[R_EDX]);
6731         tcg_gen_ext16u_i32(s->tmp2_i32, s->tmp2_i32);
6732         if (!gen_check_io(s, ot, s->tmp2_i32, 0)) {
6733             break;
6734         }
6735         if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
6736             gen_io_start();
6737         }
6738         gen_op_mov_v_reg(s, ot, s->T1, R_EAX);
6739         tcg_gen_trunc_tl_i32(s->tmp3_i32, s->T1);
6740         gen_helper_out_func(ot, s->tmp2_i32, s->tmp3_i32);
6741         gen_bpt_io(s, s->tmp2_i32, ot);
6742         if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
6743             gen_jmp(s, s->pc - s->cs_base);
6744         }
6745         break;
6746 
6747         /************************/
6748         /* control */
6749     case 0xc2: /* ret im */
6750         val = x86_ldsw_code(env, s);
6751         ot = gen_pop_T0(s);
6752         gen_stack_update(s, val + (1 << ot));
6753         /* Note that gen_pop_T0 uses a zero-extending load.  */
6754         gen_op_jmp_v(s->T0);
6755         gen_bnd_jmp(s);
6756         gen_jr(s, s->T0);
6757         break;
6758     case 0xc3: /* ret */
6759         ot = gen_pop_T0(s);
6760         gen_pop_update(s, ot);
6761         /* Note that gen_pop_T0 uses a zero-extending load.  */
6762         gen_op_jmp_v(s->T0);
6763         gen_bnd_jmp(s);
6764         gen_jr(s, s->T0);
6765         break;
6766     case 0xca: /* lret im */
6767         val = x86_ldsw_code(env, s);
6768     do_lret:
6769         if (PE(s) && !VM86(s)) {
6770             gen_update_cc_op(s);
6771             gen_jmp_im(s, pc_start - s->cs_base);
6772             gen_helper_lret_protected(cpu_env, tcg_const_i32(dflag - 1),
6773                                       tcg_const_i32(val));
6774         } else {
6775             gen_stack_A0(s);
6776             /* pop offset */
6777             gen_op_ld_v(s, dflag, s->T0, s->A0);
6778             /* NOTE: keeping EIP updated is not a problem in case of
6779                exception */
6780             gen_op_jmp_v(s->T0);
6781             /* pop selector */
6782             gen_add_A0_im(s, 1 << dflag);
6783             gen_op_ld_v(s, dflag, s->T0, s->A0);
6784             gen_op_movl_seg_T0_vm(s, R_CS);
6785             /* add stack offset */
6786             gen_stack_update(s, val + (2 << dflag));
6787         }
6788         gen_eob(s);
6789         break;
6790     case 0xcb: /* lret */
6791         val = 0;
6792         goto do_lret;
6793     case 0xcf: /* iret */
6794         gen_svm_check_intercept(s, SVM_EXIT_IRET);
6795         if (!PE(s) || VM86(s)) {
6796             /* real mode or vm86 mode */
6797             if (!check_vm86_iopl(s)) {
6798                 break;
6799             }
6800             gen_helper_iret_real(cpu_env, tcg_const_i32(dflag - 1));
6801         } else {
6802             gen_helper_iret_protected(cpu_env, tcg_const_i32(dflag - 1),
6803                                       tcg_const_i32(s->pc - s->cs_base));
6804         }
6805         set_cc_op(s, CC_OP_EFLAGS);
6806         gen_eob(s);
6807         break;
6808     case 0xe8: /* call im */
6809         {
6810             if (dflag != MO_16) {
6811                 tval = (int32_t)insn_get(env, s, MO_32);
6812             } else {
6813                 tval = (int16_t)insn_get(env, s, MO_16);
6814             }
6815             next_eip = s->pc - s->cs_base;
6816             tval += next_eip;
6817             if (dflag == MO_16) {
6818                 tval &= 0xffff;
6819             } else if (!CODE64(s)) {
6820                 tval &= 0xffffffff;
6821             }
6822             tcg_gen_movi_tl(s->T0, next_eip);
6823             gen_push_v(s, s->T0);
6824             gen_bnd_jmp(s);
6825             gen_jmp(s, tval);
6826         }
6827         break;
6828     case 0x9a: /* lcall im */
6829         {
6830             unsigned int selector, offset;
6831 
6832             if (CODE64(s))
6833                 goto illegal_op;
6834             ot = dflag;
6835             offset = insn_get(env, s, ot);
6836             selector = insn_get(env, s, MO_16);
6837 
6838             tcg_gen_movi_tl(s->T0, selector);
6839             tcg_gen_movi_tl(s->T1, offset);
6840         }
6841         goto do_lcall;
6842     case 0xe9: /* jmp im */
6843         if (dflag != MO_16) {
6844             tval = (int32_t)insn_get(env, s, MO_32);
6845         } else {
6846             tval = (int16_t)insn_get(env, s, MO_16);
6847         }
6848         tval += s->pc - s->cs_base;
6849         if (dflag == MO_16) {
6850             tval &= 0xffff;
6851         } else if (!CODE64(s)) {
6852             tval &= 0xffffffff;
6853         }
6854         gen_bnd_jmp(s);
6855         gen_jmp(s, tval);
6856         break;
6857     case 0xea: /* ljmp im */
6858         {
6859             unsigned int selector, offset;
6860 
6861             if (CODE64(s))
6862                 goto illegal_op;
6863             ot = dflag;
6864             offset = insn_get(env, s, ot);
6865             selector = insn_get(env, s, MO_16);
6866 
6867             tcg_gen_movi_tl(s->T0, selector);
6868             tcg_gen_movi_tl(s->T1, offset);
6869         }
6870         goto do_ljmp;
6871     case 0xeb: /* jmp Jb */
6872         tval = (int8_t)insn_get(env, s, MO_8);
6873         tval += s->pc - s->cs_base;
6874         if (dflag == MO_16) {
6875             tval &= 0xffff;
6876         }
6877         gen_jmp(s, tval);
6878         break;
6879     case 0x70 ... 0x7f: /* jcc Jb */
6880         tval = (int8_t)insn_get(env, s, MO_8);
6881         goto do_jcc;
6882     case 0x180 ... 0x18f: /* jcc Jv */
6883         if (dflag != MO_16) {
6884             tval = (int32_t)insn_get(env, s, MO_32);
6885         } else {
6886             tval = (int16_t)insn_get(env, s, MO_16);
6887         }
6888     do_jcc:
6889         next_eip = s->pc - s->cs_base;
6890         tval += next_eip;
6891         if (dflag == MO_16) {
6892             tval &= 0xffff;
6893         }
6894         gen_bnd_jmp(s);
6895         gen_jcc(s, b, tval, next_eip);
6896         break;
6897 
6898     case 0x190 ... 0x19f: /* setcc Gv */
6899         modrm = x86_ldub_code(env, s);
6900         gen_setcc1(s, b, s->T0);
6901         gen_ldst_modrm(env, s, modrm, MO_8, OR_TMP0, 1);
6902         break;
6903     case 0x140 ... 0x14f: /* cmov Gv, Ev */
6904         if (!(s->cpuid_features & CPUID_CMOV)) {
6905             goto illegal_op;
6906         }
6907         ot = dflag;
6908         modrm = x86_ldub_code(env, s);
6909         reg = ((modrm >> 3) & 7) | REX_R(s);
6910         gen_cmovcc1(env, s, ot, b, modrm, reg);
6911         break;
6912 
6913         /************************/
6914         /* flags */
6915     case 0x9c: /* pushf */
6916         gen_svm_check_intercept(s, SVM_EXIT_PUSHF);
6917         if (check_vm86_iopl(s)) {
6918             gen_update_cc_op(s);
6919             gen_helper_read_eflags(s->T0, cpu_env);
6920             gen_push_v(s, s->T0);
6921         }
6922         break;
6923     case 0x9d: /* popf */
6924         gen_svm_check_intercept(s, SVM_EXIT_POPF);
6925         if (check_vm86_iopl(s)) {
6926             ot = gen_pop_T0(s);
6927             if (CPL(s) == 0) {
6928                 if (dflag != MO_16) {
6929                     gen_helper_write_eflags(cpu_env, s->T0,
6930                                             tcg_const_i32((TF_MASK | AC_MASK |
6931                                                            ID_MASK | NT_MASK |
6932                                                            IF_MASK |
6933                                                            IOPL_MASK)));
6934                 } else {
6935                     gen_helper_write_eflags(cpu_env, s->T0,
6936                                             tcg_const_i32((TF_MASK | AC_MASK |
6937                                                            ID_MASK | NT_MASK |
6938                                                            IF_MASK | IOPL_MASK)
6939                                                           & 0xffff));
6940                 }
6941             } else {
6942                 if (CPL(s) <= IOPL(s)) {
6943                     if (dflag != MO_16) {
6944                         gen_helper_write_eflags(cpu_env, s->T0,
6945                                                 tcg_const_i32((TF_MASK |
6946                                                                AC_MASK |
6947                                                                ID_MASK |
6948                                                                NT_MASK |
6949                                                                IF_MASK)));
6950                     } else {
6951                         gen_helper_write_eflags(cpu_env, s->T0,
6952                                                 tcg_const_i32((TF_MASK |
6953                                                                AC_MASK |
6954                                                                ID_MASK |
6955                                                                NT_MASK |
6956                                                                IF_MASK)
6957                                                               & 0xffff));
6958                     }
6959                 } else {
6960                     if (dflag != MO_16) {
6961                         gen_helper_write_eflags(cpu_env, s->T0,
6962                                            tcg_const_i32((TF_MASK | AC_MASK |
6963                                                           ID_MASK | NT_MASK)));
6964                     } else {
6965                         gen_helper_write_eflags(cpu_env, s->T0,
6966                                            tcg_const_i32((TF_MASK | AC_MASK |
6967                                                           ID_MASK | NT_MASK)
6968                                                          & 0xffff));
6969                     }
6970                 }
6971             }
6972             gen_pop_update(s, ot);
6973             set_cc_op(s, CC_OP_EFLAGS);
6974             /* abort translation because TF/AC flag may change */
6975             gen_jmp_im(s, s->pc - s->cs_base);
6976             gen_eob(s);
6977         }
6978         break;
6979     case 0x9e: /* sahf */
6980         if (CODE64(s) && !(s->cpuid_ext3_features & CPUID_EXT3_LAHF_LM))
6981             goto illegal_op;
6982         gen_op_mov_v_reg(s, MO_8, s->T0, R_AH);
6983         gen_compute_eflags(s);
6984         tcg_gen_andi_tl(cpu_cc_src, cpu_cc_src, CC_O);
6985         tcg_gen_andi_tl(s->T0, s->T0, CC_S | CC_Z | CC_A | CC_P | CC_C);
6986         tcg_gen_or_tl(cpu_cc_src, cpu_cc_src, s->T0);
6987         break;
6988     case 0x9f: /* lahf */
6989         if (CODE64(s) && !(s->cpuid_ext3_features & CPUID_EXT3_LAHF_LM))
6990             goto illegal_op;
6991         gen_compute_eflags(s);
6992         /* Note: gen_compute_eflags() only gives the condition codes */
6993         tcg_gen_ori_tl(s->T0, cpu_cc_src, 0x02);
6994         gen_op_mov_reg_v(s, MO_8, R_AH, s->T0);
6995         break;
6996     case 0xf5: /* cmc */
6997         gen_compute_eflags(s);
6998         tcg_gen_xori_tl(cpu_cc_src, cpu_cc_src, CC_C);
6999         break;
7000     case 0xf8: /* clc */
7001         gen_compute_eflags(s);
7002         tcg_gen_andi_tl(cpu_cc_src, cpu_cc_src, ~CC_C);
7003         break;
7004     case 0xf9: /* stc */
7005         gen_compute_eflags(s);
7006         tcg_gen_ori_tl(cpu_cc_src, cpu_cc_src, CC_C);
7007         break;
7008     case 0xfc: /* cld */
7009         tcg_gen_movi_i32(s->tmp2_i32, 1);
7010         tcg_gen_st_i32(s->tmp2_i32, cpu_env, offsetof(CPUX86State, df));
7011         break;
7012     case 0xfd: /* std */
7013         tcg_gen_movi_i32(s->tmp2_i32, -1);
7014         tcg_gen_st_i32(s->tmp2_i32, cpu_env, offsetof(CPUX86State, df));
7015         break;
7016 
7017         /************************/
7018         /* bit operations */
7019     case 0x1ba: /* bt/bts/btr/btc Gv, im */
7020         ot = dflag;
7021         modrm = x86_ldub_code(env, s);
7022         op = (modrm >> 3) & 7;
7023         mod = (modrm >> 6) & 3;
7024         rm = (modrm & 7) | REX_B(s);
7025         if (mod != 3) {
7026             s->rip_offset = 1;
7027             gen_lea_modrm(env, s, modrm);
7028             if (!(s->prefix & PREFIX_LOCK)) {
7029                 gen_op_ld_v(s, ot, s->T0, s->A0);
7030             }
7031         } else {
7032             gen_op_mov_v_reg(s, ot, s->T0, rm);
7033         }
7034         /* load shift */
7035         val = x86_ldub_code(env, s);
7036         tcg_gen_movi_tl(s->T1, val);
7037         if (op < 4)
7038             goto unknown_op;
7039         op -= 4;
7040         goto bt_op;
7041     case 0x1a3: /* bt Gv, Ev */
7042         op = 0;
7043         goto do_btx;
7044     case 0x1ab: /* bts */
7045         op = 1;
7046         goto do_btx;
7047     case 0x1b3: /* btr */
7048         op = 2;
7049         goto do_btx;
7050     case 0x1bb: /* btc */
7051         op = 3;
7052     do_btx:
7053         ot = dflag;
7054         modrm = x86_ldub_code(env, s);
7055         reg = ((modrm >> 3) & 7) | REX_R(s);
7056         mod = (modrm >> 6) & 3;
7057         rm = (modrm & 7) | REX_B(s);
7058         gen_op_mov_v_reg(s, MO_32, s->T1, reg);
7059         if (mod != 3) {
7060             AddressParts a = gen_lea_modrm_0(env, s, modrm);
7061             /* specific case: we need to add a displacement */
7062             gen_exts(ot, s->T1);
7063             tcg_gen_sari_tl(s->tmp0, s->T1, 3 + ot);
7064             tcg_gen_shli_tl(s->tmp0, s->tmp0, ot);
7065             tcg_gen_add_tl(s->A0, gen_lea_modrm_1(s, a), s->tmp0);
7066             gen_lea_v_seg(s, s->aflag, s->A0, a.def_seg, s->override);
7067             if (!(s->prefix & PREFIX_LOCK)) {
7068                 gen_op_ld_v(s, ot, s->T0, s->A0);
7069             }
7070         } else {
7071             gen_op_mov_v_reg(s, ot, s->T0, rm);
7072         }
7073     bt_op:
7074         tcg_gen_andi_tl(s->T1, s->T1, (1 << (3 + ot)) - 1);
7075         tcg_gen_movi_tl(s->tmp0, 1);
7076         tcg_gen_shl_tl(s->tmp0, s->tmp0, s->T1);
7077         if (s->prefix & PREFIX_LOCK) {
7078             switch (op) {
7079             case 0: /* bt */
7080                 /* Needs no atomic ops; we surpressed the normal
7081                    memory load for LOCK above so do it now.  */
7082                 gen_op_ld_v(s, ot, s->T0, s->A0);
7083                 break;
7084             case 1: /* bts */
7085                 tcg_gen_atomic_fetch_or_tl(s->T0, s->A0, s->tmp0,
7086                                            s->mem_index, ot | MO_LE);
7087                 break;
7088             case 2: /* btr */
7089                 tcg_gen_not_tl(s->tmp0, s->tmp0);
7090                 tcg_gen_atomic_fetch_and_tl(s->T0, s->A0, s->tmp0,
7091                                             s->mem_index, ot | MO_LE);
7092                 break;
7093             default:
7094             case 3: /* btc */
7095                 tcg_gen_atomic_fetch_xor_tl(s->T0, s->A0, s->tmp0,
7096                                             s->mem_index, ot | MO_LE);
7097                 break;
7098             }
7099             tcg_gen_shr_tl(s->tmp4, s->T0, s->T1);
7100         } else {
7101             tcg_gen_shr_tl(s->tmp4, s->T0, s->T1);
7102             switch (op) {
7103             case 0: /* bt */
7104                 /* Data already loaded; nothing to do.  */
7105                 break;
7106             case 1: /* bts */
7107                 tcg_gen_or_tl(s->T0, s->T0, s->tmp0);
7108                 break;
7109             case 2: /* btr */
7110                 tcg_gen_andc_tl(s->T0, s->T0, s->tmp0);
7111                 break;
7112             default:
7113             case 3: /* btc */
7114                 tcg_gen_xor_tl(s->T0, s->T0, s->tmp0);
7115                 break;
7116             }
7117             if (op != 0) {
7118                 if (mod != 3) {
7119                     gen_op_st_v(s, ot, s->T0, s->A0);
7120                 } else {
7121                     gen_op_mov_reg_v(s, ot, rm, s->T0);
7122                 }
7123             }
7124         }
7125 
7126         /* Delay all CC updates until after the store above.  Note that
7127            C is the result of the test, Z is unchanged, and the others
7128            are all undefined.  */
7129         switch (s->cc_op) {
7130         case CC_OP_MULB ... CC_OP_MULQ:
7131         case CC_OP_ADDB ... CC_OP_ADDQ:
7132         case CC_OP_ADCB ... CC_OP_ADCQ:
7133         case CC_OP_SUBB ... CC_OP_SUBQ:
7134         case CC_OP_SBBB ... CC_OP_SBBQ:
7135         case CC_OP_LOGICB ... CC_OP_LOGICQ:
7136         case CC_OP_INCB ... CC_OP_INCQ:
7137         case CC_OP_DECB ... CC_OP_DECQ:
7138         case CC_OP_SHLB ... CC_OP_SHLQ:
7139         case CC_OP_SARB ... CC_OP_SARQ:
7140         case CC_OP_BMILGB ... CC_OP_BMILGQ:
7141             /* Z was going to be computed from the non-zero status of CC_DST.
7142                We can get that same Z value (and the new C value) by leaving
7143                CC_DST alone, setting CC_SRC, and using a CC_OP_SAR of the
7144                same width.  */
7145             tcg_gen_mov_tl(cpu_cc_src, s->tmp4);
7146             set_cc_op(s, ((s->cc_op - CC_OP_MULB) & 3) + CC_OP_SARB);
7147             break;
7148         default:
7149             /* Otherwise, generate EFLAGS and replace the C bit.  */
7150             gen_compute_eflags(s);
7151             tcg_gen_deposit_tl(cpu_cc_src, cpu_cc_src, s->tmp4,
7152                                ctz32(CC_C), 1);
7153             break;
7154         }
7155         break;
7156     case 0x1bc: /* bsf / tzcnt */
7157     case 0x1bd: /* bsr / lzcnt */
7158         ot = dflag;
7159         modrm = x86_ldub_code(env, s);
7160         reg = ((modrm >> 3) & 7) | REX_R(s);
7161         gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
7162         gen_extu(ot, s->T0);
7163 
7164         /* Note that lzcnt and tzcnt are in different extensions.  */
7165         if ((prefixes & PREFIX_REPZ)
7166             && (b & 1
7167                 ? s->cpuid_ext3_features & CPUID_EXT3_ABM
7168                 : s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI1)) {
7169             int size = 8 << ot;
7170             /* For lzcnt/tzcnt, C bit is defined related to the input. */
7171             tcg_gen_mov_tl(cpu_cc_src, s->T0);
7172             if (b & 1) {
7173                 /* For lzcnt, reduce the target_ulong result by the
7174                    number of zeros that we expect to find at the top.  */
7175                 tcg_gen_clzi_tl(s->T0, s->T0, TARGET_LONG_BITS);
7176                 tcg_gen_subi_tl(s->T0, s->T0, TARGET_LONG_BITS - size);
7177             } else {
7178                 /* For tzcnt, a zero input must return the operand size.  */
7179                 tcg_gen_ctzi_tl(s->T0, s->T0, size);
7180             }
7181             /* For lzcnt/tzcnt, Z bit is defined related to the result.  */
7182             gen_op_update1_cc(s);
7183             set_cc_op(s, CC_OP_BMILGB + ot);
7184         } else {
7185             /* For bsr/bsf, only the Z bit is defined and it is related
7186                to the input and not the result.  */
7187             tcg_gen_mov_tl(cpu_cc_dst, s->T0);
7188             set_cc_op(s, CC_OP_LOGICB + ot);
7189 
7190             /* ??? The manual says that the output is undefined when the
7191                input is zero, but real hardware leaves it unchanged, and
7192                real programs appear to depend on that.  Accomplish this
7193                by passing the output as the value to return upon zero.  */
7194             if (b & 1) {
7195                 /* For bsr, return the bit index of the first 1 bit,
7196                    not the count of leading zeros.  */
7197                 tcg_gen_xori_tl(s->T1, cpu_regs[reg], TARGET_LONG_BITS - 1);
7198                 tcg_gen_clz_tl(s->T0, s->T0, s->T1);
7199                 tcg_gen_xori_tl(s->T0, s->T0, TARGET_LONG_BITS - 1);
7200             } else {
7201                 tcg_gen_ctz_tl(s->T0, s->T0, cpu_regs[reg]);
7202             }
7203         }
7204         gen_op_mov_reg_v(s, ot, reg, s->T0);
7205         break;
7206         /************************/
7207         /* bcd */
7208     case 0x27: /* daa */
7209         if (CODE64(s))
7210             goto illegal_op;
7211         gen_update_cc_op(s);
7212         gen_helper_daa(cpu_env);
7213         set_cc_op(s, CC_OP_EFLAGS);
7214         break;
7215     case 0x2f: /* das */
7216         if (CODE64(s))
7217             goto illegal_op;
7218         gen_update_cc_op(s);
7219         gen_helper_das(cpu_env);
7220         set_cc_op(s, CC_OP_EFLAGS);
7221         break;
7222     case 0x37: /* aaa */
7223         if (CODE64(s))
7224             goto illegal_op;
7225         gen_update_cc_op(s);
7226         gen_helper_aaa(cpu_env);
7227         set_cc_op(s, CC_OP_EFLAGS);
7228         break;
7229     case 0x3f: /* aas */
7230         if (CODE64(s))
7231             goto illegal_op;
7232         gen_update_cc_op(s);
7233         gen_helper_aas(cpu_env);
7234         set_cc_op(s, CC_OP_EFLAGS);
7235         break;
7236     case 0xd4: /* aam */
7237         if (CODE64(s))
7238             goto illegal_op;
7239         val = x86_ldub_code(env, s);
7240         if (val == 0) {
7241             gen_exception(s, EXCP00_DIVZ, pc_start - s->cs_base);
7242         } else {
7243             gen_helper_aam(cpu_env, tcg_const_i32(val));
7244             set_cc_op(s, CC_OP_LOGICB);
7245         }
7246         break;
7247     case 0xd5: /* aad */
7248         if (CODE64(s))
7249             goto illegal_op;
7250         val = x86_ldub_code(env, s);
7251         gen_helper_aad(cpu_env, tcg_const_i32(val));
7252         set_cc_op(s, CC_OP_LOGICB);
7253         break;
7254         /************************/
7255         /* misc */
7256     case 0x90: /* nop */
7257         /* XXX: correct lock test for all insn */
7258         if (prefixes & PREFIX_LOCK) {
7259             goto illegal_op;
7260         }
7261         /* If REX_B is set, then this is xchg eax, r8d, not a nop.  */
7262         if (REX_B(s)) {
7263             goto do_xchg_reg_eax;
7264         }
7265         if (prefixes & PREFIX_REPZ) {
7266             gen_update_cc_op(s);
7267             gen_jmp_im(s, pc_start - s->cs_base);
7268             gen_helper_pause(cpu_env, tcg_const_i32(s->pc - pc_start));
7269             s->base.is_jmp = DISAS_NORETURN;
7270         }
7271         break;
7272     case 0x9b: /* fwait */
7273         if ((s->flags & (HF_MP_MASK | HF_TS_MASK)) ==
7274             (HF_MP_MASK | HF_TS_MASK)) {
7275             gen_exception(s, EXCP07_PREX, pc_start - s->cs_base);
7276         } else {
7277             gen_helper_fwait(cpu_env);
7278         }
7279         break;
7280     case 0xcc: /* int3 */
7281         gen_interrupt(s, EXCP03_INT3, pc_start - s->cs_base, s->pc - s->cs_base);
7282         break;
7283     case 0xcd: /* int N */
7284         val = x86_ldub_code(env, s);
7285         if (check_vm86_iopl(s)) {
7286             gen_interrupt(s, val, pc_start - s->cs_base, s->pc - s->cs_base);
7287         }
7288         break;
7289     case 0xce: /* into */
7290         if (CODE64(s))
7291             goto illegal_op;
7292         gen_update_cc_op(s);
7293         gen_jmp_im(s, pc_start - s->cs_base);
7294         gen_helper_into(cpu_env, tcg_const_i32(s->pc - pc_start));
7295         break;
7296 #ifdef WANT_ICEBP
7297     case 0xf1: /* icebp (undocumented, exits to external debugger) */
7298         gen_svm_check_intercept(s, SVM_EXIT_ICEBP);
7299         gen_debug(s);
7300         break;
7301 #endif
7302     case 0xfa: /* cli */
7303         if (check_iopl(s)) {
7304             gen_helper_cli(cpu_env);
7305         }
7306         break;
7307     case 0xfb: /* sti */
7308         if (check_iopl(s)) {
7309             gen_helper_sti(cpu_env);
7310             /* interruptions are enabled only the first insn after sti */
7311             gen_jmp_im(s, s->pc - s->cs_base);
7312             gen_eob_inhibit_irq(s, true);
7313         }
7314         break;
7315     case 0x62: /* bound */
7316         if (CODE64(s))
7317             goto illegal_op;
7318         ot = dflag;
7319         modrm = x86_ldub_code(env, s);
7320         reg = (modrm >> 3) & 7;
7321         mod = (modrm >> 6) & 3;
7322         if (mod == 3)
7323             goto illegal_op;
7324         gen_op_mov_v_reg(s, ot, s->T0, reg);
7325         gen_lea_modrm(env, s, modrm);
7326         tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
7327         if (ot == MO_16) {
7328             gen_helper_boundw(cpu_env, s->A0, s->tmp2_i32);
7329         } else {
7330             gen_helper_boundl(cpu_env, s->A0, s->tmp2_i32);
7331         }
7332         break;
7333     case 0x1c8 ... 0x1cf: /* bswap reg */
7334         reg = (b & 7) | REX_B(s);
7335 #ifdef TARGET_X86_64
7336         if (dflag == MO_64) {
7337             tcg_gen_bswap64_i64(cpu_regs[reg], cpu_regs[reg]);
7338             break;
7339         }
7340 #endif
7341         tcg_gen_bswap32_tl(cpu_regs[reg], cpu_regs[reg], TCG_BSWAP_OZ);
7342         break;
7343     case 0xd6: /* salc */
7344         if (CODE64(s))
7345             goto illegal_op;
7346         gen_compute_eflags_c(s, s->T0);
7347         tcg_gen_neg_tl(s->T0, s->T0);
7348         gen_op_mov_reg_v(s, MO_8, R_EAX, s->T0);
7349         break;
7350     case 0xe0: /* loopnz */
7351     case 0xe1: /* loopz */
7352     case 0xe2: /* loop */
7353     case 0xe3: /* jecxz */
7354         {
7355             TCGLabel *l1, *l2, *l3;
7356 
7357             tval = (int8_t)insn_get(env, s, MO_8);
7358             next_eip = s->pc - s->cs_base;
7359             tval += next_eip;
7360             if (dflag == MO_16) {
7361                 tval &= 0xffff;
7362             }
7363 
7364             l1 = gen_new_label();
7365             l2 = gen_new_label();
7366             l3 = gen_new_label();
7367             gen_update_cc_op(s);
7368             b &= 3;
7369             switch(b) {
7370             case 0: /* loopnz */
7371             case 1: /* loopz */
7372                 gen_op_add_reg_im(s, s->aflag, R_ECX, -1);
7373                 gen_op_jz_ecx(s, s->aflag, l3);
7374                 gen_jcc1(s, (JCC_Z << 1) | (b ^ 1), l1);
7375                 break;
7376             case 2: /* loop */
7377                 gen_op_add_reg_im(s, s->aflag, R_ECX, -1);
7378                 gen_op_jnz_ecx(s, s->aflag, l1);
7379                 break;
7380             default:
7381             case 3: /* jcxz */
7382                 gen_op_jz_ecx(s, s->aflag, l1);
7383                 break;
7384             }
7385 
7386             gen_set_label(l3);
7387             gen_jmp_im(s, next_eip);
7388             tcg_gen_br(l2);
7389 
7390             gen_set_label(l1);
7391             gen_jmp_im(s, tval);
7392             gen_set_label(l2);
7393             gen_eob(s);
7394         }
7395         break;
7396     case 0x130: /* wrmsr */
7397     case 0x132: /* rdmsr */
7398         if (check_cpl0(s)) {
7399             gen_update_cc_op(s);
7400             gen_jmp_im(s, pc_start - s->cs_base);
7401             if (b & 2) {
7402                 gen_helper_rdmsr(cpu_env);
7403             } else {
7404                 gen_helper_wrmsr(cpu_env);
7405                 gen_jmp_im(s, s->pc - s->cs_base);
7406                 gen_eob(s);
7407             }
7408         }
7409         break;
7410     case 0x131: /* rdtsc */
7411         gen_update_cc_op(s);
7412         gen_jmp_im(s, pc_start - s->cs_base);
7413         if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
7414             gen_io_start();
7415         }
7416         gen_helper_rdtsc(cpu_env);
7417         if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
7418             gen_jmp(s, s->pc - s->cs_base);
7419         }
7420         break;
7421     case 0x133: /* rdpmc */
7422         gen_update_cc_op(s);
7423         gen_jmp_im(s, pc_start - s->cs_base);
7424         gen_helper_rdpmc(cpu_env);
7425         s->base.is_jmp = DISAS_NORETURN;
7426         break;
7427     case 0x134: /* sysenter */
7428         /* For Intel SYSENTER is valid on 64-bit */
7429         if (CODE64(s) && env->cpuid_vendor1 != CPUID_VENDOR_INTEL_1)
7430             goto illegal_op;
7431         if (!PE(s)) {
7432             gen_exception_gpf(s);
7433         } else {
7434             gen_helper_sysenter(cpu_env);
7435             gen_eob(s);
7436         }
7437         break;
7438     case 0x135: /* sysexit */
7439         /* For Intel SYSEXIT is valid on 64-bit */
7440         if (CODE64(s) && env->cpuid_vendor1 != CPUID_VENDOR_INTEL_1)
7441             goto illegal_op;
7442         if (!PE(s)) {
7443             gen_exception_gpf(s);
7444         } else {
7445             gen_helper_sysexit(cpu_env, tcg_const_i32(dflag - 1));
7446             gen_eob(s);
7447         }
7448         break;
7449 #ifdef TARGET_X86_64
7450     case 0x105: /* syscall */
7451         /* XXX: is it usable in real mode ? */
7452         gen_update_cc_op(s);
7453         gen_jmp_im(s, pc_start - s->cs_base);
7454         gen_helper_syscall(cpu_env, tcg_const_i32(s->pc - pc_start));
7455         /* TF handling for the syscall insn is different. The TF bit is  checked
7456            after the syscall insn completes. This allows #DB to not be
7457            generated after one has entered CPL0 if TF is set in FMASK.  */
7458         gen_eob_worker(s, false, true);
7459         break;
7460     case 0x107: /* sysret */
7461         if (!PE(s)) {
7462             gen_exception_gpf(s);
7463         } else {
7464             gen_helper_sysret(cpu_env, tcg_const_i32(dflag - 1));
7465             /* condition codes are modified only in long mode */
7466             if (LMA(s)) {
7467                 set_cc_op(s, CC_OP_EFLAGS);
7468             }
7469             /* TF handling for the sysret insn is different. The TF bit is
7470                checked after the sysret insn completes. This allows #DB to be
7471                generated "as if" the syscall insn in userspace has just
7472                completed.  */
7473             gen_eob_worker(s, false, true);
7474         }
7475         break;
7476 #endif
7477     case 0x1a2: /* cpuid */
7478         gen_update_cc_op(s);
7479         gen_jmp_im(s, pc_start - s->cs_base);
7480         gen_helper_cpuid(cpu_env);
7481         break;
7482     case 0xf4: /* hlt */
7483         if (check_cpl0(s)) {
7484             gen_update_cc_op(s);
7485             gen_jmp_im(s, pc_start - s->cs_base);
7486             gen_helper_hlt(cpu_env, tcg_const_i32(s->pc - pc_start));
7487             s->base.is_jmp = DISAS_NORETURN;
7488         }
7489         break;
7490     case 0x100:
7491         modrm = x86_ldub_code(env, s);
7492         mod = (modrm >> 6) & 3;
7493         op = (modrm >> 3) & 7;
7494         switch(op) {
7495         case 0: /* sldt */
7496             if (!PE(s) || VM86(s))
7497                 goto illegal_op;
7498             if (s->flags & HF_UMIP_MASK && !check_cpl0(s)) {
7499                 break;
7500             }
7501             gen_svm_check_intercept(s, SVM_EXIT_LDTR_READ);
7502             tcg_gen_ld32u_tl(s->T0, cpu_env,
7503                              offsetof(CPUX86State, ldt.selector));
7504             ot = mod == 3 ? dflag : MO_16;
7505             gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 1);
7506             break;
7507         case 2: /* lldt */
7508             if (!PE(s) || VM86(s))
7509                 goto illegal_op;
7510             if (check_cpl0(s)) {
7511                 gen_svm_check_intercept(s, SVM_EXIT_LDTR_WRITE);
7512                 gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
7513                 tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
7514                 gen_helper_lldt(cpu_env, s->tmp2_i32);
7515             }
7516             break;
7517         case 1: /* str */
7518             if (!PE(s) || VM86(s))
7519                 goto illegal_op;
7520             if (s->flags & HF_UMIP_MASK && !check_cpl0(s)) {
7521                 break;
7522             }
7523             gen_svm_check_intercept(s, SVM_EXIT_TR_READ);
7524             tcg_gen_ld32u_tl(s->T0, cpu_env,
7525                              offsetof(CPUX86State, tr.selector));
7526             ot = mod == 3 ? dflag : MO_16;
7527             gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 1);
7528             break;
7529         case 3: /* ltr */
7530             if (!PE(s) || VM86(s))
7531                 goto illegal_op;
7532             if (check_cpl0(s)) {
7533                 gen_svm_check_intercept(s, SVM_EXIT_TR_WRITE);
7534                 gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
7535                 tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
7536                 gen_helper_ltr(cpu_env, s->tmp2_i32);
7537             }
7538             break;
7539         case 4: /* verr */
7540         case 5: /* verw */
7541             if (!PE(s) || VM86(s))
7542                 goto illegal_op;
7543             gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
7544             gen_update_cc_op(s);
7545             if (op == 4) {
7546                 gen_helper_verr(cpu_env, s->T0);
7547             } else {
7548                 gen_helper_verw(cpu_env, s->T0);
7549             }
7550             set_cc_op(s, CC_OP_EFLAGS);
7551             break;
7552         default:
7553             goto unknown_op;
7554         }
7555         break;
7556 
7557     case 0x101:
7558         modrm = x86_ldub_code(env, s);
7559         switch (modrm) {
7560         CASE_MODRM_MEM_OP(0): /* sgdt */
7561             if (s->flags & HF_UMIP_MASK && !check_cpl0(s)) {
7562                 break;
7563             }
7564             gen_svm_check_intercept(s, SVM_EXIT_GDTR_READ);
7565             gen_lea_modrm(env, s, modrm);
7566             tcg_gen_ld32u_tl(s->T0,
7567                              cpu_env, offsetof(CPUX86State, gdt.limit));
7568             gen_op_st_v(s, MO_16, s->T0, s->A0);
7569             gen_add_A0_im(s, 2);
7570             tcg_gen_ld_tl(s->T0, cpu_env, offsetof(CPUX86State, gdt.base));
7571             if (dflag == MO_16) {
7572                 tcg_gen_andi_tl(s->T0, s->T0, 0xffffff);
7573             }
7574             gen_op_st_v(s, CODE64(s) + MO_32, s->T0, s->A0);
7575             break;
7576 
7577         case 0xc8: /* monitor */
7578             if (!(s->cpuid_ext_features & CPUID_EXT_MONITOR) || CPL(s) != 0) {
7579                 goto illegal_op;
7580             }
7581             gen_update_cc_op(s);
7582             gen_jmp_im(s, pc_start - s->cs_base);
7583             tcg_gen_mov_tl(s->A0, cpu_regs[R_EAX]);
7584             gen_extu(s->aflag, s->A0);
7585             gen_add_A0_ds_seg(s);
7586             gen_helper_monitor(cpu_env, s->A0);
7587             break;
7588 
7589         case 0xc9: /* mwait */
7590             if (!(s->cpuid_ext_features & CPUID_EXT_MONITOR) || CPL(s) != 0) {
7591                 goto illegal_op;
7592             }
7593             gen_update_cc_op(s);
7594             gen_jmp_im(s, pc_start - s->cs_base);
7595             gen_helper_mwait(cpu_env, tcg_const_i32(s->pc - pc_start));
7596             s->base.is_jmp = DISAS_NORETURN;
7597             break;
7598 
7599         case 0xca: /* clac */
7600             if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_SMAP)
7601                 || CPL(s) != 0) {
7602                 goto illegal_op;
7603             }
7604             gen_helper_clac(cpu_env);
7605             gen_jmp_im(s, s->pc - s->cs_base);
7606             gen_eob(s);
7607             break;
7608 
7609         case 0xcb: /* stac */
7610             if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_SMAP)
7611                 || CPL(s) != 0) {
7612                 goto illegal_op;
7613             }
7614             gen_helper_stac(cpu_env);
7615             gen_jmp_im(s, s->pc - s->cs_base);
7616             gen_eob(s);
7617             break;
7618 
7619         CASE_MODRM_MEM_OP(1): /* sidt */
7620             if (s->flags & HF_UMIP_MASK && !check_cpl0(s)) {
7621                 break;
7622             }
7623             gen_svm_check_intercept(s, SVM_EXIT_IDTR_READ);
7624             gen_lea_modrm(env, s, modrm);
7625             tcg_gen_ld32u_tl(s->T0, cpu_env, offsetof(CPUX86State, idt.limit));
7626             gen_op_st_v(s, MO_16, s->T0, s->A0);
7627             gen_add_A0_im(s, 2);
7628             tcg_gen_ld_tl(s->T0, cpu_env, offsetof(CPUX86State, idt.base));
7629             if (dflag == MO_16) {
7630                 tcg_gen_andi_tl(s->T0, s->T0, 0xffffff);
7631             }
7632             gen_op_st_v(s, CODE64(s) + MO_32, s->T0, s->A0);
7633             break;
7634 
7635         case 0xd0: /* xgetbv */
7636             if ((s->cpuid_ext_features & CPUID_EXT_XSAVE) == 0
7637                 || (s->prefix & (PREFIX_LOCK | PREFIX_DATA
7638                                  | PREFIX_REPZ | PREFIX_REPNZ))) {
7639                 goto illegal_op;
7640             }
7641             tcg_gen_trunc_tl_i32(s->tmp2_i32, cpu_regs[R_ECX]);
7642             gen_helper_xgetbv(s->tmp1_i64, cpu_env, s->tmp2_i32);
7643             tcg_gen_extr_i64_tl(cpu_regs[R_EAX], cpu_regs[R_EDX], s->tmp1_i64);
7644             break;
7645 
7646         case 0xd1: /* xsetbv */
7647             if ((s->cpuid_ext_features & CPUID_EXT_XSAVE) == 0
7648                 || (s->prefix & (PREFIX_LOCK | PREFIX_DATA
7649                                  | PREFIX_REPZ | PREFIX_REPNZ))) {
7650                 goto illegal_op;
7651             }
7652             if (!check_cpl0(s)) {
7653                 break;
7654             }
7655             tcg_gen_concat_tl_i64(s->tmp1_i64, cpu_regs[R_EAX],
7656                                   cpu_regs[R_EDX]);
7657             tcg_gen_trunc_tl_i32(s->tmp2_i32, cpu_regs[R_ECX]);
7658             gen_helper_xsetbv(cpu_env, s->tmp2_i32, s->tmp1_i64);
7659             /* End TB because translation flags may change.  */
7660             gen_jmp_im(s, s->pc - s->cs_base);
7661             gen_eob(s);
7662             break;
7663 
7664         case 0xd8: /* VMRUN */
7665             if (!SVME(s) || !PE(s)) {
7666                 goto illegal_op;
7667             }
7668             if (!check_cpl0(s)) {
7669                 break;
7670             }
7671             gen_update_cc_op(s);
7672             gen_jmp_im(s, pc_start - s->cs_base);
7673             gen_helper_vmrun(cpu_env, tcg_const_i32(s->aflag - 1),
7674                              tcg_const_i32(s->pc - pc_start));
7675             tcg_gen_exit_tb(NULL, 0);
7676             s->base.is_jmp = DISAS_NORETURN;
7677             break;
7678 
7679         case 0xd9: /* VMMCALL */
7680             if (!SVME(s)) {
7681                 goto illegal_op;
7682             }
7683             gen_update_cc_op(s);
7684             gen_jmp_im(s, pc_start - s->cs_base);
7685             gen_helper_vmmcall(cpu_env);
7686             break;
7687 
7688         case 0xda: /* VMLOAD */
7689             if (!SVME(s) || !PE(s)) {
7690                 goto illegal_op;
7691             }
7692             if (!check_cpl0(s)) {
7693                 break;
7694             }
7695             gen_update_cc_op(s);
7696             gen_jmp_im(s, pc_start - s->cs_base);
7697             gen_helper_vmload(cpu_env, tcg_const_i32(s->aflag - 1));
7698             break;
7699 
7700         case 0xdb: /* VMSAVE */
7701             if (!SVME(s) || !PE(s)) {
7702                 goto illegal_op;
7703             }
7704             if (!check_cpl0(s)) {
7705                 break;
7706             }
7707             gen_update_cc_op(s);
7708             gen_jmp_im(s, pc_start - s->cs_base);
7709             gen_helper_vmsave(cpu_env, tcg_const_i32(s->aflag - 1));
7710             break;
7711 
7712         case 0xdc: /* STGI */
7713             if ((!SVME(s) && !(s->cpuid_ext3_features & CPUID_EXT3_SKINIT))
7714                 || !PE(s)) {
7715                 goto illegal_op;
7716             }
7717             if (!check_cpl0(s)) {
7718                 break;
7719             }
7720             gen_update_cc_op(s);
7721             gen_helper_stgi(cpu_env);
7722             gen_jmp_im(s, s->pc - s->cs_base);
7723             gen_eob(s);
7724             break;
7725 
7726         case 0xdd: /* CLGI */
7727             if (!SVME(s) || !PE(s)) {
7728                 goto illegal_op;
7729             }
7730             if (!check_cpl0(s)) {
7731                 break;
7732             }
7733             gen_update_cc_op(s);
7734             gen_jmp_im(s, pc_start - s->cs_base);
7735             gen_helper_clgi(cpu_env);
7736             break;
7737 
7738         case 0xde: /* SKINIT */
7739             if ((!SVME(s) && !(s->cpuid_ext3_features & CPUID_EXT3_SKINIT))
7740                 || !PE(s)) {
7741                 goto illegal_op;
7742             }
7743             gen_svm_check_intercept(s, SVM_EXIT_SKINIT);
7744             /* If not intercepted, not implemented -- raise #UD. */
7745             goto illegal_op;
7746 
7747         case 0xdf: /* INVLPGA */
7748             if (!SVME(s) || !PE(s)) {
7749                 goto illegal_op;
7750             }
7751             if (!check_cpl0(s)) {
7752                 break;
7753             }
7754             gen_svm_check_intercept(s, SVM_EXIT_INVLPGA);
7755             if (s->aflag == MO_64) {
7756                 tcg_gen_mov_tl(s->A0, cpu_regs[R_EAX]);
7757             } else {
7758                 tcg_gen_ext32u_tl(s->A0, cpu_regs[R_EAX]);
7759             }
7760             gen_helper_flush_page(cpu_env, s->A0);
7761             gen_jmp_im(s, s->pc - s->cs_base);
7762             gen_eob(s);
7763             break;
7764 
7765         CASE_MODRM_MEM_OP(2): /* lgdt */
7766             if (!check_cpl0(s)) {
7767                 break;
7768             }
7769             gen_svm_check_intercept(s, SVM_EXIT_GDTR_WRITE);
7770             gen_lea_modrm(env, s, modrm);
7771             gen_op_ld_v(s, MO_16, s->T1, s->A0);
7772             gen_add_A0_im(s, 2);
7773             gen_op_ld_v(s, CODE64(s) + MO_32, s->T0, s->A0);
7774             if (dflag == MO_16) {
7775                 tcg_gen_andi_tl(s->T0, s->T0, 0xffffff);
7776             }
7777             tcg_gen_st_tl(s->T0, cpu_env, offsetof(CPUX86State, gdt.base));
7778             tcg_gen_st32_tl(s->T1, cpu_env, offsetof(CPUX86State, gdt.limit));
7779             break;
7780 
7781         CASE_MODRM_MEM_OP(3): /* lidt */
7782             if (!check_cpl0(s)) {
7783                 break;
7784             }
7785             gen_svm_check_intercept(s, SVM_EXIT_IDTR_WRITE);
7786             gen_lea_modrm(env, s, modrm);
7787             gen_op_ld_v(s, MO_16, s->T1, s->A0);
7788             gen_add_A0_im(s, 2);
7789             gen_op_ld_v(s, CODE64(s) + MO_32, s->T0, s->A0);
7790             if (dflag == MO_16) {
7791                 tcg_gen_andi_tl(s->T0, s->T0, 0xffffff);
7792             }
7793             tcg_gen_st_tl(s->T0, cpu_env, offsetof(CPUX86State, idt.base));
7794             tcg_gen_st32_tl(s->T1, cpu_env, offsetof(CPUX86State, idt.limit));
7795             break;
7796 
7797         CASE_MODRM_OP(4): /* smsw */
7798             if (s->flags & HF_UMIP_MASK && !check_cpl0(s)) {
7799                 break;
7800             }
7801             gen_svm_check_intercept(s, SVM_EXIT_READ_CR0);
7802             tcg_gen_ld_tl(s->T0, cpu_env, offsetof(CPUX86State, cr[0]));
7803             /*
7804              * In 32-bit mode, the higher 16 bits of the destination
7805              * register are undefined.  In practice CR0[31:0] is stored
7806              * just like in 64-bit mode.
7807              */
7808             mod = (modrm >> 6) & 3;
7809             ot = (mod != 3 ? MO_16 : s->dflag);
7810             gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 1);
7811             break;
7812         case 0xee: /* rdpkru */
7813             if (prefixes & PREFIX_LOCK) {
7814                 goto illegal_op;
7815             }
7816             tcg_gen_trunc_tl_i32(s->tmp2_i32, cpu_regs[R_ECX]);
7817             gen_helper_rdpkru(s->tmp1_i64, cpu_env, s->tmp2_i32);
7818             tcg_gen_extr_i64_tl(cpu_regs[R_EAX], cpu_regs[R_EDX], s->tmp1_i64);
7819             break;
7820         case 0xef: /* wrpkru */
7821             if (prefixes & PREFIX_LOCK) {
7822                 goto illegal_op;
7823             }
7824             tcg_gen_concat_tl_i64(s->tmp1_i64, cpu_regs[R_EAX],
7825                                   cpu_regs[R_EDX]);
7826             tcg_gen_trunc_tl_i32(s->tmp2_i32, cpu_regs[R_ECX]);
7827             gen_helper_wrpkru(cpu_env, s->tmp2_i32, s->tmp1_i64);
7828             break;
7829 
7830         CASE_MODRM_OP(6): /* lmsw */
7831             if (!check_cpl0(s)) {
7832                 break;
7833             }
7834             gen_svm_check_intercept(s, SVM_EXIT_WRITE_CR0);
7835             gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
7836             /*
7837              * Only the 4 lower bits of CR0 are modified.
7838              * PE cannot be set to zero if already set to one.
7839              */
7840             tcg_gen_ld_tl(s->T1, cpu_env, offsetof(CPUX86State, cr[0]));
7841             tcg_gen_andi_tl(s->T0, s->T0, 0xf);
7842             tcg_gen_andi_tl(s->T1, s->T1, ~0xe);
7843             tcg_gen_or_tl(s->T0, s->T0, s->T1);
7844             gen_helper_write_crN(cpu_env, tcg_constant_i32(0), s->T0);
7845             gen_jmp_im(s, s->pc - s->cs_base);
7846             gen_eob(s);
7847             break;
7848 
7849         CASE_MODRM_MEM_OP(7): /* invlpg */
7850             if (!check_cpl0(s)) {
7851                 break;
7852             }
7853             gen_svm_check_intercept(s, SVM_EXIT_INVLPG);
7854             gen_lea_modrm(env, s, modrm);
7855             gen_helper_flush_page(cpu_env, s->A0);
7856             gen_jmp_im(s, s->pc - s->cs_base);
7857             gen_eob(s);
7858             break;
7859 
7860         case 0xf8: /* swapgs */
7861 #ifdef TARGET_X86_64
7862             if (CODE64(s)) {
7863                 if (check_cpl0(s)) {
7864                     tcg_gen_mov_tl(s->T0, cpu_seg_base[R_GS]);
7865                     tcg_gen_ld_tl(cpu_seg_base[R_GS], cpu_env,
7866                                   offsetof(CPUX86State, kernelgsbase));
7867                     tcg_gen_st_tl(s->T0, cpu_env,
7868                                   offsetof(CPUX86State, kernelgsbase));
7869                 }
7870                 break;
7871             }
7872 #endif
7873             goto illegal_op;
7874 
7875         case 0xf9: /* rdtscp */
7876             if (!(s->cpuid_ext2_features & CPUID_EXT2_RDTSCP)) {
7877                 goto illegal_op;
7878             }
7879             gen_update_cc_op(s);
7880             gen_jmp_im(s, pc_start - s->cs_base);
7881             if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
7882                 gen_io_start();
7883             }
7884             gen_helper_rdtscp(cpu_env);
7885             if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
7886                 gen_jmp(s, s->pc - s->cs_base);
7887             }
7888             break;
7889 
7890         default:
7891             goto unknown_op;
7892         }
7893         break;
7894 
7895     case 0x108: /* invd */
7896     case 0x109: /* wbinvd */
7897         if (check_cpl0(s)) {
7898             gen_svm_check_intercept(s, (b & 2) ? SVM_EXIT_INVD : SVM_EXIT_WBINVD);
7899             /* nothing to do */
7900         }
7901         break;
7902     case 0x63: /* arpl or movslS (x86_64) */
7903 #ifdef TARGET_X86_64
7904         if (CODE64(s)) {
7905             int d_ot;
7906             /* d_ot is the size of destination */
7907             d_ot = dflag;
7908 
7909             modrm = x86_ldub_code(env, s);
7910             reg = ((modrm >> 3) & 7) | REX_R(s);
7911             mod = (modrm >> 6) & 3;
7912             rm = (modrm & 7) | REX_B(s);
7913 
7914             if (mod == 3) {
7915                 gen_op_mov_v_reg(s, MO_32, s->T0, rm);
7916                 /* sign extend */
7917                 if (d_ot == MO_64) {
7918                     tcg_gen_ext32s_tl(s->T0, s->T0);
7919                 }
7920                 gen_op_mov_reg_v(s, d_ot, reg, s->T0);
7921             } else {
7922                 gen_lea_modrm(env, s, modrm);
7923                 gen_op_ld_v(s, MO_32 | MO_SIGN, s->T0, s->A0);
7924                 gen_op_mov_reg_v(s, d_ot, reg, s->T0);
7925             }
7926         } else
7927 #endif
7928         {
7929             TCGLabel *label1;
7930             TCGv t0, t1, t2, a0;
7931 
7932             if (!PE(s) || VM86(s))
7933                 goto illegal_op;
7934             t0 = tcg_temp_local_new();
7935             t1 = tcg_temp_local_new();
7936             t2 = tcg_temp_local_new();
7937             ot = MO_16;
7938             modrm = x86_ldub_code(env, s);
7939             reg = (modrm >> 3) & 7;
7940             mod = (modrm >> 6) & 3;
7941             rm = modrm & 7;
7942             if (mod != 3) {
7943                 gen_lea_modrm(env, s, modrm);
7944                 gen_op_ld_v(s, ot, t0, s->A0);
7945                 a0 = tcg_temp_local_new();
7946                 tcg_gen_mov_tl(a0, s->A0);
7947             } else {
7948                 gen_op_mov_v_reg(s, ot, t0, rm);
7949                 a0 = NULL;
7950             }
7951             gen_op_mov_v_reg(s, ot, t1, reg);
7952             tcg_gen_andi_tl(s->tmp0, t0, 3);
7953             tcg_gen_andi_tl(t1, t1, 3);
7954             tcg_gen_movi_tl(t2, 0);
7955             label1 = gen_new_label();
7956             tcg_gen_brcond_tl(TCG_COND_GE, s->tmp0, t1, label1);
7957             tcg_gen_andi_tl(t0, t0, ~3);
7958             tcg_gen_or_tl(t0, t0, t1);
7959             tcg_gen_movi_tl(t2, CC_Z);
7960             gen_set_label(label1);
7961             if (mod != 3) {
7962                 gen_op_st_v(s, ot, t0, a0);
7963                 tcg_temp_free(a0);
7964            } else {
7965                 gen_op_mov_reg_v(s, ot, rm, t0);
7966             }
7967             gen_compute_eflags(s);
7968             tcg_gen_andi_tl(cpu_cc_src, cpu_cc_src, ~CC_Z);
7969             tcg_gen_or_tl(cpu_cc_src, cpu_cc_src, t2);
7970             tcg_temp_free(t0);
7971             tcg_temp_free(t1);
7972             tcg_temp_free(t2);
7973         }
7974         break;
7975     case 0x102: /* lar */
7976     case 0x103: /* lsl */
7977         {
7978             TCGLabel *label1;
7979             TCGv t0;
7980             if (!PE(s) || VM86(s))
7981                 goto illegal_op;
7982             ot = dflag != MO_16 ? MO_32 : MO_16;
7983             modrm = x86_ldub_code(env, s);
7984             reg = ((modrm >> 3) & 7) | REX_R(s);
7985             gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
7986             t0 = tcg_temp_local_new();
7987             gen_update_cc_op(s);
7988             if (b == 0x102) {
7989                 gen_helper_lar(t0, cpu_env, s->T0);
7990             } else {
7991                 gen_helper_lsl(t0, cpu_env, s->T0);
7992             }
7993             tcg_gen_andi_tl(s->tmp0, cpu_cc_src, CC_Z);
7994             label1 = gen_new_label();
7995             tcg_gen_brcondi_tl(TCG_COND_EQ, s->tmp0, 0, label1);
7996             gen_op_mov_reg_v(s, ot, reg, t0);
7997             gen_set_label(label1);
7998             set_cc_op(s, CC_OP_EFLAGS);
7999             tcg_temp_free(t0);
8000         }
8001         break;
8002     case 0x118:
8003         modrm = x86_ldub_code(env, s);
8004         mod = (modrm >> 6) & 3;
8005         op = (modrm >> 3) & 7;
8006         switch(op) {
8007         case 0: /* prefetchnta */
8008         case 1: /* prefetchnt0 */
8009         case 2: /* prefetchnt0 */
8010         case 3: /* prefetchnt0 */
8011             if (mod == 3)
8012                 goto illegal_op;
8013             gen_nop_modrm(env, s, modrm);
8014             /* nothing more to do */
8015             break;
8016         default: /* nop (multi byte) */
8017             gen_nop_modrm(env, s, modrm);
8018             break;
8019         }
8020         break;
8021     case 0x11a:
8022         modrm = x86_ldub_code(env, s);
8023         if (s->flags & HF_MPX_EN_MASK) {
8024             mod = (modrm >> 6) & 3;
8025             reg = ((modrm >> 3) & 7) | REX_R(s);
8026             if (prefixes & PREFIX_REPZ) {
8027                 /* bndcl */
8028                 if (reg >= 4
8029                     || (prefixes & PREFIX_LOCK)
8030                     || s->aflag == MO_16) {
8031                     goto illegal_op;
8032                 }
8033                 gen_bndck(env, s, modrm, TCG_COND_LTU, cpu_bndl[reg]);
8034             } else if (prefixes & PREFIX_REPNZ) {
8035                 /* bndcu */
8036                 if (reg >= 4
8037                     || (prefixes & PREFIX_LOCK)
8038                     || s->aflag == MO_16) {
8039                     goto illegal_op;
8040                 }
8041                 TCGv_i64 notu = tcg_temp_new_i64();
8042                 tcg_gen_not_i64(notu, cpu_bndu[reg]);
8043                 gen_bndck(env, s, modrm, TCG_COND_GTU, notu);
8044                 tcg_temp_free_i64(notu);
8045             } else if (prefixes & PREFIX_DATA) {
8046                 /* bndmov -- from reg/mem */
8047                 if (reg >= 4 || s->aflag == MO_16) {
8048                     goto illegal_op;
8049                 }
8050                 if (mod == 3) {
8051                     int reg2 = (modrm & 7) | REX_B(s);
8052                     if (reg2 >= 4 || (prefixes & PREFIX_LOCK)) {
8053                         goto illegal_op;
8054                     }
8055                     if (s->flags & HF_MPX_IU_MASK) {
8056                         tcg_gen_mov_i64(cpu_bndl[reg], cpu_bndl[reg2]);
8057                         tcg_gen_mov_i64(cpu_bndu[reg], cpu_bndu[reg2]);
8058                     }
8059                 } else {
8060                     gen_lea_modrm(env, s, modrm);
8061                     if (CODE64(s)) {
8062                         tcg_gen_qemu_ld_i64(cpu_bndl[reg], s->A0,
8063                                             s->mem_index, MO_LEUQ);
8064                         tcg_gen_addi_tl(s->A0, s->A0, 8);
8065                         tcg_gen_qemu_ld_i64(cpu_bndu[reg], s->A0,
8066                                             s->mem_index, MO_LEUQ);
8067                     } else {
8068                         tcg_gen_qemu_ld_i64(cpu_bndl[reg], s->A0,
8069                                             s->mem_index, MO_LEUL);
8070                         tcg_gen_addi_tl(s->A0, s->A0, 4);
8071                         tcg_gen_qemu_ld_i64(cpu_bndu[reg], s->A0,
8072                                             s->mem_index, MO_LEUL);
8073                     }
8074                     /* bnd registers are now in-use */
8075                     gen_set_hflag(s, HF_MPX_IU_MASK);
8076                 }
8077             } else if (mod != 3) {
8078                 /* bndldx */
8079                 AddressParts a = gen_lea_modrm_0(env, s, modrm);
8080                 if (reg >= 4
8081                     || (prefixes & PREFIX_LOCK)
8082                     || s->aflag == MO_16
8083                     || a.base < -1) {
8084                     goto illegal_op;
8085                 }
8086                 if (a.base >= 0) {
8087                     tcg_gen_addi_tl(s->A0, cpu_regs[a.base], a.disp);
8088                 } else {
8089                     tcg_gen_movi_tl(s->A0, 0);
8090                 }
8091                 gen_lea_v_seg(s, s->aflag, s->A0, a.def_seg, s->override);
8092                 if (a.index >= 0) {
8093                     tcg_gen_mov_tl(s->T0, cpu_regs[a.index]);
8094                 } else {
8095                     tcg_gen_movi_tl(s->T0, 0);
8096                 }
8097                 if (CODE64(s)) {
8098                     gen_helper_bndldx64(cpu_bndl[reg], cpu_env, s->A0, s->T0);
8099                     tcg_gen_ld_i64(cpu_bndu[reg], cpu_env,
8100                                    offsetof(CPUX86State, mmx_t0.MMX_Q(0)));
8101                 } else {
8102                     gen_helper_bndldx32(cpu_bndu[reg], cpu_env, s->A0, s->T0);
8103                     tcg_gen_ext32u_i64(cpu_bndl[reg], cpu_bndu[reg]);
8104                     tcg_gen_shri_i64(cpu_bndu[reg], cpu_bndu[reg], 32);
8105                 }
8106                 gen_set_hflag(s, HF_MPX_IU_MASK);
8107             }
8108         }
8109         gen_nop_modrm(env, s, modrm);
8110         break;
8111     case 0x11b:
8112         modrm = x86_ldub_code(env, s);
8113         if (s->flags & HF_MPX_EN_MASK) {
8114             mod = (modrm >> 6) & 3;
8115             reg = ((modrm >> 3) & 7) | REX_R(s);
8116             if (mod != 3 && (prefixes & PREFIX_REPZ)) {
8117                 /* bndmk */
8118                 if (reg >= 4
8119                     || (prefixes & PREFIX_LOCK)
8120                     || s->aflag == MO_16) {
8121                     goto illegal_op;
8122                 }
8123                 AddressParts a = gen_lea_modrm_0(env, s, modrm);
8124                 if (a.base >= 0) {
8125                     tcg_gen_extu_tl_i64(cpu_bndl[reg], cpu_regs[a.base]);
8126                     if (!CODE64(s)) {
8127                         tcg_gen_ext32u_i64(cpu_bndl[reg], cpu_bndl[reg]);
8128                     }
8129                 } else if (a.base == -1) {
8130                     /* no base register has lower bound of 0 */
8131                     tcg_gen_movi_i64(cpu_bndl[reg], 0);
8132                 } else {
8133                     /* rip-relative generates #ud */
8134                     goto illegal_op;
8135                 }
8136                 tcg_gen_not_tl(s->A0, gen_lea_modrm_1(s, a));
8137                 if (!CODE64(s)) {
8138                     tcg_gen_ext32u_tl(s->A0, s->A0);
8139                 }
8140                 tcg_gen_extu_tl_i64(cpu_bndu[reg], s->A0);
8141                 /* bnd registers are now in-use */
8142                 gen_set_hflag(s, HF_MPX_IU_MASK);
8143                 break;
8144             } else if (prefixes & PREFIX_REPNZ) {
8145                 /* bndcn */
8146                 if (reg >= 4
8147                     || (prefixes & PREFIX_LOCK)
8148                     || s->aflag == MO_16) {
8149                     goto illegal_op;
8150                 }
8151                 gen_bndck(env, s, modrm, TCG_COND_GTU, cpu_bndu[reg]);
8152             } else if (prefixes & PREFIX_DATA) {
8153                 /* bndmov -- to reg/mem */
8154                 if (reg >= 4 || s->aflag == MO_16) {
8155                     goto illegal_op;
8156                 }
8157                 if (mod == 3) {
8158                     int reg2 = (modrm & 7) | REX_B(s);
8159                     if (reg2 >= 4 || (prefixes & PREFIX_LOCK)) {
8160                         goto illegal_op;
8161                     }
8162                     if (s->flags & HF_MPX_IU_MASK) {
8163                         tcg_gen_mov_i64(cpu_bndl[reg2], cpu_bndl[reg]);
8164                         tcg_gen_mov_i64(cpu_bndu[reg2], cpu_bndu[reg]);
8165                     }
8166                 } else {
8167                     gen_lea_modrm(env, s, modrm);
8168                     if (CODE64(s)) {
8169                         tcg_gen_qemu_st_i64(cpu_bndl[reg], s->A0,
8170                                             s->mem_index, MO_LEUQ);
8171                         tcg_gen_addi_tl(s->A0, s->A0, 8);
8172                         tcg_gen_qemu_st_i64(cpu_bndu[reg], s->A0,
8173                                             s->mem_index, MO_LEUQ);
8174                     } else {
8175                         tcg_gen_qemu_st_i64(cpu_bndl[reg], s->A0,
8176                                             s->mem_index, MO_LEUL);
8177                         tcg_gen_addi_tl(s->A0, s->A0, 4);
8178                         tcg_gen_qemu_st_i64(cpu_bndu[reg], s->A0,
8179                                             s->mem_index, MO_LEUL);
8180                     }
8181                 }
8182             } else if (mod != 3) {
8183                 /* bndstx */
8184                 AddressParts a = gen_lea_modrm_0(env, s, modrm);
8185                 if (reg >= 4
8186                     || (prefixes & PREFIX_LOCK)
8187                     || s->aflag == MO_16
8188                     || a.base < -1) {
8189                     goto illegal_op;
8190                 }
8191                 if (a.base >= 0) {
8192                     tcg_gen_addi_tl(s->A0, cpu_regs[a.base], a.disp);
8193                 } else {
8194                     tcg_gen_movi_tl(s->A0, 0);
8195                 }
8196                 gen_lea_v_seg(s, s->aflag, s->A0, a.def_seg, s->override);
8197                 if (a.index >= 0) {
8198                     tcg_gen_mov_tl(s->T0, cpu_regs[a.index]);
8199                 } else {
8200                     tcg_gen_movi_tl(s->T0, 0);
8201                 }
8202                 if (CODE64(s)) {
8203                     gen_helper_bndstx64(cpu_env, s->A0, s->T0,
8204                                         cpu_bndl[reg], cpu_bndu[reg]);
8205                 } else {
8206                     gen_helper_bndstx32(cpu_env, s->A0, s->T0,
8207                                         cpu_bndl[reg], cpu_bndu[reg]);
8208                 }
8209             }
8210         }
8211         gen_nop_modrm(env, s, modrm);
8212         break;
8213     case 0x119: case 0x11c ... 0x11f: /* nop (multi byte) */
8214         modrm = x86_ldub_code(env, s);
8215         gen_nop_modrm(env, s, modrm);
8216         break;
8217 
8218     case 0x120: /* mov reg, crN */
8219     case 0x122: /* mov crN, reg */
8220         if (!check_cpl0(s)) {
8221             break;
8222         }
8223         modrm = x86_ldub_code(env, s);
8224         /*
8225          * Ignore the mod bits (assume (modrm&0xc0)==0xc0).
8226          * AMD documentation (24594.pdf) and testing of Intel 386 and 486
8227          * processors all show that the mod bits are assumed to be 1's,
8228          * regardless of actual values.
8229          */
8230         rm = (modrm & 7) | REX_B(s);
8231         reg = ((modrm >> 3) & 7) | REX_R(s);
8232         switch (reg) {
8233         case 0:
8234             if ((prefixes & PREFIX_LOCK) &&
8235                 (s->cpuid_ext3_features & CPUID_EXT3_CR8LEG)) {
8236                 reg = 8;
8237             }
8238             break;
8239         case 2:
8240         case 3:
8241         case 4:
8242         case 8:
8243             break;
8244         default:
8245             goto unknown_op;
8246         }
8247         ot  = (CODE64(s) ? MO_64 : MO_32);
8248 
8249         if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
8250             gen_io_start();
8251         }
8252         if (b & 2) {
8253             gen_svm_check_intercept(s, SVM_EXIT_WRITE_CR0 + reg);
8254             gen_op_mov_v_reg(s, ot, s->T0, rm);
8255             gen_helper_write_crN(cpu_env, tcg_constant_i32(reg), s->T0);
8256             gen_jmp_im(s, s->pc - s->cs_base);
8257             gen_eob(s);
8258         } else {
8259             gen_svm_check_intercept(s, SVM_EXIT_READ_CR0 + reg);
8260             gen_helper_read_crN(s->T0, cpu_env, tcg_constant_i32(reg));
8261             gen_op_mov_reg_v(s, ot, rm, s->T0);
8262             if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
8263                 gen_jmp(s, s->pc - s->cs_base);
8264             }
8265         }
8266         break;
8267 
8268     case 0x121: /* mov reg, drN */
8269     case 0x123: /* mov drN, reg */
8270         if (check_cpl0(s)) {
8271             modrm = x86_ldub_code(env, s);
8272             /* Ignore the mod bits (assume (modrm&0xc0)==0xc0).
8273              * AMD documentation (24594.pdf) and testing of
8274              * intel 386 and 486 processors all show that the mod bits
8275              * are assumed to be 1's, regardless of actual values.
8276              */
8277             rm = (modrm & 7) | REX_B(s);
8278             reg = ((modrm >> 3) & 7) | REX_R(s);
8279             if (CODE64(s))
8280                 ot = MO_64;
8281             else
8282                 ot = MO_32;
8283             if (reg >= 8) {
8284                 goto illegal_op;
8285             }
8286             if (b & 2) {
8287                 gen_svm_check_intercept(s, SVM_EXIT_WRITE_DR0 + reg);
8288                 gen_op_mov_v_reg(s, ot, s->T0, rm);
8289                 tcg_gen_movi_i32(s->tmp2_i32, reg);
8290                 gen_helper_set_dr(cpu_env, s->tmp2_i32, s->T0);
8291                 gen_jmp_im(s, s->pc - s->cs_base);
8292                 gen_eob(s);
8293             } else {
8294                 gen_svm_check_intercept(s, SVM_EXIT_READ_DR0 + reg);
8295                 tcg_gen_movi_i32(s->tmp2_i32, reg);
8296                 gen_helper_get_dr(s->T0, cpu_env, s->tmp2_i32);
8297                 gen_op_mov_reg_v(s, ot, rm, s->T0);
8298             }
8299         }
8300         break;
8301     case 0x106: /* clts */
8302         if (check_cpl0(s)) {
8303             gen_svm_check_intercept(s, SVM_EXIT_WRITE_CR0);
8304             gen_helper_clts(cpu_env);
8305             /* abort block because static cpu state changed */
8306             gen_jmp_im(s, s->pc - s->cs_base);
8307             gen_eob(s);
8308         }
8309         break;
8310     /* MMX/3DNow!/SSE/SSE2/SSE3/SSSE3/SSE4 support */
8311     case 0x1c3: /* MOVNTI reg, mem */
8312         if (!(s->cpuid_features & CPUID_SSE2))
8313             goto illegal_op;
8314         ot = mo_64_32(dflag);
8315         modrm = x86_ldub_code(env, s);
8316         mod = (modrm >> 6) & 3;
8317         if (mod == 3)
8318             goto illegal_op;
8319         reg = ((modrm >> 3) & 7) | REX_R(s);
8320         /* generate a generic store */
8321         gen_ldst_modrm(env, s, modrm, ot, reg, 1);
8322         break;
8323     case 0x1ae:
8324         modrm = x86_ldub_code(env, s);
8325         switch (modrm) {
8326         CASE_MODRM_MEM_OP(0): /* fxsave */
8327             if (!(s->cpuid_features & CPUID_FXSR)
8328                 || (prefixes & PREFIX_LOCK)) {
8329                 goto illegal_op;
8330             }
8331             if ((s->flags & HF_EM_MASK) || (s->flags & HF_TS_MASK)) {
8332                 gen_exception(s, EXCP07_PREX, pc_start - s->cs_base);
8333                 break;
8334             }
8335             gen_lea_modrm(env, s, modrm);
8336             gen_helper_fxsave(cpu_env, s->A0);
8337             break;
8338 
8339         CASE_MODRM_MEM_OP(1): /* fxrstor */
8340             if (!(s->cpuid_features & CPUID_FXSR)
8341                 || (prefixes & PREFIX_LOCK)) {
8342                 goto illegal_op;
8343             }
8344             if ((s->flags & HF_EM_MASK) || (s->flags & HF_TS_MASK)) {
8345                 gen_exception(s, EXCP07_PREX, pc_start - s->cs_base);
8346                 break;
8347             }
8348             gen_lea_modrm(env, s, modrm);
8349             gen_helper_fxrstor(cpu_env, s->A0);
8350             break;
8351 
8352         CASE_MODRM_MEM_OP(2): /* ldmxcsr */
8353             if ((s->flags & HF_EM_MASK) || !(s->flags & HF_OSFXSR_MASK)) {
8354                 goto illegal_op;
8355             }
8356             if (s->flags & HF_TS_MASK) {
8357                 gen_exception(s, EXCP07_PREX, pc_start - s->cs_base);
8358                 break;
8359             }
8360             gen_lea_modrm(env, s, modrm);
8361             tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0, s->mem_index, MO_LEUL);
8362             gen_helper_ldmxcsr(cpu_env, s->tmp2_i32);
8363             break;
8364 
8365         CASE_MODRM_MEM_OP(3): /* stmxcsr */
8366             if ((s->flags & HF_EM_MASK) || !(s->flags & HF_OSFXSR_MASK)) {
8367                 goto illegal_op;
8368             }
8369             if (s->flags & HF_TS_MASK) {
8370                 gen_exception(s, EXCP07_PREX, pc_start - s->cs_base);
8371                 break;
8372             }
8373             gen_helper_update_mxcsr(cpu_env);
8374             gen_lea_modrm(env, s, modrm);
8375             tcg_gen_ld32u_tl(s->T0, cpu_env, offsetof(CPUX86State, mxcsr));
8376             gen_op_st_v(s, MO_32, s->T0, s->A0);
8377             break;
8378 
8379         CASE_MODRM_MEM_OP(4): /* xsave */
8380             if ((s->cpuid_ext_features & CPUID_EXT_XSAVE) == 0
8381                 || (prefixes & (PREFIX_LOCK | PREFIX_DATA
8382                                 | PREFIX_REPZ | PREFIX_REPNZ))) {
8383                 goto illegal_op;
8384             }
8385             gen_lea_modrm(env, s, modrm);
8386             tcg_gen_concat_tl_i64(s->tmp1_i64, cpu_regs[R_EAX],
8387                                   cpu_regs[R_EDX]);
8388             gen_helper_xsave(cpu_env, s->A0, s->tmp1_i64);
8389             break;
8390 
8391         CASE_MODRM_MEM_OP(5): /* xrstor */
8392             if ((s->cpuid_ext_features & CPUID_EXT_XSAVE) == 0
8393                 || (prefixes & (PREFIX_LOCK | PREFIX_DATA
8394                                 | PREFIX_REPZ | PREFIX_REPNZ))) {
8395                 goto illegal_op;
8396             }
8397             gen_lea_modrm(env, s, modrm);
8398             tcg_gen_concat_tl_i64(s->tmp1_i64, cpu_regs[R_EAX],
8399                                   cpu_regs[R_EDX]);
8400             gen_helper_xrstor(cpu_env, s->A0, s->tmp1_i64);
8401             /* XRSTOR is how MPX is enabled, which changes how
8402                we translate.  Thus we need to end the TB.  */
8403             gen_update_cc_op(s);
8404             gen_jmp_im(s, s->pc - s->cs_base);
8405             gen_eob(s);
8406             break;
8407 
8408         CASE_MODRM_MEM_OP(6): /* xsaveopt / clwb */
8409             if (prefixes & PREFIX_LOCK) {
8410                 goto illegal_op;
8411             }
8412             if (prefixes & PREFIX_DATA) {
8413                 /* clwb */
8414                 if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_CLWB)) {
8415                     goto illegal_op;
8416                 }
8417                 gen_nop_modrm(env, s, modrm);
8418             } else {
8419                 /* xsaveopt */
8420                 if ((s->cpuid_ext_features & CPUID_EXT_XSAVE) == 0
8421                     || (s->cpuid_xsave_features & CPUID_XSAVE_XSAVEOPT) == 0
8422                     || (prefixes & (PREFIX_REPZ | PREFIX_REPNZ))) {
8423                     goto illegal_op;
8424                 }
8425                 gen_lea_modrm(env, s, modrm);
8426                 tcg_gen_concat_tl_i64(s->tmp1_i64, cpu_regs[R_EAX],
8427                                       cpu_regs[R_EDX]);
8428                 gen_helper_xsaveopt(cpu_env, s->A0, s->tmp1_i64);
8429             }
8430             break;
8431 
8432         CASE_MODRM_MEM_OP(7): /* clflush / clflushopt */
8433             if (prefixes & PREFIX_LOCK) {
8434                 goto illegal_op;
8435             }
8436             if (prefixes & PREFIX_DATA) {
8437                 /* clflushopt */
8438                 if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_CLFLUSHOPT)) {
8439                     goto illegal_op;
8440                 }
8441             } else {
8442                 /* clflush */
8443                 if ((s->prefix & (PREFIX_REPZ | PREFIX_REPNZ))
8444                     || !(s->cpuid_features & CPUID_CLFLUSH)) {
8445                     goto illegal_op;
8446                 }
8447             }
8448             gen_nop_modrm(env, s, modrm);
8449             break;
8450 
8451         case 0xc0 ... 0xc7: /* rdfsbase (f3 0f ae /0) */
8452         case 0xc8 ... 0xcf: /* rdgsbase (f3 0f ae /1) */
8453         case 0xd0 ... 0xd7: /* wrfsbase (f3 0f ae /2) */
8454         case 0xd8 ... 0xdf: /* wrgsbase (f3 0f ae /3) */
8455             if (CODE64(s)
8456                 && (prefixes & PREFIX_REPZ)
8457                 && !(prefixes & PREFIX_LOCK)
8458                 && (s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_FSGSBASE)) {
8459                 TCGv base, treg, src, dst;
8460 
8461                 /* Preserve hflags bits by testing CR4 at runtime.  */
8462                 tcg_gen_movi_i32(s->tmp2_i32, CR4_FSGSBASE_MASK);
8463                 gen_helper_cr4_testbit(cpu_env, s->tmp2_i32);
8464 
8465                 base = cpu_seg_base[modrm & 8 ? R_GS : R_FS];
8466                 treg = cpu_regs[(modrm & 7) | REX_B(s)];
8467 
8468                 if (modrm & 0x10) {
8469                     /* wr*base */
8470                     dst = base, src = treg;
8471                 } else {
8472                     /* rd*base */
8473                     dst = treg, src = base;
8474                 }
8475 
8476                 if (s->dflag == MO_32) {
8477                     tcg_gen_ext32u_tl(dst, src);
8478                 } else {
8479                     tcg_gen_mov_tl(dst, src);
8480                 }
8481                 break;
8482             }
8483             goto unknown_op;
8484 
8485         case 0xf8: /* sfence / pcommit */
8486             if (prefixes & PREFIX_DATA) {
8487                 /* pcommit */
8488                 if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_PCOMMIT)
8489                     || (prefixes & PREFIX_LOCK)) {
8490                     goto illegal_op;
8491                 }
8492                 break;
8493             }
8494             /* fallthru */
8495         case 0xf9 ... 0xff: /* sfence */
8496             if (!(s->cpuid_features & CPUID_SSE)
8497                 || (prefixes & PREFIX_LOCK)) {
8498                 goto illegal_op;
8499             }
8500             tcg_gen_mb(TCG_MO_ST_ST | TCG_BAR_SC);
8501             break;
8502         case 0xe8 ... 0xef: /* lfence */
8503             if (!(s->cpuid_features & CPUID_SSE)
8504                 || (prefixes & PREFIX_LOCK)) {
8505                 goto illegal_op;
8506             }
8507             tcg_gen_mb(TCG_MO_LD_LD | TCG_BAR_SC);
8508             break;
8509         case 0xf0 ... 0xf7: /* mfence */
8510             if (!(s->cpuid_features & CPUID_SSE2)
8511                 || (prefixes & PREFIX_LOCK)) {
8512                 goto illegal_op;
8513             }
8514             tcg_gen_mb(TCG_MO_ALL | TCG_BAR_SC);
8515             break;
8516 
8517         default:
8518             goto unknown_op;
8519         }
8520         break;
8521 
8522     case 0x10d: /* 3DNow! prefetch(w) */
8523         modrm = x86_ldub_code(env, s);
8524         mod = (modrm >> 6) & 3;
8525         if (mod == 3)
8526             goto illegal_op;
8527         gen_nop_modrm(env, s, modrm);
8528         break;
8529     case 0x1aa: /* rsm */
8530         gen_svm_check_intercept(s, SVM_EXIT_RSM);
8531         if (!(s->flags & HF_SMM_MASK))
8532             goto illegal_op;
8533 #ifdef CONFIG_USER_ONLY
8534         /* we should not be in SMM mode */
8535         g_assert_not_reached();
8536 #else
8537         gen_update_cc_op(s);
8538         gen_jmp_im(s, s->pc - s->cs_base);
8539         gen_helper_rsm(cpu_env);
8540 #endif /* CONFIG_USER_ONLY */
8541         gen_eob(s);
8542         break;
8543     case 0x1b8: /* SSE4.2 popcnt */
8544         if ((prefixes & (PREFIX_REPZ | PREFIX_LOCK | PREFIX_REPNZ)) !=
8545              PREFIX_REPZ)
8546             goto illegal_op;
8547         if (!(s->cpuid_ext_features & CPUID_EXT_POPCNT))
8548             goto illegal_op;
8549 
8550         modrm = x86_ldub_code(env, s);
8551         reg = ((modrm >> 3) & 7) | REX_R(s);
8552 
8553         if (s->prefix & PREFIX_DATA) {
8554             ot = MO_16;
8555         } else {
8556             ot = mo_64_32(dflag);
8557         }
8558 
8559         gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
8560         gen_extu(ot, s->T0);
8561         tcg_gen_mov_tl(cpu_cc_src, s->T0);
8562         tcg_gen_ctpop_tl(s->T0, s->T0);
8563         gen_op_mov_reg_v(s, ot, reg, s->T0);
8564 
8565         set_cc_op(s, CC_OP_POPCNT);
8566         break;
8567     case 0x10e ... 0x10f:
8568         /* 3DNow! instructions, ignore prefixes */
8569         s->prefix &= ~(PREFIX_REPZ | PREFIX_REPNZ | PREFIX_DATA);
8570         /* fall through */
8571     case 0x110 ... 0x117:
8572     case 0x128 ... 0x12f:
8573     case 0x138 ... 0x13a:
8574     case 0x150 ... 0x179:
8575     case 0x17c ... 0x17f:
8576     case 0x1c2:
8577     case 0x1c4 ... 0x1c6:
8578     case 0x1d0 ... 0x1fe:
8579         gen_sse(env, s, b, pc_start);
8580         break;
8581     default:
8582         goto unknown_op;
8583     }
8584     return s->pc;
8585  illegal_op:
8586     gen_illegal_opcode(s);
8587     return s->pc;
8588  unknown_op:
8589     gen_unknown_opcode(env, s);
8590     return s->pc;
8591 }
8592 
8593 void tcg_x86_init(void)
8594 {
8595     static const char reg_names[CPU_NB_REGS][4] = {
8596 #ifdef TARGET_X86_64
8597         [R_EAX] = "rax",
8598         [R_EBX] = "rbx",
8599         [R_ECX] = "rcx",
8600         [R_EDX] = "rdx",
8601         [R_ESI] = "rsi",
8602         [R_EDI] = "rdi",
8603         [R_EBP] = "rbp",
8604         [R_ESP] = "rsp",
8605         [8]  = "r8",
8606         [9]  = "r9",
8607         [10] = "r10",
8608         [11] = "r11",
8609         [12] = "r12",
8610         [13] = "r13",
8611         [14] = "r14",
8612         [15] = "r15",
8613 #else
8614         [R_EAX] = "eax",
8615         [R_EBX] = "ebx",
8616         [R_ECX] = "ecx",
8617         [R_EDX] = "edx",
8618         [R_ESI] = "esi",
8619         [R_EDI] = "edi",
8620         [R_EBP] = "ebp",
8621         [R_ESP] = "esp",
8622 #endif
8623     };
8624     static const char seg_base_names[6][8] = {
8625         [R_CS] = "cs_base",
8626         [R_DS] = "ds_base",
8627         [R_ES] = "es_base",
8628         [R_FS] = "fs_base",
8629         [R_GS] = "gs_base",
8630         [R_SS] = "ss_base",
8631     };
8632     static const char bnd_regl_names[4][8] = {
8633         "bnd0_lb", "bnd1_lb", "bnd2_lb", "bnd3_lb"
8634     };
8635     static const char bnd_regu_names[4][8] = {
8636         "bnd0_ub", "bnd1_ub", "bnd2_ub", "bnd3_ub"
8637     };
8638     int i;
8639 
8640     cpu_cc_op = tcg_global_mem_new_i32(cpu_env,
8641                                        offsetof(CPUX86State, cc_op), "cc_op");
8642     cpu_cc_dst = tcg_global_mem_new(cpu_env, offsetof(CPUX86State, cc_dst),
8643                                     "cc_dst");
8644     cpu_cc_src = tcg_global_mem_new(cpu_env, offsetof(CPUX86State, cc_src),
8645                                     "cc_src");
8646     cpu_cc_src2 = tcg_global_mem_new(cpu_env, offsetof(CPUX86State, cc_src2),
8647                                      "cc_src2");
8648 
8649     for (i = 0; i < CPU_NB_REGS; ++i) {
8650         cpu_regs[i] = tcg_global_mem_new(cpu_env,
8651                                          offsetof(CPUX86State, regs[i]),
8652                                          reg_names[i]);
8653     }
8654 
8655     for (i = 0; i < 6; ++i) {
8656         cpu_seg_base[i]
8657             = tcg_global_mem_new(cpu_env,
8658                                  offsetof(CPUX86State, segs[i].base),
8659                                  seg_base_names[i]);
8660     }
8661 
8662     for (i = 0; i < 4; ++i) {
8663         cpu_bndl[i]
8664             = tcg_global_mem_new_i64(cpu_env,
8665                                      offsetof(CPUX86State, bnd_regs[i].lb),
8666                                      bnd_regl_names[i]);
8667         cpu_bndu[i]
8668             = tcg_global_mem_new_i64(cpu_env,
8669                                      offsetof(CPUX86State, bnd_regs[i].ub),
8670                                      bnd_regu_names[i]);
8671     }
8672 }
8673 
8674 static void i386_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cpu)
8675 {
8676     DisasContext *dc = container_of(dcbase, DisasContext, base);
8677     CPUX86State *env = cpu->env_ptr;
8678     uint32_t flags = dc->base.tb->flags;
8679     uint32_t cflags = tb_cflags(dc->base.tb);
8680     int cpl = (flags >> HF_CPL_SHIFT) & 3;
8681     int iopl = (flags >> IOPL_SHIFT) & 3;
8682 
8683     dc->cs_base = dc->base.tb->cs_base;
8684     dc->flags = flags;
8685 #ifndef CONFIG_USER_ONLY
8686     dc->cpl = cpl;
8687     dc->iopl = iopl;
8688 #endif
8689 
8690     /* We make some simplifying assumptions; validate they're correct. */
8691     g_assert(PE(dc) == ((flags & HF_PE_MASK) != 0));
8692     g_assert(CPL(dc) == cpl);
8693     g_assert(IOPL(dc) == iopl);
8694     g_assert(VM86(dc) == ((flags & HF_VM_MASK) != 0));
8695     g_assert(CODE32(dc) == ((flags & HF_CS32_MASK) != 0));
8696     g_assert(CODE64(dc) == ((flags & HF_CS64_MASK) != 0));
8697     g_assert(SS32(dc) == ((flags & HF_SS32_MASK) != 0));
8698     g_assert(LMA(dc) == ((flags & HF_LMA_MASK) != 0));
8699     g_assert(ADDSEG(dc) == ((flags & HF_ADDSEG_MASK) != 0));
8700     g_assert(SVME(dc) == ((flags & HF_SVME_MASK) != 0));
8701     g_assert(GUEST(dc) == ((flags & HF_GUEST_MASK) != 0));
8702 
8703     dc->cc_op = CC_OP_DYNAMIC;
8704     dc->cc_op_dirty = false;
8705     dc->popl_esp_hack = 0;
8706     /* select memory access functions */
8707     dc->mem_index = 0;
8708 #ifdef CONFIG_SOFTMMU
8709     dc->mem_index = cpu_mmu_index(env, false);
8710 #endif
8711     dc->cpuid_features = env->features[FEAT_1_EDX];
8712     dc->cpuid_ext_features = env->features[FEAT_1_ECX];
8713     dc->cpuid_ext2_features = env->features[FEAT_8000_0001_EDX];
8714     dc->cpuid_ext3_features = env->features[FEAT_8000_0001_ECX];
8715     dc->cpuid_7_0_ebx_features = env->features[FEAT_7_0_EBX];
8716     dc->cpuid_xsave_features = env->features[FEAT_XSAVE];
8717     dc->jmp_opt = !((cflags & CF_NO_GOTO_TB) ||
8718                     (flags & (HF_TF_MASK | HF_INHIBIT_IRQ_MASK)));
8719     /*
8720      * If jmp_opt, we want to handle each string instruction individually.
8721      * For icount also disable repz optimization so that each iteration
8722      * is accounted separately.
8723      */
8724     dc->repz_opt = !dc->jmp_opt && !(cflags & CF_USE_ICOUNT);
8725 
8726     dc->T0 = tcg_temp_new();
8727     dc->T1 = tcg_temp_new();
8728     dc->A0 = tcg_temp_new();
8729 
8730     dc->tmp0 = tcg_temp_new();
8731     dc->tmp1_i64 = tcg_temp_new_i64();
8732     dc->tmp2_i32 = tcg_temp_new_i32();
8733     dc->tmp3_i32 = tcg_temp_new_i32();
8734     dc->tmp4 = tcg_temp_new();
8735     dc->ptr0 = tcg_temp_new_ptr();
8736     dc->ptr1 = tcg_temp_new_ptr();
8737     dc->cc_srcT = tcg_temp_local_new();
8738 }
8739 
8740 static void i386_tr_tb_start(DisasContextBase *db, CPUState *cpu)
8741 {
8742 }
8743 
8744 static void i386_tr_insn_start(DisasContextBase *dcbase, CPUState *cpu)
8745 {
8746     DisasContext *dc = container_of(dcbase, DisasContext, base);
8747 
8748     tcg_gen_insn_start(dc->base.pc_next, dc->cc_op);
8749 }
8750 
8751 static void i386_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu)
8752 {
8753     DisasContext *dc = container_of(dcbase, DisasContext, base);
8754     target_ulong pc_next;
8755 
8756 #ifdef TARGET_VSYSCALL_PAGE
8757     /*
8758      * Detect entry into the vsyscall page and invoke the syscall.
8759      */
8760     if ((dc->base.pc_next & TARGET_PAGE_MASK) == TARGET_VSYSCALL_PAGE) {
8761         gen_exception(dc, EXCP_VSYSCALL, dc->base.pc_next);
8762         dc->base.pc_next = dc->pc + 1;
8763         return;
8764     }
8765 #endif
8766 
8767     pc_next = disas_insn(dc, cpu);
8768 
8769     if (dc->flags & (HF_TF_MASK | HF_INHIBIT_IRQ_MASK)) {
8770         /* if single step mode, we generate only one instruction and
8771            generate an exception */
8772         /* if irq were inhibited with HF_INHIBIT_IRQ_MASK, we clear
8773            the flag and abort the translation to give the irqs a
8774            chance to happen */
8775         dc->base.is_jmp = DISAS_TOO_MANY;
8776     } else if ((tb_cflags(dc->base.tb) & CF_USE_ICOUNT)
8777                && ((pc_next & TARGET_PAGE_MASK)
8778                    != ((pc_next + TARGET_MAX_INSN_SIZE - 1)
8779                        & TARGET_PAGE_MASK)
8780                    || (pc_next & ~TARGET_PAGE_MASK) == 0)) {
8781         /* Do not cross the boundary of the pages in icount mode,
8782            it can cause an exception. Do it only when boundary is
8783            crossed by the first instruction in the block.
8784            If current instruction already crossed the bound - it's ok,
8785            because an exception hasn't stopped this code.
8786          */
8787         dc->base.is_jmp = DISAS_TOO_MANY;
8788     } else if ((pc_next - dc->base.pc_first) >= (TARGET_PAGE_SIZE - 32)) {
8789         dc->base.is_jmp = DISAS_TOO_MANY;
8790     }
8791 
8792     dc->base.pc_next = pc_next;
8793 }
8794 
8795 static void i386_tr_tb_stop(DisasContextBase *dcbase, CPUState *cpu)
8796 {
8797     DisasContext *dc = container_of(dcbase, DisasContext, base);
8798 
8799     if (dc->base.is_jmp == DISAS_TOO_MANY) {
8800         gen_jmp_im(dc, dc->base.pc_next - dc->cs_base);
8801         gen_eob(dc);
8802     }
8803 }
8804 
8805 static void i386_tr_disas_log(const DisasContextBase *dcbase,
8806                               CPUState *cpu, FILE *logfile)
8807 {
8808     DisasContext *dc = container_of(dcbase, DisasContext, base);
8809 
8810     fprintf(logfile, "IN: %s\n", lookup_symbol(dc->base.pc_first));
8811     target_disas(logfile, cpu, dc->base.pc_first, dc->base.tb->size);
8812 }
8813 
8814 static const TranslatorOps i386_tr_ops = {
8815     .init_disas_context = i386_tr_init_disas_context,
8816     .tb_start           = i386_tr_tb_start,
8817     .insn_start         = i386_tr_insn_start,
8818     .translate_insn     = i386_tr_translate_insn,
8819     .tb_stop            = i386_tr_tb_stop,
8820     .disas_log          = i386_tr_disas_log,
8821 };
8822 
8823 /* generate intermediate code for basic block 'tb'.  */
8824 void gen_intermediate_code(CPUState *cpu, TranslationBlock *tb, int max_insns)
8825 {
8826     DisasContext dc;
8827 
8828     translator_loop(&i386_tr_ops, &dc.base, cpu, tb, max_insns);
8829 }
8830 
8831 void restore_state_to_opc(CPUX86State *env, TranslationBlock *tb,
8832                           target_ulong *data)
8833 {
8834     int cc_op = data[1];
8835     env->eip = data[0] - tb->cs_base;
8836     if (cc_op != CC_OP_DYNAMIC) {
8837         env->cc_op = cc_op;
8838     }
8839 }
8840