xref: /openbmc/qemu/target/i386/tcg/translate.c (revision ad1a706f)
1 /*
2  *  i386 translation
3  *
4  *  Copyright (c) 2003 Fabrice Bellard
5  *
6  * This library is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * This library is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18  */
19 #include "qemu/osdep.h"
20 
21 #include "qemu/host-utils.h"
22 #include "cpu.h"
23 #include "disas/disas.h"
24 #include "exec/exec-all.h"
25 #include "tcg/tcg-op.h"
26 #include "exec/cpu_ldst.h"
27 #include "exec/translator.h"
28 
29 #include "exec/helper-proto.h"
30 #include "exec/helper-gen.h"
31 #include "helper-tcg.h"
32 
33 #include "exec/log.h"
34 
35 #define PREFIX_REPZ   0x01
36 #define PREFIX_REPNZ  0x02
37 #define PREFIX_LOCK   0x04
38 #define PREFIX_DATA   0x08
39 #define PREFIX_ADR    0x10
40 #define PREFIX_VEX    0x20
41 #define PREFIX_REX    0x40
42 
43 #ifdef TARGET_X86_64
44 # define ctztl  ctz64
45 # define clztl  clz64
46 #else
47 # define ctztl  ctz32
48 # define clztl  clz32
49 #endif
50 
51 /* For a switch indexed by MODRM, match all memory operands for a given OP.  */
52 #define CASE_MODRM_MEM_OP(OP) \
53     case (0 << 6) | (OP << 3) | 0 ... (0 << 6) | (OP << 3) | 7: \
54     case (1 << 6) | (OP << 3) | 0 ... (1 << 6) | (OP << 3) | 7: \
55     case (2 << 6) | (OP << 3) | 0 ... (2 << 6) | (OP << 3) | 7
56 
57 #define CASE_MODRM_OP(OP) \
58     case (0 << 6) | (OP << 3) | 0 ... (0 << 6) | (OP << 3) | 7: \
59     case (1 << 6) | (OP << 3) | 0 ... (1 << 6) | (OP << 3) | 7: \
60     case (2 << 6) | (OP << 3) | 0 ... (2 << 6) | (OP << 3) | 7: \
61     case (3 << 6) | (OP << 3) | 0 ... (3 << 6) | (OP << 3) | 7
62 
63 //#define MACRO_TEST   1
64 
65 /* global register indexes */
66 static TCGv cpu_cc_dst, cpu_cc_src, cpu_cc_src2;
67 static TCGv_i32 cpu_cc_op;
68 static TCGv cpu_regs[CPU_NB_REGS];
69 static TCGv cpu_seg_base[6];
70 static TCGv_i64 cpu_bndl[4];
71 static TCGv_i64 cpu_bndu[4];
72 
73 #include "exec/gen-icount.h"
74 
75 typedef struct DisasContext {
76     DisasContextBase base;
77 
78     target_ulong pc;       /* pc = eip + cs_base */
79     target_ulong pc_start; /* pc at TB entry */
80     target_ulong cs_base;  /* base of CS segment */
81 
82     MemOp aflag;
83     MemOp dflag;
84 
85     int8_t override; /* -1 if no override, else R_CS, R_DS, etc */
86     uint8_t prefix;
87 
88 #ifndef CONFIG_USER_ONLY
89     uint8_t cpl;   /* code priv level */
90     uint8_t iopl;  /* i/o priv level */
91 #endif
92     uint8_t vex_l;  /* vex vector length */
93     uint8_t vex_v;  /* vex vvvv register, without 1's complement.  */
94     uint8_t popl_esp_hack; /* for correct popl with esp base handling */
95     uint8_t rip_offset; /* only used in x86_64, but left for simplicity */
96 
97 #ifdef TARGET_X86_64
98     uint8_t rex_r;
99     uint8_t rex_x;
100     uint8_t rex_b;
101     bool rex_w;
102 #endif
103     bool jmp_opt; /* use direct block chaining for direct jumps */
104     bool repz_opt; /* optimize jumps within repz instructions */
105     bool cc_op_dirty;
106 
107     CCOp cc_op;  /* current CC operation */
108     int mem_index; /* select memory access functions */
109     uint32_t flags; /* all execution flags */
110     int cpuid_features;
111     int cpuid_ext_features;
112     int cpuid_ext2_features;
113     int cpuid_ext3_features;
114     int cpuid_7_0_ebx_features;
115     int cpuid_xsave_features;
116 
117     /* TCG local temps */
118     TCGv cc_srcT;
119     TCGv A0;
120     TCGv T0;
121     TCGv T1;
122 
123     /* TCG local register indexes (only used inside old micro ops) */
124     TCGv tmp0;
125     TCGv tmp4;
126     TCGv_ptr ptr0;
127     TCGv_ptr ptr1;
128     TCGv_i32 tmp2_i32;
129     TCGv_i32 tmp3_i32;
130     TCGv_i64 tmp1_i64;
131 
132     sigjmp_buf jmpbuf;
133 } DisasContext;
134 
135 /* The environment in which user-only runs is constrained. */
136 #ifdef CONFIG_USER_ONLY
137 #define PE(S)     true
138 #define CPL(S)    3
139 #define IOPL(S)   0
140 #define SVME(S)   false
141 #define GUEST(S)  false
142 #else
143 #define PE(S)     (((S)->flags & HF_PE_MASK) != 0)
144 #define CPL(S)    ((S)->cpl)
145 #define IOPL(S)   ((S)->iopl)
146 #define SVME(S)   (((S)->flags & HF_SVME_MASK) != 0)
147 #define GUEST(S)  (((S)->flags & HF_GUEST_MASK) != 0)
148 #endif
149 #if defined(CONFIG_USER_ONLY) && defined(TARGET_X86_64)
150 #define VM86(S)   false
151 #define CODE32(S) true
152 #define SS32(S)   true
153 #define ADDSEG(S) false
154 #else
155 #define VM86(S)   (((S)->flags & HF_VM_MASK) != 0)
156 #define CODE32(S) (((S)->flags & HF_CS32_MASK) != 0)
157 #define SS32(S)   (((S)->flags & HF_SS32_MASK) != 0)
158 #define ADDSEG(S) (((S)->flags & HF_ADDSEG_MASK) != 0)
159 #endif
160 #if !defined(TARGET_X86_64)
161 #define CODE64(S) false
162 #define LMA(S)    false
163 #elif defined(CONFIG_USER_ONLY)
164 #define CODE64(S) true
165 #define LMA(S)    true
166 #else
167 #define CODE64(S) (((S)->flags & HF_CS64_MASK) != 0)
168 #define LMA(S)    (((S)->flags & HF_LMA_MASK) != 0)
169 #endif
170 
171 #ifdef TARGET_X86_64
172 #define REX_PREFIX(S)  (((S)->prefix & PREFIX_REX) != 0)
173 #define REX_W(S)       ((S)->rex_w)
174 #define REX_R(S)       ((S)->rex_r + 0)
175 #define REX_X(S)       ((S)->rex_x + 0)
176 #define REX_B(S)       ((S)->rex_b + 0)
177 #else
178 #define REX_PREFIX(S)  false
179 #define REX_W(S)       false
180 #define REX_R(S)       0
181 #define REX_X(S)       0
182 #define REX_B(S)       0
183 #endif
184 
185 /*
186  * Many sysemu-only helpers are not reachable for user-only.
187  * Define stub generators here, so that we need not either sprinkle
188  * ifdefs through the translator, nor provide the helper function.
189  */
190 #define STUB_HELPER(NAME, ...) \
191     static inline void gen_helper_##NAME(__VA_ARGS__) \
192     { qemu_build_not_reached(); }
193 
194 #ifdef CONFIG_USER_ONLY
195 STUB_HELPER(clgi, TCGv_env env)
196 STUB_HELPER(flush_page, TCGv_env env, TCGv addr)
197 STUB_HELPER(hlt, TCGv_env env, TCGv_i32 pc_ofs)
198 STUB_HELPER(inb, TCGv ret, TCGv_env env, TCGv_i32 port)
199 STUB_HELPER(inw, TCGv ret, TCGv_env env, TCGv_i32 port)
200 STUB_HELPER(inl, TCGv ret, TCGv_env env, TCGv_i32 port)
201 STUB_HELPER(monitor, TCGv_env env, TCGv addr)
202 STUB_HELPER(mwait, TCGv_env env, TCGv_i32 pc_ofs)
203 STUB_HELPER(outb, TCGv_env env, TCGv_i32 port, TCGv_i32 val)
204 STUB_HELPER(outw, TCGv_env env, TCGv_i32 port, TCGv_i32 val)
205 STUB_HELPER(outl, TCGv_env env, TCGv_i32 port, TCGv_i32 val)
206 STUB_HELPER(rdmsr, TCGv_env env)
207 STUB_HELPER(read_crN, TCGv ret, TCGv_env env, TCGv_i32 reg)
208 STUB_HELPER(set_dr, TCGv_env env, TCGv_i32 reg, TCGv val)
209 STUB_HELPER(stgi, TCGv_env env)
210 STUB_HELPER(svm_check_intercept, TCGv_env env, TCGv_i32 type)
211 STUB_HELPER(vmload, TCGv_env env, TCGv_i32 aflag)
212 STUB_HELPER(vmmcall, TCGv_env env)
213 STUB_HELPER(vmrun, TCGv_env env, TCGv_i32 aflag, TCGv_i32 pc_ofs)
214 STUB_HELPER(vmsave, TCGv_env env, TCGv_i32 aflag)
215 STUB_HELPER(write_crN, TCGv_env env, TCGv_i32 reg, TCGv val)
216 STUB_HELPER(wrmsr, TCGv_env env)
217 #endif
218 
219 static void gen_eob(DisasContext *s);
220 static void gen_jr(DisasContext *s, TCGv dest);
221 static void gen_jmp(DisasContext *s, target_ulong eip);
222 static void gen_jmp_tb(DisasContext *s, target_ulong eip, int tb_num);
223 static void gen_op(DisasContext *s1, int op, MemOp ot, int d);
224 static void gen_exception_gpf(DisasContext *s);
225 
226 /* i386 arith/logic operations */
227 enum {
228     OP_ADDL,
229     OP_ORL,
230     OP_ADCL,
231     OP_SBBL,
232     OP_ANDL,
233     OP_SUBL,
234     OP_XORL,
235     OP_CMPL,
236 };
237 
238 /* i386 shift ops */
239 enum {
240     OP_ROL,
241     OP_ROR,
242     OP_RCL,
243     OP_RCR,
244     OP_SHL,
245     OP_SHR,
246     OP_SHL1, /* undocumented */
247     OP_SAR = 7,
248 };
249 
250 enum {
251     JCC_O,
252     JCC_B,
253     JCC_Z,
254     JCC_BE,
255     JCC_S,
256     JCC_P,
257     JCC_L,
258     JCC_LE,
259 };
260 
261 enum {
262     /* I386 int registers */
263     OR_EAX,   /* MUST be even numbered */
264     OR_ECX,
265     OR_EDX,
266     OR_EBX,
267     OR_ESP,
268     OR_EBP,
269     OR_ESI,
270     OR_EDI,
271 
272     OR_TMP0 = 16,    /* temporary operand register */
273     OR_TMP1,
274     OR_A0, /* temporary register used when doing address evaluation */
275 };
276 
277 enum {
278     USES_CC_DST  = 1,
279     USES_CC_SRC  = 2,
280     USES_CC_SRC2 = 4,
281     USES_CC_SRCT = 8,
282 };
283 
284 /* Bit set if the global variable is live after setting CC_OP to X.  */
285 static const uint8_t cc_op_live[CC_OP_NB] = {
286     [CC_OP_DYNAMIC] = USES_CC_DST | USES_CC_SRC | USES_CC_SRC2,
287     [CC_OP_EFLAGS] = USES_CC_SRC,
288     [CC_OP_MULB ... CC_OP_MULQ] = USES_CC_DST | USES_CC_SRC,
289     [CC_OP_ADDB ... CC_OP_ADDQ] = USES_CC_DST | USES_CC_SRC,
290     [CC_OP_ADCB ... CC_OP_ADCQ] = USES_CC_DST | USES_CC_SRC | USES_CC_SRC2,
291     [CC_OP_SUBB ... CC_OP_SUBQ] = USES_CC_DST | USES_CC_SRC | USES_CC_SRCT,
292     [CC_OP_SBBB ... CC_OP_SBBQ] = USES_CC_DST | USES_CC_SRC | USES_CC_SRC2,
293     [CC_OP_LOGICB ... CC_OP_LOGICQ] = USES_CC_DST,
294     [CC_OP_INCB ... CC_OP_INCQ] = USES_CC_DST | USES_CC_SRC,
295     [CC_OP_DECB ... CC_OP_DECQ] = USES_CC_DST | USES_CC_SRC,
296     [CC_OP_SHLB ... CC_OP_SHLQ] = USES_CC_DST | USES_CC_SRC,
297     [CC_OP_SARB ... CC_OP_SARQ] = USES_CC_DST | USES_CC_SRC,
298     [CC_OP_BMILGB ... CC_OP_BMILGQ] = USES_CC_DST | USES_CC_SRC,
299     [CC_OP_ADCX] = USES_CC_DST | USES_CC_SRC,
300     [CC_OP_ADOX] = USES_CC_SRC | USES_CC_SRC2,
301     [CC_OP_ADCOX] = USES_CC_DST | USES_CC_SRC | USES_CC_SRC2,
302     [CC_OP_CLR] = 0,
303     [CC_OP_POPCNT] = USES_CC_SRC,
304 };
305 
306 static void set_cc_op(DisasContext *s, CCOp op)
307 {
308     int dead;
309 
310     if (s->cc_op == op) {
311         return;
312     }
313 
314     /* Discard CC computation that will no longer be used.  */
315     dead = cc_op_live[s->cc_op] & ~cc_op_live[op];
316     if (dead & USES_CC_DST) {
317         tcg_gen_discard_tl(cpu_cc_dst);
318     }
319     if (dead & USES_CC_SRC) {
320         tcg_gen_discard_tl(cpu_cc_src);
321     }
322     if (dead & USES_CC_SRC2) {
323         tcg_gen_discard_tl(cpu_cc_src2);
324     }
325     if (dead & USES_CC_SRCT) {
326         tcg_gen_discard_tl(s->cc_srcT);
327     }
328 
329     if (op == CC_OP_DYNAMIC) {
330         /* The DYNAMIC setting is translator only, and should never be
331            stored.  Thus we always consider it clean.  */
332         s->cc_op_dirty = false;
333     } else {
334         /* Discard any computed CC_OP value (see shifts).  */
335         if (s->cc_op == CC_OP_DYNAMIC) {
336             tcg_gen_discard_i32(cpu_cc_op);
337         }
338         s->cc_op_dirty = true;
339     }
340     s->cc_op = op;
341 }
342 
343 static void gen_update_cc_op(DisasContext *s)
344 {
345     if (s->cc_op_dirty) {
346         tcg_gen_movi_i32(cpu_cc_op, s->cc_op);
347         s->cc_op_dirty = false;
348     }
349 }
350 
351 #ifdef TARGET_X86_64
352 
353 #define NB_OP_SIZES 4
354 
355 #else /* !TARGET_X86_64 */
356 
357 #define NB_OP_SIZES 3
358 
359 #endif /* !TARGET_X86_64 */
360 
361 #if defined(HOST_WORDS_BIGENDIAN)
362 #define REG_B_OFFSET (sizeof(target_ulong) - 1)
363 #define REG_H_OFFSET (sizeof(target_ulong) - 2)
364 #define REG_W_OFFSET (sizeof(target_ulong) - 2)
365 #define REG_L_OFFSET (sizeof(target_ulong) - 4)
366 #define REG_LH_OFFSET (sizeof(target_ulong) - 8)
367 #else
368 #define REG_B_OFFSET 0
369 #define REG_H_OFFSET 1
370 #define REG_W_OFFSET 0
371 #define REG_L_OFFSET 0
372 #define REG_LH_OFFSET 4
373 #endif
374 
375 /* In instruction encodings for byte register accesses the
376  * register number usually indicates "low 8 bits of register N";
377  * however there are some special cases where N 4..7 indicates
378  * [AH, CH, DH, BH], ie "bits 15..8 of register N-4". Return
379  * true for this special case, false otherwise.
380  */
381 static inline bool byte_reg_is_xH(DisasContext *s, int reg)
382 {
383     /* Any time the REX prefix is present, byte registers are uniform */
384     if (reg < 4 || REX_PREFIX(s)) {
385         return false;
386     }
387     return true;
388 }
389 
390 /* Select the size of a push/pop operation.  */
391 static inline MemOp mo_pushpop(DisasContext *s, MemOp ot)
392 {
393     if (CODE64(s)) {
394         return ot == MO_16 ? MO_16 : MO_64;
395     } else {
396         return ot;
397     }
398 }
399 
400 /* Select the size of the stack pointer.  */
401 static inline MemOp mo_stacksize(DisasContext *s)
402 {
403     return CODE64(s) ? MO_64 : SS32(s) ? MO_32 : MO_16;
404 }
405 
406 /* Select only size 64 else 32.  Used for SSE operand sizes.  */
407 static inline MemOp mo_64_32(MemOp ot)
408 {
409 #ifdef TARGET_X86_64
410     return ot == MO_64 ? MO_64 : MO_32;
411 #else
412     return MO_32;
413 #endif
414 }
415 
416 /* Select size 8 if lsb of B is clear, else OT.  Used for decoding
417    byte vs word opcodes.  */
418 static inline MemOp mo_b_d(int b, MemOp ot)
419 {
420     return b & 1 ? ot : MO_8;
421 }
422 
423 /* Select size 8 if lsb of B is clear, else OT capped at 32.
424    Used for decoding operand size of port opcodes.  */
425 static inline MemOp mo_b_d32(int b, MemOp ot)
426 {
427     return b & 1 ? (ot == MO_16 ? MO_16 : MO_32) : MO_8;
428 }
429 
430 static void gen_op_mov_reg_v(DisasContext *s, MemOp ot, int reg, TCGv t0)
431 {
432     switch(ot) {
433     case MO_8:
434         if (!byte_reg_is_xH(s, reg)) {
435             tcg_gen_deposit_tl(cpu_regs[reg], cpu_regs[reg], t0, 0, 8);
436         } else {
437             tcg_gen_deposit_tl(cpu_regs[reg - 4], cpu_regs[reg - 4], t0, 8, 8);
438         }
439         break;
440     case MO_16:
441         tcg_gen_deposit_tl(cpu_regs[reg], cpu_regs[reg], t0, 0, 16);
442         break;
443     case MO_32:
444         /* For x86_64, this sets the higher half of register to zero.
445            For i386, this is equivalent to a mov. */
446         tcg_gen_ext32u_tl(cpu_regs[reg], t0);
447         break;
448 #ifdef TARGET_X86_64
449     case MO_64:
450         tcg_gen_mov_tl(cpu_regs[reg], t0);
451         break;
452 #endif
453     default:
454         tcg_abort();
455     }
456 }
457 
458 static inline
459 void gen_op_mov_v_reg(DisasContext *s, MemOp ot, TCGv t0, int reg)
460 {
461     if (ot == MO_8 && byte_reg_is_xH(s, reg)) {
462         tcg_gen_extract_tl(t0, cpu_regs[reg - 4], 8, 8);
463     } else {
464         tcg_gen_mov_tl(t0, cpu_regs[reg]);
465     }
466 }
467 
468 static void gen_add_A0_im(DisasContext *s, int val)
469 {
470     tcg_gen_addi_tl(s->A0, s->A0, val);
471     if (!CODE64(s)) {
472         tcg_gen_ext32u_tl(s->A0, s->A0);
473     }
474 }
475 
476 static inline void gen_op_jmp_v(TCGv dest)
477 {
478     tcg_gen_st_tl(dest, cpu_env, offsetof(CPUX86State, eip));
479 }
480 
481 static inline
482 void gen_op_add_reg_im(DisasContext *s, MemOp size, int reg, int32_t val)
483 {
484     tcg_gen_addi_tl(s->tmp0, cpu_regs[reg], val);
485     gen_op_mov_reg_v(s, size, reg, s->tmp0);
486 }
487 
488 static inline void gen_op_add_reg_T0(DisasContext *s, MemOp size, int reg)
489 {
490     tcg_gen_add_tl(s->tmp0, cpu_regs[reg], s->T0);
491     gen_op_mov_reg_v(s, size, reg, s->tmp0);
492 }
493 
494 static inline void gen_op_ld_v(DisasContext *s, int idx, TCGv t0, TCGv a0)
495 {
496     tcg_gen_qemu_ld_tl(t0, a0, s->mem_index, idx | MO_LE);
497 }
498 
499 static inline void gen_op_st_v(DisasContext *s, int idx, TCGv t0, TCGv a0)
500 {
501     tcg_gen_qemu_st_tl(t0, a0, s->mem_index, idx | MO_LE);
502 }
503 
504 static inline void gen_op_st_rm_T0_A0(DisasContext *s, int idx, int d)
505 {
506     if (d == OR_TMP0) {
507         gen_op_st_v(s, idx, s->T0, s->A0);
508     } else {
509         gen_op_mov_reg_v(s, idx, d, s->T0);
510     }
511 }
512 
513 static inline void gen_jmp_im(DisasContext *s, target_ulong pc)
514 {
515     tcg_gen_movi_tl(s->tmp0, pc);
516     gen_op_jmp_v(s->tmp0);
517 }
518 
519 /* Compute SEG:REG into A0.  SEG is selected from the override segment
520    (OVR_SEG) and the default segment (DEF_SEG).  OVR_SEG may be -1 to
521    indicate no override.  */
522 static void gen_lea_v_seg(DisasContext *s, MemOp aflag, TCGv a0,
523                           int def_seg, int ovr_seg)
524 {
525     switch (aflag) {
526 #ifdef TARGET_X86_64
527     case MO_64:
528         if (ovr_seg < 0) {
529             tcg_gen_mov_tl(s->A0, a0);
530             return;
531         }
532         break;
533 #endif
534     case MO_32:
535         /* 32 bit address */
536         if (ovr_seg < 0 && ADDSEG(s)) {
537             ovr_seg = def_seg;
538         }
539         if (ovr_seg < 0) {
540             tcg_gen_ext32u_tl(s->A0, a0);
541             return;
542         }
543         break;
544     case MO_16:
545         /* 16 bit address */
546         tcg_gen_ext16u_tl(s->A0, a0);
547         a0 = s->A0;
548         if (ovr_seg < 0) {
549             if (ADDSEG(s)) {
550                 ovr_seg = def_seg;
551             } else {
552                 return;
553             }
554         }
555         break;
556     default:
557         tcg_abort();
558     }
559 
560     if (ovr_seg >= 0) {
561         TCGv seg = cpu_seg_base[ovr_seg];
562 
563         if (aflag == MO_64) {
564             tcg_gen_add_tl(s->A0, a0, seg);
565         } else if (CODE64(s)) {
566             tcg_gen_ext32u_tl(s->A0, a0);
567             tcg_gen_add_tl(s->A0, s->A0, seg);
568         } else {
569             tcg_gen_add_tl(s->A0, a0, seg);
570             tcg_gen_ext32u_tl(s->A0, s->A0);
571         }
572     }
573 }
574 
575 static inline void gen_string_movl_A0_ESI(DisasContext *s)
576 {
577     gen_lea_v_seg(s, s->aflag, cpu_regs[R_ESI], R_DS, s->override);
578 }
579 
580 static inline void gen_string_movl_A0_EDI(DisasContext *s)
581 {
582     gen_lea_v_seg(s, s->aflag, cpu_regs[R_EDI], R_ES, -1);
583 }
584 
585 static inline void gen_op_movl_T0_Dshift(DisasContext *s, MemOp ot)
586 {
587     tcg_gen_ld32s_tl(s->T0, cpu_env, offsetof(CPUX86State, df));
588     tcg_gen_shli_tl(s->T0, s->T0, ot);
589 };
590 
591 static TCGv gen_ext_tl(TCGv dst, TCGv src, MemOp size, bool sign)
592 {
593     switch (size) {
594     case MO_8:
595         if (sign) {
596             tcg_gen_ext8s_tl(dst, src);
597         } else {
598             tcg_gen_ext8u_tl(dst, src);
599         }
600         return dst;
601     case MO_16:
602         if (sign) {
603             tcg_gen_ext16s_tl(dst, src);
604         } else {
605             tcg_gen_ext16u_tl(dst, src);
606         }
607         return dst;
608 #ifdef TARGET_X86_64
609     case MO_32:
610         if (sign) {
611             tcg_gen_ext32s_tl(dst, src);
612         } else {
613             tcg_gen_ext32u_tl(dst, src);
614         }
615         return dst;
616 #endif
617     default:
618         return src;
619     }
620 }
621 
622 static void gen_extu(MemOp ot, TCGv reg)
623 {
624     gen_ext_tl(reg, reg, ot, false);
625 }
626 
627 static void gen_exts(MemOp ot, TCGv reg)
628 {
629     gen_ext_tl(reg, reg, ot, true);
630 }
631 
632 static inline
633 void gen_op_jnz_ecx(DisasContext *s, MemOp size, TCGLabel *label1)
634 {
635     tcg_gen_mov_tl(s->tmp0, cpu_regs[R_ECX]);
636     gen_extu(size, s->tmp0);
637     tcg_gen_brcondi_tl(TCG_COND_NE, s->tmp0, 0, label1);
638 }
639 
640 static inline
641 void gen_op_jz_ecx(DisasContext *s, MemOp size, TCGLabel *label1)
642 {
643     tcg_gen_mov_tl(s->tmp0, cpu_regs[R_ECX]);
644     gen_extu(size, s->tmp0);
645     tcg_gen_brcondi_tl(TCG_COND_EQ, s->tmp0, 0, label1);
646 }
647 
648 static void gen_helper_in_func(MemOp ot, TCGv v, TCGv_i32 n)
649 {
650     switch (ot) {
651     case MO_8:
652         gen_helper_inb(v, cpu_env, n);
653         break;
654     case MO_16:
655         gen_helper_inw(v, cpu_env, n);
656         break;
657     case MO_32:
658         gen_helper_inl(v, cpu_env, n);
659         break;
660     default:
661         tcg_abort();
662     }
663 }
664 
665 static void gen_helper_out_func(MemOp ot, TCGv_i32 v, TCGv_i32 n)
666 {
667     switch (ot) {
668     case MO_8:
669         gen_helper_outb(cpu_env, v, n);
670         break;
671     case MO_16:
672         gen_helper_outw(cpu_env, v, n);
673         break;
674     case MO_32:
675         gen_helper_outl(cpu_env, v, n);
676         break;
677     default:
678         tcg_abort();
679     }
680 }
681 
682 /*
683  * Validate that access to [port, port + 1<<ot) is allowed.
684  * Raise #GP, or VMM exit if not.
685  */
686 static bool gen_check_io(DisasContext *s, MemOp ot, TCGv_i32 port,
687                          uint32_t svm_flags)
688 {
689 #ifdef CONFIG_USER_ONLY
690     /*
691      * We do not implement the ioperm(2) syscall, so the TSS check
692      * will always fail.
693      */
694     gen_exception_gpf(s);
695     return false;
696 #else
697     if (PE(s) && (CPL(s) > IOPL(s) || VM86(s))) {
698         gen_helper_check_io(cpu_env, port, tcg_constant_i32(1 << ot));
699     }
700     if (GUEST(s)) {
701         target_ulong cur_eip = s->base.pc_next - s->cs_base;
702         target_ulong next_eip = s->pc - s->cs_base;
703 
704         gen_update_cc_op(s);
705         gen_jmp_im(s, cur_eip);
706         if (s->prefix & (PREFIX_REPZ | PREFIX_REPNZ)) {
707             svm_flags |= SVM_IOIO_REP_MASK;
708         }
709         svm_flags |= 1 << (SVM_IOIO_SIZE_SHIFT + ot);
710         gen_helper_svm_check_io(cpu_env, port,
711                                 tcg_constant_i32(svm_flags),
712                                 tcg_constant_i32(next_eip - cur_eip));
713     }
714     return true;
715 #endif
716 }
717 
718 static inline void gen_movs(DisasContext *s, MemOp ot)
719 {
720     gen_string_movl_A0_ESI(s);
721     gen_op_ld_v(s, ot, s->T0, s->A0);
722     gen_string_movl_A0_EDI(s);
723     gen_op_st_v(s, ot, s->T0, s->A0);
724     gen_op_movl_T0_Dshift(s, ot);
725     gen_op_add_reg_T0(s, s->aflag, R_ESI);
726     gen_op_add_reg_T0(s, s->aflag, R_EDI);
727 }
728 
729 static void gen_op_update1_cc(DisasContext *s)
730 {
731     tcg_gen_mov_tl(cpu_cc_dst, s->T0);
732 }
733 
734 static void gen_op_update2_cc(DisasContext *s)
735 {
736     tcg_gen_mov_tl(cpu_cc_src, s->T1);
737     tcg_gen_mov_tl(cpu_cc_dst, s->T0);
738 }
739 
740 static void gen_op_update3_cc(DisasContext *s, TCGv reg)
741 {
742     tcg_gen_mov_tl(cpu_cc_src2, reg);
743     tcg_gen_mov_tl(cpu_cc_src, s->T1);
744     tcg_gen_mov_tl(cpu_cc_dst, s->T0);
745 }
746 
747 static inline void gen_op_testl_T0_T1_cc(DisasContext *s)
748 {
749     tcg_gen_and_tl(cpu_cc_dst, s->T0, s->T1);
750 }
751 
752 static void gen_op_update_neg_cc(DisasContext *s)
753 {
754     tcg_gen_mov_tl(cpu_cc_dst, s->T0);
755     tcg_gen_neg_tl(cpu_cc_src, s->T0);
756     tcg_gen_movi_tl(s->cc_srcT, 0);
757 }
758 
759 /* compute all eflags to cc_src */
760 static void gen_compute_eflags(DisasContext *s)
761 {
762     TCGv zero, dst, src1, src2;
763     int live, dead;
764 
765     if (s->cc_op == CC_OP_EFLAGS) {
766         return;
767     }
768     if (s->cc_op == CC_OP_CLR) {
769         tcg_gen_movi_tl(cpu_cc_src, CC_Z | CC_P);
770         set_cc_op(s, CC_OP_EFLAGS);
771         return;
772     }
773 
774     zero = NULL;
775     dst = cpu_cc_dst;
776     src1 = cpu_cc_src;
777     src2 = cpu_cc_src2;
778 
779     /* Take care to not read values that are not live.  */
780     live = cc_op_live[s->cc_op] & ~USES_CC_SRCT;
781     dead = live ^ (USES_CC_DST | USES_CC_SRC | USES_CC_SRC2);
782     if (dead) {
783         zero = tcg_const_tl(0);
784         if (dead & USES_CC_DST) {
785             dst = zero;
786         }
787         if (dead & USES_CC_SRC) {
788             src1 = zero;
789         }
790         if (dead & USES_CC_SRC2) {
791             src2 = zero;
792         }
793     }
794 
795     gen_update_cc_op(s);
796     gen_helper_cc_compute_all(cpu_cc_src, dst, src1, src2, cpu_cc_op);
797     set_cc_op(s, CC_OP_EFLAGS);
798 
799     if (dead) {
800         tcg_temp_free(zero);
801     }
802 }
803 
804 typedef struct CCPrepare {
805     TCGCond cond;
806     TCGv reg;
807     TCGv reg2;
808     target_ulong imm;
809     target_ulong mask;
810     bool use_reg2;
811     bool no_setcond;
812 } CCPrepare;
813 
814 /* compute eflags.C to reg */
815 static CCPrepare gen_prepare_eflags_c(DisasContext *s, TCGv reg)
816 {
817     TCGv t0, t1;
818     int size, shift;
819 
820     switch (s->cc_op) {
821     case CC_OP_SUBB ... CC_OP_SUBQ:
822         /* (DATA_TYPE)CC_SRCT < (DATA_TYPE)CC_SRC */
823         size = s->cc_op - CC_OP_SUBB;
824         t1 = gen_ext_tl(s->tmp0, cpu_cc_src, size, false);
825         /* If no temporary was used, be careful not to alias t1 and t0.  */
826         t0 = t1 == cpu_cc_src ? s->tmp0 : reg;
827         tcg_gen_mov_tl(t0, s->cc_srcT);
828         gen_extu(size, t0);
829         goto add_sub;
830 
831     case CC_OP_ADDB ... CC_OP_ADDQ:
832         /* (DATA_TYPE)CC_DST < (DATA_TYPE)CC_SRC */
833         size = s->cc_op - CC_OP_ADDB;
834         t1 = gen_ext_tl(s->tmp0, cpu_cc_src, size, false);
835         t0 = gen_ext_tl(reg, cpu_cc_dst, size, false);
836     add_sub:
837         return (CCPrepare) { .cond = TCG_COND_LTU, .reg = t0,
838                              .reg2 = t1, .mask = -1, .use_reg2 = true };
839 
840     case CC_OP_LOGICB ... CC_OP_LOGICQ:
841     case CC_OP_CLR:
842     case CC_OP_POPCNT:
843         return (CCPrepare) { .cond = TCG_COND_NEVER, .mask = -1 };
844 
845     case CC_OP_INCB ... CC_OP_INCQ:
846     case CC_OP_DECB ... CC_OP_DECQ:
847         return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
848                              .mask = -1, .no_setcond = true };
849 
850     case CC_OP_SHLB ... CC_OP_SHLQ:
851         /* (CC_SRC >> (DATA_BITS - 1)) & 1 */
852         size = s->cc_op - CC_OP_SHLB;
853         shift = (8 << size) - 1;
854         return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
855                              .mask = (target_ulong)1 << shift };
856 
857     case CC_OP_MULB ... CC_OP_MULQ:
858         return (CCPrepare) { .cond = TCG_COND_NE,
859                              .reg = cpu_cc_src, .mask = -1 };
860 
861     case CC_OP_BMILGB ... CC_OP_BMILGQ:
862         size = s->cc_op - CC_OP_BMILGB;
863         t0 = gen_ext_tl(reg, cpu_cc_src, size, false);
864         return (CCPrepare) { .cond = TCG_COND_EQ, .reg = t0, .mask = -1 };
865 
866     case CC_OP_ADCX:
867     case CC_OP_ADCOX:
868         return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_dst,
869                              .mask = -1, .no_setcond = true };
870 
871     case CC_OP_EFLAGS:
872     case CC_OP_SARB ... CC_OP_SARQ:
873         /* CC_SRC & 1 */
874         return (CCPrepare) { .cond = TCG_COND_NE,
875                              .reg = cpu_cc_src, .mask = CC_C };
876 
877     default:
878        /* The need to compute only C from CC_OP_DYNAMIC is important
879           in efficiently implementing e.g. INC at the start of a TB.  */
880        gen_update_cc_op(s);
881        gen_helper_cc_compute_c(reg, cpu_cc_dst, cpu_cc_src,
882                                cpu_cc_src2, cpu_cc_op);
883        return (CCPrepare) { .cond = TCG_COND_NE, .reg = reg,
884                             .mask = -1, .no_setcond = true };
885     }
886 }
887 
888 /* compute eflags.P to reg */
889 static CCPrepare gen_prepare_eflags_p(DisasContext *s, TCGv reg)
890 {
891     gen_compute_eflags(s);
892     return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
893                          .mask = CC_P };
894 }
895 
896 /* compute eflags.S to reg */
897 static CCPrepare gen_prepare_eflags_s(DisasContext *s, TCGv reg)
898 {
899     switch (s->cc_op) {
900     case CC_OP_DYNAMIC:
901         gen_compute_eflags(s);
902         /* FALLTHRU */
903     case CC_OP_EFLAGS:
904     case CC_OP_ADCX:
905     case CC_OP_ADOX:
906     case CC_OP_ADCOX:
907         return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
908                              .mask = CC_S };
909     case CC_OP_CLR:
910     case CC_OP_POPCNT:
911         return (CCPrepare) { .cond = TCG_COND_NEVER, .mask = -1 };
912     default:
913         {
914             MemOp size = (s->cc_op - CC_OP_ADDB) & 3;
915             TCGv t0 = gen_ext_tl(reg, cpu_cc_dst, size, true);
916             return (CCPrepare) { .cond = TCG_COND_LT, .reg = t0, .mask = -1 };
917         }
918     }
919 }
920 
921 /* compute eflags.O to reg */
922 static CCPrepare gen_prepare_eflags_o(DisasContext *s, TCGv reg)
923 {
924     switch (s->cc_op) {
925     case CC_OP_ADOX:
926     case CC_OP_ADCOX:
927         return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src2,
928                              .mask = -1, .no_setcond = true };
929     case CC_OP_CLR:
930     case CC_OP_POPCNT:
931         return (CCPrepare) { .cond = TCG_COND_NEVER, .mask = -1 };
932     default:
933         gen_compute_eflags(s);
934         return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
935                              .mask = CC_O };
936     }
937 }
938 
939 /* compute eflags.Z to reg */
940 static CCPrepare gen_prepare_eflags_z(DisasContext *s, TCGv reg)
941 {
942     switch (s->cc_op) {
943     case CC_OP_DYNAMIC:
944         gen_compute_eflags(s);
945         /* FALLTHRU */
946     case CC_OP_EFLAGS:
947     case CC_OP_ADCX:
948     case CC_OP_ADOX:
949     case CC_OP_ADCOX:
950         return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
951                              .mask = CC_Z };
952     case CC_OP_CLR:
953         return (CCPrepare) { .cond = TCG_COND_ALWAYS, .mask = -1 };
954     case CC_OP_POPCNT:
955         return (CCPrepare) { .cond = TCG_COND_EQ, .reg = cpu_cc_src,
956                              .mask = -1 };
957     default:
958         {
959             MemOp size = (s->cc_op - CC_OP_ADDB) & 3;
960             TCGv t0 = gen_ext_tl(reg, cpu_cc_dst, size, false);
961             return (CCPrepare) { .cond = TCG_COND_EQ, .reg = t0, .mask = -1 };
962         }
963     }
964 }
965 
966 /* perform a conditional store into register 'reg' according to jump opcode
967    value 'b'. In the fast case, T0 is guaranted not to be used. */
968 static CCPrepare gen_prepare_cc(DisasContext *s, int b, TCGv reg)
969 {
970     int inv, jcc_op, cond;
971     MemOp size;
972     CCPrepare cc;
973     TCGv t0;
974 
975     inv = b & 1;
976     jcc_op = (b >> 1) & 7;
977 
978     switch (s->cc_op) {
979     case CC_OP_SUBB ... CC_OP_SUBQ:
980         /* We optimize relational operators for the cmp/jcc case.  */
981         size = s->cc_op - CC_OP_SUBB;
982         switch (jcc_op) {
983         case JCC_BE:
984             tcg_gen_mov_tl(s->tmp4, s->cc_srcT);
985             gen_extu(size, s->tmp4);
986             t0 = gen_ext_tl(s->tmp0, cpu_cc_src, size, false);
987             cc = (CCPrepare) { .cond = TCG_COND_LEU, .reg = s->tmp4,
988                                .reg2 = t0, .mask = -1, .use_reg2 = true };
989             break;
990 
991         case JCC_L:
992             cond = TCG_COND_LT;
993             goto fast_jcc_l;
994         case JCC_LE:
995             cond = TCG_COND_LE;
996         fast_jcc_l:
997             tcg_gen_mov_tl(s->tmp4, s->cc_srcT);
998             gen_exts(size, s->tmp4);
999             t0 = gen_ext_tl(s->tmp0, cpu_cc_src, size, true);
1000             cc = (CCPrepare) { .cond = cond, .reg = s->tmp4,
1001                                .reg2 = t0, .mask = -1, .use_reg2 = true };
1002             break;
1003 
1004         default:
1005             goto slow_jcc;
1006         }
1007         break;
1008 
1009     default:
1010     slow_jcc:
1011         /* This actually generates good code for JC, JZ and JS.  */
1012         switch (jcc_op) {
1013         case JCC_O:
1014             cc = gen_prepare_eflags_o(s, reg);
1015             break;
1016         case JCC_B:
1017             cc = gen_prepare_eflags_c(s, reg);
1018             break;
1019         case JCC_Z:
1020             cc = gen_prepare_eflags_z(s, reg);
1021             break;
1022         case JCC_BE:
1023             gen_compute_eflags(s);
1024             cc = (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
1025                                .mask = CC_Z | CC_C };
1026             break;
1027         case JCC_S:
1028             cc = gen_prepare_eflags_s(s, reg);
1029             break;
1030         case JCC_P:
1031             cc = gen_prepare_eflags_p(s, reg);
1032             break;
1033         case JCC_L:
1034             gen_compute_eflags(s);
1035             if (reg == cpu_cc_src) {
1036                 reg = s->tmp0;
1037             }
1038             tcg_gen_shri_tl(reg, cpu_cc_src, 4); /* CC_O -> CC_S */
1039             tcg_gen_xor_tl(reg, reg, cpu_cc_src);
1040             cc = (CCPrepare) { .cond = TCG_COND_NE, .reg = reg,
1041                                .mask = CC_S };
1042             break;
1043         default:
1044         case JCC_LE:
1045             gen_compute_eflags(s);
1046             if (reg == cpu_cc_src) {
1047                 reg = s->tmp0;
1048             }
1049             tcg_gen_shri_tl(reg, cpu_cc_src, 4); /* CC_O -> CC_S */
1050             tcg_gen_xor_tl(reg, reg, cpu_cc_src);
1051             cc = (CCPrepare) { .cond = TCG_COND_NE, .reg = reg,
1052                                .mask = CC_S | CC_Z };
1053             break;
1054         }
1055         break;
1056     }
1057 
1058     if (inv) {
1059         cc.cond = tcg_invert_cond(cc.cond);
1060     }
1061     return cc;
1062 }
1063 
1064 static void gen_setcc1(DisasContext *s, int b, TCGv reg)
1065 {
1066     CCPrepare cc = gen_prepare_cc(s, b, reg);
1067 
1068     if (cc.no_setcond) {
1069         if (cc.cond == TCG_COND_EQ) {
1070             tcg_gen_xori_tl(reg, cc.reg, 1);
1071         } else {
1072             tcg_gen_mov_tl(reg, cc.reg);
1073         }
1074         return;
1075     }
1076 
1077     if (cc.cond == TCG_COND_NE && !cc.use_reg2 && cc.imm == 0 &&
1078         cc.mask != 0 && (cc.mask & (cc.mask - 1)) == 0) {
1079         tcg_gen_shri_tl(reg, cc.reg, ctztl(cc.mask));
1080         tcg_gen_andi_tl(reg, reg, 1);
1081         return;
1082     }
1083     if (cc.mask != -1) {
1084         tcg_gen_andi_tl(reg, cc.reg, cc.mask);
1085         cc.reg = reg;
1086     }
1087     if (cc.use_reg2) {
1088         tcg_gen_setcond_tl(cc.cond, reg, cc.reg, cc.reg2);
1089     } else {
1090         tcg_gen_setcondi_tl(cc.cond, reg, cc.reg, cc.imm);
1091     }
1092 }
1093 
1094 static inline void gen_compute_eflags_c(DisasContext *s, TCGv reg)
1095 {
1096     gen_setcc1(s, JCC_B << 1, reg);
1097 }
1098 
1099 /* generate a conditional jump to label 'l1' according to jump opcode
1100    value 'b'. In the fast case, T0 is guaranted not to be used. */
1101 static inline void gen_jcc1_noeob(DisasContext *s, int b, TCGLabel *l1)
1102 {
1103     CCPrepare cc = gen_prepare_cc(s, b, s->T0);
1104 
1105     if (cc.mask != -1) {
1106         tcg_gen_andi_tl(s->T0, cc.reg, cc.mask);
1107         cc.reg = s->T0;
1108     }
1109     if (cc.use_reg2) {
1110         tcg_gen_brcond_tl(cc.cond, cc.reg, cc.reg2, l1);
1111     } else {
1112         tcg_gen_brcondi_tl(cc.cond, cc.reg, cc.imm, l1);
1113     }
1114 }
1115 
1116 /* Generate a conditional jump to label 'l1' according to jump opcode
1117    value 'b'. In the fast case, T0 is guaranted not to be used.
1118    A translation block must end soon.  */
1119 static inline void gen_jcc1(DisasContext *s, int b, TCGLabel *l1)
1120 {
1121     CCPrepare cc = gen_prepare_cc(s, b, s->T0);
1122 
1123     gen_update_cc_op(s);
1124     if (cc.mask != -1) {
1125         tcg_gen_andi_tl(s->T0, cc.reg, cc.mask);
1126         cc.reg = s->T0;
1127     }
1128     set_cc_op(s, CC_OP_DYNAMIC);
1129     if (cc.use_reg2) {
1130         tcg_gen_brcond_tl(cc.cond, cc.reg, cc.reg2, l1);
1131     } else {
1132         tcg_gen_brcondi_tl(cc.cond, cc.reg, cc.imm, l1);
1133     }
1134 }
1135 
1136 /* XXX: does not work with gdbstub "ice" single step - not a
1137    serious problem */
1138 static TCGLabel *gen_jz_ecx_string(DisasContext *s, target_ulong next_eip)
1139 {
1140     TCGLabel *l1 = gen_new_label();
1141     TCGLabel *l2 = gen_new_label();
1142     gen_op_jnz_ecx(s, s->aflag, l1);
1143     gen_set_label(l2);
1144     gen_jmp_tb(s, next_eip, 1);
1145     gen_set_label(l1);
1146     return l2;
1147 }
1148 
1149 static inline void gen_stos(DisasContext *s, MemOp ot)
1150 {
1151     gen_op_mov_v_reg(s, MO_32, s->T0, R_EAX);
1152     gen_string_movl_A0_EDI(s);
1153     gen_op_st_v(s, ot, s->T0, s->A0);
1154     gen_op_movl_T0_Dshift(s, ot);
1155     gen_op_add_reg_T0(s, s->aflag, R_EDI);
1156 }
1157 
1158 static inline void gen_lods(DisasContext *s, MemOp ot)
1159 {
1160     gen_string_movl_A0_ESI(s);
1161     gen_op_ld_v(s, ot, s->T0, s->A0);
1162     gen_op_mov_reg_v(s, ot, R_EAX, s->T0);
1163     gen_op_movl_T0_Dshift(s, ot);
1164     gen_op_add_reg_T0(s, s->aflag, R_ESI);
1165 }
1166 
1167 static inline void gen_scas(DisasContext *s, MemOp ot)
1168 {
1169     gen_string_movl_A0_EDI(s);
1170     gen_op_ld_v(s, ot, s->T1, s->A0);
1171     gen_op(s, OP_CMPL, ot, R_EAX);
1172     gen_op_movl_T0_Dshift(s, ot);
1173     gen_op_add_reg_T0(s, s->aflag, R_EDI);
1174 }
1175 
1176 static inline void gen_cmps(DisasContext *s, MemOp ot)
1177 {
1178     gen_string_movl_A0_EDI(s);
1179     gen_op_ld_v(s, ot, s->T1, s->A0);
1180     gen_string_movl_A0_ESI(s);
1181     gen_op(s, OP_CMPL, ot, OR_TMP0);
1182     gen_op_movl_T0_Dshift(s, ot);
1183     gen_op_add_reg_T0(s, s->aflag, R_ESI);
1184     gen_op_add_reg_T0(s, s->aflag, R_EDI);
1185 }
1186 
1187 static void gen_bpt_io(DisasContext *s, TCGv_i32 t_port, int ot)
1188 {
1189     if (s->flags & HF_IOBPT_MASK) {
1190 #ifdef CONFIG_USER_ONLY
1191         /* user-mode cpu should not be in IOBPT mode */
1192         g_assert_not_reached();
1193 #else
1194         TCGv_i32 t_size = tcg_const_i32(1 << ot);
1195         TCGv t_next = tcg_const_tl(s->pc - s->cs_base);
1196 
1197         gen_helper_bpt_io(cpu_env, t_port, t_size, t_next);
1198         tcg_temp_free_i32(t_size);
1199         tcg_temp_free(t_next);
1200 #endif /* CONFIG_USER_ONLY */
1201     }
1202 }
1203 
1204 static inline void gen_ins(DisasContext *s, MemOp ot)
1205 {
1206     gen_string_movl_A0_EDI(s);
1207     /* Note: we must do this dummy write first to be restartable in
1208        case of page fault. */
1209     tcg_gen_movi_tl(s->T0, 0);
1210     gen_op_st_v(s, ot, s->T0, s->A0);
1211     tcg_gen_trunc_tl_i32(s->tmp2_i32, cpu_regs[R_EDX]);
1212     tcg_gen_andi_i32(s->tmp2_i32, s->tmp2_i32, 0xffff);
1213     gen_helper_in_func(ot, s->T0, s->tmp2_i32);
1214     gen_op_st_v(s, ot, s->T0, s->A0);
1215     gen_op_movl_T0_Dshift(s, ot);
1216     gen_op_add_reg_T0(s, s->aflag, R_EDI);
1217     gen_bpt_io(s, s->tmp2_i32, ot);
1218 }
1219 
1220 static inline void gen_outs(DisasContext *s, MemOp ot)
1221 {
1222     gen_string_movl_A0_ESI(s);
1223     gen_op_ld_v(s, ot, s->T0, s->A0);
1224 
1225     tcg_gen_trunc_tl_i32(s->tmp2_i32, cpu_regs[R_EDX]);
1226     tcg_gen_andi_i32(s->tmp2_i32, s->tmp2_i32, 0xffff);
1227     tcg_gen_trunc_tl_i32(s->tmp3_i32, s->T0);
1228     gen_helper_out_func(ot, s->tmp2_i32, s->tmp3_i32);
1229     gen_op_movl_T0_Dshift(s, ot);
1230     gen_op_add_reg_T0(s, s->aflag, R_ESI);
1231     gen_bpt_io(s, s->tmp2_i32, ot);
1232 }
1233 
1234 /* same method as Valgrind : we generate jumps to current or next
1235    instruction */
1236 #define GEN_REPZ(op)                                                          \
1237 static inline void gen_repz_ ## op(DisasContext *s, MemOp ot,              \
1238                                  target_ulong cur_eip, target_ulong next_eip) \
1239 {                                                                             \
1240     TCGLabel *l2;                                                             \
1241     gen_update_cc_op(s);                                                      \
1242     l2 = gen_jz_ecx_string(s, next_eip);                                      \
1243     gen_ ## op(s, ot);                                                        \
1244     gen_op_add_reg_im(s, s->aflag, R_ECX, -1);                                \
1245     /* a loop would cause two single step exceptions if ECX = 1               \
1246        before rep string_insn */                                              \
1247     if (s->repz_opt)                                                          \
1248         gen_op_jz_ecx(s, s->aflag, l2);                                       \
1249     gen_jmp(s, cur_eip);                                                      \
1250 }
1251 
1252 #define GEN_REPZ2(op)                                                         \
1253 static inline void gen_repz_ ## op(DisasContext *s, MemOp ot,              \
1254                                    target_ulong cur_eip,                      \
1255                                    target_ulong next_eip,                     \
1256                                    int nz)                                    \
1257 {                                                                             \
1258     TCGLabel *l2;                                                             \
1259     gen_update_cc_op(s);                                                      \
1260     l2 = gen_jz_ecx_string(s, next_eip);                                      \
1261     gen_ ## op(s, ot);                                                        \
1262     gen_op_add_reg_im(s, s->aflag, R_ECX, -1);                                \
1263     gen_update_cc_op(s);                                                      \
1264     gen_jcc1(s, (JCC_Z << 1) | (nz ^ 1), l2);                                 \
1265     if (s->repz_opt)                                                          \
1266         gen_op_jz_ecx(s, s->aflag, l2);                                       \
1267     gen_jmp(s, cur_eip);                                                      \
1268 }
1269 
1270 GEN_REPZ(movs)
1271 GEN_REPZ(stos)
1272 GEN_REPZ(lods)
1273 GEN_REPZ(ins)
1274 GEN_REPZ(outs)
1275 GEN_REPZ2(scas)
1276 GEN_REPZ2(cmps)
1277 
1278 static void gen_helper_fp_arith_ST0_FT0(int op)
1279 {
1280     switch (op) {
1281     case 0:
1282         gen_helper_fadd_ST0_FT0(cpu_env);
1283         break;
1284     case 1:
1285         gen_helper_fmul_ST0_FT0(cpu_env);
1286         break;
1287     case 2:
1288         gen_helper_fcom_ST0_FT0(cpu_env);
1289         break;
1290     case 3:
1291         gen_helper_fcom_ST0_FT0(cpu_env);
1292         break;
1293     case 4:
1294         gen_helper_fsub_ST0_FT0(cpu_env);
1295         break;
1296     case 5:
1297         gen_helper_fsubr_ST0_FT0(cpu_env);
1298         break;
1299     case 6:
1300         gen_helper_fdiv_ST0_FT0(cpu_env);
1301         break;
1302     case 7:
1303         gen_helper_fdivr_ST0_FT0(cpu_env);
1304         break;
1305     }
1306 }
1307 
1308 /* NOTE the exception in "r" op ordering */
1309 static void gen_helper_fp_arith_STN_ST0(int op, int opreg)
1310 {
1311     TCGv_i32 tmp = tcg_const_i32(opreg);
1312     switch (op) {
1313     case 0:
1314         gen_helper_fadd_STN_ST0(cpu_env, tmp);
1315         break;
1316     case 1:
1317         gen_helper_fmul_STN_ST0(cpu_env, tmp);
1318         break;
1319     case 4:
1320         gen_helper_fsubr_STN_ST0(cpu_env, tmp);
1321         break;
1322     case 5:
1323         gen_helper_fsub_STN_ST0(cpu_env, tmp);
1324         break;
1325     case 6:
1326         gen_helper_fdivr_STN_ST0(cpu_env, tmp);
1327         break;
1328     case 7:
1329         gen_helper_fdiv_STN_ST0(cpu_env, tmp);
1330         break;
1331     }
1332 }
1333 
1334 static void gen_exception(DisasContext *s, int trapno, target_ulong cur_eip)
1335 {
1336     gen_update_cc_op(s);
1337     gen_jmp_im(s, cur_eip);
1338     gen_helper_raise_exception(cpu_env, tcg_const_i32(trapno));
1339     s->base.is_jmp = DISAS_NORETURN;
1340 }
1341 
1342 /* Generate #UD for the current instruction.  The assumption here is that
1343    the instruction is known, but it isn't allowed in the current cpu mode.  */
1344 static void gen_illegal_opcode(DisasContext *s)
1345 {
1346     gen_exception(s, EXCP06_ILLOP, s->pc_start - s->cs_base);
1347 }
1348 
1349 /* Generate #GP for the current instruction. */
1350 static void gen_exception_gpf(DisasContext *s)
1351 {
1352     gen_exception(s, EXCP0D_GPF, s->pc_start - s->cs_base);
1353 }
1354 
1355 /* Check for cpl == 0; if not, raise #GP and return false. */
1356 static bool check_cpl0(DisasContext *s)
1357 {
1358     if (CPL(s) == 0) {
1359         return true;
1360     }
1361     gen_exception_gpf(s);
1362     return false;
1363 }
1364 
1365 /* If vm86, check for iopl == 3; if not, raise #GP and return false. */
1366 static bool check_vm86_iopl(DisasContext *s)
1367 {
1368     if (!VM86(s) || IOPL(s) == 3) {
1369         return true;
1370     }
1371     gen_exception_gpf(s);
1372     return false;
1373 }
1374 
1375 /* Check for iopl allowing access; if not, raise #GP and return false. */
1376 static bool check_iopl(DisasContext *s)
1377 {
1378     if (VM86(s) ? IOPL(s) == 3 : CPL(s) <= IOPL(s)) {
1379         return true;
1380     }
1381     gen_exception_gpf(s);
1382     return false;
1383 }
1384 
1385 /* if d == OR_TMP0, it means memory operand (address in A0) */
1386 static void gen_op(DisasContext *s1, int op, MemOp ot, int d)
1387 {
1388     if (d != OR_TMP0) {
1389         if (s1->prefix & PREFIX_LOCK) {
1390             /* Lock prefix when destination is not memory.  */
1391             gen_illegal_opcode(s1);
1392             return;
1393         }
1394         gen_op_mov_v_reg(s1, ot, s1->T0, d);
1395     } else if (!(s1->prefix & PREFIX_LOCK)) {
1396         gen_op_ld_v(s1, ot, s1->T0, s1->A0);
1397     }
1398     switch(op) {
1399     case OP_ADCL:
1400         gen_compute_eflags_c(s1, s1->tmp4);
1401         if (s1->prefix & PREFIX_LOCK) {
1402             tcg_gen_add_tl(s1->T0, s1->tmp4, s1->T1);
1403             tcg_gen_atomic_add_fetch_tl(s1->T0, s1->A0, s1->T0,
1404                                         s1->mem_index, ot | MO_LE);
1405         } else {
1406             tcg_gen_add_tl(s1->T0, s1->T0, s1->T1);
1407             tcg_gen_add_tl(s1->T0, s1->T0, s1->tmp4);
1408             gen_op_st_rm_T0_A0(s1, ot, d);
1409         }
1410         gen_op_update3_cc(s1, s1->tmp4);
1411         set_cc_op(s1, CC_OP_ADCB + ot);
1412         break;
1413     case OP_SBBL:
1414         gen_compute_eflags_c(s1, s1->tmp4);
1415         if (s1->prefix & PREFIX_LOCK) {
1416             tcg_gen_add_tl(s1->T0, s1->T1, s1->tmp4);
1417             tcg_gen_neg_tl(s1->T0, s1->T0);
1418             tcg_gen_atomic_add_fetch_tl(s1->T0, s1->A0, s1->T0,
1419                                         s1->mem_index, ot | MO_LE);
1420         } else {
1421             tcg_gen_sub_tl(s1->T0, s1->T0, s1->T1);
1422             tcg_gen_sub_tl(s1->T0, s1->T0, s1->tmp4);
1423             gen_op_st_rm_T0_A0(s1, ot, d);
1424         }
1425         gen_op_update3_cc(s1, s1->tmp4);
1426         set_cc_op(s1, CC_OP_SBBB + ot);
1427         break;
1428     case OP_ADDL:
1429         if (s1->prefix & PREFIX_LOCK) {
1430             tcg_gen_atomic_add_fetch_tl(s1->T0, s1->A0, s1->T1,
1431                                         s1->mem_index, ot | MO_LE);
1432         } else {
1433             tcg_gen_add_tl(s1->T0, s1->T0, s1->T1);
1434             gen_op_st_rm_T0_A0(s1, ot, d);
1435         }
1436         gen_op_update2_cc(s1);
1437         set_cc_op(s1, CC_OP_ADDB + ot);
1438         break;
1439     case OP_SUBL:
1440         if (s1->prefix & PREFIX_LOCK) {
1441             tcg_gen_neg_tl(s1->T0, s1->T1);
1442             tcg_gen_atomic_fetch_add_tl(s1->cc_srcT, s1->A0, s1->T0,
1443                                         s1->mem_index, ot | MO_LE);
1444             tcg_gen_sub_tl(s1->T0, s1->cc_srcT, s1->T1);
1445         } else {
1446             tcg_gen_mov_tl(s1->cc_srcT, s1->T0);
1447             tcg_gen_sub_tl(s1->T0, s1->T0, s1->T1);
1448             gen_op_st_rm_T0_A0(s1, ot, d);
1449         }
1450         gen_op_update2_cc(s1);
1451         set_cc_op(s1, CC_OP_SUBB + ot);
1452         break;
1453     default:
1454     case OP_ANDL:
1455         if (s1->prefix & PREFIX_LOCK) {
1456             tcg_gen_atomic_and_fetch_tl(s1->T0, s1->A0, s1->T1,
1457                                         s1->mem_index, ot | MO_LE);
1458         } else {
1459             tcg_gen_and_tl(s1->T0, s1->T0, s1->T1);
1460             gen_op_st_rm_T0_A0(s1, ot, d);
1461         }
1462         gen_op_update1_cc(s1);
1463         set_cc_op(s1, CC_OP_LOGICB + ot);
1464         break;
1465     case OP_ORL:
1466         if (s1->prefix & PREFIX_LOCK) {
1467             tcg_gen_atomic_or_fetch_tl(s1->T0, s1->A0, s1->T1,
1468                                        s1->mem_index, ot | MO_LE);
1469         } else {
1470             tcg_gen_or_tl(s1->T0, s1->T0, s1->T1);
1471             gen_op_st_rm_T0_A0(s1, ot, d);
1472         }
1473         gen_op_update1_cc(s1);
1474         set_cc_op(s1, CC_OP_LOGICB + ot);
1475         break;
1476     case OP_XORL:
1477         if (s1->prefix & PREFIX_LOCK) {
1478             tcg_gen_atomic_xor_fetch_tl(s1->T0, s1->A0, s1->T1,
1479                                         s1->mem_index, ot | MO_LE);
1480         } else {
1481             tcg_gen_xor_tl(s1->T0, s1->T0, s1->T1);
1482             gen_op_st_rm_T0_A0(s1, ot, d);
1483         }
1484         gen_op_update1_cc(s1);
1485         set_cc_op(s1, CC_OP_LOGICB + ot);
1486         break;
1487     case OP_CMPL:
1488         tcg_gen_mov_tl(cpu_cc_src, s1->T1);
1489         tcg_gen_mov_tl(s1->cc_srcT, s1->T0);
1490         tcg_gen_sub_tl(cpu_cc_dst, s1->T0, s1->T1);
1491         set_cc_op(s1, CC_OP_SUBB + ot);
1492         break;
1493     }
1494 }
1495 
1496 /* if d == OR_TMP0, it means memory operand (address in A0) */
1497 static void gen_inc(DisasContext *s1, MemOp ot, int d, int c)
1498 {
1499     if (s1->prefix & PREFIX_LOCK) {
1500         if (d != OR_TMP0) {
1501             /* Lock prefix when destination is not memory */
1502             gen_illegal_opcode(s1);
1503             return;
1504         }
1505         tcg_gen_movi_tl(s1->T0, c > 0 ? 1 : -1);
1506         tcg_gen_atomic_add_fetch_tl(s1->T0, s1->A0, s1->T0,
1507                                     s1->mem_index, ot | MO_LE);
1508     } else {
1509         if (d != OR_TMP0) {
1510             gen_op_mov_v_reg(s1, ot, s1->T0, d);
1511         } else {
1512             gen_op_ld_v(s1, ot, s1->T0, s1->A0);
1513         }
1514         tcg_gen_addi_tl(s1->T0, s1->T0, (c > 0 ? 1 : -1));
1515         gen_op_st_rm_T0_A0(s1, ot, d);
1516     }
1517 
1518     gen_compute_eflags_c(s1, cpu_cc_src);
1519     tcg_gen_mov_tl(cpu_cc_dst, s1->T0);
1520     set_cc_op(s1, (c > 0 ? CC_OP_INCB : CC_OP_DECB) + ot);
1521 }
1522 
1523 static void gen_shift_flags(DisasContext *s, MemOp ot, TCGv result,
1524                             TCGv shm1, TCGv count, bool is_right)
1525 {
1526     TCGv_i32 z32, s32, oldop;
1527     TCGv z_tl;
1528 
1529     /* Store the results into the CC variables.  If we know that the
1530        variable must be dead, store unconditionally.  Otherwise we'll
1531        need to not disrupt the current contents.  */
1532     z_tl = tcg_const_tl(0);
1533     if (cc_op_live[s->cc_op] & USES_CC_DST) {
1534         tcg_gen_movcond_tl(TCG_COND_NE, cpu_cc_dst, count, z_tl,
1535                            result, cpu_cc_dst);
1536     } else {
1537         tcg_gen_mov_tl(cpu_cc_dst, result);
1538     }
1539     if (cc_op_live[s->cc_op] & USES_CC_SRC) {
1540         tcg_gen_movcond_tl(TCG_COND_NE, cpu_cc_src, count, z_tl,
1541                            shm1, cpu_cc_src);
1542     } else {
1543         tcg_gen_mov_tl(cpu_cc_src, shm1);
1544     }
1545     tcg_temp_free(z_tl);
1546 
1547     /* Get the two potential CC_OP values into temporaries.  */
1548     tcg_gen_movi_i32(s->tmp2_i32, (is_right ? CC_OP_SARB : CC_OP_SHLB) + ot);
1549     if (s->cc_op == CC_OP_DYNAMIC) {
1550         oldop = cpu_cc_op;
1551     } else {
1552         tcg_gen_movi_i32(s->tmp3_i32, s->cc_op);
1553         oldop = s->tmp3_i32;
1554     }
1555 
1556     /* Conditionally store the CC_OP value.  */
1557     z32 = tcg_const_i32(0);
1558     s32 = tcg_temp_new_i32();
1559     tcg_gen_trunc_tl_i32(s32, count);
1560     tcg_gen_movcond_i32(TCG_COND_NE, cpu_cc_op, s32, z32, s->tmp2_i32, oldop);
1561     tcg_temp_free_i32(z32);
1562     tcg_temp_free_i32(s32);
1563 
1564     /* The CC_OP value is no longer predictable.  */
1565     set_cc_op(s, CC_OP_DYNAMIC);
1566 }
1567 
1568 static void gen_shift_rm_T1(DisasContext *s, MemOp ot, int op1,
1569                             int is_right, int is_arith)
1570 {
1571     target_ulong mask = (ot == MO_64 ? 0x3f : 0x1f);
1572 
1573     /* load */
1574     if (op1 == OR_TMP0) {
1575         gen_op_ld_v(s, ot, s->T0, s->A0);
1576     } else {
1577         gen_op_mov_v_reg(s, ot, s->T0, op1);
1578     }
1579 
1580     tcg_gen_andi_tl(s->T1, s->T1, mask);
1581     tcg_gen_subi_tl(s->tmp0, s->T1, 1);
1582 
1583     if (is_right) {
1584         if (is_arith) {
1585             gen_exts(ot, s->T0);
1586             tcg_gen_sar_tl(s->tmp0, s->T0, s->tmp0);
1587             tcg_gen_sar_tl(s->T0, s->T0, s->T1);
1588         } else {
1589             gen_extu(ot, s->T0);
1590             tcg_gen_shr_tl(s->tmp0, s->T0, s->tmp0);
1591             tcg_gen_shr_tl(s->T0, s->T0, s->T1);
1592         }
1593     } else {
1594         tcg_gen_shl_tl(s->tmp0, s->T0, s->tmp0);
1595         tcg_gen_shl_tl(s->T0, s->T0, s->T1);
1596     }
1597 
1598     /* store */
1599     gen_op_st_rm_T0_A0(s, ot, op1);
1600 
1601     gen_shift_flags(s, ot, s->T0, s->tmp0, s->T1, is_right);
1602 }
1603 
1604 static void gen_shift_rm_im(DisasContext *s, MemOp ot, int op1, int op2,
1605                             int is_right, int is_arith)
1606 {
1607     int mask = (ot == MO_64 ? 0x3f : 0x1f);
1608 
1609     /* load */
1610     if (op1 == OR_TMP0)
1611         gen_op_ld_v(s, ot, s->T0, s->A0);
1612     else
1613         gen_op_mov_v_reg(s, ot, s->T0, op1);
1614 
1615     op2 &= mask;
1616     if (op2 != 0) {
1617         if (is_right) {
1618             if (is_arith) {
1619                 gen_exts(ot, s->T0);
1620                 tcg_gen_sari_tl(s->tmp4, s->T0, op2 - 1);
1621                 tcg_gen_sari_tl(s->T0, s->T0, op2);
1622             } else {
1623                 gen_extu(ot, s->T0);
1624                 tcg_gen_shri_tl(s->tmp4, s->T0, op2 - 1);
1625                 tcg_gen_shri_tl(s->T0, s->T0, op2);
1626             }
1627         } else {
1628             tcg_gen_shli_tl(s->tmp4, s->T0, op2 - 1);
1629             tcg_gen_shli_tl(s->T0, s->T0, op2);
1630         }
1631     }
1632 
1633     /* store */
1634     gen_op_st_rm_T0_A0(s, ot, op1);
1635 
1636     /* update eflags if non zero shift */
1637     if (op2 != 0) {
1638         tcg_gen_mov_tl(cpu_cc_src, s->tmp4);
1639         tcg_gen_mov_tl(cpu_cc_dst, s->T0);
1640         set_cc_op(s, (is_right ? CC_OP_SARB : CC_OP_SHLB) + ot);
1641     }
1642 }
1643 
1644 static void gen_rot_rm_T1(DisasContext *s, MemOp ot, int op1, int is_right)
1645 {
1646     target_ulong mask = (ot == MO_64 ? 0x3f : 0x1f);
1647     TCGv_i32 t0, t1;
1648 
1649     /* load */
1650     if (op1 == OR_TMP0) {
1651         gen_op_ld_v(s, ot, s->T0, s->A0);
1652     } else {
1653         gen_op_mov_v_reg(s, ot, s->T0, op1);
1654     }
1655 
1656     tcg_gen_andi_tl(s->T1, s->T1, mask);
1657 
1658     switch (ot) {
1659     case MO_8:
1660         /* Replicate the 8-bit input so that a 32-bit rotate works.  */
1661         tcg_gen_ext8u_tl(s->T0, s->T0);
1662         tcg_gen_muli_tl(s->T0, s->T0, 0x01010101);
1663         goto do_long;
1664     case MO_16:
1665         /* Replicate the 16-bit input so that a 32-bit rotate works.  */
1666         tcg_gen_deposit_tl(s->T0, s->T0, s->T0, 16, 16);
1667         goto do_long;
1668     do_long:
1669 #ifdef TARGET_X86_64
1670     case MO_32:
1671         tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
1672         tcg_gen_trunc_tl_i32(s->tmp3_i32, s->T1);
1673         if (is_right) {
1674             tcg_gen_rotr_i32(s->tmp2_i32, s->tmp2_i32, s->tmp3_i32);
1675         } else {
1676             tcg_gen_rotl_i32(s->tmp2_i32, s->tmp2_i32, s->tmp3_i32);
1677         }
1678         tcg_gen_extu_i32_tl(s->T0, s->tmp2_i32);
1679         break;
1680 #endif
1681     default:
1682         if (is_right) {
1683             tcg_gen_rotr_tl(s->T0, s->T0, s->T1);
1684         } else {
1685             tcg_gen_rotl_tl(s->T0, s->T0, s->T1);
1686         }
1687         break;
1688     }
1689 
1690     /* store */
1691     gen_op_st_rm_T0_A0(s, ot, op1);
1692 
1693     /* We'll need the flags computed into CC_SRC.  */
1694     gen_compute_eflags(s);
1695 
1696     /* The value that was "rotated out" is now present at the other end
1697        of the word.  Compute C into CC_DST and O into CC_SRC2.  Note that
1698        since we've computed the flags into CC_SRC, these variables are
1699        currently dead.  */
1700     if (is_right) {
1701         tcg_gen_shri_tl(cpu_cc_src2, s->T0, mask - 1);
1702         tcg_gen_shri_tl(cpu_cc_dst, s->T0, mask);
1703         tcg_gen_andi_tl(cpu_cc_dst, cpu_cc_dst, 1);
1704     } else {
1705         tcg_gen_shri_tl(cpu_cc_src2, s->T0, mask);
1706         tcg_gen_andi_tl(cpu_cc_dst, s->T0, 1);
1707     }
1708     tcg_gen_andi_tl(cpu_cc_src2, cpu_cc_src2, 1);
1709     tcg_gen_xor_tl(cpu_cc_src2, cpu_cc_src2, cpu_cc_dst);
1710 
1711     /* Now conditionally store the new CC_OP value.  If the shift count
1712        is 0 we keep the CC_OP_EFLAGS setting so that only CC_SRC is live.
1713        Otherwise reuse CC_OP_ADCOX which have the C and O flags split out
1714        exactly as we computed above.  */
1715     t0 = tcg_const_i32(0);
1716     t1 = tcg_temp_new_i32();
1717     tcg_gen_trunc_tl_i32(t1, s->T1);
1718     tcg_gen_movi_i32(s->tmp2_i32, CC_OP_ADCOX);
1719     tcg_gen_movi_i32(s->tmp3_i32, CC_OP_EFLAGS);
1720     tcg_gen_movcond_i32(TCG_COND_NE, cpu_cc_op, t1, t0,
1721                         s->tmp2_i32, s->tmp3_i32);
1722     tcg_temp_free_i32(t0);
1723     tcg_temp_free_i32(t1);
1724 
1725     /* The CC_OP value is no longer predictable.  */
1726     set_cc_op(s, CC_OP_DYNAMIC);
1727 }
1728 
1729 static void gen_rot_rm_im(DisasContext *s, MemOp ot, int op1, int op2,
1730                           int is_right)
1731 {
1732     int mask = (ot == MO_64 ? 0x3f : 0x1f);
1733     int shift;
1734 
1735     /* load */
1736     if (op1 == OR_TMP0) {
1737         gen_op_ld_v(s, ot, s->T0, s->A0);
1738     } else {
1739         gen_op_mov_v_reg(s, ot, s->T0, op1);
1740     }
1741 
1742     op2 &= mask;
1743     if (op2 != 0) {
1744         switch (ot) {
1745 #ifdef TARGET_X86_64
1746         case MO_32:
1747             tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
1748             if (is_right) {
1749                 tcg_gen_rotri_i32(s->tmp2_i32, s->tmp2_i32, op2);
1750             } else {
1751                 tcg_gen_rotli_i32(s->tmp2_i32, s->tmp2_i32, op2);
1752             }
1753             tcg_gen_extu_i32_tl(s->T0, s->tmp2_i32);
1754             break;
1755 #endif
1756         default:
1757             if (is_right) {
1758                 tcg_gen_rotri_tl(s->T0, s->T0, op2);
1759             } else {
1760                 tcg_gen_rotli_tl(s->T0, s->T0, op2);
1761             }
1762             break;
1763         case MO_8:
1764             mask = 7;
1765             goto do_shifts;
1766         case MO_16:
1767             mask = 15;
1768         do_shifts:
1769             shift = op2 & mask;
1770             if (is_right) {
1771                 shift = mask + 1 - shift;
1772             }
1773             gen_extu(ot, s->T0);
1774             tcg_gen_shli_tl(s->tmp0, s->T0, shift);
1775             tcg_gen_shri_tl(s->T0, s->T0, mask + 1 - shift);
1776             tcg_gen_or_tl(s->T0, s->T0, s->tmp0);
1777             break;
1778         }
1779     }
1780 
1781     /* store */
1782     gen_op_st_rm_T0_A0(s, ot, op1);
1783 
1784     if (op2 != 0) {
1785         /* Compute the flags into CC_SRC.  */
1786         gen_compute_eflags(s);
1787 
1788         /* The value that was "rotated out" is now present at the other end
1789            of the word.  Compute C into CC_DST and O into CC_SRC2.  Note that
1790            since we've computed the flags into CC_SRC, these variables are
1791            currently dead.  */
1792         if (is_right) {
1793             tcg_gen_shri_tl(cpu_cc_src2, s->T0, mask - 1);
1794             tcg_gen_shri_tl(cpu_cc_dst, s->T0, mask);
1795             tcg_gen_andi_tl(cpu_cc_dst, cpu_cc_dst, 1);
1796         } else {
1797             tcg_gen_shri_tl(cpu_cc_src2, s->T0, mask);
1798             tcg_gen_andi_tl(cpu_cc_dst, s->T0, 1);
1799         }
1800         tcg_gen_andi_tl(cpu_cc_src2, cpu_cc_src2, 1);
1801         tcg_gen_xor_tl(cpu_cc_src2, cpu_cc_src2, cpu_cc_dst);
1802         set_cc_op(s, CC_OP_ADCOX);
1803     }
1804 }
1805 
1806 /* XXX: add faster immediate = 1 case */
1807 static void gen_rotc_rm_T1(DisasContext *s, MemOp ot, int op1,
1808                            int is_right)
1809 {
1810     gen_compute_eflags(s);
1811     assert(s->cc_op == CC_OP_EFLAGS);
1812 
1813     /* load */
1814     if (op1 == OR_TMP0)
1815         gen_op_ld_v(s, ot, s->T0, s->A0);
1816     else
1817         gen_op_mov_v_reg(s, ot, s->T0, op1);
1818 
1819     if (is_right) {
1820         switch (ot) {
1821         case MO_8:
1822             gen_helper_rcrb(s->T0, cpu_env, s->T0, s->T1);
1823             break;
1824         case MO_16:
1825             gen_helper_rcrw(s->T0, cpu_env, s->T0, s->T1);
1826             break;
1827         case MO_32:
1828             gen_helper_rcrl(s->T0, cpu_env, s->T0, s->T1);
1829             break;
1830 #ifdef TARGET_X86_64
1831         case MO_64:
1832             gen_helper_rcrq(s->T0, cpu_env, s->T0, s->T1);
1833             break;
1834 #endif
1835         default:
1836             tcg_abort();
1837         }
1838     } else {
1839         switch (ot) {
1840         case MO_8:
1841             gen_helper_rclb(s->T0, cpu_env, s->T0, s->T1);
1842             break;
1843         case MO_16:
1844             gen_helper_rclw(s->T0, cpu_env, s->T0, s->T1);
1845             break;
1846         case MO_32:
1847             gen_helper_rcll(s->T0, cpu_env, s->T0, s->T1);
1848             break;
1849 #ifdef TARGET_X86_64
1850         case MO_64:
1851             gen_helper_rclq(s->T0, cpu_env, s->T0, s->T1);
1852             break;
1853 #endif
1854         default:
1855             tcg_abort();
1856         }
1857     }
1858     /* store */
1859     gen_op_st_rm_T0_A0(s, ot, op1);
1860 }
1861 
1862 /* XXX: add faster immediate case */
1863 static void gen_shiftd_rm_T1(DisasContext *s, MemOp ot, int op1,
1864                              bool is_right, TCGv count_in)
1865 {
1866     target_ulong mask = (ot == MO_64 ? 63 : 31);
1867     TCGv count;
1868 
1869     /* load */
1870     if (op1 == OR_TMP0) {
1871         gen_op_ld_v(s, ot, s->T0, s->A0);
1872     } else {
1873         gen_op_mov_v_reg(s, ot, s->T0, op1);
1874     }
1875 
1876     count = tcg_temp_new();
1877     tcg_gen_andi_tl(count, count_in, mask);
1878 
1879     switch (ot) {
1880     case MO_16:
1881         /* Note: we implement the Intel behaviour for shift count > 16.
1882            This means "shrdw C, B, A" shifts A:B:A >> C.  Build the B:A
1883            portion by constructing it as a 32-bit value.  */
1884         if (is_right) {
1885             tcg_gen_deposit_tl(s->tmp0, s->T0, s->T1, 16, 16);
1886             tcg_gen_mov_tl(s->T1, s->T0);
1887             tcg_gen_mov_tl(s->T0, s->tmp0);
1888         } else {
1889             tcg_gen_deposit_tl(s->T1, s->T0, s->T1, 16, 16);
1890         }
1891         /*
1892          * If TARGET_X86_64 defined then fall through into MO_32 case,
1893          * otherwise fall through default case.
1894          */
1895     case MO_32:
1896 #ifdef TARGET_X86_64
1897         /* Concatenate the two 32-bit values and use a 64-bit shift.  */
1898         tcg_gen_subi_tl(s->tmp0, count, 1);
1899         if (is_right) {
1900             tcg_gen_concat_tl_i64(s->T0, s->T0, s->T1);
1901             tcg_gen_shr_i64(s->tmp0, s->T0, s->tmp0);
1902             tcg_gen_shr_i64(s->T0, s->T0, count);
1903         } else {
1904             tcg_gen_concat_tl_i64(s->T0, s->T1, s->T0);
1905             tcg_gen_shl_i64(s->tmp0, s->T0, s->tmp0);
1906             tcg_gen_shl_i64(s->T0, s->T0, count);
1907             tcg_gen_shri_i64(s->tmp0, s->tmp0, 32);
1908             tcg_gen_shri_i64(s->T0, s->T0, 32);
1909         }
1910         break;
1911 #endif
1912     default:
1913         tcg_gen_subi_tl(s->tmp0, count, 1);
1914         if (is_right) {
1915             tcg_gen_shr_tl(s->tmp0, s->T0, s->tmp0);
1916 
1917             tcg_gen_subfi_tl(s->tmp4, mask + 1, count);
1918             tcg_gen_shr_tl(s->T0, s->T0, count);
1919             tcg_gen_shl_tl(s->T1, s->T1, s->tmp4);
1920         } else {
1921             tcg_gen_shl_tl(s->tmp0, s->T0, s->tmp0);
1922             if (ot == MO_16) {
1923                 /* Only needed if count > 16, for Intel behaviour.  */
1924                 tcg_gen_subfi_tl(s->tmp4, 33, count);
1925                 tcg_gen_shr_tl(s->tmp4, s->T1, s->tmp4);
1926                 tcg_gen_or_tl(s->tmp0, s->tmp0, s->tmp4);
1927             }
1928 
1929             tcg_gen_subfi_tl(s->tmp4, mask + 1, count);
1930             tcg_gen_shl_tl(s->T0, s->T0, count);
1931             tcg_gen_shr_tl(s->T1, s->T1, s->tmp4);
1932         }
1933         tcg_gen_movi_tl(s->tmp4, 0);
1934         tcg_gen_movcond_tl(TCG_COND_EQ, s->T1, count, s->tmp4,
1935                            s->tmp4, s->T1);
1936         tcg_gen_or_tl(s->T0, s->T0, s->T1);
1937         break;
1938     }
1939 
1940     /* store */
1941     gen_op_st_rm_T0_A0(s, ot, op1);
1942 
1943     gen_shift_flags(s, ot, s->T0, s->tmp0, count, is_right);
1944     tcg_temp_free(count);
1945 }
1946 
1947 static void gen_shift(DisasContext *s1, int op, MemOp ot, int d, int s)
1948 {
1949     if (s != OR_TMP1)
1950         gen_op_mov_v_reg(s1, ot, s1->T1, s);
1951     switch(op) {
1952     case OP_ROL:
1953         gen_rot_rm_T1(s1, ot, d, 0);
1954         break;
1955     case OP_ROR:
1956         gen_rot_rm_T1(s1, ot, d, 1);
1957         break;
1958     case OP_SHL:
1959     case OP_SHL1:
1960         gen_shift_rm_T1(s1, ot, d, 0, 0);
1961         break;
1962     case OP_SHR:
1963         gen_shift_rm_T1(s1, ot, d, 1, 0);
1964         break;
1965     case OP_SAR:
1966         gen_shift_rm_T1(s1, ot, d, 1, 1);
1967         break;
1968     case OP_RCL:
1969         gen_rotc_rm_T1(s1, ot, d, 0);
1970         break;
1971     case OP_RCR:
1972         gen_rotc_rm_T1(s1, ot, d, 1);
1973         break;
1974     }
1975 }
1976 
1977 static void gen_shifti(DisasContext *s1, int op, MemOp ot, int d, int c)
1978 {
1979     switch(op) {
1980     case OP_ROL:
1981         gen_rot_rm_im(s1, ot, d, c, 0);
1982         break;
1983     case OP_ROR:
1984         gen_rot_rm_im(s1, ot, d, c, 1);
1985         break;
1986     case OP_SHL:
1987     case OP_SHL1:
1988         gen_shift_rm_im(s1, ot, d, c, 0, 0);
1989         break;
1990     case OP_SHR:
1991         gen_shift_rm_im(s1, ot, d, c, 1, 0);
1992         break;
1993     case OP_SAR:
1994         gen_shift_rm_im(s1, ot, d, c, 1, 1);
1995         break;
1996     default:
1997         /* currently not optimized */
1998         tcg_gen_movi_tl(s1->T1, c);
1999         gen_shift(s1, op, ot, d, OR_TMP1);
2000         break;
2001     }
2002 }
2003 
2004 #define X86_MAX_INSN_LENGTH 15
2005 
2006 static uint64_t advance_pc(CPUX86State *env, DisasContext *s, int num_bytes)
2007 {
2008     uint64_t pc = s->pc;
2009 
2010     s->pc += num_bytes;
2011     if (unlikely(s->pc - s->pc_start > X86_MAX_INSN_LENGTH)) {
2012         /* If the instruction's 16th byte is on a different page than the 1st, a
2013          * page fault on the second page wins over the general protection fault
2014          * caused by the instruction being too long.
2015          * This can happen even if the operand is only one byte long!
2016          */
2017         if (((s->pc - 1) ^ (pc - 1)) & TARGET_PAGE_MASK) {
2018             volatile uint8_t unused =
2019                 cpu_ldub_code(env, (s->pc - 1) & TARGET_PAGE_MASK);
2020             (void) unused;
2021         }
2022         siglongjmp(s->jmpbuf, 1);
2023     }
2024 
2025     return pc;
2026 }
2027 
2028 static inline uint8_t x86_ldub_code(CPUX86State *env, DisasContext *s)
2029 {
2030     return translator_ldub(env, advance_pc(env, s, 1));
2031 }
2032 
2033 static inline int16_t x86_ldsw_code(CPUX86State *env, DisasContext *s)
2034 {
2035     return translator_ldsw(env, advance_pc(env, s, 2));
2036 }
2037 
2038 static inline uint16_t x86_lduw_code(CPUX86State *env, DisasContext *s)
2039 {
2040     return translator_lduw(env, advance_pc(env, s, 2));
2041 }
2042 
2043 static inline uint32_t x86_ldl_code(CPUX86State *env, DisasContext *s)
2044 {
2045     return translator_ldl(env, advance_pc(env, s, 4));
2046 }
2047 
2048 #ifdef TARGET_X86_64
2049 static inline uint64_t x86_ldq_code(CPUX86State *env, DisasContext *s)
2050 {
2051     return translator_ldq(env, advance_pc(env, s, 8));
2052 }
2053 #endif
2054 
2055 /* Decompose an address.  */
2056 
2057 typedef struct AddressParts {
2058     int def_seg;
2059     int base;
2060     int index;
2061     int scale;
2062     target_long disp;
2063 } AddressParts;
2064 
2065 static AddressParts gen_lea_modrm_0(CPUX86State *env, DisasContext *s,
2066                                     int modrm)
2067 {
2068     int def_seg, base, index, scale, mod, rm;
2069     target_long disp;
2070     bool havesib;
2071 
2072     def_seg = R_DS;
2073     index = -1;
2074     scale = 0;
2075     disp = 0;
2076 
2077     mod = (modrm >> 6) & 3;
2078     rm = modrm & 7;
2079     base = rm | REX_B(s);
2080 
2081     if (mod == 3) {
2082         /* Normally filtered out earlier, but including this path
2083            simplifies multi-byte nop, as well as bndcl, bndcu, bndcn.  */
2084         goto done;
2085     }
2086 
2087     switch (s->aflag) {
2088     case MO_64:
2089     case MO_32:
2090         havesib = 0;
2091         if (rm == 4) {
2092             int code = x86_ldub_code(env, s);
2093             scale = (code >> 6) & 3;
2094             index = ((code >> 3) & 7) | REX_X(s);
2095             if (index == 4) {
2096                 index = -1;  /* no index */
2097             }
2098             base = (code & 7) | REX_B(s);
2099             havesib = 1;
2100         }
2101 
2102         switch (mod) {
2103         case 0:
2104             if ((base & 7) == 5) {
2105                 base = -1;
2106                 disp = (int32_t)x86_ldl_code(env, s);
2107                 if (CODE64(s) && !havesib) {
2108                     base = -2;
2109                     disp += s->pc + s->rip_offset;
2110                 }
2111             }
2112             break;
2113         case 1:
2114             disp = (int8_t)x86_ldub_code(env, s);
2115             break;
2116         default:
2117         case 2:
2118             disp = (int32_t)x86_ldl_code(env, s);
2119             break;
2120         }
2121 
2122         /* For correct popl handling with esp.  */
2123         if (base == R_ESP && s->popl_esp_hack) {
2124             disp += s->popl_esp_hack;
2125         }
2126         if (base == R_EBP || base == R_ESP) {
2127             def_seg = R_SS;
2128         }
2129         break;
2130 
2131     case MO_16:
2132         if (mod == 0) {
2133             if (rm == 6) {
2134                 base = -1;
2135                 disp = x86_lduw_code(env, s);
2136                 break;
2137             }
2138         } else if (mod == 1) {
2139             disp = (int8_t)x86_ldub_code(env, s);
2140         } else {
2141             disp = (int16_t)x86_lduw_code(env, s);
2142         }
2143 
2144         switch (rm) {
2145         case 0:
2146             base = R_EBX;
2147             index = R_ESI;
2148             break;
2149         case 1:
2150             base = R_EBX;
2151             index = R_EDI;
2152             break;
2153         case 2:
2154             base = R_EBP;
2155             index = R_ESI;
2156             def_seg = R_SS;
2157             break;
2158         case 3:
2159             base = R_EBP;
2160             index = R_EDI;
2161             def_seg = R_SS;
2162             break;
2163         case 4:
2164             base = R_ESI;
2165             break;
2166         case 5:
2167             base = R_EDI;
2168             break;
2169         case 6:
2170             base = R_EBP;
2171             def_seg = R_SS;
2172             break;
2173         default:
2174         case 7:
2175             base = R_EBX;
2176             break;
2177         }
2178         break;
2179 
2180     default:
2181         tcg_abort();
2182     }
2183 
2184  done:
2185     return (AddressParts){ def_seg, base, index, scale, disp };
2186 }
2187 
2188 /* Compute the address, with a minimum number of TCG ops.  */
2189 static TCGv gen_lea_modrm_1(DisasContext *s, AddressParts a)
2190 {
2191     TCGv ea = NULL;
2192 
2193     if (a.index >= 0) {
2194         if (a.scale == 0) {
2195             ea = cpu_regs[a.index];
2196         } else {
2197             tcg_gen_shli_tl(s->A0, cpu_regs[a.index], a.scale);
2198             ea = s->A0;
2199         }
2200         if (a.base >= 0) {
2201             tcg_gen_add_tl(s->A0, ea, cpu_regs[a.base]);
2202             ea = s->A0;
2203         }
2204     } else if (a.base >= 0) {
2205         ea = cpu_regs[a.base];
2206     }
2207     if (!ea) {
2208         tcg_gen_movi_tl(s->A0, a.disp);
2209         ea = s->A0;
2210     } else if (a.disp != 0) {
2211         tcg_gen_addi_tl(s->A0, ea, a.disp);
2212         ea = s->A0;
2213     }
2214 
2215     return ea;
2216 }
2217 
2218 static void gen_lea_modrm(CPUX86State *env, DisasContext *s, int modrm)
2219 {
2220     AddressParts a = gen_lea_modrm_0(env, s, modrm);
2221     TCGv ea = gen_lea_modrm_1(s, a);
2222     gen_lea_v_seg(s, s->aflag, ea, a.def_seg, s->override);
2223 }
2224 
2225 static void gen_nop_modrm(CPUX86State *env, DisasContext *s, int modrm)
2226 {
2227     (void)gen_lea_modrm_0(env, s, modrm);
2228 }
2229 
2230 /* Used for BNDCL, BNDCU, BNDCN.  */
2231 static void gen_bndck(CPUX86State *env, DisasContext *s, int modrm,
2232                       TCGCond cond, TCGv_i64 bndv)
2233 {
2234     TCGv ea = gen_lea_modrm_1(s, gen_lea_modrm_0(env, s, modrm));
2235 
2236     tcg_gen_extu_tl_i64(s->tmp1_i64, ea);
2237     if (!CODE64(s)) {
2238         tcg_gen_ext32u_i64(s->tmp1_i64, s->tmp1_i64);
2239     }
2240     tcg_gen_setcond_i64(cond, s->tmp1_i64, s->tmp1_i64, bndv);
2241     tcg_gen_extrl_i64_i32(s->tmp2_i32, s->tmp1_i64);
2242     gen_helper_bndck(cpu_env, s->tmp2_i32);
2243 }
2244 
2245 /* used for LEA and MOV AX, mem */
2246 static void gen_add_A0_ds_seg(DisasContext *s)
2247 {
2248     gen_lea_v_seg(s, s->aflag, s->A0, R_DS, s->override);
2249 }
2250 
2251 /* generate modrm memory load or store of 'reg'. TMP0 is used if reg ==
2252    OR_TMP0 */
2253 static void gen_ldst_modrm(CPUX86State *env, DisasContext *s, int modrm,
2254                            MemOp ot, int reg, int is_store)
2255 {
2256     int mod, rm;
2257 
2258     mod = (modrm >> 6) & 3;
2259     rm = (modrm & 7) | REX_B(s);
2260     if (mod == 3) {
2261         if (is_store) {
2262             if (reg != OR_TMP0)
2263                 gen_op_mov_v_reg(s, ot, s->T0, reg);
2264             gen_op_mov_reg_v(s, ot, rm, s->T0);
2265         } else {
2266             gen_op_mov_v_reg(s, ot, s->T0, rm);
2267             if (reg != OR_TMP0)
2268                 gen_op_mov_reg_v(s, ot, reg, s->T0);
2269         }
2270     } else {
2271         gen_lea_modrm(env, s, modrm);
2272         if (is_store) {
2273             if (reg != OR_TMP0)
2274                 gen_op_mov_v_reg(s, ot, s->T0, reg);
2275             gen_op_st_v(s, ot, s->T0, s->A0);
2276         } else {
2277             gen_op_ld_v(s, ot, s->T0, s->A0);
2278             if (reg != OR_TMP0)
2279                 gen_op_mov_reg_v(s, ot, reg, s->T0);
2280         }
2281     }
2282 }
2283 
2284 static inline uint32_t insn_get(CPUX86State *env, DisasContext *s, MemOp ot)
2285 {
2286     uint32_t ret;
2287 
2288     switch (ot) {
2289     case MO_8:
2290         ret = x86_ldub_code(env, s);
2291         break;
2292     case MO_16:
2293         ret = x86_lduw_code(env, s);
2294         break;
2295     case MO_32:
2296 #ifdef TARGET_X86_64
2297     case MO_64:
2298 #endif
2299         ret = x86_ldl_code(env, s);
2300         break;
2301     default:
2302         tcg_abort();
2303     }
2304     return ret;
2305 }
2306 
2307 static inline int insn_const_size(MemOp ot)
2308 {
2309     if (ot <= MO_32) {
2310         return 1 << ot;
2311     } else {
2312         return 4;
2313     }
2314 }
2315 
2316 static void gen_goto_tb(DisasContext *s, int tb_num, target_ulong eip)
2317 {
2318     target_ulong pc = s->cs_base + eip;
2319 
2320     if (translator_use_goto_tb(&s->base, pc))  {
2321         /* jump to same page: we can use a direct jump */
2322         tcg_gen_goto_tb(tb_num);
2323         gen_jmp_im(s, eip);
2324         tcg_gen_exit_tb(s->base.tb, tb_num);
2325         s->base.is_jmp = DISAS_NORETURN;
2326     } else {
2327         /* jump to another page */
2328         gen_jmp_im(s, eip);
2329         gen_jr(s, s->tmp0);
2330     }
2331 }
2332 
2333 static inline void gen_jcc(DisasContext *s, int b,
2334                            target_ulong val, target_ulong next_eip)
2335 {
2336     TCGLabel *l1, *l2;
2337 
2338     if (s->jmp_opt) {
2339         l1 = gen_new_label();
2340         gen_jcc1(s, b, l1);
2341 
2342         gen_goto_tb(s, 0, next_eip);
2343 
2344         gen_set_label(l1);
2345         gen_goto_tb(s, 1, val);
2346     } else {
2347         l1 = gen_new_label();
2348         l2 = gen_new_label();
2349         gen_jcc1(s, b, l1);
2350 
2351         gen_jmp_im(s, next_eip);
2352         tcg_gen_br(l2);
2353 
2354         gen_set_label(l1);
2355         gen_jmp_im(s, val);
2356         gen_set_label(l2);
2357         gen_eob(s);
2358     }
2359 }
2360 
2361 static void gen_cmovcc1(CPUX86State *env, DisasContext *s, MemOp ot, int b,
2362                         int modrm, int reg)
2363 {
2364     CCPrepare cc;
2365 
2366     gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
2367 
2368     cc = gen_prepare_cc(s, b, s->T1);
2369     if (cc.mask != -1) {
2370         TCGv t0 = tcg_temp_new();
2371         tcg_gen_andi_tl(t0, cc.reg, cc.mask);
2372         cc.reg = t0;
2373     }
2374     if (!cc.use_reg2) {
2375         cc.reg2 = tcg_const_tl(cc.imm);
2376     }
2377 
2378     tcg_gen_movcond_tl(cc.cond, s->T0, cc.reg, cc.reg2,
2379                        s->T0, cpu_regs[reg]);
2380     gen_op_mov_reg_v(s, ot, reg, s->T0);
2381 
2382     if (cc.mask != -1) {
2383         tcg_temp_free(cc.reg);
2384     }
2385     if (!cc.use_reg2) {
2386         tcg_temp_free(cc.reg2);
2387     }
2388 }
2389 
2390 static inline void gen_op_movl_T0_seg(DisasContext *s, X86Seg seg_reg)
2391 {
2392     tcg_gen_ld32u_tl(s->T0, cpu_env,
2393                      offsetof(CPUX86State,segs[seg_reg].selector));
2394 }
2395 
2396 static inline void gen_op_movl_seg_T0_vm(DisasContext *s, X86Seg seg_reg)
2397 {
2398     tcg_gen_ext16u_tl(s->T0, s->T0);
2399     tcg_gen_st32_tl(s->T0, cpu_env,
2400                     offsetof(CPUX86State,segs[seg_reg].selector));
2401     tcg_gen_shli_tl(cpu_seg_base[seg_reg], s->T0, 4);
2402 }
2403 
2404 /* move T0 to seg_reg and compute if the CPU state may change. Never
2405    call this function with seg_reg == R_CS */
2406 static void gen_movl_seg_T0(DisasContext *s, X86Seg seg_reg)
2407 {
2408     if (PE(s) && !VM86(s)) {
2409         tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
2410         gen_helper_load_seg(cpu_env, tcg_const_i32(seg_reg), s->tmp2_i32);
2411         /* abort translation because the addseg value may change or
2412            because ss32 may change. For R_SS, translation must always
2413            stop as a special handling must be done to disable hardware
2414            interrupts for the next instruction */
2415         if (seg_reg == R_SS || (CODE32(s) && seg_reg < R_FS)) {
2416             s->base.is_jmp = DISAS_TOO_MANY;
2417         }
2418     } else {
2419         gen_op_movl_seg_T0_vm(s, seg_reg);
2420         if (seg_reg == R_SS) {
2421             s->base.is_jmp = DISAS_TOO_MANY;
2422         }
2423     }
2424 }
2425 
2426 static void gen_svm_check_intercept(DisasContext *s, uint32_t type)
2427 {
2428     /* no SVM activated; fast case */
2429     if (likely(!GUEST(s))) {
2430         return;
2431     }
2432     gen_helper_svm_check_intercept(cpu_env, tcg_constant_i32(type));
2433 }
2434 
2435 static inline void gen_stack_update(DisasContext *s, int addend)
2436 {
2437     gen_op_add_reg_im(s, mo_stacksize(s), R_ESP, addend);
2438 }
2439 
2440 /* Generate a push. It depends on ss32, addseg and dflag.  */
2441 static void gen_push_v(DisasContext *s, TCGv val)
2442 {
2443     MemOp d_ot = mo_pushpop(s, s->dflag);
2444     MemOp a_ot = mo_stacksize(s);
2445     int size = 1 << d_ot;
2446     TCGv new_esp = s->A0;
2447 
2448     tcg_gen_subi_tl(s->A0, cpu_regs[R_ESP], size);
2449 
2450     if (!CODE64(s)) {
2451         if (ADDSEG(s)) {
2452             new_esp = s->tmp4;
2453             tcg_gen_mov_tl(new_esp, s->A0);
2454         }
2455         gen_lea_v_seg(s, a_ot, s->A0, R_SS, -1);
2456     }
2457 
2458     gen_op_st_v(s, d_ot, val, s->A0);
2459     gen_op_mov_reg_v(s, a_ot, R_ESP, new_esp);
2460 }
2461 
2462 /* two step pop is necessary for precise exceptions */
2463 static MemOp gen_pop_T0(DisasContext *s)
2464 {
2465     MemOp d_ot = mo_pushpop(s, s->dflag);
2466 
2467     gen_lea_v_seg(s, mo_stacksize(s), cpu_regs[R_ESP], R_SS, -1);
2468     gen_op_ld_v(s, d_ot, s->T0, s->A0);
2469 
2470     return d_ot;
2471 }
2472 
2473 static inline void gen_pop_update(DisasContext *s, MemOp ot)
2474 {
2475     gen_stack_update(s, 1 << ot);
2476 }
2477 
2478 static inline void gen_stack_A0(DisasContext *s)
2479 {
2480     gen_lea_v_seg(s, SS32(s) ? MO_32 : MO_16, cpu_regs[R_ESP], R_SS, -1);
2481 }
2482 
2483 static void gen_pusha(DisasContext *s)
2484 {
2485     MemOp s_ot = SS32(s) ? MO_32 : MO_16;
2486     MemOp d_ot = s->dflag;
2487     int size = 1 << d_ot;
2488     int i;
2489 
2490     for (i = 0; i < 8; i++) {
2491         tcg_gen_addi_tl(s->A0, cpu_regs[R_ESP], (i - 8) * size);
2492         gen_lea_v_seg(s, s_ot, s->A0, R_SS, -1);
2493         gen_op_st_v(s, d_ot, cpu_regs[7 - i], s->A0);
2494     }
2495 
2496     gen_stack_update(s, -8 * size);
2497 }
2498 
2499 static void gen_popa(DisasContext *s)
2500 {
2501     MemOp s_ot = SS32(s) ? MO_32 : MO_16;
2502     MemOp d_ot = s->dflag;
2503     int size = 1 << d_ot;
2504     int i;
2505 
2506     for (i = 0; i < 8; i++) {
2507         /* ESP is not reloaded */
2508         if (7 - i == R_ESP) {
2509             continue;
2510         }
2511         tcg_gen_addi_tl(s->A0, cpu_regs[R_ESP], i * size);
2512         gen_lea_v_seg(s, s_ot, s->A0, R_SS, -1);
2513         gen_op_ld_v(s, d_ot, s->T0, s->A0);
2514         gen_op_mov_reg_v(s, d_ot, 7 - i, s->T0);
2515     }
2516 
2517     gen_stack_update(s, 8 * size);
2518 }
2519 
2520 static void gen_enter(DisasContext *s, int esp_addend, int level)
2521 {
2522     MemOp d_ot = mo_pushpop(s, s->dflag);
2523     MemOp a_ot = CODE64(s) ? MO_64 : SS32(s) ? MO_32 : MO_16;
2524     int size = 1 << d_ot;
2525 
2526     /* Push BP; compute FrameTemp into T1.  */
2527     tcg_gen_subi_tl(s->T1, cpu_regs[R_ESP], size);
2528     gen_lea_v_seg(s, a_ot, s->T1, R_SS, -1);
2529     gen_op_st_v(s, d_ot, cpu_regs[R_EBP], s->A0);
2530 
2531     level &= 31;
2532     if (level != 0) {
2533         int i;
2534 
2535         /* Copy level-1 pointers from the previous frame.  */
2536         for (i = 1; i < level; ++i) {
2537             tcg_gen_subi_tl(s->A0, cpu_regs[R_EBP], size * i);
2538             gen_lea_v_seg(s, a_ot, s->A0, R_SS, -1);
2539             gen_op_ld_v(s, d_ot, s->tmp0, s->A0);
2540 
2541             tcg_gen_subi_tl(s->A0, s->T1, size * i);
2542             gen_lea_v_seg(s, a_ot, s->A0, R_SS, -1);
2543             gen_op_st_v(s, d_ot, s->tmp0, s->A0);
2544         }
2545 
2546         /* Push the current FrameTemp as the last level.  */
2547         tcg_gen_subi_tl(s->A0, s->T1, size * level);
2548         gen_lea_v_seg(s, a_ot, s->A0, R_SS, -1);
2549         gen_op_st_v(s, d_ot, s->T1, s->A0);
2550     }
2551 
2552     /* Copy the FrameTemp value to EBP.  */
2553     gen_op_mov_reg_v(s, a_ot, R_EBP, s->T1);
2554 
2555     /* Compute the final value of ESP.  */
2556     tcg_gen_subi_tl(s->T1, s->T1, esp_addend + size * level);
2557     gen_op_mov_reg_v(s, a_ot, R_ESP, s->T1);
2558 }
2559 
2560 static void gen_leave(DisasContext *s)
2561 {
2562     MemOp d_ot = mo_pushpop(s, s->dflag);
2563     MemOp a_ot = mo_stacksize(s);
2564 
2565     gen_lea_v_seg(s, a_ot, cpu_regs[R_EBP], R_SS, -1);
2566     gen_op_ld_v(s, d_ot, s->T0, s->A0);
2567 
2568     tcg_gen_addi_tl(s->T1, cpu_regs[R_EBP], 1 << d_ot);
2569 
2570     gen_op_mov_reg_v(s, d_ot, R_EBP, s->T0);
2571     gen_op_mov_reg_v(s, a_ot, R_ESP, s->T1);
2572 }
2573 
2574 /* Similarly, except that the assumption here is that we don't decode
2575    the instruction at all -- either a missing opcode, an unimplemented
2576    feature, or just a bogus instruction stream.  */
2577 static void gen_unknown_opcode(CPUX86State *env, DisasContext *s)
2578 {
2579     gen_illegal_opcode(s);
2580 
2581     if (qemu_loglevel_mask(LOG_UNIMP)) {
2582         FILE *logfile = qemu_log_lock();
2583         target_ulong pc = s->pc_start, end = s->pc;
2584 
2585         qemu_log("ILLOPC: " TARGET_FMT_lx ":", pc);
2586         for (; pc < end; ++pc) {
2587             qemu_log(" %02x", cpu_ldub_code(env, pc));
2588         }
2589         qemu_log("\n");
2590         qemu_log_unlock(logfile);
2591     }
2592 }
2593 
2594 /* an interrupt is different from an exception because of the
2595    privilege checks */
2596 static void gen_interrupt(DisasContext *s, int intno,
2597                           target_ulong cur_eip, target_ulong next_eip)
2598 {
2599     gen_update_cc_op(s);
2600     gen_jmp_im(s, cur_eip);
2601     gen_helper_raise_interrupt(cpu_env, tcg_const_i32(intno),
2602                                tcg_const_i32(next_eip - cur_eip));
2603     s->base.is_jmp = DISAS_NORETURN;
2604 }
2605 
2606 static void gen_debug(DisasContext *s)
2607 {
2608     gen_update_cc_op(s);
2609     gen_jmp_im(s, s->base.pc_next - s->cs_base);
2610     gen_helper_debug(cpu_env);
2611     s->base.is_jmp = DISAS_NORETURN;
2612 }
2613 
2614 static void gen_set_hflag(DisasContext *s, uint32_t mask)
2615 {
2616     if ((s->flags & mask) == 0) {
2617         TCGv_i32 t = tcg_temp_new_i32();
2618         tcg_gen_ld_i32(t, cpu_env, offsetof(CPUX86State, hflags));
2619         tcg_gen_ori_i32(t, t, mask);
2620         tcg_gen_st_i32(t, cpu_env, offsetof(CPUX86State, hflags));
2621         tcg_temp_free_i32(t);
2622         s->flags |= mask;
2623     }
2624 }
2625 
2626 static void gen_reset_hflag(DisasContext *s, uint32_t mask)
2627 {
2628     if (s->flags & mask) {
2629         TCGv_i32 t = tcg_temp_new_i32();
2630         tcg_gen_ld_i32(t, cpu_env, offsetof(CPUX86State, hflags));
2631         tcg_gen_andi_i32(t, t, ~mask);
2632         tcg_gen_st_i32(t, cpu_env, offsetof(CPUX86State, hflags));
2633         tcg_temp_free_i32(t);
2634         s->flags &= ~mask;
2635     }
2636 }
2637 
2638 /* Clear BND registers during legacy branches.  */
2639 static void gen_bnd_jmp(DisasContext *s)
2640 {
2641     /* Clear the registers only if BND prefix is missing, MPX is enabled,
2642        and if the BNDREGs are known to be in use (non-zero) already.
2643        The helper itself will check BNDPRESERVE at runtime.  */
2644     if ((s->prefix & PREFIX_REPNZ) == 0
2645         && (s->flags & HF_MPX_EN_MASK) != 0
2646         && (s->flags & HF_MPX_IU_MASK) != 0) {
2647         gen_helper_bnd_jmp(cpu_env);
2648     }
2649 }
2650 
2651 /* Generate an end of block. Trace exception is also generated if needed.
2652    If INHIBIT, set HF_INHIBIT_IRQ_MASK if it isn't already set.
2653    If RECHECK_TF, emit a rechecking helper for #DB, ignoring the state of
2654    S->TF.  This is used by the syscall/sysret insns.  */
2655 static void
2656 do_gen_eob_worker(DisasContext *s, bool inhibit, bool recheck_tf, bool jr)
2657 {
2658     gen_update_cc_op(s);
2659 
2660     /* If several instructions disable interrupts, only the first does it.  */
2661     if (inhibit && !(s->flags & HF_INHIBIT_IRQ_MASK)) {
2662         gen_set_hflag(s, HF_INHIBIT_IRQ_MASK);
2663     } else {
2664         gen_reset_hflag(s, HF_INHIBIT_IRQ_MASK);
2665     }
2666 
2667     if (s->base.tb->flags & HF_RF_MASK) {
2668         gen_helper_reset_rf(cpu_env);
2669     }
2670     if (s->base.singlestep_enabled) {
2671         gen_helper_debug(cpu_env);
2672     } else if (recheck_tf) {
2673         gen_helper_rechecking_single_step(cpu_env);
2674         tcg_gen_exit_tb(NULL, 0);
2675     } else if (s->flags & HF_TF_MASK) {
2676         gen_helper_single_step(cpu_env);
2677     } else if (jr) {
2678         tcg_gen_lookup_and_goto_ptr();
2679     } else {
2680         tcg_gen_exit_tb(NULL, 0);
2681     }
2682     s->base.is_jmp = DISAS_NORETURN;
2683 }
2684 
2685 static inline void
2686 gen_eob_worker(DisasContext *s, bool inhibit, bool recheck_tf)
2687 {
2688     do_gen_eob_worker(s, inhibit, recheck_tf, false);
2689 }
2690 
2691 /* End of block.
2692    If INHIBIT, set HF_INHIBIT_IRQ_MASK if it isn't already set.  */
2693 static void gen_eob_inhibit_irq(DisasContext *s, bool inhibit)
2694 {
2695     gen_eob_worker(s, inhibit, false);
2696 }
2697 
2698 /* End of block, resetting the inhibit irq flag.  */
2699 static void gen_eob(DisasContext *s)
2700 {
2701     gen_eob_worker(s, false, false);
2702 }
2703 
2704 /* Jump to register */
2705 static void gen_jr(DisasContext *s, TCGv dest)
2706 {
2707     do_gen_eob_worker(s, false, false, true);
2708 }
2709 
2710 /* generate a jump to eip. No segment change must happen before as a
2711    direct call to the next block may occur */
2712 static void gen_jmp_tb(DisasContext *s, target_ulong eip, int tb_num)
2713 {
2714     gen_update_cc_op(s);
2715     set_cc_op(s, CC_OP_DYNAMIC);
2716     if (s->jmp_opt) {
2717         gen_goto_tb(s, tb_num, eip);
2718     } else {
2719         gen_jmp_im(s, eip);
2720         gen_eob(s);
2721     }
2722 }
2723 
2724 static void gen_jmp(DisasContext *s, target_ulong eip)
2725 {
2726     gen_jmp_tb(s, eip, 0);
2727 }
2728 
2729 static inline void gen_ldq_env_A0(DisasContext *s, int offset)
2730 {
2731     tcg_gen_qemu_ld_i64(s->tmp1_i64, s->A0, s->mem_index, MO_LEQ);
2732     tcg_gen_st_i64(s->tmp1_i64, cpu_env, offset);
2733 }
2734 
2735 static inline void gen_stq_env_A0(DisasContext *s, int offset)
2736 {
2737     tcg_gen_ld_i64(s->tmp1_i64, cpu_env, offset);
2738     tcg_gen_qemu_st_i64(s->tmp1_i64, s->A0, s->mem_index, MO_LEQ);
2739 }
2740 
2741 static inline void gen_ldo_env_A0(DisasContext *s, int offset)
2742 {
2743     int mem_index = s->mem_index;
2744     tcg_gen_qemu_ld_i64(s->tmp1_i64, s->A0, mem_index, MO_LEQ);
2745     tcg_gen_st_i64(s->tmp1_i64, cpu_env, offset + offsetof(ZMMReg, ZMM_Q(0)));
2746     tcg_gen_addi_tl(s->tmp0, s->A0, 8);
2747     tcg_gen_qemu_ld_i64(s->tmp1_i64, s->tmp0, mem_index, MO_LEQ);
2748     tcg_gen_st_i64(s->tmp1_i64, cpu_env, offset + offsetof(ZMMReg, ZMM_Q(1)));
2749 }
2750 
2751 static inline void gen_sto_env_A0(DisasContext *s, int offset)
2752 {
2753     int mem_index = s->mem_index;
2754     tcg_gen_ld_i64(s->tmp1_i64, cpu_env, offset + offsetof(ZMMReg, ZMM_Q(0)));
2755     tcg_gen_qemu_st_i64(s->tmp1_i64, s->A0, mem_index, MO_LEQ);
2756     tcg_gen_addi_tl(s->tmp0, s->A0, 8);
2757     tcg_gen_ld_i64(s->tmp1_i64, cpu_env, offset + offsetof(ZMMReg, ZMM_Q(1)));
2758     tcg_gen_qemu_st_i64(s->tmp1_i64, s->tmp0, mem_index, MO_LEQ);
2759 }
2760 
2761 static inline void gen_op_movo(DisasContext *s, int d_offset, int s_offset)
2762 {
2763     tcg_gen_ld_i64(s->tmp1_i64, cpu_env, s_offset + offsetof(ZMMReg, ZMM_Q(0)));
2764     tcg_gen_st_i64(s->tmp1_i64, cpu_env, d_offset + offsetof(ZMMReg, ZMM_Q(0)));
2765     tcg_gen_ld_i64(s->tmp1_i64, cpu_env, s_offset + offsetof(ZMMReg, ZMM_Q(1)));
2766     tcg_gen_st_i64(s->tmp1_i64, cpu_env, d_offset + offsetof(ZMMReg, ZMM_Q(1)));
2767 }
2768 
2769 static inline void gen_op_movq(DisasContext *s, int d_offset, int s_offset)
2770 {
2771     tcg_gen_ld_i64(s->tmp1_i64, cpu_env, s_offset);
2772     tcg_gen_st_i64(s->tmp1_i64, cpu_env, d_offset);
2773 }
2774 
2775 static inline void gen_op_movl(DisasContext *s, int d_offset, int s_offset)
2776 {
2777     tcg_gen_ld_i32(s->tmp2_i32, cpu_env, s_offset);
2778     tcg_gen_st_i32(s->tmp2_i32, cpu_env, d_offset);
2779 }
2780 
2781 static inline void gen_op_movq_env_0(DisasContext *s, int d_offset)
2782 {
2783     tcg_gen_movi_i64(s->tmp1_i64, 0);
2784     tcg_gen_st_i64(s->tmp1_i64, cpu_env, d_offset);
2785 }
2786 
2787 typedef void (*SSEFunc_i_ep)(TCGv_i32 val, TCGv_ptr env, TCGv_ptr reg);
2788 typedef void (*SSEFunc_l_ep)(TCGv_i64 val, TCGv_ptr env, TCGv_ptr reg);
2789 typedef void (*SSEFunc_0_epi)(TCGv_ptr env, TCGv_ptr reg, TCGv_i32 val);
2790 typedef void (*SSEFunc_0_epl)(TCGv_ptr env, TCGv_ptr reg, TCGv_i64 val);
2791 typedef void (*SSEFunc_0_epp)(TCGv_ptr env, TCGv_ptr reg_a, TCGv_ptr reg_b);
2792 typedef void (*SSEFunc_0_eppi)(TCGv_ptr env, TCGv_ptr reg_a, TCGv_ptr reg_b,
2793                                TCGv_i32 val);
2794 typedef void (*SSEFunc_0_ppi)(TCGv_ptr reg_a, TCGv_ptr reg_b, TCGv_i32 val);
2795 typedef void (*SSEFunc_0_eppt)(TCGv_ptr env, TCGv_ptr reg_a, TCGv_ptr reg_b,
2796                                TCGv val);
2797 
2798 #define SSE_SPECIAL ((void *)1)
2799 #define SSE_DUMMY ((void *)2)
2800 
2801 #define MMX_OP2(x) { gen_helper_ ## x ## _mmx, gen_helper_ ## x ## _xmm }
2802 #define SSE_FOP(x) { gen_helper_ ## x ## ps, gen_helper_ ## x ## pd, \
2803                      gen_helper_ ## x ## ss, gen_helper_ ## x ## sd, }
2804 
2805 static const SSEFunc_0_epp sse_op_table1[256][4] = {
2806     /* 3DNow! extensions */
2807     [0x0e] = { SSE_DUMMY }, /* femms */
2808     [0x0f] = { SSE_DUMMY }, /* pf... */
2809     /* pure SSE operations */
2810     [0x10] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movups, movupd, movss, movsd */
2811     [0x11] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movups, movupd, movss, movsd */
2812     [0x12] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movlps, movlpd, movsldup, movddup */
2813     [0x13] = { SSE_SPECIAL, SSE_SPECIAL },  /* movlps, movlpd */
2814     [0x14] = { gen_helper_punpckldq_xmm, gen_helper_punpcklqdq_xmm },
2815     [0x15] = { gen_helper_punpckhdq_xmm, gen_helper_punpckhqdq_xmm },
2816     [0x16] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL },  /* movhps, movhpd, movshdup */
2817     [0x17] = { SSE_SPECIAL, SSE_SPECIAL },  /* movhps, movhpd */
2818 
2819     [0x28] = { SSE_SPECIAL, SSE_SPECIAL },  /* movaps, movapd */
2820     [0x29] = { SSE_SPECIAL, SSE_SPECIAL },  /* movaps, movapd */
2821     [0x2a] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* cvtpi2ps, cvtpi2pd, cvtsi2ss, cvtsi2sd */
2822     [0x2b] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movntps, movntpd, movntss, movntsd */
2823     [0x2c] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* cvttps2pi, cvttpd2pi, cvttsd2si, cvttss2si */
2824     [0x2d] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* cvtps2pi, cvtpd2pi, cvtsd2si, cvtss2si */
2825     [0x2e] = { gen_helper_ucomiss, gen_helper_ucomisd },
2826     [0x2f] = { gen_helper_comiss, gen_helper_comisd },
2827     [0x50] = { SSE_SPECIAL, SSE_SPECIAL }, /* movmskps, movmskpd */
2828     [0x51] = SSE_FOP(sqrt),
2829     [0x52] = { gen_helper_rsqrtps, NULL, gen_helper_rsqrtss, NULL },
2830     [0x53] = { gen_helper_rcpps, NULL, gen_helper_rcpss, NULL },
2831     [0x54] = { gen_helper_pand_xmm, gen_helper_pand_xmm }, /* andps, andpd */
2832     [0x55] = { gen_helper_pandn_xmm, gen_helper_pandn_xmm }, /* andnps, andnpd */
2833     [0x56] = { gen_helper_por_xmm, gen_helper_por_xmm }, /* orps, orpd */
2834     [0x57] = { gen_helper_pxor_xmm, gen_helper_pxor_xmm }, /* xorps, xorpd */
2835     [0x58] = SSE_FOP(add),
2836     [0x59] = SSE_FOP(mul),
2837     [0x5a] = { gen_helper_cvtps2pd, gen_helper_cvtpd2ps,
2838                gen_helper_cvtss2sd, gen_helper_cvtsd2ss },
2839     [0x5b] = { gen_helper_cvtdq2ps, gen_helper_cvtps2dq, gen_helper_cvttps2dq },
2840     [0x5c] = SSE_FOP(sub),
2841     [0x5d] = SSE_FOP(min),
2842     [0x5e] = SSE_FOP(div),
2843     [0x5f] = SSE_FOP(max),
2844 
2845     [0xc2] = SSE_FOP(cmpeq),
2846     [0xc6] = { (SSEFunc_0_epp)gen_helper_shufps,
2847                (SSEFunc_0_epp)gen_helper_shufpd }, /* XXX: casts */
2848 
2849     /* SSSE3, SSE4, MOVBE, CRC32, BMI1, BMI2, ADX.  */
2850     [0x38] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL },
2851     [0x3a] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL },
2852 
2853     /* MMX ops and their SSE extensions */
2854     [0x60] = MMX_OP2(punpcklbw),
2855     [0x61] = MMX_OP2(punpcklwd),
2856     [0x62] = MMX_OP2(punpckldq),
2857     [0x63] = MMX_OP2(packsswb),
2858     [0x64] = MMX_OP2(pcmpgtb),
2859     [0x65] = MMX_OP2(pcmpgtw),
2860     [0x66] = MMX_OP2(pcmpgtl),
2861     [0x67] = MMX_OP2(packuswb),
2862     [0x68] = MMX_OP2(punpckhbw),
2863     [0x69] = MMX_OP2(punpckhwd),
2864     [0x6a] = MMX_OP2(punpckhdq),
2865     [0x6b] = MMX_OP2(packssdw),
2866     [0x6c] = { NULL, gen_helper_punpcklqdq_xmm },
2867     [0x6d] = { NULL, gen_helper_punpckhqdq_xmm },
2868     [0x6e] = { SSE_SPECIAL, SSE_SPECIAL }, /* movd mm, ea */
2869     [0x6f] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movq, movdqa, , movqdu */
2870     [0x70] = { (SSEFunc_0_epp)gen_helper_pshufw_mmx,
2871                (SSEFunc_0_epp)gen_helper_pshufd_xmm,
2872                (SSEFunc_0_epp)gen_helper_pshufhw_xmm,
2873                (SSEFunc_0_epp)gen_helper_pshuflw_xmm }, /* XXX: casts */
2874     [0x71] = { SSE_SPECIAL, SSE_SPECIAL }, /* shiftw */
2875     [0x72] = { SSE_SPECIAL, SSE_SPECIAL }, /* shiftd */
2876     [0x73] = { SSE_SPECIAL, SSE_SPECIAL }, /* shiftq */
2877     [0x74] = MMX_OP2(pcmpeqb),
2878     [0x75] = MMX_OP2(pcmpeqw),
2879     [0x76] = MMX_OP2(pcmpeql),
2880     [0x77] = { SSE_DUMMY }, /* emms */
2881     [0x78] = { NULL, SSE_SPECIAL, NULL, SSE_SPECIAL }, /* extrq_i, insertq_i */
2882     [0x79] = { NULL, gen_helper_extrq_r, NULL, gen_helper_insertq_r },
2883     [0x7c] = { NULL, gen_helper_haddpd, NULL, gen_helper_haddps },
2884     [0x7d] = { NULL, gen_helper_hsubpd, NULL, gen_helper_hsubps },
2885     [0x7e] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movd, movd, , movq */
2886     [0x7f] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movq, movdqa, movdqu */
2887     [0xc4] = { SSE_SPECIAL, SSE_SPECIAL }, /* pinsrw */
2888     [0xc5] = { SSE_SPECIAL, SSE_SPECIAL }, /* pextrw */
2889     [0xd0] = { NULL, gen_helper_addsubpd, NULL, gen_helper_addsubps },
2890     [0xd1] = MMX_OP2(psrlw),
2891     [0xd2] = MMX_OP2(psrld),
2892     [0xd3] = MMX_OP2(psrlq),
2893     [0xd4] = MMX_OP2(paddq),
2894     [0xd5] = MMX_OP2(pmullw),
2895     [0xd6] = { NULL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL },
2896     [0xd7] = { SSE_SPECIAL, SSE_SPECIAL }, /* pmovmskb */
2897     [0xd8] = MMX_OP2(psubusb),
2898     [0xd9] = MMX_OP2(psubusw),
2899     [0xda] = MMX_OP2(pminub),
2900     [0xdb] = MMX_OP2(pand),
2901     [0xdc] = MMX_OP2(paddusb),
2902     [0xdd] = MMX_OP2(paddusw),
2903     [0xde] = MMX_OP2(pmaxub),
2904     [0xdf] = MMX_OP2(pandn),
2905     [0xe0] = MMX_OP2(pavgb),
2906     [0xe1] = MMX_OP2(psraw),
2907     [0xe2] = MMX_OP2(psrad),
2908     [0xe3] = MMX_OP2(pavgw),
2909     [0xe4] = MMX_OP2(pmulhuw),
2910     [0xe5] = MMX_OP2(pmulhw),
2911     [0xe6] = { NULL, gen_helper_cvttpd2dq, gen_helper_cvtdq2pd, gen_helper_cvtpd2dq },
2912     [0xe7] = { SSE_SPECIAL , SSE_SPECIAL },  /* movntq, movntq */
2913     [0xe8] = MMX_OP2(psubsb),
2914     [0xe9] = MMX_OP2(psubsw),
2915     [0xea] = MMX_OP2(pminsw),
2916     [0xeb] = MMX_OP2(por),
2917     [0xec] = MMX_OP2(paddsb),
2918     [0xed] = MMX_OP2(paddsw),
2919     [0xee] = MMX_OP2(pmaxsw),
2920     [0xef] = MMX_OP2(pxor),
2921     [0xf0] = { NULL, NULL, NULL, SSE_SPECIAL }, /* lddqu */
2922     [0xf1] = MMX_OP2(psllw),
2923     [0xf2] = MMX_OP2(pslld),
2924     [0xf3] = MMX_OP2(psllq),
2925     [0xf4] = MMX_OP2(pmuludq),
2926     [0xf5] = MMX_OP2(pmaddwd),
2927     [0xf6] = MMX_OP2(psadbw),
2928     [0xf7] = { (SSEFunc_0_epp)gen_helper_maskmov_mmx,
2929                (SSEFunc_0_epp)gen_helper_maskmov_xmm }, /* XXX: casts */
2930     [0xf8] = MMX_OP2(psubb),
2931     [0xf9] = MMX_OP2(psubw),
2932     [0xfa] = MMX_OP2(psubl),
2933     [0xfb] = MMX_OP2(psubq),
2934     [0xfc] = MMX_OP2(paddb),
2935     [0xfd] = MMX_OP2(paddw),
2936     [0xfe] = MMX_OP2(paddl),
2937 };
2938 
2939 static const SSEFunc_0_epp sse_op_table2[3 * 8][2] = {
2940     [0 + 2] = MMX_OP2(psrlw),
2941     [0 + 4] = MMX_OP2(psraw),
2942     [0 + 6] = MMX_OP2(psllw),
2943     [8 + 2] = MMX_OP2(psrld),
2944     [8 + 4] = MMX_OP2(psrad),
2945     [8 + 6] = MMX_OP2(pslld),
2946     [16 + 2] = MMX_OP2(psrlq),
2947     [16 + 3] = { NULL, gen_helper_psrldq_xmm },
2948     [16 + 6] = MMX_OP2(psllq),
2949     [16 + 7] = { NULL, gen_helper_pslldq_xmm },
2950 };
2951 
2952 static const SSEFunc_0_epi sse_op_table3ai[] = {
2953     gen_helper_cvtsi2ss,
2954     gen_helper_cvtsi2sd
2955 };
2956 
2957 #ifdef TARGET_X86_64
2958 static const SSEFunc_0_epl sse_op_table3aq[] = {
2959     gen_helper_cvtsq2ss,
2960     gen_helper_cvtsq2sd
2961 };
2962 #endif
2963 
2964 static const SSEFunc_i_ep sse_op_table3bi[] = {
2965     gen_helper_cvttss2si,
2966     gen_helper_cvtss2si,
2967     gen_helper_cvttsd2si,
2968     gen_helper_cvtsd2si
2969 };
2970 
2971 #ifdef TARGET_X86_64
2972 static const SSEFunc_l_ep sse_op_table3bq[] = {
2973     gen_helper_cvttss2sq,
2974     gen_helper_cvtss2sq,
2975     gen_helper_cvttsd2sq,
2976     gen_helper_cvtsd2sq
2977 };
2978 #endif
2979 
2980 static const SSEFunc_0_epp sse_op_table4[8][4] = {
2981     SSE_FOP(cmpeq),
2982     SSE_FOP(cmplt),
2983     SSE_FOP(cmple),
2984     SSE_FOP(cmpunord),
2985     SSE_FOP(cmpneq),
2986     SSE_FOP(cmpnlt),
2987     SSE_FOP(cmpnle),
2988     SSE_FOP(cmpord),
2989 };
2990 
2991 static const SSEFunc_0_epp sse_op_table5[256] = {
2992     [0x0c] = gen_helper_pi2fw,
2993     [0x0d] = gen_helper_pi2fd,
2994     [0x1c] = gen_helper_pf2iw,
2995     [0x1d] = gen_helper_pf2id,
2996     [0x8a] = gen_helper_pfnacc,
2997     [0x8e] = gen_helper_pfpnacc,
2998     [0x90] = gen_helper_pfcmpge,
2999     [0x94] = gen_helper_pfmin,
3000     [0x96] = gen_helper_pfrcp,
3001     [0x97] = gen_helper_pfrsqrt,
3002     [0x9a] = gen_helper_pfsub,
3003     [0x9e] = gen_helper_pfadd,
3004     [0xa0] = gen_helper_pfcmpgt,
3005     [0xa4] = gen_helper_pfmax,
3006     [0xa6] = gen_helper_movq, /* pfrcpit1; no need to actually increase precision */
3007     [0xa7] = gen_helper_movq, /* pfrsqit1 */
3008     [0xaa] = gen_helper_pfsubr,
3009     [0xae] = gen_helper_pfacc,
3010     [0xb0] = gen_helper_pfcmpeq,
3011     [0xb4] = gen_helper_pfmul,
3012     [0xb6] = gen_helper_movq, /* pfrcpit2 */
3013     [0xb7] = gen_helper_pmulhrw_mmx,
3014     [0xbb] = gen_helper_pswapd,
3015     [0xbf] = gen_helper_pavgb_mmx /* pavgusb */
3016 };
3017 
3018 struct SSEOpHelper_epp {
3019     SSEFunc_0_epp op[2];
3020     uint32_t ext_mask;
3021 };
3022 
3023 struct SSEOpHelper_eppi {
3024     SSEFunc_0_eppi op[2];
3025     uint32_t ext_mask;
3026 };
3027 
3028 #define SSSE3_OP(x) { MMX_OP2(x), CPUID_EXT_SSSE3 }
3029 #define SSE41_OP(x) { { NULL, gen_helper_ ## x ## _xmm }, CPUID_EXT_SSE41 }
3030 #define SSE42_OP(x) { { NULL, gen_helper_ ## x ## _xmm }, CPUID_EXT_SSE42 }
3031 #define SSE41_SPECIAL { { NULL, SSE_SPECIAL }, CPUID_EXT_SSE41 }
3032 #define PCLMULQDQ_OP(x) { { NULL, gen_helper_ ## x ## _xmm }, \
3033         CPUID_EXT_PCLMULQDQ }
3034 #define AESNI_OP(x) { { NULL, gen_helper_ ## x ## _xmm }, CPUID_EXT_AES }
3035 
3036 static const struct SSEOpHelper_epp sse_op_table6[256] = {
3037     [0x00] = SSSE3_OP(pshufb),
3038     [0x01] = SSSE3_OP(phaddw),
3039     [0x02] = SSSE3_OP(phaddd),
3040     [0x03] = SSSE3_OP(phaddsw),
3041     [0x04] = SSSE3_OP(pmaddubsw),
3042     [0x05] = SSSE3_OP(phsubw),
3043     [0x06] = SSSE3_OP(phsubd),
3044     [0x07] = SSSE3_OP(phsubsw),
3045     [0x08] = SSSE3_OP(psignb),
3046     [0x09] = SSSE3_OP(psignw),
3047     [0x0a] = SSSE3_OP(psignd),
3048     [0x0b] = SSSE3_OP(pmulhrsw),
3049     [0x10] = SSE41_OP(pblendvb),
3050     [0x14] = SSE41_OP(blendvps),
3051     [0x15] = SSE41_OP(blendvpd),
3052     [0x17] = SSE41_OP(ptest),
3053     [0x1c] = SSSE3_OP(pabsb),
3054     [0x1d] = SSSE3_OP(pabsw),
3055     [0x1e] = SSSE3_OP(pabsd),
3056     [0x20] = SSE41_OP(pmovsxbw),
3057     [0x21] = SSE41_OP(pmovsxbd),
3058     [0x22] = SSE41_OP(pmovsxbq),
3059     [0x23] = SSE41_OP(pmovsxwd),
3060     [0x24] = SSE41_OP(pmovsxwq),
3061     [0x25] = SSE41_OP(pmovsxdq),
3062     [0x28] = SSE41_OP(pmuldq),
3063     [0x29] = SSE41_OP(pcmpeqq),
3064     [0x2a] = SSE41_SPECIAL, /* movntqda */
3065     [0x2b] = SSE41_OP(packusdw),
3066     [0x30] = SSE41_OP(pmovzxbw),
3067     [0x31] = SSE41_OP(pmovzxbd),
3068     [0x32] = SSE41_OP(pmovzxbq),
3069     [0x33] = SSE41_OP(pmovzxwd),
3070     [0x34] = SSE41_OP(pmovzxwq),
3071     [0x35] = SSE41_OP(pmovzxdq),
3072     [0x37] = SSE42_OP(pcmpgtq),
3073     [0x38] = SSE41_OP(pminsb),
3074     [0x39] = SSE41_OP(pminsd),
3075     [0x3a] = SSE41_OP(pminuw),
3076     [0x3b] = SSE41_OP(pminud),
3077     [0x3c] = SSE41_OP(pmaxsb),
3078     [0x3d] = SSE41_OP(pmaxsd),
3079     [0x3e] = SSE41_OP(pmaxuw),
3080     [0x3f] = SSE41_OP(pmaxud),
3081     [0x40] = SSE41_OP(pmulld),
3082     [0x41] = SSE41_OP(phminposuw),
3083     [0xdb] = AESNI_OP(aesimc),
3084     [0xdc] = AESNI_OP(aesenc),
3085     [0xdd] = AESNI_OP(aesenclast),
3086     [0xde] = AESNI_OP(aesdec),
3087     [0xdf] = AESNI_OP(aesdeclast),
3088 };
3089 
3090 static const struct SSEOpHelper_eppi sse_op_table7[256] = {
3091     [0x08] = SSE41_OP(roundps),
3092     [0x09] = SSE41_OP(roundpd),
3093     [0x0a] = SSE41_OP(roundss),
3094     [0x0b] = SSE41_OP(roundsd),
3095     [0x0c] = SSE41_OP(blendps),
3096     [0x0d] = SSE41_OP(blendpd),
3097     [0x0e] = SSE41_OP(pblendw),
3098     [0x0f] = SSSE3_OP(palignr),
3099     [0x14] = SSE41_SPECIAL, /* pextrb */
3100     [0x15] = SSE41_SPECIAL, /* pextrw */
3101     [0x16] = SSE41_SPECIAL, /* pextrd/pextrq */
3102     [0x17] = SSE41_SPECIAL, /* extractps */
3103     [0x20] = SSE41_SPECIAL, /* pinsrb */
3104     [0x21] = SSE41_SPECIAL, /* insertps */
3105     [0x22] = SSE41_SPECIAL, /* pinsrd/pinsrq */
3106     [0x40] = SSE41_OP(dpps),
3107     [0x41] = SSE41_OP(dppd),
3108     [0x42] = SSE41_OP(mpsadbw),
3109     [0x44] = PCLMULQDQ_OP(pclmulqdq),
3110     [0x60] = SSE42_OP(pcmpestrm),
3111     [0x61] = SSE42_OP(pcmpestri),
3112     [0x62] = SSE42_OP(pcmpistrm),
3113     [0x63] = SSE42_OP(pcmpistri),
3114     [0xdf] = AESNI_OP(aeskeygenassist),
3115 };
3116 
3117 static void gen_sse(CPUX86State *env, DisasContext *s, int b,
3118                     target_ulong pc_start)
3119 {
3120     int b1, op1_offset, op2_offset, is_xmm, val;
3121     int modrm, mod, rm, reg;
3122     SSEFunc_0_epp sse_fn_epp;
3123     SSEFunc_0_eppi sse_fn_eppi;
3124     SSEFunc_0_ppi sse_fn_ppi;
3125     SSEFunc_0_eppt sse_fn_eppt;
3126     MemOp ot;
3127 
3128     b &= 0xff;
3129     if (s->prefix & PREFIX_DATA)
3130         b1 = 1;
3131     else if (s->prefix & PREFIX_REPZ)
3132         b1 = 2;
3133     else if (s->prefix & PREFIX_REPNZ)
3134         b1 = 3;
3135     else
3136         b1 = 0;
3137     sse_fn_epp = sse_op_table1[b][b1];
3138     if (!sse_fn_epp) {
3139         goto unknown_op;
3140     }
3141     if ((b <= 0x5f && b >= 0x10) || b == 0xc6 || b == 0xc2) {
3142         is_xmm = 1;
3143     } else {
3144         if (b1 == 0) {
3145             /* MMX case */
3146             is_xmm = 0;
3147         } else {
3148             is_xmm = 1;
3149         }
3150     }
3151     /* simple MMX/SSE operation */
3152     if (s->flags & HF_TS_MASK) {
3153         gen_exception(s, EXCP07_PREX, pc_start - s->cs_base);
3154         return;
3155     }
3156     if (s->flags & HF_EM_MASK) {
3157     illegal_op:
3158         gen_illegal_opcode(s);
3159         return;
3160     }
3161     if (is_xmm
3162         && !(s->flags & HF_OSFXSR_MASK)
3163         && (b != 0x38 && b != 0x3a)) {
3164         goto unknown_op;
3165     }
3166     if (b == 0x0e) {
3167         if (!(s->cpuid_ext2_features & CPUID_EXT2_3DNOW)) {
3168             /* If we were fully decoding this we might use illegal_op.  */
3169             goto unknown_op;
3170         }
3171         /* femms */
3172         gen_helper_emms(cpu_env);
3173         return;
3174     }
3175     if (b == 0x77) {
3176         /* emms */
3177         gen_helper_emms(cpu_env);
3178         return;
3179     }
3180     /* prepare MMX state (XXX: optimize by storing fptt and fptags in
3181        the static cpu state) */
3182     if (!is_xmm) {
3183         gen_helper_enter_mmx(cpu_env);
3184     }
3185 
3186     modrm = x86_ldub_code(env, s);
3187     reg = ((modrm >> 3) & 7);
3188     if (is_xmm) {
3189         reg |= REX_R(s);
3190     }
3191     mod = (modrm >> 6) & 3;
3192     if (sse_fn_epp == SSE_SPECIAL) {
3193         b |= (b1 << 8);
3194         switch(b) {
3195         case 0x0e7: /* movntq */
3196             if (mod == 3) {
3197                 goto illegal_op;
3198             }
3199             gen_lea_modrm(env, s, modrm);
3200             gen_stq_env_A0(s, offsetof(CPUX86State, fpregs[reg].mmx));
3201             break;
3202         case 0x1e7: /* movntdq */
3203         case 0x02b: /* movntps */
3204         case 0x12b: /* movntps */
3205             if (mod == 3)
3206                 goto illegal_op;
3207             gen_lea_modrm(env, s, modrm);
3208             gen_sto_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
3209             break;
3210         case 0x3f0: /* lddqu */
3211             if (mod == 3)
3212                 goto illegal_op;
3213             gen_lea_modrm(env, s, modrm);
3214             gen_ldo_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
3215             break;
3216         case 0x22b: /* movntss */
3217         case 0x32b: /* movntsd */
3218             if (mod == 3)
3219                 goto illegal_op;
3220             gen_lea_modrm(env, s, modrm);
3221             if (b1 & 1) {
3222                 gen_stq_env_A0(s, offsetof(CPUX86State,
3223                                            xmm_regs[reg].ZMM_Q(0)));
3224             } else {
3225                 tcg_gen_ld32u_tl(s->T0, cpu_env, offsetof(CPUX86State,
3226                     xmm_regs[reg].ZMM_L(0)));
3227                 gen_op_st_v(s, MO_32, s->T0, s->A0);
3228             }
3229             break;
3230         case 0x6e: /* movd mm, ea */
3231 #ifdef TARGET_X86_64
3232             if (s->dflag == MO_64) {
3233                 gen_ldst_modrm(env, s, modrm, MO_64, OR_TMP0, 0);
3234                 tcg_gen_st_tl(s->T0, cpu_env,
3235                               offsetof(CPUX86State, fpregs[reg].mmx));
3236             } else
3237 #endif
3238             {
3239                 gen_ldst_modrm(env, s, modrm, MO_32, OR_TMP0, 0);
3240                 tcg_gen_addi_ptr(s->ptr0, cpu_env,
3241                                  offsetof(CPUX86State,fpregs[reg].mmx));
3242                 tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
3243                 gen_helper_movl_mm_T0_mmx(s->ptr0, s->tmp2_i32);
3244             }
3245             break;
3246         case 0x16e: /* movd xmm, ea */
3247 #ifdef TARGET_X86_64
3248             if (s->dflag == MO_64) {
3249                 gen_ldst_modrm(env, s, modrm, MO_64, OR_TMP0, 0);
3250                 tcg_gen_addi_ptr(s->ptr0, cpu_env,
3251                                  offsetof(CPUX86State,xmm_regs[reg]));
3252                 gen_helper_movq_mm_T0_xmm(s->ptr0, s->T0);
3253             } else
3254 #endif
3255             {
3256                 gen_ldst_modrm(env, s, modrm, MO_32, OR_TMP0, 0);
3257                 tcg_gen_addi_ptr(s->ptr0, cpu_env,
3258                                  offsetof(CPUX86State,xmm_regs[reg]));
3259                 tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
3260                 gen_helper_movl_mm_T0_xmm(s->ptr0, s->tmp2_i32);
3261             }
3262             break;
3263         case 0x6f: /* movq mm, ea */
3264             if (mod != 3) {
3265                 gen_lea_modrm(env, s, modrm);
3266                 gen_ldq_env_A0(s, offsetof(CPUX86State, fpregs[reg].mmx));
3267             } else {
3268                 rm = (modrm & 7);
3269                 tcg_gen_ld_i64(s->tmp1_i64, cpu_env,
3270                                offsetof(CPUX86State,fpregs[rm].mmx));
3271                 tcg_gen_st_i64(s->tmp1_i64, cpu_env,
3272                                offsetof(CPUX86State,fpregs[reg].mmx));
3273             }
3274             break;
3275         case 0x010: /* movups */
3276         case 0x110: /* movupd */
3277         case 0x028: /* movaps */
3278         case 0x128: /* movapd */
3279         case 0x16f: /* movdqa xmm, ea */
3280         case 0x26f: /* movdqu xmm, ea */
3281             if (mod != 3) {
3282                 gen_lea_modrm(env, s, modrm);
3283                 gen_ldo_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
3284             } else {
3285                 rm = (modrm & 7) | REX_B(s);
3286                 gen_op_movo(s, offsetof(CPUX86State, xmm_regs[reg]),
3287                             offsetof(CPUX86State,xmm_regs[rm]));
3288             }
3289             break;
3290         case 0x210: /* movss xmm, ea */
3291             if (mod != 3) {
3292                 gen_lea_modrm(env, s, modrm);
3293                 gen_op_ld_v(s, MO_32, s->T0, s->A0);
3294                 tcg_gen_st32_tl(s->T0, cpu_env,
3295                                 offsetof(CPUX86State, xmm_regs[reg].ZMM_L(0)));
3296                 tcg_gen_movi_tl(s->T0, 0);
3297                 tcg_gen_st32_tl(s->T0, cpu_env,
3298                                 offsetof(CPUX86State, xmm_regs[reg].ZMM_L(1)));
3299                 tcg_gen_st32_tl(s->T0, cpu_env,
3300                                 offsetof(CPUX86State, xmm_regs[reg].ZMM_L(2)));
3301                 tcg_gen_st32_tl(s->T0, cpu_env,
3302                                 offsetof(CPUX86State, xmm_regs[reg].ZMM_L(3)));
3303             } else {
3304                 rm = (modrm & 7) | REX_B(s);
3305                 gen_op_movl(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_L(0)),
3306                             offsetof(CPUX86State,xmm_regs[rm].ZMM_L(0)));
3307             }
3308             break;
3309         case 0x310: /* movsd xmm, ea */
3310             if (mod != 3) {
3311                 gen_lea_modrm(env, s, modrm);
3312                 gen_ldq_env_A0(s, offsetof(CPUX86State,
3313                                            xmm_regs[reg].ZMM_Q(0)));
3314                 tcg_gen_movi_tl(s->T0, 0);
3315                 tcg_gen_st32_tl(s->T0, cpu_env,
3316                                 offsetof(CPUX86State, xmm_regs[reg].ZMM_L(2)));
3317                 tcg_gen_st32_tl(s->T0, cpu_env,
3318                                 offsetof(CPUX86State, xmm_regs[reg].ZMM_L(3)));
3319             } else {
3320                 rm = (modrm & 7) | REX_B(s);
3321                 gen_op_movq(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_Q(0)),
3322                             offsetof(CPUX86State,xmm_regs[rm].ZMM_Q(0)));
3323             }
3324             break;
3325         case 0x012: /* movlps */
3326         case 0x112: /* movlpd */
3327             if (mod != 3) {
3328                 gen_lea_modrm(env, s, modrm);
3329                 gen_ldq_env_A0(s, offsetof(CPUX86State,
3330                                            xmm_regs[reg].ZMM_Q(0)));
3331             } else {
3332                 /* movhlps */
3333                 rm = (modrm & 7) | REX_B(s);
3334                 gen_op_movq(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_Q(0)),
3335                             offsetof(CPUX86State,xmm_regs[rm].ZMM_Q(1)));
3336             }
3337             break;
3338         case 0x212: /* movsldup */
3339             if (mod != 3) {
3340                 gen_lea_modrm(env, s, modrm);
3341                 gen_ldo_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
3342             } else {
3343                 rm = (modrm & 7) | REX_B(s);
3344                 gen_op_movl(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_L(0)),
3345                             offsetof(CPUX86State,xmm_regs[rm].ZMM_L(0)));
3346                 gen_op_movl(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_L(2)),
3347                             offsetof(CPUX86State,xmm_regs[rm].ZMM_L(2)));
3348             }
3349             gen_op_movl(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_L(1)),
3350                         offsetof(CPUX86State,xmm_regs[reg].ZMM_L(0)));
3351             gen_op_movl(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_L(3)),
3352                         offsetof(CPUX86State,xmm_regs[reg].ZMM_L(2)));
3353             break;
3354         case 0x312: /* movddup */
3355             if (mod != 3) {
3356                 gen_lea_modrm(env, s, modrm);
3357                 gen_ldq_env_A0(s, offsetof(CPUX86State,
3358                                            xmm_regs[reg].ZMM_Q(0)));
3359             } else {
3360                 rm = (modrm & 7) | REX_B(s);
3361                 gen_op_movq(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_Q(0)),
3362                             offsetof(CPUX86State,xmm_regs[rm].ZMM_Q(0)));
3363             }
3364             gen_op_movq(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_Q(1)),
3365                         offsetof(CPUX86State,xmm_regs[reg].ZMM_Q(0)));
3366             break;
3367         case 0x016: /* movhps */
3368         case 0x116: /* movhpd */
3369             if (mod != 3) {
3370                 gen_lea_modrm(env, s, modrm);
3371                 gen_ldq_env_A0(s, offsetof(CPUX86State,
3372                                            xmm_regs[reg].ZMM_Q(1)));
3373             } else {
3374                 /* movlhps */
3375                 rm = (modrm & 7) | REX_B(s);
3376                 gen_op_movq(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_Q(1)),
3377                             offsetof(CPUX86State,xmm_regs[rm].ZMM_Q(0)));
3378             }
3379             break;
3380         case 0x216: /* movshdup */
3381             if (mod != 3) {
3382                 gen_lea_modrm(env, s, modrm);
3383                 gen_ldo_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
3384             } else {
3385                 rm = (modrm & 7) | REX_B(s);
3386                 gen_op_movl(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_L(1)),
3387                             offsetof(CPUX86State,xmm_regs[rm].ZMM_L(1)));
3388                 gen_op_movl(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_L(3)),
3389                             offsetof(CPUX86State,xmm_regs[rm].ZMM_L(3)));
3390             }
3391             gen_op_movl(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_L(0)),
3392                         offsetof(CPUX86State,xmm_regs[reg].ZMM_L(1)));
3393             gen_op_movl(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_L(2)),
3394                         offsetof(CPUX86State,xmm_regs[reg].ZMM_L(3)));
3395             break;
3396         case 0x178:
3397         case 0x378:
3398             {
3399                 int bit_index, field_length;
3400 
3401                 if (b1 == 1 && reg != 0)
3402                     goto illegal_op;
3403                 field_length = x86_ldub_code(env, s) & 0x3F;
3404                 bit_index = x86_ldub_code(env, s) & 0x3F;
3405                 tcg_gen_addi_ptr(s->ptr0, cpu_env,
3406                     offsetof(CPUX86State,xmm_regs[reg]));
3407                 if (b1 == 1)
3408                     gen_helper_extrq_i(cpu_env, s->ptr0,
3409                                        tcg_const_i32(bit_index),
3410                                        tcg_const_i32(field_length));
3411                 else
3412                     gen_helper_insertq_i(cpu_env, s->ptr0,
3413                                          tcg_const_i32(bit_index),
3414                                          tcg_const_i32(field_length));
3415             }
3416             break;
3417         case 0x7e: /* movd ea, mm */
3418 #ifdef TARGET_X86_64
3419             if (s->dflag == MO_64) {
3420                 tcg_gen_ld_i64(s->T0, cpu_env,
3421                                offsetof(CPUX86State,fpregs[reg].mmx));
3422                 gen_ldst_modrm(env, s, modrm, MO_64, OR_TMP0, 1);
3423             } else
3424 #endif
3425             {
3426                 tcg_gen_ld32u_tl(s->T0, cpu_env,
3427                                  offsetof(CPUX86State,fpregs[reg].mmx.MMX_L(0)));
3428                 gen_ldst_modrm(env, s, modrm, MO_32, OR_TMP0, 1);
3429             }
3430             break;
3431         case 0x17e: /* movd ea, xmm */
3432 #ifdef TARGET_X86_64
3433             if (s->dflag == MO_64) {
3434                 tcg_gen_ld_i64(s->T0, cpu_env,
3435                                offsetof(CPUX86State,xmm_regs[reg].ZMM_Q(0)));
3436                 gen_ldst_modrm(env, s, modrm, MO_64, OR_TMP0, 1);
3437             } else
3438 #endif
3439             {
3440                 tcg_gen_ld32u_tl(s->T0, cpu_env,
3441                                  offsetof(CPUX86State,xmm_regs[reg].ZMM_L(0)));
3442                 gen_ldst_modrm(env, s, modrm, MO_32, OR_TMP0, 1);
3443             }
3444             break;
3445         case 0x27e: /* movq xmm, ea */
3446             if (mod != 3) {
3447                 gen_lea_modrm(env, s, modrm);
3448                 gen_ldq_env_A0(s, offsetof(CPUX86State,
3449                                            xmm_regs[reg].ZMM_Q(0)));
3450             } else {
3451                 rm = (modrm & 7) | REX_B(s);
3452                 gen_op_movq(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_Q(0)),
3453                             offsetof(CPUX86State,xmm_regs[rm].ZMM_Q(0)));
3454             }
3455             gen_op_movq_env_0(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_Q(1)));
3456             break;
3457         case 0x7f: /* movq ea, mm */
3458             if (mod != 3) {
3459                 gen_lea_modrm(env, s, modrm);
3460                 gen_stq_env_A0(s, offsetof(CPUX86State, fpregs[reg].mmx));
3461             } else {
3462                 rm = (modrm & 7);
3463                 gen_op_movq(s, offsetof(CPUX86State, fpregs[rm].mmx),
3464                             offsetof(CPUX86State,fpregs[reg].mmx));
3465             }
3466             break;
3467         case 0x011: /* movups */
3468         case 0x111: /* movupd */
3469         case 0x029: /* movaps */
3470         case 0x129: /* movapd */
3471         case 0x17f: /* movdqa ea, xmm */
3472         case 0x27f: /* movdqu ea, xmm */
3473             if (mod != 3) {
3474                 gen_lea_modrm(env, s, modrm);
3475                 gen_sto_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
3476             } else {
3477                 rm = (modrm & 7) | REX_B(s);
3478                 gen_op_movo(s, offsetof(CPUX86State, xmm_regs[rm]),
3479                             offsetof(CPUX86State,xmm_regs[reg]));
3480             }
3481             break;
3482         case 0x211: /* movss ea, xmm */
3483             if (mod != 3) {
3484                 gen_lea_modrm(env, s, modrm);
3485                 tcg_gen_ld32u_tl(s->T0, cpu_env,
3486                                  offsetof(CPUX86State, xmm_regs[reg].ZMM_L(0)));
3487                 gen_op_st_v(s, MO_32, s->T0, s->A0);
3488             } else {
3489                 rm = (modrm & 7) | REX_B(s);
3490                 gen_op_movl(s, offsetof(CPUX86State, xmm_regs[rm].ZMM_L(0)),
3491                             offsetof(CPUX86State,xmm_regs[reg].ZMM_L(0)));
3492             }
3493             break;
3494         case 0x311: /* movsd ea, xmm */
3495             if (mod != 3) {
3496                 gen_lea_modrm(env, s, modrm);
3497                 gen_stq_env_A0(s, offsetof(CPUX86State,
3498                                            xmm_regs[reg].ZMM_Q(0)));
3499             } else {
3500                 rm = (modrm & 7) | REX_B(s);
3501                 gen_op_movq(s, offsetof(CPUX86State, xmm_regs[rm].ZMM_Q(0)),
3502                             offsetof(CPUX86State,xmm_regs[reg].ZMM_Q(0)));
3503             }
3504             break;
3505         case 0x013: /* movlps */
3506         case 0x113: /* movlpd */
3507             if (mod != 3) {
3508                 gen_lea_modrm(env, s, modrm);
3509                 gen_stq_env_A0(s, offsetof(CPUX86State,
3510                                            xmm_regs[reg].ZMM_Q(0)));
3511             } else {
3512                 goto illegal_op;
3513             }
3514             break;
3515         case 0x017: /* movhps */
3516         case 0x117: /* movhpd */
3517             if (mod != 3) {
3518                 gen_lea_modrm(env, s, modrm);
3519                 gen_stq_env_A0(s, offsetof(CPUX86State,
3520                                            xmm_regs[reg].ZMM_Q(1)));
3521             } else {
3522                 goto illegal_op;
3523             }
3524             break;
3525         case 0x71: /* shift mm, im */
3526         case 0x72:
3527         case 0x73:
3528         case 0x171: /* shift xmm, im */
3529         case 0x172:
3530         case 0x173:
3531             if (b1 >= 2) {
3532                 goto unknown_op;
3533             }
3534             val = x86_ldub_code(env, s);
3535             if (is_xmm) {
3536                 tcg_gen_movi_tl(s->T0, val);
3537                 tcg_gen_st32_tl(s->T0, cpu_env,
3538                                 offsetof(CPUX86State, xmm_t0.ZMM_L(0)));
3539                 tcg_gen_movi_tl(s->T0, 0);
3540                 tcg_gen_st32_tl(s->T0, cpu_env,
3541                                 offsetof(CPUX86State, xmm_t0.ZMM_L(1)));
3542                 op1_offset = offsetof(CPUX86State,xmm_t0);
3543             } else {
3544                 tcg_gen_movi_tl(s->T0, val);
3545                 tcg_gen_st32_tl(s->T0, cpu_env,
3546                                 offsetof(CPUX86State, mmx_t0.MMX_L(0)));
3547                 tcg_gen_movi_tl(s->T0, 0);
3548                 tcg_gen_st32_tl(s->T0, cpu_env,
3549                                 offsetof(CPUX86State, mmx_t0.MMX_L(1)));
3550                 op1_offset = offsetof(CPUX86State,mmx_t0);
3551             }
3552             sse_fn_epp = sse_op_table2[((b - 1) & 3) * 8 +
3553                                        (((modrm >> 3)) & 7)][b1];
3554             if (!sse_fn_epp) {
3555                 goto unknown_op;
3556             }
3557             if (is_xmm) {
3558                 rm = (modrm & 7) | REX_B(s);
3559                 op2_offset = offsetof(CPUX86State,xmm_regs[rm]);
3560             } else {
3561                 rm = (modrm & 7);
3562                 op2_offset = offsetof(CPUX86State,fpregs[rm].mmx);
3563             }
3564             tcg_gen_addi_ptr(s->ptr0, cpu_env, op2_offset);
3565             tcg_gen_addi_ptr(s->ptr1, cpu_env, op1_offset);
3566             sse_fn_epp(cpu_env, s->ptr0, s->ptr1);
3567             break;
3568         case 0x050: /* movmskps */
3569             rm = (modrm & 7) | REX_B(s);
3570             tcg_gen_addi_ptr(s->ptr0, cpu_env,
3571                              offsetof(CPUX86State,xmm_regs[rm]));
3572             gen_helper_movmskps(s->tmp2_i32, cpu_env, s->ptr0);
3573             tcg_gen_extu_i32_tl(cpu_regs[reg], s->tmp2_i32);
3574             break;
3575         case 0x150: /* movmskpd */
3576             rm = (modrm & 7) | REX_B(s);
3577             tcg_gen_addi_ptr(s->ptr0, cpu_env,
3578                              offsetof(CPUX86State,xmm_regs[rm]));
3579             gen_helper_movmskpd(s->tmp2_i32, cpu_env, s->ptr0);
3580             tcg_gen_extu_i32_tl(cpu_regs[reg], s->tmp2_i32);
3581             break;
3582         case 0x02a: /* cvtpi2ps */
3583         case 0x12a: /* cvtpi2pd */
3584             gen_helper_enter_mmx(cpu_env);
3585             if (mod != 3) {
3586                 gen_lea_modrm(env, s, modrm);
3587                 op2_offset = offsetof(CPUX86State,mmx_t0);
3588                 gen_ldq_env_A0(s, op2_offset);
3589             } else {
3590                 rm = (modrm & 7);
3591                 op2_offset = offsetof(CPUX86State,fpregs[rm].mmx);
3592             }
3593             op1_offset = offsetof(CPUX86State,xmm_regs[reg]);
3594             tcg_gen_addi_ptr(s->ptr0, cpu_env, op1_offset);
3595             tcg_gen_addi_ptr(s->ptr1, cpu_env, op2_offset);
3596             switch(b >> 8) {
3597             case 0x0:
3598                 gen_helper_cvtpi2ps(cpu_env, s->ptr0, s->ptr1);
3599                 break;
3600             default:
3601             case 0x1:
3602                 gen_helper_cvtpi2pd(cpu_env, s->ptr0, s->ptr1);
3603                 break;
3604             }
3605             break;
3606         case 0x22a: /* cvtsi2ss */
3607         case 0x32a: /* cvtsi2sd */
3608             ot = mo_64_32(s->dflag);
3609             gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
3610             op1_offset = offsetof(CPUX86State,xmm_regs[reg]);
3611             tcg_gen_addi_ptr(s->ptr0, cpu_env, op1_offset);
3612             if (ot == MO_32) {
3613                 SSEFunc_0_epi sse_fn_epi = sse_op_table3ai[(b >> 8) & 1];
3614                 tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
3615                 sse_fn_epi(cpu_env, s->ptr0, s->tmp2_i32);
3616             } else {
3617 #ifdef TARGET_X86_64
3618                 SSEFunc_0_epl sse_fn_epl = sse_op_table3aq[(b >> 8) & 1];
3619                 sse_fn_epl(cpu_env, s->ptr0, s->T0);
3620 #else
3621                 goto illegal_op;
3622 #endif
3623             }
3624             break;
3625         case 0x02c: /* cvttps2pi */
3626         case 0x12c: /* cvttpd2pi */
3627         case 0x02d: /* cvtps2pi */
3628         case 0x12d: /* cvtpd2pi */
3629             gen_helper_enter_mmx(cpu_env);
3630             if (mod != 3) {
3631                 gen_lea_modrm(env, s, modrm);
3632                 op2_offset = offsetof(CPUX86State,xmm_t0);
3633                 gen_ldo_env_A0(s, op2_offset);
3634             } else {
3635                 rm = (modrm & 7) | REX_B(s);
3636                 op2_offset = offsetof(CPUX86State,xmm_regs[rm]);
3637             }
3638             op1_offset = offsetof(CPUX86State,fpregs[reg & 7].mmx);
3639             tcg_gen_addi_ptr(s->ptr0, cpu_env, op1_offset);
3640             tcg_gen_addi_ptr(s->ptr1, cpu_env, op2_offset);
3641             switch(b) {
3642             case 0x02c:
3643                 gen_helper_cvttps2pi(cpu_env, s->ptr0, s->ptr1);
3644                 break;
3645             case 0x12c:
3646                 gen_helper_cvttpd2pi(cpu_env, s->ptr0, s->ptr1);
3647                 break;
3648             case 0x02d:
3649                 gen_helper_cvtps2pi(cpu_env, s->ptr0, s->ptr1);
3650                 break;
3651             case 0x12d:
3652                 gen_helper_cvtpd2pi(cpu_env, s->ptr0, s->ptr1);
3653                 break;
3654             }
3655             break;
3656         case 0x22c: /* cvttss2si */
3657         case 0x32c: /* cvttsd2si */
3658         case 0x22d: /* cvtss2si */
3659         case 0x32d: /* cvtsd2si */
3660             ot = mo_64_32(s->dflag);
3661             if (mod != 3) {
3662                 gen_lea_modrm(env, s, modrm);
3663                 if ((b >> 8) & 1) {
3664                     gen_ldq_env_A0(s, offsetof(CPUX86State, xmm_t0.ZMM_Q(0)));
3665                 } else {
3666                     gen_op_ld_v(s, MO_32, s->T0, s->A0);
3667                     tcg_gen_st32_tl(s->T0, cpu_env,
3668                                     offsetof(CPUX86State, xmm_t0.ZMM_L(0)));
3669                 }
3670                 op2_offset = offsetof(CPUX86State,xmm_t0);
3671             } else {
3672                 rm = (modrm & 7) | REX_B(s);
3673                 op2_offset = offsetof(CPUX86State,xmm_regs[rm]);
3674             }
3675             tcg_gen_addi_ptr(s->ptr0, cpu_env, op2_offset);
3676             if (ot == MO_32) {
3677                 SSEFunc_i_ep sse_fn_i_ep =
3678                     sse_op_table3bi[((b >> 7) & 2) | (b & 1)];
3679                 sse_fn_i_ep(s->tmp2_i32, cpu_env, s->ptr0);
3680                 tcg_gen_extu_i32_tl(s->T0, s->tmp2_i32);
3681             } else {
3682 #ifdef TARGET_X86_64
3683                 SSEFunc_l_ep sse_fn_l_ep =
3684                     sse_op_table3bq[((b >> 7) & 2) | (b & 1)];
3685                 sse_fn_l_ep(s->T0, cpu_env, s->ptr0);
3686 #else
3687                 goto illegal_op;
3688 #endif
3689             }
3690             gen_op_mov_reg_v(s, ot, reg, s->T0);
3691             break;
3692         case 0xc4: /* pinsrw */
3693         case 0x1c4:
3694             s->rip_offset = 1;
3695             gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
3696             val = x86_ldub_code(env, s);
3697             if (b1) {
3698                 val &= 7;
3699                 tcg_gen_st16_tl(s->T0, cpu_env,
3700                                 offsetof(CPUX86State,xmm_regs[reg].ZMM_W(val)));
3701             } else {
3702                 val &= 3;
3703                 tcg_gen_st16_tl(s->T0, cpu_env,
3704                                 offsetof(CPUX86State,fpregs[reg].mmx.MMX_W(val)));
3705             }
3706             break;
3707         case 0xc5: /* pextrw */
3708         case 0x1c5:
3709             if (mod != 3)
3710                 goto illegal_op;
3711             ot = mo_64_32(s->dflag);
3712             val = x86_ldub_code(env, s);
3713             if (b1) {
3714                 val &= 7;
3715                 rm = (modrm & 7) | REX_B(s);
3716                 tcg_gen_ld16u_tl(s->T0, cpu_env,
3717                                  offsetof(CPUX86State,xmm_regs[rm].ZMM_W(val)));
3718             } else {
3719                 val &= 3;
3720                 rm = (modrm & 7);
3721                 tcg_gen_ld16u_tl(s->T0, cpu_env,
3722                                 offsetof(CPUX86State,fpregs[rm].mmx.MMX_W(val)));
3723             }
3724             reg = ((modrm >> 3) & 7) | REX_R(s);
3725             gen_op_mov_reg_v(s, ot, reg, s->T0);
3726             break;
3727         case 0x1d6: /* movq ea, xmm */
3728             if (mod != 3) {
3729                 gen_lea_modrm(env, s, modrm);
3730                 gen_stq_env_A0(s, offsetof(CPUX86State,
3731                                            xmm_regs[reg].ZMM_Q(0)));
3732             } else {
3733                 rm = (modrm & 7) | REX_B(s);
3734                 gen_op_movq(s, offsetof(CPUX86State, xmm_regs[rm].ZMM_Q(0)),
3735                             offsetof(CPUX86State,xmm_regs[reg].ZMM_Q(0)));
3736                 gen_op_movq_env_0(s,
3737                                   offsetof(CPUX86State, xmm_regs[rm].ZMM_Q(1)));
3738             }
3739             break;
3740         case 0x2d6: /* movq2dq */
3741             gen_helper_enter_mmx(cpu_env);
3742             rm = (modrm & 7);
3743             gen_op_movq(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_Q(0)),
3744                         offsetof(CPUX86State,fpregs[rm].mmx));
3745             gen_op_movq_env_0(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_Q(1)));
3746             break;
3747         case 0x3d6: /* movdq2q */
3748             gen_helper_enter_mmx(cpu_env);
3749             rm = (modrm & 7) | REX_B(s);
3750             gen_op_movq(s, offsetof(CPUX86State, fpregs[reg & 7].mmx),
3751                         offsetof(CPUX86State,xmm_regs[rm].ZMM_Q(0)));
3752             break;
3753         case 0xd7: /* pmovmskb */
3754         case 0x1d7:
3755             if (mod != 3)
3756                 goto illegal_op;
3757             if (b1) {
3758                 rm = (modrm & 7) | REX_B(s);
3759                 tcg_gen_addi_ptr(s->ptr0, cpu_env,
3760                                  offsetof(CPUX86State, xmm_regs[rm]));
3761                 gen_helper_pmovmskb_xmm(s->tmp2_i32, cpu_env, s->ptr0);
3762             } else {
3763                 rm = (modrm & 7);
3764                 tcg_gen_addi_ptr(s->ptr0, cpu_env,
3765                                  offsetof(CPUX86State, fpregs[rm].mmx));
3766                 gen_helper_pmovmskb_mmx(s->tmp2_i32, cpu_env, s->ptr0);
3767             }
3768             reg = ((modrm >> 3) & 7) | REX_R(s);
3769             tcg_gen_extu_i32_tl(cpu_regs[reg], s->tmp2_i32);
3770             break;
3771 
3772         case 0x138:
3773         case 0x038:
3774             b = modrm;
3775             if ((b & 0xf0) == 0xf0) {
3776                 goto do_0f_38_fx;
3777             }
3778             modrm = x86_ldub_code(env, s);
3779             rm = modrm & 7;
3780             reg = ((modrm >> 3) & 7) | REX_R(s);
3781             mod = (modrm >> 6) & 3;
3782             if (b1 >= 2) {
3783                 goto unknown_op;
3784             }
3785 
3786             sse_fn_epp = sse_op_table6[b].op[b1];
3787             if (!sse_fn_epp) {
3788                 goto unknown_op;
3789             }
3790             if (!(s->cpuid_ext_features & sse_op_table6[b].ext_mask))
3791                 goto illegal_op;
3792 
3793             if (b1) {
3794                 op1_offset = offsetof(CPUX86State,xmm_regs[reg]);
3795                 if (mod == 3) {
3796                     op2_offset = offsetof(CPUX86State,xmm_regs[rm | REX_B(s)]);
3797                 } else {
3798                     op2_offset = offsetof(CPUX86State,xmm_t0);
3799                     gen_lea_modrm(env, s, modrm);
3800                     switch (b) {
3801                     case 0x20: case 0x30: /* pmovsxbw, pmovzxbw */
3802                     case 0x23: case 0x33: /* pmovsxwd, pmovzxwd */
3803                     case 0x25: case 0x35: /* pmovsxdq, pmovzxdq */
3804                         gen_ldq_env_A0(s, op2_offset +
3805                                         offsetof(ZMMReg, ZMM_Q(0)));
3806                         break;
3807                     case 0x21: case 0x31: /* pmovsxbd, pmovzxbd */
3808                     case 0x24: case 0x34: /* pmovsxwq, pmovzxwq */
3809                         tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0,
3810                                             s->mem_index, MO_LEUL);
3811                         tcg_gen_st_i32(s->tmp2_i32, cpu_env, op2_offset +
3812                                         offsetof(ZMMReg, ZMM_L(0)));
3813                         break;
3814                     case 0x22: case 0x32: /* pmovsxbq, pmovzxbq */
3815                         tcg_gen_qemu_ld_tl(s->tmp0, s->A0,
3816                                            s->mem_index, MO_LEUW);
3817                         tcg_gen_st16_tl(s->tmp0, cpu_env, op2_offset +
3818                                         offsetof(ZMMReg, ZMM_W(0)));
3819                         break;
3820                     case 0x2a:            /* movntqda */
3821                         gen_ldo_env_A0(s, op1_offset);
3822                         return;
3823                     default:
3824                         gen_ldo_env_A0(s, op2_offset);
3825                     }
3826                 }
3827             } else {
3828                 op1_offset = offsetof(CPUX86State,fpregs[reg].mmx);
3829                 if (mod == 3) {
3830                     op2_offset = offsetof(CPUX86State,fpregs[rm].mmx);
3831                 } else {
3832                     op2_offset = offsetof(CPUX86State,mmx_t0);
3833                     gen_lea_modrm(env, s, modrm);
3834                     gen_ldq_env_A0(s, op2_offset);
3835                 }
3836             }
3837             if (sse_fn_epp == SSE_SPECIAL) {
3838                 goto unknown_op;
3839             }
3840 
3841             tcg_gen_addi_ptr(s->ptr0, cpu_env, op1_offset);
3842             tcg_gen_addi_ptr(s->ptr1, cpu_env, op2_offset);
3843             sse_fn_epp(cpu_env, s->ptr0, s->ptr1);
3844 
3845             if (b == 0x17) {
3846                 set_cc_op(s, CC_OP_EFLAGS);
3847             }
3848             break;
3849 
3850         case 0x238:
3851         case 0x338:
3852         do_0f_38_fx:
3853             /* Various integer extensions at 0f 38 f[0-f].  */
3854             b = modrm | (b1 << 8);
3855             modrm = x86_ldub_code(env, s);
3856             reg = ((modrm >> 3) & 7) | REX_R(s);
3857 
3858             switch (b) {
3859             case 0x3f0: /* crc32 Gd,Eb */
3860             case 0x3f1: /* crc32 Gd,Ey */
3861             do_crc32:
3862                 if (!(s->cpuid_ext_features & CPUID_EXT_SSE42)) {
3863                     goto illegal_op;
3864                 }
3865                 if ((b & 0xff) == 0xf0) {
3866                     ot = MO_8;
3867                 } else if (s->dflag != MO_64) {
3868                     ot = (s->prefix & PREFIX_DATA ? MO_16 : MO_32);
3869                 } else {
3870                     ot = MO_64;
3871                 }
3872 
3873                 tcg_gen_trunc_tl_i32(s->tmp2_i32, cpu_regs[reg]);
3874                 gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
3875                 gen_helper_crc32(s->T0, s->tmp2_i32,
3876                                  s->T0, tcg_const_i32(8 << ot));
3877 
3878                 ot = mo_64_32(s->dflag);
3879                 gen_op_mov_reg_v(s, ot, reg, s->T0);
3880                 break;
3881 
3882             case 0x1f0: /* crc32 or movbe */
3883             case 0x1f1:
3884                 /* For these insns, the f3 prefix is supposed to have priority
3885                    over the 66 prefix, but that's not what we implement above
3886                    setting b1.  */
3887                 if (s->prefix & PREFIX_REPNZ) {
3888                     goto do_crc32;
3889                 }
3890                 /* FALLTHRU */
3891             case 0x0f0: /* movbe Gy,My */
3892             case 0x0f1: /* movbe My,Gy */
3893                 if (!(s->cpuid_ext_features & CPUID_EXT_MOVBE)) {
3894                     goto illegal_op;
3895                 }
3896                 if (s->dflag != MO_64) {
3897                     ot = (s->prefix & PREFIX_DATA ? MO_16 : MO_32);
3898                 } else {
3899                     ot = MO_64;
3900                 }
3901 
3902                 gen_lea_modrm(env, s, modrm);
3903                 if ((b & 1) == 0) {
3904                     tcg_gen_qemu_ld_tl(s->T0, s->A0,
3905                                        s->mem_index, ot | MO_BE);
3906                     gen_op_mov_reg_v(s, ot, reg, s->T0);
3907                 } else {
3908                     tcg_gen_qemu_st_tl(cpu_regs[reg], s->A0,
3909                                        s->mem_index, ot | MO_BE);
3910                 }
3911                 break;
3912 
3913             case 0x0f2: /* andn Gy, By, Ey */
3914                 if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI1)
3915                     || !(s->prefix & PREFIX_VEX)
3916                     || s->vex_l != 0) {
3917                     goto illegal_op;
3918                 }
3919                 ot = mo_64_32(s->dflag);
3920                 gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
3921                 tcg_gen_andc_tl(s->T0, s->T0, cpu_regs[s->vex_v]);
3922                 gen_op_mov_reg_v(s, ot, reg, s->T0);
3923                 gen_op_update1_cc(s);
3924                 set_cc_op(s, CC_OP_LOGICB + ot);
3925                 break;
3926 
3927             case 0x0f7: /* bextr Gy, Ey, By */
3928                 if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI1)
3929                     || !(s->prefix & PREFIX_VEX)
3930                     || s->vex_l != 0) {
3931                     goto illegal_op;
3932                 }
3933                 ot = mo_64_32(s->dflag);
3934                 {
3935                     TCGv bound, zero;
3936 
3937                     gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
3938                     /* Extract START, and shift the operand.
3939                        Shifts larger than operand size get zeros.  */
3940                     tcg_gen_ext8u_tl(s->A0, cpu_regs[s->vex_v]);
3941                     tcg_gen_shr_tl(s->T0, s->T0, s->A0);
3942 
3943                     bound = tcg_const_tl(ot == MO_64 ? 63 : 31);
3944                     zero = tcg_const_tl(0);
3945                     tcg_gen_movcond_tl(TCG_COND_LEU, s->T0, s->A0, bound,
3946                                        s->T0, zero);
3947                     tcg_temp_free(zero);
3948 
3949                     /* Extract the LEN into a mask.  Lengths larger than
3950                        operand size get all ones.  */
3951                     tcg_gen_extract_tl(s->A0, cpu_regs[s->vex_v], 8, 8);
3952                     tcg_gen_movcond_tl(TCG_COND_LEU, s->A0, s->A0, bound,
3953                                        s->A0, bound);
3954                     tcg_temp_free(bound);
3955                     tcg_gen_movi_tl(s->T1, 1);
3956                     tcg_gen_shl_tl(s->T1, s->T1, s->A0);
3957                     tcg_gen_subi_tl(s->T1, s->T1, 1);
3958                     tcg_gen_and_tl(s->T0, s->T0, s->T1);
3959 
3960                     gen_op_mov_reg_v(s, ot, reg, s->T0);
3961                     gen_op_update1_cc(s);
3962                     set_cc_op(s, CC_OP_LOGICB + ot);
3963                 }
3964                 break;
3965 
3966             case 0x0f5: /* bzhi Gy, Ey, By */
3967                 if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI2)
3968                     || !(s->prefix & PREFIX_VEX)
3969                     || s->vex_l != 0) {
3970                     goto illegal_op;
3971                 }
3972                 ot = mo_64_32(s->dflag);
3973                 gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
3974                 tcg_gen_ext8u_tl(s->T1, cpu_regs[s->vex_v]);
3975                 {
3976                     TCGv bound = tcg_const_tl(ot == MO_64 ? 63 : 31);
3977                     /* Note that since we're using BMILG (in order to get O
3978                        cleared) we need to store the inverse into C.  */
3979                     tcg_gen_setcond_tl(TCG_COND_LT, cpu_cc_src,
3980                                        s->T1, bound);
3981                     tcg_gen_movcond_tl(TCG_COND_GT, s->T1, s->T1,
3982                                        bound, bound, s->T1);
3983                     tcg_temp_free(bound);
3984                 }
3985                 tcg_gen_movi_tl(s->A0, -1);
3986                 tcg_gen_shl_tl(s->A0, s->A0, s->T1);
3987                 tcg_gen_andc_tl(s->T0, s->T0, s->A0);
3988                 gen_op_mov_reg_v(s, ot, reg, s->T0);
3989                 gen_op_update1_cc(s);
3990                 set_cc_op(s, CC_OP_BMILGB + ot);
3991                 break;
3992 
3993             case 0x3f6: /* mulx By, Gy, rdx, Ey */
3994                 if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI2)
3995                     || !(s->prefix & PREFIX_VEX)
3996                     || s->vex_l != 0) {
3997                     goto illegal_op;
3998                 }
3999                 ot = mo_64_32(s->dflag);
4000                 gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
4001                 switch (ot) {
4002                 default:
4003                     tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
4004                     tcg_gen_trunc_tl_i32(s->tmp3_i32, cpu_regs[R_EDX]);
4005                     tcg_gen_mulu2_i32(s->tmp2_i32, s->tmp3_i32,
4006                                       s->tmp2_i32, s->tmp3_i32);
4007                     tcg_gen_extu_i32_tl(cpu_regs[s->vex_v], s->tmp2_i32);
4008                     tcg_gen_extu_i32_tl(cpu_regs[reg], s->tmp3_i32);
4009                     break;
4010 #ifdef TARGET_X86_64
4011                 case MO_64:
4012                     tcg_gen_mulu2_i64(s->T0, s->T1,
4013                                       s->T0, cpu_regs[R_EDX]);
4014                     tcg_gen_mov_i64(cpu_regs[s->vex_v], s->T0);
4015                     tcg_gen_mov_i64(cpu_regs[reg], s->T1);
4016                     break;
4017 #endif
4018                 }
4019                 break;
4020 
4021             case 0x3f5: /* pdep Gy, By, Ey */
4022                 if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI2)
4023                     || !(s->prefix & PREFIX_VEX)
4024                     || s->vex_l != 0) {
4025                     goto illegal_op;
4026                 }
4027                 ot = mo_64_32(s->dflag);
4028                 gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
4029                 /* Note that by zero-extending the source operand, we
4030                    automatically handle zero-extending the result.  */
4031                 if (ot == MO_64) {
4032                     tcg_gen_mov_tl(s->T1, cpu_regs[s->vex_v]);
4033                 } else {
4034                     tcg_gen_ext32u_tl(s->T1, cpu_regs[s->vex_v]);
4035                 }
4036                 gen_helper_pdep(cpu_regs[reg], s->T1, s->T0);
4037                 break;
4038 
4039             case 0x2f5: /* pext Gy, By, Ey */
4040                 if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI2)
4041                     || !(s->prefix & PREFIX_VEX)
4042                     || s->vex_l != 0) {
4043                     goto illegal_op;
4044                 }
4045                 ot = mo_64_32(s->dflag);
4046                 gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
4047                 /* Note that by zero-extending the source operand, we
4048                    automatically handle zero-extending the result.  */
4049                 if (ot == MO_64) {
4050                     tcg_gen_mov_tl(s->T1, cpu_regs[s->vex_v]);
4051                 } else {
4052                     tcg_gen_ext32u_tl(s->T1, cpu_regs[s->vex_v]);
4053                 }
4054                 gen_helper_pext(cpu_regs[reg], s->T1, s->T0);
4055                 break;
4056 
4057             case 0x1f6: /* adcx Gy, Ey */
4058             case 0x2f6: /* adox Gy, Ey */
4059                 if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_ADX)) {
4060                     goto illegal_op;
4061                 } else {
4062                     TCGv carry_in, carry_out, zero;
4063                     int end_op;
4064 
4065                     ot = mo_64_32(s->dflag);
4066                     gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
4067 
4068                     /* Re-use the carry-out from a previous round.  */
4069                     carry_in = NULL;
4070                     carry_out = (b == 0x1f6 ? cpu_cc_dst : cpu_cc_src2);
4071                     switch (s->cc_op) {
4072                     case CC_OP_ADCX:
4073                         if (b == 0x1f6) {
4074                             carry_in = cpu_cc_dst;
4075                             end_op = CC_OP_ADCX;
4076                         } else {
4077                             end_op = CC_OP_ADCOX;
4078                         }
4079                         break;
4080                     case CC_OP_ADOX:
4081                         if (b == 0x1f6) {
4082                             end_op = CC_OP_ADCOX;
4083                         } else {
4084                             carry_in = cpu_cc_src2;
4085                             end_op = CC_OP_ADOX;
4086                         }
4087                         break;
4088                     case CC_OP_ADCOX:
4089                         end_op = CC_OP_ADCOX;
4090                         carry_in = carry_out;
4091                         break;
4092                     default:
4093                         end_op = (b == 0x1f6 ? CC_OP_ADCX : CC_OP_ADOX);
4094                         break;
4095                     }
4096                     /* If we can't reuse carry-out, get it out of EFLAGS.  */
4097                     if (!carry_in) {
4098                         if (s->cc_op != CC_OP_ADCX && s->cc_op != CC_OP_ADOX) {
4099                             gen_compute_eflags(s);
4100                         }
4101                         carry_in = s->tmp0;
4102                         tcg_gen_extract_tl(carry_in, cpu_cc_src,
4103                                            ctz32(b == 0x1f6 ? CC_C : CC_O), 1);
4104                     }
4105 
4106                     switch (ot) {
4107 #ifdef TARGET_X86_64
4108                     case MO_32:
4109                         /* If we know TL is 64-bit, and we want a 32-bit
4110                            result, just do everything in 64-bit arithmetic.  */
4111                         tcg_gen_ext32u_i64(cpu_regs[reg], cpu_regs[reg]);
4112                         tcg_gen_ext32u_i64(s->T0, s->T0);
4113                         tcg_gen_add_i64(s->T0, s->T0, cpu_regs[reg]);
4114                         tcg_gen_add_i64(s->T0, s->T0, carry_in);
4115                         tcg_gen_ext32u_i64(cpu_regs[reg], s->T0);
4116                         tcg_gen_shri_i64(carry_out, s->T0, 32);
4117                         break;
4118 #endif
4119                     default:
4120                         /* Otherwise compute the carry-out in two steps.  */
4121                         zero = tcg_const_tl(0);
4122                         tcg_gen_add2_tl(s->T0, carry_out,
4123                                         s->T0, zero,
4124                                         carry_in, zero);
4125                         tcg_gen_add2_tl(cpu_regs[reg], carry_out,
4126                                         cpu_regs[reg], carry_out,
4127                                         s->T0, zero);
4128                         tcg_temp_free(zero);
4129                         break;
4130                     }
4131                     set_cc_op(s, end_op);
4132                 }
4133                 break;
4134 
4135             case 0x1f7: /* shlx Gy, Ey, By */
4136             case 0x2f7: /* sarx Gy, Ey, By */
4137             case 0x3f7: /* shrx Gy, Ey, By */
4138                 if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI2)
4139                     || !(s->prefix & PREFIX_VEX)
4140                     || s->vex_l != 0) {
4141                     goto illegal_op;
4142                 }
4143                 ot = mo_64_32(s->dflag);
4144                 gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
4145                 if (ot == MO_64) {
4146                     tcg_gen_andi_tl(s->T1, cpu_regs[s->vex_v], 63);
4147                 } else {
4148                     tcg_gen_andi_tl(s->T1, cpu_regs[s->vex_v], 31);
4149                 }
4150                 if (b == 0x1f7) {
4151                     tcg_gen_shl_tl(s->T0, s->T0, s->T1);
4152                 } else if (b == 0x2f7) {
4153                     if (ot != MO_64) {
4154                         tcg_gen_ext32s_tl(s->T0, s->T0);
4155                     }
4156                     tcg_gen_sar_tl(s->T0, s->T0, s->T1);
4157                 } else {
4158                     if (ot != MO_64) {
4159                         tcg_gen_ext32u_tl(s->T0, s->T0);
4160                     }
4161                     tcg_gen_shr_tl(s->T0, s->T0, s->T1);
4162                 }
4163                 gen_op_mov_reg_v(s, ot, reg, s->T0);
4164                 break;
4165 
4166             case 0x0f3:
4167             case 0x1f3:
4168             case 0x2f3:
4169             case 0x3f3: /* Group 17 */
4170                 if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI1)
4171                     || !(s->prefix & PREFIX_VEX)
4172                     || s->vex_l != 0) {
4173                     goto illegal_op;
4174                 }
4175                 ot = mo_64_32(s->dflag);
4176                 gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
4177 
4178                 tcg_gen_mov_tl(cpu_cc_src, s->T0);
4179                 switch (reg & 7) {
4180                 case 1: /* blsr By,Ey */
4181                     tcg_gen_subi_tl(s->T1, s->T0, 1);
4182                     tcg_gen_and_tl(s->T0, s->T0, s->T1);
4183                     break;
4184                 case 2: /* blsmsk By,Ey */
4185                     tcg_gen_subi_tl(s->T1, s->T0, 1);
4186                     tcg_gen_xor_tl(s->T0, s->T0, s->T1);
4187                     break;
4188                 case 3: /* blsi By, Ey */
4189                     tcg_gen_neg_tl(s->T1, s->T0);
4190                     tcg_gen_and_tl(s->T0, s->T0, s->T1);
4191                     break;
4192                 default:
4193                     goto unknown_op;
4194                 }
4195                 tcg_gen_mov_tl(cpu_cc_dst, s->T0);
4196                 gen_op_mov_reg_v(s, ot, s->vex_v, s->T0);
4197                 set_cc_op(s, CC_OP_BMILGB + ot);
4198                 break;
4199 
4200             default:
4201                 goto unknown_op;
4202             }
4203             break;
4204 
4205         case 0x03a:
4206         case 0x13a:
4207             b = modrm;
4208             modrm = x86_ldub_code(env, s);
4209             rm = modrm & 7;
4210             reg = ((modrm >> 3) & 7) | REX_R(s);
4211             mod = (modrm >> 6) & 3;
4212             if (b1 >= 2) {
4213                 goto unknown_op;
4214             }
4215 
4216             sse_fn_eppi = sse_op_table7[b].op[b1];
4217             if (!sse_fn_eppi) {
4218                 goto unknown_op;
4219             }
4220             if (!(s->cpuid_ext_features & sse_op_table7[b].ext_mask))
4221                 goto illegal_op;
4222 
4223             s->rip_offset = 1;
4224 
4225             if (sse_fn_eppi == SSE_SPECIAL) {
4226                 ot = mo_64_32(s->dflag);
4227                 rm = (modrm & 7) | REX_B(s);
4228                 if (mod != 3)
4229                     gen_lea_modrm(env, s, modrm);
4230                 reg = ((modrm >> 3) & 7) | REX_R(s);
4231                 val = x86_ldub_code(env, s);
4232                 switch (b) {
4233                 case 0x14: /* pextrb */
4234                     tcg_gen_ld8u_tl(s->T0, cpu_env, offsetof(CPUX86State,
4235                                             xmm_regs[reg].ZMM_B(val & 15)));
4236                     if (mod == 3) {
4237                         gen_op_mov_reg_v(s, ot, rm, s->T0);
4238                     } else {
4239                         tcg_gen_qemu_st_tl(s->T0, s->A0,
4240                                            s->mem_index, MO_UB);
4241                     }
4242                     break;
4243                 case 0x15: /* pextrw */
4244                     tcg_gen_ld16u_tl(s->T0, cpu_env, offsetof(CPUX86State,
4245                                             xmm_regs[reg].ZMM_W(val & 7)));
4246                     if (mod == 3) {
4247                         gen_op_mov_reg_v(s, ot, rm, s->T0);
4248                     } else {
4249                         tcg_gen_qemu_st_tl(s->T0, s->A0,
4250                                            s->mem_index, MO_LEUW);
4251                     }
4252                     break;
4253                 case 0x16:
4254                     if (ot == MO_32) { /* pextrd */
4255                         tcg_gen_ld_i32(s->tmp2_i32, cpu_env,
4256                                         offsetof(CPUX86State,
4257                                                 xmm_regs[reg].ZMM_L(val & 3)));
4258                         if (mod == 3) {
4259                             tcg_gen_extu_i32_tl(cpu_regs[rm], s->tmp2_i32);
4260                         } else {
4261                             tcg_gen_qemu_st_i32(s->tmp2_i32, s->A0,
4262                                                 s->mem_index, MO_LEUL);
4263                         }
4264                     } else { /* pextrq */
4265 #ifdef TARGET_X86_64
4266                         tcg_gen_ld_i64(s->tmp1_i64, cpu_env,
4267                                         offsetof(CPUX86State,
4268                                                 xmm_regs[reg].ZMM_Q(val & 1)));
4269                         if (mod == 3) {
4270                             tcg_gen_mov_i64(cpu_regs[rm], s->tmp1_i64);
4271                         } else {
4272                             tcg_gen_qemu_st_i64(s->tmp1_i64, s->A0,
4273                                                 s->mem_index, MO_LEQ);
4274                         }
4275 #else
4276                         goto illegal_op;
4277 #endif
4278                     }
4279                     break;
4280                 case 0x17: /* extractps */
4281                     tcg_gen_ld32u_tl(s->T0, cpu_env, offsetof(CPUX86State,
4282                                             xmm_regs[reg].ZMM_L(val & 3)));
4283                     if (mod == 3) {
4284                         gen_op_mov_reg_v(s, ot, rm, s->T0);
4285                     } else {
4286                         tcg_gen_qemu_st_tl(s->T0, s->A0,
4287                                            s->mem_index, MO_LEUL);
4288                     }
4289                     break;
4290                 case 0x20: /* pinsrb */
4291                     if (mod == 3) {
4292                         gen_op_mov_v_reg(s, MO_32, s->T0, rm);
4293                     } else {
4294                         tcg_gen_qemu_ld_tl(s->T0, s->A0,
4295                                            s->mem_index, MO_UB);
4296                     }
4297                     tcg_gen_st8_tl(s->T0, cpu_env, offsetof(CPUX86State,
4298                                             xmm_regs[reg].ZMM_B(val & 15)));
4299                     break;
4300                 case 0x21: /* insertps */
4301                     if (mod == 3) {
4302                         tcg_gen_ld_i32(s->tmp2_i32, cpu_env,
4303                                         offsetof(CPUX86State,xmm_regs[rm]
4304                                                 .ZMM_L((val >> 6) & 3)));
4305                     } else {
4306                         tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0,
4307                                             s->mem_index, MO_LEUL);
4308                     }
4309                     tcg_gen_st_i32(s->tmp2_i32, cpu_env,
4310                                     offsetof(CPUX86State,xmm_regs[reg]
4311                                             .ZMM_L((val >> 4) & 3)));
4312                     if ((val >> 0) & 1)
4313                         tcg_gen_st_i32(tcg_const_i32(0 /*float32_zero*/),
4314                                         cpu_env, offsetof(CPUX86State,
4315                                                 xmm_regs[reg].ZMM_L(0)));
4316                     if ((val >> 1) & 1)
4317                         tcg_gen_st_i32(tcg_const_i32(0 /*float32_zero*/),
4318                                         cpu_env, offsetof(CPUX86State,
4319                                                 xmm_regs[reg].ZMM_L(1)));
4320                     if ((val >> 2) & 1)
4321                         tcg_gen_st_i32(tcg_const_i32(0 /*float32_zero*/),
4322                                         cpu_env, offsetof(CPUX86State,
4323                                                 xmm_regs[reg].ZMM_L(2)));
4324                     if ((val >> 3) & 1)
4325                         tcg_gen_st_i32(tcg_const_i32(0 /*float32_zero*/),
4326                                         cpu_env, offsetof(CPUX86State,
4327                                                 xmm_regs[reg].ZMM_L(3)));
4328                     break;
4329                 case 0x22:
4330                     if (ot == MO_32) { /* pinsrd */
4331                         if (mod == 3) {
4332                             tcg_gen_trunc_tl_i32(s->tmp2_i32, cpu_regs[rm]);
4333                         } else {
4334                             tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0,
4335                                                 s->mem_index, MO_LEUL);
4336                         }
4337                         tcg_gen_st_i32(s->tmp2_i32, cpu_env,
4338                                         offsetof(CPUX86State,
4339                                                 xmm_regs[reg].ZMM_L(val & 3)));
4340                     } else { /* pinsrq */
4341 #ifdef TARGET_X86_64
4342                         if (mod == 3) {
4343                             gen_op_mov_v_reg(s, ot, s->tmp1_i64, rm);
4344                         } else {
4345                             tcg_gen_qemu_ld_i64(s->tmp1_i64, s->A0,
4346                                                 s->mem_index, MO_LEQ);
4347                         }
4348                         tcg_gen_st_i64(s->tmp1_i64, cpu_env,
4349                                         offsetof(CPUX86State,
4350                                                 xmm_regs[reg].ZMM_Q(val & 1)));
4351 #else
4352                         goto illegal_op;
4353 #endif
4354                     }
4355                     break;
4356                 }
4357                 return;
4358             }
4359 
4360             if (b1) {
4361                 op1_offset = offsetof(CPUX86State,xmm_regs[reg]);
4362                 if (mod == 3) {
4363                     op2_offset = offsetof(CPUX86State,xmm_regs[rm | REX_B(s)]);
4364                 } else {
4365                     op2_offset = offsetof(CPUX86State,xmm_t0);
4366                     gen_lea_modrm(env, s, modrm);
4367                     gen_ldo_env_A0(s, op2_offset);
4368                 }
4369             } else {
4370                 op1_offset = offsetof(CPUX86State,fpregs[reg].mmx);
4371                 if (mod == 3) {
4372                     op2_offset = offsetof(CPUX86State,fpregs[rm].mmx);
4373                 } else {
4374                     op2_offset = offsetof(CPUX86State,mmx_t0);
4375                     gen_lea_modrm(env, s, modrm);
4376                     gen_ldq_env_A0(s, op2_offset);
4377                 }
4378             }
4379             val = x86_ldub_code(env, s);
4380 
4381             if ((b & 0xfc) == 0x60) { /* pcmpXstrX */
4382                 set_cc_op(s, CC_OP_EFLAGS);
4383 
4384                 if (s->dflag == MO_64) {
4385                     /* The helper must use entire 64-bit gp registers */
4386                     val |= 1 << 8;
4387                 }
4388             }
4389 
4390             tcg_gen_addi_ptr(s->ptr0, cpu_env, op1_offset);
4391             tcg_gen_addi_ptr(s->ptr1, cpu_env, op2_offset);
4392             sse_fn_eppi(cpu_env, s->ptr0, s->ptr1, tcg_const_i32(val));
4393             break;
4394 
4395         case 0x33a:
4396             /* Various integer extensions at 0f 3a f[0-f].  */
4397             b = modrm | (b1 << 8);
4398             modrm = x86_ldub_code(env, s);
4399             reg = ((modrm >> 3) & 7) | REX_R(s);
4400 
4401             switch (b) {
4402             case 0x3f0: /* rorx Gy,Ey, Ib */
4403                 if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI2)
4404                     || !(s->prefix & PREFIX_VEX)
4405                     || s->vex_l != 0) {
4406                     goto illegal_op;
4407                 }
4408                 ot = mo_64_32(s->dflag);
4409                 gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
4410                 b = x86_ldub_code(env, s);
4411                 if (ot == MO_64) {
4412                     tcg_gen_rotri_tl(s->T0, s->T0, b & 63);
4413                 } else {
4414                     tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
4415                     tcg_gen_rotri_i32(s->tmp2_i32, s->tmp2_i32, b & 31);
4416                     tcg_gen_extu_i32_tl(s->T0, s->tmp2_i32);
4417                 }
4418                 gen_op_mov_reg_v(s, ot, reg, s->T0);
4419                 break;
4420 
4421             default:
4422                 goto unknown_op;
4423             }
4424             break;
4425 
4426         default:
4427         unknown_op:
4428             gen_unknown_opcode(env, s);
4429             return;
4430         }
4431     } else {
4432         /* generic MMX or SSE operation */
4433         switch(b) {
4434         case 0x70: /* pshufx insn */
4435         case 0xc6: /* pshufx insn */
4436         case 0xc2: /* compare insns */
4437             s->rip_offset = 1;
4438             break;
4439         default:
4440             break;
4441         }
4442         if (is_xmm) {
4443             op1_offset = offsetof(CPUX86State,xmm_regs[reg]);
4444             if (mod != 3) {
4445                 int sz = 4;
4446 
4447                 gen_lea_modrm(env, s, modrm);
4448                 op2_offset = offsetof(CPUX86State,xmm_t0);
4449 
4450                 switch (b) {
4451                 case 0x50 ... 0x5a:
4452                 case 0x5c ... 0x5f:
4453                 case 0xc2:
4454                     /* Most sse scalar operations.  */
4455                     if (b1 == 2) {
4456                         sz = 2;
4457                     } else if (b1 == 3) {
4458                         sz = 3;
4459                     }
4460                     break;
4461 
4462                 case 0x2e:  /* ucomis[sd] */
4463                 case 0x2f:  /* comis[sd] */
4464                     if (b1 == 0) {
4465                         sz = 2;
4466                     } else {
4467                         sz = 3;
4468                     }
4469                     break;
4470                 }
4471 
4472                 switch (sz) {
4473                 case 2:
4474                     /* 32 bit access */
4475                     gen_op_ld_v(s, MO_32, s->T0, s->A0);
4476                     tcg_gen_st32_tl(s->T0, cpu_env,
4477                                     offsetof(CPUX86State,xmm_t0.ZMM_L(0)));
4478                     break;
4479                 case 3:
4480                     /* 64 bit access */
4481                     gen_ldq_env_A0(s, offsetof(CPUX86State, xmm_t0.ZMM_D(0)));
4482                     break;
4483                 default:
4484                     /* 128 bit access */
4485                     gen_ldo_env_A0(s, op2_offset);
4486                     break;
4487                 }
4488             } else {
4489                 rm = (modrm & 7) | REX_B(s);
4490                 op2_offset = offsetof(CPUX86State,xmm_regs[rm]);
4491             }
4492         } else {
4493             op1_offset = offsetof(CPUX86State,fpregs[reg].mmx);
4494             if (mod != 3) {
4495                 gen_lea_modrm(env, s, modrm);
4496                 op2_offset = offsetof(CPUX86State,mmx_t0);
4497                 gen_ldq_env_A0(s, op2_offset);
4498             } else {
4499                 rm = (modrm & 7);
4500                 op2_offset = offsetof(CPUX86State,fpregs[rm].mmx);
4501             }
4502         }
4503         switch(b) {
4504         case 0x0f: /* 3DNow! data insns */
4505             val = x86_ldub_code(env, s);
4506             sse_fn_epp = sse_op_table5[val];
4507             if (!sse_fn_epp) {
4508                 goto unknown_op;
4509             }
4510             if (!(s->cpuid_ext2_features & CPUID_EXT2_3DNOW)) {
4511                 goto illegal_op;
4512             }
4513             tcg_gen_addi_ptr(s->ptr0, cpu_env, op1_offset);
4514             tcg_gen_addi_ptr(s->ptr1, cpu_env, op2_offset);
4515             sse_fn_epp(cpu_env, s->ptr0, s->ptr1);
4516             break;
4517         case 0x70: /* pshufx insn */
4518         case 0xc6: /* pshufx insn */
4519             val = x86_ldub_code(env, s);
4520             tcg_gen_addi_ptr(s->ptr0, cpu_env, op1_offset);
4521             tcg_gen_addi_ptr(s->ptr1, cpu_env, op2_offset);
4522             /* XXX: introduce a new table? */
4523             sse_fn_ppi = (SSEFunc_0_ppi)sse_fn_epp;
4524             sse_fn_ppi(s->ptr0, s->ptr1, tcg_const_i32(val));
4525             break;
4526         case 0xc2:
4527             /* compare insns */
4528             val = x86_ldub_code(env, s);
4529             if (val >= 8)
4530                 goto unknown_op;
4531             sse_fn_epp = sse_op_table4[val][b1];
4532 
4533             tcg_gen_addi_ptr(s->ptr0, cpu_env, op1_offset);
4534             tcg_gen_addi_ptr(s->ptr1, cpu_env, op2_offset);
4535             sse_fn_epp(cpu_env, s->ptr0, s->ptr1);
4536             break;
4537         case 0xf7:
4538             /* maskmov : we must prepare A0 */
4539             if (mod != 3)
4540                 goto illegal_op;
4541             tcg_gen_mov_tl(s->A0, cpu_regs[R_EDI]);
4542             gen_extu(s->aflag, s->A0);
4543             gen_add_A0_ds_seg(s);
4544 
4545             tcg_gen_addi_ptr(s->ptr0, cpu_env, op1_offset);
4546             tcg_gen_addi_ptr(s->ptr1, cpu_env, op2_offset);
4547             /* XXX: introduce a new table? */
4548             sse_fn_eppt = (SSEFunc_0_eppt)sse_fn_epp;
4549             sse_fn_eppt(cpu_env, s->ptr0, s->ptr1, s->A0);
4550             break;
4551         default:
4552             tcg_gen_addi_ptr(s->ptr0, cpu_env, op1_offset);
4553             tcg_gen_addi_ptr(s->ptr1, cpu_env, op2_offset);
4554             sse_fn_epp(cpu_env, s->ptr0, s->ptr1);
4555             break;
4556         }
4557         if (b == 0x2e || b == 0x2f) {
4558             set_cc_op(s, CC_OP_EFLAGS);
4559         }
4560     }
4561 }
4562 
4563 /* convert one instruction. s->base.is_jmp is set if the translation must
4564    be stopped. Return the next pc value */
4565 static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
4566 {
4567     CPUX86State *env = cpu->env_ptr;
4568     int b, prefixes;
4569     int shift;
4570     MemOp ot, aflag, dflag;
4571     int modrm, reg, rm, mod, op, opreg, val;
4572     target_ulong next_eip, tval;
4573     target_ulong pc_start = s->base.pc_next;
4574 
4575     s->pc_start = s->pc = pc_start;
4576     s->override = -1;
4577 #ifdef TARGET_X86_64
4578     s->rex_w = false;
4579     s->rex_r = 0;
4580     s->rex_x = 0;
4581     s->rex_b = 0;
4582 #endif
4583     s->rip_offset = 0; /* for relative ip address */
4584     s->vex_l = 0;
4585     s->vex_v = 0;
4586     if (sigsetjmp(s->jmpbuf, 0) != 0) {
4587         gen_exception_gpf(s);
4588         return s->pc;
4589     }
4590 
4591     prefixes = 0;
4592 
4593  next_byte:
4594     b = x86_ldub_code(env, s);
4595     /* Collect prefixes.  */
4596     switch (b) {
4597     case 0xf3:
4598         prefixes |= PREFIX_REPZ;
4599         goto next_byte;
4600     case 0xf2:
4601         prefixes |= PREFIX_REPNZ;
4602         goto next_byte;
4603     case 0xf0:
4604         prefixes |= PREFIX_LOCK;
4605         goto next_byte;
4606     case 0x2e:
4607         s->override = R_CS;
4608         goto next_byte;
4609     case 0x36:
4610         s->override = R_SS;
4611         goto next_byte;
4612     case 0x3e:
4613         s->override = R_DS;
4614         goto next_byte;
4615     case 0x26:
4616         s->override = R_ES;
4617         goto next_byte;
4618     case 0x64:
4619         s->override = R_FS;
4620         goto next_byte;
4621     case 0x65:
4622         s->override = R_GS;
4623         goto next_byte;
4624     case 0x66:
4625         prefixes |= PREFIX_DATA;
4626         goto next_byte;
4627     case 0x67:
4628         prefixes |= PREFIX_ADR;
4629         goto next_byte;
4630 #ifdef TARGET_X86_64
4631     case 0x40 ... 0x4f:
4632         if (CODE64(s)) {
4633             /* REX prefix */
4634             prefixes |= PREFIX_REX;
4635             s->rex_w = (b >> 3) & 1;
4636             s->rex_r = (b & 0x4) << 1;
4637             s->rex_x = (b & 0x2) << 2;
4638             s->rex_b = (b & 0x1) << 3;
4639             goto next_byte;
4640         }
4641         break;
4642 #endif
4643     case 0xc5: /* 2-byte VEX */
4644     case 0xc4: /* 3-byte VEX */
4645         /* VEX prefixes cannot be used except in 32-bit mode.
4646            Otherwise the instruction is LES or LDS.  */
4647         if (CODE32(s) && !VM86(s)) {
4648             static const int pp_prefix[4] = {
4649                 0, PREFIX_DATA, PREFIX_REPZ, PREFIX_REPNZ
4650             };
4651             int vex3, vex2 = x86_ldub_code(env, s);
4652 
4653             if (!CODE64(s) && (vex2 & 0xc0) != 0xc0) {
4654                 /* 4.1.4.6: In 32-bit mode, bits [7:6] must be 11b,
4655                    otherwise the instruction is LES or LDS.  */
4656                 s->pc--; /* rewind the advance_pc() x86_ldub_code() did */
4657                 break;
4658             }
4659 
4660             /* 4.1.1-4.1.3: No preceding lock, 66, f2, f3, or rex prefixes. */
4661             if (prefixes & (PREFIX_REPZ | PREFIX_REPNZ
4662                             | PREFIX_LOCK | PREFIX_DATA | PREFIX_REX)) {
4663                 goto illegal_op;
4664             }
4665 #ifdef TARGET_X86_64
4666             s->rex_r = (~vex2 >> 4) & 8;
4667 #endif
4668             if (b == 0xc5) {
4669                 /* 2-byte VEX prefix: RVVVVlpp, implied 0f leading opcode byte */
4670                 vex3 = vex2;
4671                 b = x86_ldub_code(env, s) | 0x100;
4672             } else {
4673                 /* 3-byte VEX prefix: RXBmmmmm wVVVVlpp */
4674                 vex3 = x86_ldub_code(env, s);
4675 #ifdef TARGET_X86_64
4676                 s->rex_x = (~vex2 >> 3) & 8;
4677                 s->rex_b = (~vex2 >> 2) & 8;
4678                 s->rex_w = (vex3 >> 7) & 1;
4679 #endif
4680                 switch (vex2 & 0x1f) {
4681                 case 0x01: /* Implied 0f leading opcode bytes.  */
4682                     b = x86_ldub_code(env, s) | 0x100;
4683                     break;
4684                 case 0x02: /* Implied 0f 38 leading opcode bytes.  */
4685                     b = 0x138;
4686                     break;
4687                 case 0x03: /* Implied 0f 3a leading opcode bytes.  */
4688                     b = 0x13a;
4689                     break;
4690                 default:   /* Reserved for future use.  */
4691                     goto unknown_op;
4692                 }
4693             }
4694             s->vex_v = (~vex3 >> 3) & 0xf;
4695             s->vex_l = (vex3 >> 2) & 1;
4696             prefixes |= pp_prefix[vex3 & 3] | PREFIX_VEX;
4697         }
4698         break;
4699     }
4700 
4701     /* Post-process prefixes.  */
4702     if (CODE64(s)) {
4703         /* In 64-bit mode, the default data size is 32-bit.  Select 64-bit
4704            data with rex_w, and 16-bit data with 0x66; rex_w takes precedence
4705            over 0x66 if both are present.  */
4706         dflag = (REX_W(s) ? MO_64 : prefixes & PREFIX_DATA ? MO_16 : MO_32);
4707         /* In 64-bit mode, 0x67 selects 32-bit addressing.  */
4708         aflag = (prefixes & PREFIX_ADR ? MO_32 : MO_64);
4709     } else {
4710         /* In 16/32-bit mode, 0x66 selects the opposite data size.  */
4711         if (CODE32(s) ^ ((prefixes & PREFIX_DATA) != 0)) {
4712             dflag = MO_32;
4713         } else {
4714             dflag = MO_16;
4715         }
4716         /* In 16/32-bit mode, 0x67 selects the opposite addressing.  */
4717         if (CODE32(s) ^ ((prefixes & PREFIX_ADR) != 0)) {
4718             aflag = MO_32;
4719         }  else {
4720             aflag = MO_16;
4721         }
4722     }
4723 
4724     s->prefix = prefixes;
4725     s->aflag = aflag;
4726     s->dflag = dflag;
4727 
4728     /* now check op code */
4729  reswitch:
4730     switch(b) {
4731     case 0x0f:
4732         /**************************/
4733         /* extended op code */
4734         b = x86_ldub_code(env, s) | 0x100;
4735         goto reswitch;
4736 
4737         /**************************/
4738         /* arith & logic */
4739     case 0x00 ... 0x05:
4740     case 0x08 ... 0x0d:
4741     case 0x10 ... 0x15:
4742     case 0x18 ... 0x1d:
4743     case 0x20 ... 0x25:
4744     case 0x28 ... 0x2d:
4745     case 0x30 ... 0x35:
4746     case 0x38 ... 0x3d:
4747         {
4748             int op, f, val;
4749             op = (b >> 3) & 7;
4750             f = (b >> 1) & 3;
4751 
4752             ot = mo_b_d(b, dflag);
4753 
4754             switch(f) {
4755             case 0: /* OP Ev, Gv */
4756                 modrm = x86_ldub_code(env, s);
4757                 reg = ((modrm >> 3) & 7) | REX_R(s);
4758                 mod = (modrm >> 6) & 3;
4759                 rm = (modrm & 7) | REX_B(s);
4760                 if (mod != 3) {
4761                     gen_lea_modrm(env, s, modrm);
4762                     opreg = OR_TMP0;
4763                 } else if (op == OP_XORL && rm == reg) {
4764                 xor_zero:
4765                     /* xor reg, reg optimisation */
4766                     set_cc_op(s, CC_OP_CLR);
4767                     tcg_gen_movi_tl(s->T0, 0);
4768                     gen_op_mov_reg_v(s, ot, reg, s->T0);
4769                     break;
4770                 } else {
4771                     opreg = rm;
4772                 }
4773                 gen_op_mov_v_reg(s, ot, s->T1, reg);
4774                 gen_op(s, op, ot, opreg);
4775                 break;
4776             case 1: /* OP Gv, Ev */
4777                 modrm = x86_ldub_code(env, s);
4778                 mod = (modrm >> 6) & 3;
4779                 reg = ((modrm >> 3) & 7) | REX_R(s);
4780                 rm = (modrm & 7) | REX_B(s);
4781                 if (mod != 3) {
4782                     gen_lea_modrm(env, s, modrm);
4783                     gen_op_ld_v(s, ot, s->T1, s->A0);
4784                 } else if (op == OP_XORL && rm == reg) {
4785                     goto xor_zero;
4786                 } else {
4787                     gen_op_mov_v_reg(s, ot, s->T1, rm);
4788                 }
4789                 gen_op(s, op, ot, reg);
4790                 break;
4791             case 2: /* OP A, Iv */
4792                 val = insn_get(env, s, ot);
4793                 tcg_gen_movi_tl(s->T1, val);
4794                 gen_op(s, op, ot, OR_EAX);
4795                 break;
4796             }
4797         }
4798         break;
4799 
4800     case 0x82:
4801         if (CODE64(s))
4802             goto illegal_op;
4803         /* fall through */
4804     case 0x80: /* GRP1 */
4805     case 0x81:
4806     case 0x83:
4807         {
4808             int val;
4809 
4810             ot = mo_b_d(b, dflag);
4811 
4812             modrm = x86_ldub_code(env, s);
4813             mod = (modrm >> 6) & 3;
4814             rm = (modrm & 7) | REX_B(s);
4815             op = (modrm >> 3) & 7;
4816 
4817             if (mod != 3) {
4818                 if (b == 0x83)
4819                     s->rip_offset = 1;
4820                 else
4821                     s->rip_offset = insn_const_size(ot);
4822                 gen_lea_modrm(env, s, modrm);
4823                 opreg = OR_TMP0;
4824             } else {
4825                 opreg = rm;
4826             }
4827 
4828             switch(b) {
4829             default:
4830             case 0x80:
4831             case 0x81:
4832             case 0x82:
4833                 val = insn_get(env, s, ot);
4834                 break;
4835             case 0x83:
4836                 val = (int8_t)insn_get(env, s, MO_8);
4837                 break;
4838             }
4839             tcg_gen_movi_tl(s->T1, val);
4840             gen_op(s, op, ot, opreg);
4841         }
4842         break;
4843 
4844         /**************************/
4845         /* inc, dec, and other misc arith */
4846     case 0x40 ... 0x47: /* inc Gv */
4847         ot = dflag;
4848         gen_inc(s, ot, OR_EAX + (b & 7), 1);
4849         break;
4850     case 0x48 ... 0x4f: /* dec Gv */
4851         ot = dflag;
4852         gen_inc(s, ot, OR_EAX + (b & 7), -1);
4853         break;
4854     case 0xf6: /* GRP3 */
4855     case 0xf7:
4856         ot = mo_b_d(b, dflag);
4857 
4858         modrm = x86_ldub_code(env, s);
4859         mod = (modrm >> 6) & 3;
4860         rm = (modrm & 7) | REX_B(s);
4861         op = (modrm >> 3) & 7;
4862         if (mod != 3) {
4863             if (op == 0) {
4864                 s->rip_offset = insn_const_size(ot);
4865             }
4866             gen_lea_modrm(env, s, modrm);
4867             /* For those below that handle locked memory, don't load here.  */
4868             if (!(s->prefix & PREFIX_LOCK)
4869                 || op != 2) {
4870                 gen_op_ld_v(s, ot, s->T0, s->A0);
4871             }
4872         } else {
4873             gen_op_mov_v_reg(s, ot, s->T0, rm);
4874         }
4875 
4876         switch(op) {
4877         case 0: /* test */
4878             val = insn_get(env, s, ot);
4879             tcg_gen_movi_tl(s->T1, val);
4880             gen_op_testl_T0_T1_cc(s);
4881             set_cc_op(s, CC_OP_LOGICB + ot);
4882             break;
4883         case 2: /* not */
4884             if (s->prefix & PREFIX_LOCK) {
4885                 if (mod == 3) {
4886                     goto illegal_op;
4887                 }
4888                 tcg_gen_movi_tl(s->T0, ~0);
4889                 tcg_gen_atomic_xor_fetch_tl(s->T0, s->A0, s->T0,
4890                                             s->mem_index, ot | MO_LE);
4891             } else {
4892                 tcg_gen_not_tl(s->T0, s->T0);
4893                 if (mod != 3) {
4894                     gen_op_st_v(s, ot, s->T0, s->A0);
4895                 } else {
4896                     gen_op_mov_reg_v(s, ot, rm, s->T0);
4897                 }
4898             }
4899             break;
4900         case 3: /* neg */
4901             if (s->prefix & PREFIX_LOCK) {
4902                 TCGLabel *label1;
4903                 TCGv a0, t0, t1, t2;
4904 
4905                 if (mod == 3) {
4906                     goto illegal_op;
4907                 }
4908                 a0 = tcg_temp_local_new();
4909                 t0 = tcg_temp_local_new();
4910                 label1 = gen_new_label();
4911 
4912                 tcg_gen_mov_tl(a0, s->A0);
4913                 tcg_gen_mov_tl(t0, s->T0);
4914 
4915                 gen_set_label(label1);
4916                 t1 = tcg_temp_new();
4917                 t2 = tcg_temp_new();
4918                 tcg_gen_mov_tl(t2, t0);
4919                 tcg_gen_neg_tl(t1, t0);
4920                 tcg_gen_atomic_cmpxchg_tl(t0, a0, t0, t1,
4921                                           s->mem_index, ot | MO_LE);
4922                 tcg_temp_free(t1);
4923                 tcg_gen_brcond_tl(TCG_COND_NE, t0, t2, label1);
4924 
4925                 tcg_temp_free(t2);
4926                 tcg_temp_free(a0);
4927                 tcg_gen_mov_tl(s->T0, t0);
4928                 tcg_temp_free(t0);
4929             } else {
4930                 tcg_gen_neg_tl(s->T0, s->T0);
4931                 if (mod != 3) {
4932                     gen_op_st_v(s, ot, s->T0, s->A0);
4933                 } else {
4934                     gen_op_mov_reg_v(s, ot, rm, s->T0);
4935                 }
4936             }
4937             gen_op_update_neg_cc(s);
4938             set_cc_op(s, CC_OP_SUBB + ot);
4939             break;
4940         case 4: /* mul */
4941             switch(ot) {
4942             case MO_8:
4943                 gen_op_mov_v_reg(s, MO_8, s->T1, R_EAX);
4944                 tcg_gen_ext8u_tl(s->T0, s->T0);
4945                 tcg_gen_ext8u_tl(s->T1, s->T1);
4946                 /* XXX: use 32 bit mul which could be faster */
4947                 tcg_gen_mul_tl(s->T0, s->T0, s->T1);
4948                 gen_op_mov_reg_v(s, MO_16, R_EAX, s->T0);
4949                 tcg_gen_mov_tl(cpu_cc_dst, s->T0);
4950                 tcg_gen_andi_tl(cpu_cc_src, s->T0, 0xff00);
4951                 set_cc_op(s, CC_OP_MULB);
4952                 break;
4953             case MO_16:
4954                 gen_op_mov_v_reg(s, MO_16, s->T1, R_EAX);
4955                 tcg_gen_ext16u_tl(s->T0, s->T0);
4956                 tcg_gen_ext16u_tl(s->T1, s->T1);
4957                 /* XXX: use 32 bit mul which could be faster */
4958                 tcg_gen_mul_tl(s->T0, s->T0, s->T1);
4959                 gen_op_mov_reg_v(s, MO_16, R_EAX, s->T0);
4960                 tcg_gen_mov_tl(cpu_cc_dst, s->T0);
4961                 tcg_gen_shri_tl(s->T0, s->T0, 16);
4962                 gen_op_mov_reg_v(s, MO_16, R_EDX, s->T0);
4963                 tcg_gen_mov_tl(cpu_cc_src, s->T0);
4964                 set_cc_op(s, CC_OP_MULW);
4965                 break;
4966             default:
4967             case MO_32:
4968                 tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
4969                 tcg_gen_trunc_tl_i32(s->tmp3_i32, cpu_regs[R_EAX]);
4970                 tcg_gen_mulu2_i32(s->tmp2_i32, s->tmp3_i32,
4971                                   s->tmp2_i32, s->tmp3_i32);
4972                 tcg_gen_extu_i32_tl(cpu_regs[R_EAX], s->tmp2_i32);
4973                 tcg_gen_extu_i32_tl(cpu_regs[R_EDX], s->tmp3_i32);
4974                 tcg_gen_mov_tl(cpu_cc_dst, cpu_regs[R_EAX]);
4975                 tcg_gen_mov_tl(cpu_cc_src, cpu_regs[R_EDX]);
4976                 set_cc_op(s, CC_OP_MULL);
4977                 break;
4978 #ifdef TARGET_X86_64
4979             case MO_64:
4980                 tcg_gen_mulu2_i64(cpu_regs[R_EAX], cpu_regs[R_EDX],
4981                                   s->T0, cpu_regs[R_EAX]);
4982                 tcg_gen_mov_tl(cpu_cc_dst, cpu_regs[R_EAX]);
4983                 tcg_gen_mov_tl(cpu_cc_src, cpu_regs[R_EDX]);
4984                 set_cc_op(s, CC_OP_MULQ);
4985                 break;
4986 #endif
4987             }
4988             break;
4989         case 5: /* imul */
4990             switch(ot) {
4991             case MO_8:
4992                 gen_op_mov_v_reg(s, MO_8, s->T1, R_EAX);
4993                 tcg_gen_ext8s_tl(s->T0, s->T0);
4994                 tcg_gen_ext8s_tl(s->T1, s->T1);
4995                 /* XXX: use 32 bit mul which could be faster */
4996                 tcg_gen_mul_tl(s->T0, s->T0, s->T1);
4997                 gen_op_mov_reg_v(s, MO_16, R_EAX, s->T0);
4998                 tcg_gen_mov_tl(cpu_cc_dst, s->T0);
4999                 tcg_gen_ext8s_tl(s->tmp0, s->T0);
5000                 tcg_gen_sub_tl(cpu_cc_src, s->T0, s->tmp0);
5001                 set_cc_op(s, CC_OP_MULB);
5002                 break;
5003             case MO_16:
5004                 gen_op_mov_v_reg(s, MO_16, s->T1, R_EAX);
5005                 tcg_gen_ext16s_tl(s->T0, s->T0);
5006                 tcg_gen_ext16s_tl(s->T1, s->T1);
5007                 /* XXX: use 32 bit mul which could be faster */
5008                 tcg_gen_mul_tl(s->T0, s->T0, s->T1);
5009                 gen_op_mov_reg_v(s, MO_16, R_EAX, s->T0);
5010                 tcg_gen_mov_tl(cpu_cc_dst, s->T0);
5011                 tcg_gen_ext16s_tl(s->tmp0, s->T0);
5012                 tcg_gen_sub_tl(cpu_cc_src, s->T0, s->tmp0);
5013                 tcg_gen_shri_tl(s->T0, s->T0, 16);
5014                 gen_op_mov_reg_v(s, MO_16, R_EDX, s->T0);
5015                 set_cc_op(s, CC_OP_MULW);
5016                 break;
5017             default:
5018             case MO_32:
5019                 tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
5020                 tcg_gen_trunc_tl_i32(s->tmp3_i32, cpu_regs[R_EAX]);
5021                 tcg_gen_muls2_i32(s->tmp2_i32, s->tmp3_i32,
5022                                   s->tmp2_i32, s->tmp3_i32);
5023                 tcg_gen_extu_i32_tl(cpu_regs[R_EAX], s->tmp2_i32);
5024                 tcg_gen_extu_i32_tl(cpu_regs[R_EDX], s->tmp3_i32);
5025                 tcg_gen_sari_i32(s->tmp2_i32, s->tmp2_i32, 31);
5026                 tcg_gen_mov_tl(cpu_cc_dst, cpu_regs[R_EAX]);
5027                 tcg_gen_sub_i32(s->tmp2_i32, s->tmp2_i32, s->tmp3_i32);
5028                 tcg_gen_extu_i32_tl(cpu_cc_src, s->tmp2_i32);
5029                 set_cc_op(s, CC_OP_MULL);
5030                 break;
5031 #ifdef TARGET_X86_64
5032             case MO_64:
5033                 tcg_gen_muls2_i64(cpu_regs[R_EAX], cpu_regs[R_EDX],
5034                                   s->T0, cpu_regs[R_EAX]);
5035                 tcg_gen_mov_tl(cpu_cc_dst, cpu_regs[R_EAX]);
5036                 tcg_gen_sari_tl(cpu_cc_src, cpu_regs[R_EAX], 63);
5037                 tcg_gen_sub_tl(cpu_cc_src, cpu_cc_src, cpu_regs[R_EDX]);
5038                 set_cc_op(s, CC_OP_MULQ);
5039                 break;
5040 #endif
5041             }
5042             break;
5043         case 6: /* div */
5044             switch(ot) {
5045             case MO_8:
5046                 gen_helper_divb_AL(cpu_env, s->T0);
5047                 break;
5048             case MO_16:
5049                 gen_helper_divw_AX(cpu_env, s->T0);
5050                 break;
5051             default:
5052             case MO_32:
5053                 gen_helper_divl_EAX(cpu_env, s->T0);
5054                 break;
5055 #ifdef TARGET_X86_64
5056             case MO_64:
5057                 gen_helper_divq_EAX(cpu_env, s->T0);
5058                 break;
5059 #endif
5060             }
5061             break;
5062         case 7: /* idiv */
5063             switch(ot) {
5064             case MO_8:
5065                 gen_helper_idivb_AL(cpu_env, s->T0);
5066                 break;
5067             case MO_16:
5068                 gen_helper_idivw_AX(cpu_env, s->T0);
5069                 break;
5070             default:
5071             case MO_32:
5072                 gen_helper_idivl_EAX(cpu_env, s->T0);
5073                 break;
5074 #ifdef TARGET_X86_64
5075             case MO_64:
5076                 gen_helper_idivq_EAX(cpu_env, s->T0);
5077                 break;
5078 #endif
5079             }
5080             break;
5081         default:
5082             goto unknown_op;
5083         }
5084         break;
5085 
5086     case 0xfe: /* GRP4 */
5087     case 0xff: /* GRP5 */
5088         ot = mo_b_d(b, dflag);
5089 
5090         modrm = x86_ldub_code(env, s);
5091         mod = (modrm >> 6) & 3;
5092         rm = (modrm & 7) | REX_B(s);
5093         op = (modrm >> 3) & 7;
5094         if (op >= 2 && b == 0xfe) {
5095             goto unknown_op;
5096         }
5097         if (CODE64(s)) {
5098             if (op == 2 || op == 4) {
5099                 /* operand size for jumps is 64 bit */
5100                 ot = MO_64;
5101             } else if (op == 3 || op == 5) {
5102                 ot = dflag != MO_16 ? MO_32 + REX_W(s) : MO_16;
5103             } else if (op == 6) {
5104                 /* default push size is 64 bit */
5105                 ot = mo_pushpop(s, dflag);
5106             }
5107         }
5108         if (mod != 3) {
5109             gen_lea_modrm(env, s, modrm);
5110             if (op >= 2 && op != 3 && op != 5)
5111                 gen_op_ld_v(s, ot, s->T0, s->A0);
5112         } else {
5113             gen_op_mov_v_reg(s, ot, s->T0, rm);
5114         }
5115 
5116         switch(op) {
5117         case 0: /* inc Ev */
5118             if (mod != 3)
5119                 opreg = OR_TMP0;
5120             else
5121                 opreg = rm;
5122             gen_inc(s, ot, opreg, 1);
5123             break;
5124         case 1: /* dec Ev */
5125             if (mod != 3)
5126                 opreg = OR_TMP0;
5127             else
5128                 opreg = rm;
5129             gen_inc(s, ot, opreg, -1);
5130             break;
5131         case 2: /* call Ev */
5132             /* XXX: optimize if memory (no 'and' is necessary) */
5133             if (dflag == MO_16) {
5134                 tcg_gen_ext16u_tl(s->T0, s->T0);
5135             }
5136             next_eip = s->pc - s->cs_base;
5137             tcg_gen_movi_tl(s->T1, next_eip);
5138             gen_push_v(s, s->T1);
5139             gen_op_jmp_v(s->T0);
5140             gen_bnd_jmp(s);
5141             gen_jr(s, s->T0);
5142             break;
5143         case 3: /* lcall Ev */
5144             if (mod == 3) {
5145                 goto illegal_op;
5146             }
5147             gen_op_ld_v(s, ot, s->T1, s->A0);
5148             gen_add_A0_im(s, 1 << ot);
5149             gen_op_ld_v(s, MO_16, s->T0, s->A0);
5150         do_lcall:
5151             if (PE(s) && !VM86(s)) {
5152                 tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
5153                 gen_helper_lcall_protected(cpu_env, s->tmp2_i32, s->T1,
5154                                            tcg_const_i32(dflag - 1),
5155                                            tcg_const_tl(s->pc - s->cs_base));
5156             } else {
5157                 tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
5158                 gen_helper_lcall_real(cpu_env, s->tmp2_i32, s->T1,
5159                                       tcg_const_i32(dflag - 1),
5160                                       tcg_const_i32(s->pc - s->cs_base));
5161             }
5162             tcg_gen_ld_tl(s->tmp4, cpu_env, offsetof(CPUX86State, eip));
5163             gen_jr(s, s->tmp4);
5164             break;
5165         case 4: /* jmp Ev */
5166             if (dflag == MO_16) {
5167                 tcg_gen_ext16u_tl(s->T0, s->T0);
5168             }
5169             gen_op_jmp_v(s->T0);
5170             gen_bnd_jmp(s);
5171             gen_jr(s, s->T0);
5172             break;
5173         case 5: /* ljmp Ev */
5174             if (mod == 3) {
5175                 goto illegal_op;
5176             }
5177             gen_op_ld_v(s, ot, s->T1, s->A0);
5178             gen_add_A0_im(s, 1 << ot);
5179             gen_op_ld_v(s, MO_16, s->T0, s->A0);
5180         do_ljmp:
5181             if (PE(s) && !VM86(s)) {
5182                 tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
5183                 gen_helper_ljmp_protected(cpu_env, s->tmp2_i32, s->T1,
5184                                           tcg_const_tl(s->pc - s->cs_base));
5185             } else {
5186                 gen_op_movl_seg_T0_vm(s, R_CS);
5187                 gen_op_jmp_v(s->T1);
5188             }
5189             tcg_gen_ld_tl(s->tmp4, cpu_env, offsetof(CPUX86State, eip));
5190             gen_jr(s, s->tmp4);
5191             break;
5192         case 6: /* push Ev */
5193             gen_push_v(s, s->T0);
5194             break;
5195         default:
5196             goto unknown_op;
5197         }
5198         break;
5199 
5200     case 0x84: /* test Ev, Gv */
5201     case 0x85:
5202         ot = mo_b_d(b, dflag);
5203 
5204         modrm = x86_ldub_code(env, s);
5205         reg = ((modrm >> 3) & 7) | REX_R(s);
5206 
5207         gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
5208         gen_op_mov_v_reg(s, ot, s->T1, reg);
5209         gen_op_testl_T0_T1_cc(s);
5210         set_cc_op(s, CC_OP_LOGICB + ot);
5211         break;
5212 
5213     case 0xa8: /* test eAX, Iv */
5214     case 0xa9:
5215         ot = mo_b_d(b, dflag);
5216         val = insn_get(env, s, ot);
5217 
5218         gen_op_mov_v_reg(s, ot, s->T0, OR_EAX);
5219         tcg_gen_movi_tl(s->T1, val);
5220         gen_op_testl_T0_T1_cc(s);
5221         set_cc_op(s, CC_OP_LOGICB + ot);
5222         break;
5223 
5224     case 0x98: /* CWDE/CBW */
5225         switch (dflag) {
5226 #ifdef TARGET_X86_64
5227         case MO_64:
5228             gen_op_mov_v_reg(s, MO_32, s->T0, R_EAX);
5229             tcg_gen_ext32s_tl(s->T0, s->T0);
5230             gen_op_mov_reg_v(s, MO_64, R_EAX, s->T0);
5231             break;
5232 #endif
5233         case MO_32:
5234             gen_op_mov_v_reg(s, MO_16, s->T0, R_EAX);
5235             tcg_gen_ext16s_tl(s->T0, s->T0);
5236             gen_op_mov_reg_v(s, MO_32, R_EAX, s->T0);
5237             break;
5238         case MO_16:
5239             gen_op_mov_v_reg(s, MO_8, s->T0, R_EAX);
5240             tcg_gen_ext8s_tl(s->T0, s->T0);
5241             gen_op_mov_reg_v(s, MO_16, R_EAX, s->T0);
5242             break;
5243         default:
5244             tcg_abort();
5245         }
5246         break;
5247     case 0x99: /* CDQ/CWD */
5248         switch (dflag) {
5249 #ifdef TARGET_X86_64
5250         case MO_64:
5251             gen_op_mov_v_reg(s, MO_64, s->T0, R_EAX);
5252             tcg_gen_sari_tl(s->T0, s->T0, 63);
5253             gen_op_mov_reg_v(s, MO_64, R_EDX, s->T0);
5254             break;
5255 #endif
5256         case MO_32:
5257             gen_op_mov_v_reg(s, MO_32, s->T0, R_EAX);
5258             tcg_gen_ext32s_tl(s->T0, s->T0);
5259             tcg_gen_sari_tl(s->T0, s->T0, 31);
5260             gen_op_mov_reg_v(s, MO_32, R_EDX, s->T0);
5261             break;
5262         case MO_16:
5263             gen_op_mov_v_reg(s, MO_16, s->T0, R_EAX);
5264             tcg_gen_ext16s_tl(s->T0, s->T0);
5265             tcg_gen_sari_tl(s->T0, s->T0, 15);
5266             gen_op_mov_reg_v(s, MO_16, R_EDX, s->T0);
5267             break;
5268         default:
5269             tcg_abort();
5270         }
5271         break;
5272     case 0x1af: /* imul Gv, Ev */
5273     case 0x69: /* imul Gv, Ev, I */
5274     case 0x6b:
5275         ot = dflag;
5276         modrm = x86_ldub_code(env, s);
5277         reg = ((modrm >> 3) & 7) | REX_R(s);
5278         if (b == 0x69)
5279             s->rip_offset = insn_const_size(ot);
5280         else if (b == 0x6b)
5281             s->rip_offset = 1;
5282         gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
5283         if (b == 0x69) {
5284             val = insn_get(env, s, ot);
5285             tcg_gen_movi_tl(s->T1, val);
5286         } else if (b == 0x6b) {
5287             val = (int8_t)insn_get(env, s, MO_8);
5288             tcg_gen_movi_tl(s->T1, val);
5289         } else {
5290             gen_op_mov_v_reg(s, ot, s->T1, reg);
5291         }
5292         switch (ot) {
5293 #ifdef TARGET_X86_64
5294         case MO_64:
5295             tcg_gen_muls2_i64(cpu_regs[reg], s->T1, s->T0, s->T1);
5296             tcg_gen_mov_tl(cpu_cc_dst, cpu_regs[reg]);
5297             tcg_gen_sari_tl(cpu_cc_src, cpu_cc_dst, 63);
5298             tcg_gen_sub_tl(cpu_cc_src, cpu_cc_src, s->T1);
5299             break;
5300 #endif
5301         case MO_32:
5302             tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
5303             tcg_gen_trunc_tl_i32(s->tmp3_i32, s->T1);
5304             tcg_gen_muls2_i32(s->tmp2_i32, s->tmp3_i32,
5305                               s->tmp2_i32, s->tmp3_i32);
5306             tcg_gen_extu_i32_tl(cpu_regs[reg], s->tmp2_i32);
5307             tcg_gen_sari_i32(s->tmp2_i32, s->tmp2_i32, 31);
5308             tcg_gen_mov_tl(cpu_cc_dst, cpu_regs[reg]);
5309             tcg_gen_sub_i32(s->tmp2_i32, s->tmp2_i32, s->tmp3_i32);
5310             tcg_gen_extu_i32_tl(cpu_cc_src, s->tmp2_i32);
5311             break;
5312         default:
5313             tcg_gen_ext16s_tl(s->T0, s->T0);
5314             tcg_gen_ext16s_tl(s->T1, s->T1);
5315             /* XXX: use 32 bit mul which could be faster */
5316             tcg_gen_mul_tl(s->T0, s->T0, s->T1);
5317             tcg_gen_mov_tl(cpu_cc_dst, s->T0);
5318             tcg_gen_ext16s_tl(s->tmp0, s->T0);
5319             tcg_gen_sub_tl(cpu_cc_src, s->T0, s->tmp0);
5320             gen_op_mov_reg_v(s, ot, reg, s->T0);
5321             break;
5322         }
5323         set_cc_op(s, CC_OP_MULB + ot);
5324         break;
5325     case 0x1c0:
5326     case 0x1c1: /* xadd Ev, Gv */
5327         ot = mo_b_d(b, dflag);
5328         modrm = x86_ldub_code(env, s);
5329         reg = ((modrm >> 3) & 7) | REX_R(s);
5330         mod = (modrm >> 6) & 3;
5331         gen_op_mov_v_reg(s, ot, s->T0, reg);
5332         if (mod == 3) {
5333             rm = (modrm & 7) | REX_B(s);
5334             gen_op_mov_v_reg(s, ot, s->T1, rm);
5335             tcg_gen_add_tl(s->T0, s->T0, s->T1);
5336             gen_op_mov_reg_v(s, ot, reg, s->T1);
5337             gen_op_mov_reg_v(s, ot, rm, s->T0);
5338         } else {
5339             gen_lea_modrm(env, s, modrm);
5340             if (s->prefix & PREFIX_LOCK) {
5341                 tcg_gen_atomic_fetch_add_tl(s->T1, s->A0, s->T0,
5342                                             s->mem_index, ot | MO_LE);
5343                 tcg_gen_add_tl(s->T0, s->T0, s->T1);
5344             } else {
5345                 gen_op_ld_v(s, ot, s->T1, s->A0);
5346                 tcg_gen_add_tl(s->T0, s->T0, s->T1);
5347                 gen_op_st_v(s, ot, s->T0, s->A0);
5348             }
5349             gen_op_mov_reg_v(s, ot, reg, s->T1);
5350         }
5351         gen_op_update2_cc(s);
5352         set_cc_op(s, CC_OP_ADDB + ot);
5353         break;
5354     case 0x1b0:
5355     case 0x1b1: /* cmpxchg Ev, Gv */
5356         {
5357             TCGv oldv, newv, cmpv;
5358 
5359             ot = mo_b_d(b, dflag);
5360             modrm = x86_ldub_code(env, s);
5361             reg = ((modrm >> 3) & 7) | REX_R(s);
5362             mod = (modrm >> 6) & 3;
5363             oldv = tcg_temp_new();
5364             newv = tcg_temp_new();
5365             cmpv = tcg_temp_new();
5366             gen_op_mov_v_reg(s, ot, newv, reg);
5367             tcg_gen_mov_tl(cmpv, cpu_regs[R_EAX]);
5368 
5369             if (s->prefix & PREFIX_LOCK) {
5370                 if (mod == 3) {
5371                     goto illegal_op;
5372                 }
5373                 gen_lea_modrm(env, s, modrm);
5374                 tcg_gen_atomic_cmpxchg_tl(oldv, s->A0, cmpv, newv,
5375                                           s->mem_index, ot | MO_LE);
5376                 gen_op_mov_reg_v(s, ot, R_EAX, oldv);
5377             } else {
5378                 if (mod == 3) {
5379                     rm = (modrm & 7) | REX_B(s);
5380                     gen_op_mov_v_reg(s, ot, oldv, rm);
5381                 } else {
5382                     gen_lea_modrm(env, s, modrm);
5383                     gen_op_ld_v(s, ot, oldv, s->A0);
5384                     rm = 0; /* avoid warning */
5385                 }
5386                 gen_extu(ot, oldv);
5387                 gen_extu(ot, cmpv);
5388                 /* store value = (old == cmp ? new : old);  */
5389                 tcg_gen_movcond_tl(TCG_COND_EQ, newv, oldv, cmpv, newv, oldv);
5390                 if (mod == 3) {
5391                     gen_op_mov_reg_v(s, ot, R_EAX, oldv);
5392                     gen_op_mov_reg_v(s, ot, rm, newv);
5393                 } else {
5394                     /* Perform an unconditional store cycle like physical cpu;
5395                        must be before changing accumulator to ensure
5396                        idempotency if the store faults and the instruction
5397                        is restarted */
5398                     gen_op_st_v(s, ot, newv, s->A0);
5399                     gen_op_mov_reg_v(s, ot, R_EAX, oldv);
5400                 }
5401             }
5402             tcg_gen_mov_tl(cpu_cc_src, oldv);
5403             tcg_gen_mov_tl(s->cc_srcT, cmpv);
5404             tcg_gen_sub_tl(cpu_cc_dst, cmpv, oldv);
5405             set_cc_op(s, CC_OP_SUBB + ot);
5406             tcg_temp_free(oldv);
5407             tcg_temp_free(newv);
5408             tcg_temp_free(cmpv);
5409         }
5410         break;
5411     case 0x1c7: /* cmpxchg8b */
5412         modrm = x86_ldub_code(env, s);
5413         mod = (modrm >> 6) & 3;
5414         switch ((modrm >> 3) & 7) {
5415         case 1: /* CMPXCHG8, CMPXCHG16 */
5416             if (mod == 3) {
5417                 goto illegal_op;
5418             }
5419 #ifdef TARGET_X86_64
5420             if (dflag == MO_64) {
5421                 if (!(s->cpuid_ext_features & CPUID_EXT_CX16)) {
5422                     goto illegal_op;
5423                 }
5424                 gen_lea_modrm(env, s, modrm);
5425                 if ((s->prefix & PREFIX_LOCK) &&
5426                     (tb_cflags(s->base.tb) & CF_PARALLEL)) {
5427                     gen_helper_cmpxchg16b(cpu_env, s->A0);
5428                 } else {
5429                     gen_helper_cmpxchg16b_unlocked(cpu_env, s->A0);
5430                 }
5431                 set_cc_op(s, CC_OP_EFLAGS);
5432                 break;
5433             }
5434 #endif
5435             if (!(s->cpuid_features & CPUID_CX8)) {
5436                 goto illegal_op;
5437             }
5438             gen_lea_modrm(env, s, modrm);
5439             if ((s->prefix & PREFIX_LOCK) &&
5440                 (tb_cflags(s->base.tb) & CF_PARALLEL)) {
5441                 gen_helper_cmpxchg8b(cpu_env, s->A0);
5442             } else {
5443                 gen_helper_cmpxchg8b_unlocked(cpu_env, s->A0);
5444             }
5445             set_cc_op(s, CC_OP_EFLAGS);
5446             break;
5447 
5448         case 7: /* RDSEED */
5449         case 6: /* RDRAND */
5450             if (mod != 3 ||
5451                 (s->prefix & (PREFIX_LOCK | PREFIX_REPZ | PREFIX_REPNZ)) ||
5452                 !(s->cpuid_ext_features & CPUID_EXT_RDRAND)) {
5453                 goto illegal_op;
5454             }
5455             if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
5456                 gen_io_start();
5457             }
5458             gen_helper_rdrand(s->T0, cpu_env);
5459             rm = (modrm & 7) | REX_B(s);
5460             gen_op_mov_reg_v(s, dflag, rm, s->T0);
5461             set_cc_op(s, CC_OP_EFLAGS);
5462             if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
5463                 gen_jmp(s, s->pc - s->cs_base);
5464             }
5465             break;
5466 
5467         default:
5468             goto illegal_op;
5469         }
5470         break;
5471 
5472         /**************************/
5473         /* push/pop */
5474     case 0x50 ... 0x57: /* push */
5475         gen_op_mov_v_reg(s, MO_32, s->T0, (b & 7) | REX_B(s));
5476         gen_push_v(s, s->T0);
5477         break;
5478     case 0x58 ... 0x5f: /* pop */
5479         ot = gen_pop_T0(s);
5480         /* NOTE: order is important for pop %sp */
5481         gen_pop_update(s, ot);
5482         gen_op_mov_reg_v(s, ot, (b & 7) | REX_B(s), s->T0);
5483         break;
5484     case 0x60: /* pusha */
5485         if (CODE64(s))
5486             goto illegal_op;
5487         gen_pusha(s);
5488         break;
5489     case 0x61: /* popa */
5490         if (CODE64(s))
5491             goto illegal_op;
5492         gen_popa(s);
5493         break;
5494     case 0x68: /* push Iv */
5495     case 0x6a:
5496         ot = mo_pushpop(s, dflag);
5497         if (b == 0x68)
5498             val = insn_get(env, s, ot);
5499         else
5500             val = (int8_t)insn_get(env, s, MO_8);
5501         tcg_gen_movi_tl(s->T0, val);
5502         gen_push_v(s, s->T0);
5503         break;
5504     case 0x8f: /* pop Ev */
5505         modrm = x86_ldub_code(env, s);
5506         mod = (modrm >> 6) & 3;
5507         ot = gen_pop_T0(s);
5508         if (mod == 3) {
5509             /* NOTE: order is important for pop %sp */
5510             gen_pop_update(s, ot);
5511             rm = (modrm & 7) | REX_B(s);
5512             gen_op_mov_reg_v(s, ot, rm, s->T0);
5513         } else {
5514             /* NOTE: order is important too for MMU exceptions */
5515             s->popl_esp_hack = 1 << ot;
5516             gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 1);
5517             s->popl_esp_hack = 0;
5518             gen_pop_update(s, ot);
5519         }
5520         break;
5521     case 0xc8: /* enter */
5522         {
5523             int level;
5524             val = x86_lduw_code(env, s);
5525             level = x86_ldub_code(env, s);
5526             gen_enter(s, val, level);
5527         }
5528         break;
5529     case 0xc9: /* leave */
5530         gen_leave(s);
5531         break;
5532     case 0x06: /* push es */
5533     case 0x0e: /* push cs */
5534     case 0x16: /* push ss */
5535     case 0x1e: /* push ds */
5536         if (CODE64(s))
5537             goto illegal_op;
5538         gen_op_movl_T0_seg(s, b >> 3);
5539         gen_push_v(s, s->T0);
5540         break;
5541     case 0x1a0: /* push fs */
5542     case 0x1a8: /* push gs */
5543         gen_op_movl_T0_seg(s, (b >> 3) & 7);
5544         gen_push_v(s, s->T0);
5545         break;
5546     case 0x07: /* pop es */
5547     case 0x17: /* pop ss */
5548     case 0x1f: /* pop ds */
5549         if (CODE64(s))
5550             goto illegal_op;
5551         reg = b >> 3;
5552         ot = gen_pop_T0(s);
5553         gen_movl_seg_T0(s, reg);
5554         gen_pop_update(s, ot);
5555         /* Note that reg == R_SS in gen_movl_seg_T0 always sets is_jmp.  */
5556         if (s->base.is_jmp) {
5557             gen_jmp_im(s, s->pc - s->cs_base);
5558             if (reg == R_SS) {
5559                 s->flags &= ~HF_TF_MASK;
5560                 gen_eob_inhibit_irq(s, true);
5561             } else {
5562                 gen_eob(s);
5563             }
5564         }
5565         break;
5566     case 0x1a1: /* pop fs */
5567     case 0x1a9: /* pop gs */
5568         ot = gen_pop_T0(s);
5569         gen_movl_seg_T0(s, (b >> 3) & 7);
5570         gen_pop_update(s, ot);
5571         if (s->base.is_jmp) {
5572             gen_jmp_im(s, s->pc - s->cs_base);
5573             gen_eob(s);
5574         }
5575         break;
5576 
5577         /**************************/
5578         /* mov */
5579     case 0x88:
5580     case 0x89: /* mov Gv, Ev */
5581         ot = mo_b_d(b, dflag);
5582         modrm = x86_ldub_code(env, s);
5583         reg = ((modrm >> 3) & 7) | REX_R(s);
5584 
5585         /* generate a generic store */
5586         gen_ldst_modrm(env, s, modrm, ot, reg, 1);
5587         break;
5588     case 0xc6:
5589     case 0xc7: /* mov Ev, Iv */
5590         ot = mo_b_d(b, dflag);
5591         modrm = x86_ldub_code(env, s);
5592         mod = (modrm >> 6) & 3;
5593         if (mod != 3) {
5594             s->rip_offset = insn_const_size(ot);
5595             gen_lea_modrm(env, s, modrm);
5596         }
5597         val = insn_get(env, s, ot);
5598         tcg_gen_movi_tl(s->T0, val);
5599         if (mod != 3) {
5600             gen_op_st_v(s, ot, s->T0, s->A0);
5601         } else {
5602             gen_op_mov_reg_v(s, ot, (modrm & 7) | REX_B(s), s->T0);
5603         }
5604         break;
5605     case 0x8a:
5606     case 0x8b: /* mov Ev, Gv */
5607         ot = mo_b_d(b, dflag);
5608         modrm = x86_ldub_code(env, s);
5609         reg = ((modrm >> 3) & 7) | REX_R(s);
5610 
5611         gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
5612         gen_op_mov_reg_v(s, ot, reg, s->T0);
5613         break;
5614     case 0x8e: /* mov seg, Gv */
5615         modrm = x86_ldub_code(env, s);
5616         reg = (modrm >> 3) & 7;
5617         if (reg >= 6 || reg == R_CS)
5618             goto illegal_op;
5619         gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
5620         gen_movl_seg_T0(s, reg);
5621         /* Note that reg == R_SS in gen_movl_seg_T0 always sets is_jmp.  */
5622         if (s->base.is_jmp) {
5623             gen_jmp_im(s, s->pc - s->cs_base);
5624             if (reg == R_SS) {
5625                 s->flags &= ~HF_TF_MASK;
5626                 gen_eob_inhibit_irq(s, true);
5627             } else {
5628                 gen_eob(s);
5629             }
5630         }
5631         break;
5632     case 0x8c: /* mov Gv, seg */
5633         modrm = x86_ldub_code(env, s);
5634         reg = (modrm >> 3) & 7;
5635         mod = (modrm >> 6) & 3;
5636         if (reg >= 6)
5637             goto illegal_op;
5638         gen_op_movl_T0_seg(s, reg);
5639         ot = mod == 3 ? dflag : MO_16;
5640         gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 1);
5641         break;
5642 
5643     case 0x1b6: /* movzbS Gv, Eb */
5644     case 0x1b7: /* movzwS Gv, Eb */
5645     case 0x1be: /* movsbS Gv, Eb */
5646     case 0x1bf: /* movswS Gv, Eb */
5647         {
5648             MemOp d_ot;
5649             MemOp s_ot;
5650 
5651             /* d_ot is the size of destination */
5652             d_ot = dflag;
5653             /* ot is the size of source */
5654             ot = (b & 1) + MO_8;
5655             /* s_ot is the sign+size of source */
5656             s_ot = b & 8 ? MO_SIGN | ot : ot;
5657 
5658             modrm = x86_ldub_code(env, s);
5659             reg = ((modrm >> 3) & 7) | REX_R(s);
5660             mod = (modrm >> 6) & 3;
5661             rm = (modrm & 7) | REX_B(s);
5662 
5663             if (mod == 3) {
5664                 if (s_ot == MO_SB && byte_reg_is_xH(s, rm)) {
5665                     tcg_gen_sextract_tl(s->T0, cpu_regs[rm - 4], 8, 8);
5666                 } else {
5667                     gen_op_mov_v_reg(s, ot, s->T0, rm);
5668                     switch (s_ot) {
5669                     case MO_UB:
5670                         tcg_gen_ext8u_tl(s->T0, s->T0);
5671                         break;
5672                     case MO_SB:
5673                         tcg_gen_ext8s_tl(s->T0, s->T0);
5674                         break;
5675                     case MO_UW:
5676                         tcg_gen_ext16u_tl(s->T0, s->T0);
5677                         break;
5678                     default:
5679                     case MO_SW:
5680                         tcg_gen_ext16s_tl(s->T0, s->T0);
5681                         break;
5682                     }
5683                 }
5684                 gen_op_mov_reg_v(s, d_ot, reg, s->T0);
5685             } else {
5686                 gen_lea_modrm(env, s, modrm);
5687                 gen_op_ld_v(s, s_ot, s->T0, s->A0);
5688                 gen_op_mov_reg_v(s, d_ot, reg, s->T0);
5689             }
5690         }
5691         break;
5692 
5693     case 0x8d: /* lea */
5694         modrm = x86_ldub_code(env, s);
5695         mod = (modrm >> 6) & 3;
5696         if (mod == 3)
5697             goto illegal_op;
5698         reg = ((modrm >> 3) & 7) | REX_R(s);
5699         {
5700             AddressParts a = gen_lea_modrm_0(env, s, modrm);
5701             TCGv ea = gen_lea_modrm_1(s, a);
5702             gen_lea_v_seg(s, s->aflag, ea, -1, -1);
5703             gen_op_mov_reg_v(s, dflag, reg, s->A0);
5704         }
5705         break;
5706 
5707     case 0xa0: /* mov EAX, Ov */
5708     case 0xa1:
5709     case 0xa2: /* mov Ov, EAX */
5710     case 0xa3:
5711         {
5712             target_ulong offset_addr;
5713 
5714             ot = mo_b_d(b, dflag);
5715             switch (s->aflag) {
5716 #ifdef TARGET_X86_64
5717             case MO_64:
5718                 offset_addr = x86_ldq_code(env, s);
5719                 break;
5720 #endif
5721             default:
5722                 offset_addr = insn_get(env, s, s->aflag);
5723                 break;
5724             }
5725             tcg_gen_movi_tl(s->A0, offset_addr);
5726             gen_add_A0_ds_seg(s);
5727             if ((b & 2) == 0) {
5728                 gen_op_ld_v(s, ot, s->T0, s->A0);
5729                 gen_op_mov_reg_v(s, ot, R_EAX, s->T0);
5730             } else {
5731                 gen_op_mov_v_reg(s, ot, s->T0, R_EAX);
5732                 gen_op_st_v(s, ot, s->T0, s->A0);
5733             }
5734         }
5735         break;
5736     case 0xd7: /* xlat */
5737         tcg_gen_mov_tl(s->A0, cpu_regs[R_EBX]);
5738         tcg_gen_ext8u_tl(s->T0, cpu_regs[R_EAX]);
5739         tcg_gen_add_tl(s->A0, s->A0, s->T0);
5740         gen_extu(s->aflag, s->A0);
5741         gen_add_A0_ds_seg(s);
5742         gen_op_ld_v(s, MO_8, s->T0, s->A0);
5743         gen_op_mov_reg_v(s, MO_8, R_EAX, s->T0);
5744         break;
5745     case 0xb0 ... 0xb7: /* mov R, Ib */
5746         val = insn_get(env, s, MO_8);
5747         tcg_gen_movi_tl(s->T0, val);
5748         gen_op_mov_reg_v(s, MO_8, (b & 7) | REX_B(s), s->T0);
5749         break;
5750     case 0xb8 ... 0xbf: /* mov R, Iv */
5751 #ifdef TARGET_X86_64
5752         if (dflag == MO_64) {
5753             uint64_t tmp;
5754             /* 64 bit case */
5755             tmp = x86_ldq_code(env, s);
5756             reg = (b & 7) | REX_B(s);
5757             tcg_gen_movi_tl(s->T0, tmp);
5758             gen_op_mov_reg_v(s, MO_64, reg, s->T0);
5759         } else
5760 #endif
5761         {
5762             ot = dflag;
5763             val = insn_get(env, s, ot);
5764             reg = (b & 7) | REX_B(s);
5765             tcg_gen_movi_tl(s->T0, val);
5766             gen_op_mov_reg_v(s, ot, reg, s->T0);
5767         }
5768         break;
5769 
5770     case 0x91 ... 0x97: /* xchg R, EAX */
5771     do_xchg_reg_eax:
5772         ot = dflag;
5773         reg = (b & 7) | REX_B(s);
5774         rm = R_EAX;
5775         goto do_xchg_reg;
5776     case 0x86:
5777     case 0x87: /* xchg Ev, Gv */
5778         ot = mo_b_d(b, dflag);
5779         modrm = x86_ldub_code(env, s);
5780         reg = ((modrm >> 3) & 7) | REX_R(s);
5781         mod = (modrm >> 6) & 3;
5782         if (mod == 3) {
5783             rm = (modrm & 7) | REX_B(s);
5784         do_xchg_reg:
5785             gen_op_mov_v_reg(s, ot, s->T0, reg);
5786             gen_op_mov_v_reg(s, ot, s->T1, rm);
5787             gen_op_mov_reg_v(s, ot, rm, s->T0);
5788             gen_op_mov_reg_v(s, ot, reg, s->T1);
5789         } else {
5790             gen_lea_modrm(env, s, modrm);
5791             gen_op_mov_v_reg(s, ot, s->T0, reg);
5792             /* for xchg, lock is implicit */
5793             tcg_gen_atomic_xchg_tl(s->T1, s->A0, s->T0,
5794                                    s->mem_index, ot | MO_LE);
5795             gen_op_mov_reg_v(s, ot, reg, s->T1);
5796         }
5797         break;
5798     case 0xc4: /* les Gv */
5799         /* In CODE64 this is VEX3; see above.  */
5800         op = R_ES;
5801         goto do_lxx;
5802     case 0xc5: /* lds Gv */
5803         /* In CODE64 this is VEX2; see above.  */
5804         op = R_DS;
5805         goto do_lxx;
5806     case 0x1b2: /* lss Gv */
5807         op = R_SS;
5808         goto do_lxx;
5809     case 0x1b4: /* lfs Gv */
5810         op = R_FS;
5811         goto do_lxx;
5812     case 0x1b5: /* lgs Gv */
5813         op = R_GS;
5814     do_lxx:
5815         ot = dflag != MO_16 ? MO_32 : MO_16;
5816         modrm = x86_ldub_code(env, s);
5817         reg = ((modrm >> 3) & 7) | REX_R(s);
5818         mod = (modrm >> 6) & 3;
5819         if (mod == 3)
5820             goto illegal_op;
5821         gen_lea_modrm(env, s, modrm);
5822         gen_op_ld_v(s, ot, s->T1, s->A0);
5823         gen_add_A0_im(s, 1 << ot);
5824         /* load the segment first to handle exceptions properly */
5825         gen_op_ld_v(s, MO_16, s->T0, s->A0);
5826         gen_movl_seg_T0(s, op);
5827         /* then put the data */
5828         gen_op_mov_reg_v(s, ot, reg, s->T1);
5829         if (s->base.is_jmp) {
5830             gen_jmp_im(s, s->pc - s->cs_base);
5831             gen_eob(s);
5832         }
5833         break;
5834 
5835         /************************/
5836         /* shifts */
5837     case 0xc0:
5838     case 0xc1:
5839         /* shift Ev,Ib */
5840         shift = 2;
5841     grp2:
5842         {
5843             ot = mo_b_d(b, dflag);
5844             modrm = x86_ldub_code(env, s);
5845             mod = (modrm >> 6) & 3;
5846             op = (modrm >> 3) & 7;
5847 
5848             if (mod != 3) {
5849                 if (shift == 2) {
5850                     s->rip_offset = 1;
5851                 }
5852                 gen_lea_modrm(env, s, modrm);
5853                 opreg = OR_TMP0;
5854             } else {
5855                 opreg = (modrm & 7) | REX_B(s);
5856             }
5857 
5858             /* simpler op */
5859             if (shift == 0) {
5860                 gen_shift(s, op, ot, opreg, OR_ECX);
5861             } else {
5862                 if (shift == 2) {
5863                     shift = x86_ldub_code(env, s);
5864                 }
5865                 gen_shifti(s, op, ot, opreg, shift);
5866             }
5867         }
5868         break;
5869     case 0xd0:
5870     case 0xd1:
5871         /* shift Ev,1 */
5872         shift = 1;
5873         goto grp2;
5874     case 0xd2:
5875     case 0xd3:
5876         /* shift Ev,cl */
5877         shift = 0;
5878         goto grp2;
5879 
5880     case 0x1a4: /* shld imm */
5881         op = 0;
5882         shift = 1;
5883         goto do_shiftd;
5884     case 0x1a5: /* shld cl */
5885         op = 0;
5886         shift = 0;
5887         goto do_shiftd;
5888     case 0x1ac: /* shrd imm */
5889         op = 1;
5890         shift = 1;
5891         goto do_shiftd;
5892     case 0x1ad: /* shrd cl */
5893         op = 1;
5894         shift = 0;
5895     do_shiftd:
5896         ot = dflag;
5897         modrm = x86_ldub_code(env, s);
5898         mod = (modrm >> 6) & 3;
5899         rm = (modrm & 7) | REX_B(s);
5900         reg = ((modrm >> 3) & 7) | REX_R(s);
5901         if (mod != 3) {
5902             gen_lea_modrm(env, s, modrm);
5903             opreg = OR_TMP0;
5904         } else {
5905             opreg = rm;
5906         }
5907         gen_op_mov_v_reg(s, ot, s->T1, reg);
5908 
5909         if (shift) {
5910             TCGv imm = tcg_const_tl(x86_ldub_code(env, s));
5911             gen_shiftd_rm_T1(s, ot, opreg, op, imm);
5912             tcg_temp_free(imm);
5913         } else {
5914             gen_shiftd_rm_T1(s, ot, opreg, op, cpu_regs[R_ECX]);
5915         }
5916         break;
5917 
5918         /************************/
5919         /* floats */
5920     case 0xd8 ... 0xdf:
5921         if (s->flags & (HF_EM_MASK | HF_TS_MASK)) {
5922             /* if CR0.EM or CR0.TS are set, generate an FPU exception */
5923             /* XXX: what to do if illegal op ? */
5924             gen_exception(s, EXCP07_PREX, pc_start - s->cs_base);
5925             break;
5926         }
5927         modrm = x86_ldub_code(env, s);
5928         mod = (modrm >> 6) & 3;
5929         rm = modrm & 7;
5930         op = ((b & 7) << 3) | ((modrm >> 3) & 7);
5931         if (mod != 3) {
5932             /* memory op */
5933             gen_lea_modrm(env, s, modrm);
5934             switch(op) {
5935             case 0x00 ... 0x07: /* fxxxs */
5936             case 0x10 ... 0x17: /* fixxxl */
5937             case 0x20 ... 0x27: /* fxxxl */
5938             case 0x30 ... 0x37: /* fixxx */
5939                 {
5940                     int op1;
5941                     op1 = op & 7;
5942 
5943                     switch(op >> 4) {
5944                     case 0:
5945                         tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0,
5946                                             s->mem_index, MO_LEUL);
5947                         gen_helper_flds_FT0(cpu_env, s->tmp2_i32);
5948                         break;
5949                     case 1:
5950                         tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0,
5951                                             s->mem_index, MO_LEUL);
5952                         gen_helper_fildl_FT0(cpu_env, s->tmp2_i32);
5953                         break;
5954                     case 2:
5955                         tcg_gen_qemu_ld_i64(s->tmp1_i64, s->A0,
5956                                             s->mem_index, MO_LEQ);
5957                         gen_helper_fldl_FT0(cpu_env, s->tmp1_i64);
5958                         break;
5959                     case 3:
5960                     default:
5961                         tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0,
5962                                             s->mem_index, MO_LESW);
5963                         gen_helper_fildl_FT0(cpu_env, s->tmp2_i32);
5964                         break;
5965                     }
5966 
5967                     gen_helper_fp_arith_ST0_FT0(op1);
5968                     if (op1 == 3) {
5969                         /* fcomp needs pop */
5970                         gen_helper_fpop(cpu_env);
5971                     }
5972                 }
5973                 break;
5974             case 0x08: /* flds */
5975             case 0x0a: /* fsts */
5976             case 0x0b: /* fstps */
5977             case 0x18 ... 0x1b: /* fildl, fisttpl, fistl, fistpl */
5978             case 0x28 ... 0x2b: /* fldl, fisttpll, fstl, fstpl */
5979             case 0x38 ... 0x3b: /* filds, fisttps, fists, fistps */
5980                 switch(op & 7) {
5981                 case 0:
5982                     switch(op >> 4) {
5983                     case 0:
5984                         tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0,
5985                                             s->mem_index, MO_LEUL);
5986                         gen_helper_flds_ST0(cpu_env, s->tmp2_i32);
5987                         break;
5988                     case 1:
5989                         tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0,
5990                                             s->mem_index, MO_LEUL);
5991                         gen_helper_fildl_ST0(cpu_env, s->tmp2_i32);
5992                         break;
5993                     case 2:
5994                         tcg_gen_qemu_ld_i64(s->tmp1_i64, s->A0,
5995                                             s->mem_index, MO_LEQ);
5996                         gen_helper_fldl_ST0(cpu_env, s->tmp1_i64);
5997                         break;
5998                     case 3:
5999                     default:
6000                         tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0,
6001                                             s->mem_index, MO_LESW);
6002                         gen_helper_fildl_ST0(cpu_env, s->tmp2_i32);
6003                         break;
6004                     }
6005                     break;
6006                 case 1:
6007                     /* XXX: the corresponding CPUID bit must be tested ! */
6008                     switch(op >> 4) {
6009                     case 1:
6010                         gen_helper_fisttl_ST0(s->tmp2_i32, cpu_env);
6011                         tcg_gen_qemu_st_i32(s->tmp2_i32, s->A0,
6012                                             s->mem_index, MO_LEUL);
6013                         break;
6014                     case 2:
6015                         gen_helper_fisttll_ST0(s->tmp1_i64, cpu_env);
6016                         tcg_gen_qemu_st_i64(s->tmp1_i64, s->A0,
6017                                             s->mem_index, MO_LEQ);
6018                         break;
6019                     case 3:
6020                     default:
6021                         gen_helper_fistt_ST0(s->tmp2_i32, cpu_env);
6022                         tcg_gen_qemu_st_i32(s->tmp2_i32, s->A0,
6023                                             s->mem_index, MO_LEUW);
6024                         break;
6025                     }
6026                     gen_helper_fpop(cpu_env);
6027                     break;
6028                 default:
6029                     switch(op >> 4) {
6030                     case 0:
6031                         gen_helper_fsts_ST0(s->tmp2_i32, cpu_env);
6032                         tcg_gen_qemu_st_i32(s->tmp2_i32, s->A0,
6033                                             s->mem_index, MO_LEUL);
6034                         break;
6035                     case 1:
6036                         gen_helper_fistl_ST0(s->tmp2_i32, cpu_env);
6037                         tcg_gen_qemu_st_i32(s->tmp2_i32, s->A0,
6038                                             s->mem_index, MO_LEUL);
6039                         break;
6040                     case 2:
6041                         gen_helper_fstl_ST0(s->tmp1_i64, cpu_env);
6042                         tcg_gen_qemu_st_i64(s->tmp1_i64, s->A0,
6043                                             s->mem_index, MO_LEQ);
6044                         break;
6045                     case 3:
6046                     default:
6047                         gen_helper_fist_ST0(s->tmp2_i32, cpu_env);
6048                         tcg_gen_qemu_st_i32(s->tmp2_i32, s->A0,
6049                                             s->mem_index, MO_LEUW);
6050                         break;
6051                     }
6052                     if ((op & 7) == 3)
6053                         gen_helper_fpop(cpu_env);
6054                     break;
6055                 }
6056                 break;
6057             case 0x0c: /* fldenv mem */
6058                 gen_helper_fldenv(cpu_env, s->A0, tcg_const_i32(dflag - 1));
6059                 break;
6060             case 0x0d: /* fldcw mem */
6061                 tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0,
6062                                     s->mem_index, MO_LEUW);
6063                 gen_helper_fldcw(cpu_env, s->tmp2_i32);
6064                 break;
6065             case 0x0e: /* fnstenv mem */
6066                 gen_helper_fstenv(cpu_env, s->A0, tcg_const_i32(dflag - 1));
6067                 break;
6068             case 0x0f: /* fnstcw mem */
6069                 gen_helper_fnstcw(s->tmp2_i32, cpu_env);
6070                 tcg_gen_qemu_st_i32(s->tmp2_i32, s->A0,
6071                                     s->mem_index, MO_LEUW);
6072                 break;
6073             case 0x1d: /* fldt mem */
6074                 gen_helper_fldt_ST0(cpu_env, s->A0);
6075                 break;
6076             case 0x1f: /* fstpt mem */
6077                 gen_helper_fstt_ST0(cpu_env, s->A0);
6078                 gen_helper_fpop(cpu_env);
6079                 break;
6080             case 0x2c: /* frstor mem */
6081                 gen_helper_frstor(cpu_env, s->A0, tcg_const_i32(dflag - 1));
6082                 break;
6083             case 0x2e: /* fnsave mem */
6084                 gen_helper_fsave(cpu_env, s->A0, tcg_const_i32(dflag - 1));
6085                 break;
6086             case 0x2f: /* fnstsw mem */
6087                 gen_helper_fnstsw(s->tmp2_i32, cpu_env);
6088                 tcg_gen_qemu_st_i32(s->tmp2_i32, s->A0,
6089                                     s->mem_index, MO_LEUW);
6090                 break;
6091             case 0x3c: /* fbld */
6092                 gen_helper_fbld_ST0(cpu_env, s->A0);
6093                 break;
6094             case 0x3e: /* fbstp */
6095                 gen_helper_fbst_ST0(cpu_env, s->A0);
6096                 gen_helper_fpop(cpu_env);
6097                 break;
6098             case 0x3d: /* fildll */
6099                 tcg_gen_qemu_ld_i64(s->tmp1_i64, s->A0, s->mem_index, MO_LEQ);
6100                 gen_helper_fildll_ST0(cpu_env, s->tmp1_i64);
6101                 break;
6102             case 0x3f: /* fistpll */
6103                 gen_helper_fistll_ST0(s->tmp1_i64, cpu_env);
6104                 tcg_gen_qemu_st_i64(s->tmp1_i64, s->A0, s->mem_index, MO_LEQ);
6105                 gen_helper_fpop(cpu_env);
6106                 break;
6107             default:
6108                 goto unknown_op;
6109             }
6110         } else {
6111             /* register float ops */
6112             opreg = rm;
6113 
6114             switch(op) {
6115             case 0x08: /* fld sti */
6116                 gen_helper_fpush(cpu_env);
6117                 gen_helper_fmov_ST0_STN(cpu_env,
6118                                         tcg_const_i32((opreg + 1) & 7));
6119                 break;
6120             case 0x09: /* fxchg sti */
6121             case 0x29: /* fxchg4 sti, undocumented op */
6122             case 0x39: /* fxchg7 sti, undocumented op */
6123                 gen_helper_fxchg_ST0_STN(cpu_env, tcg_const_i32(opreg));
6124                 break;
6125             case 0x0a: /* grp d9/2 */
6126                 switch(rm) {
6127                 case 0: /* fnop */
6128                     /* check exceptions (FreeBSD FPU probe) */
6129                     gen_helper_fwait(cpu_env);
6130                     break;
6131                 default:
6132                     goto unknown_op;
6133                 }
6134                 break;
6135             case 0x0c: /* grp d9/4 */
6136                 switch(rm) {
6137                 case 0: /* fchs */
6138                     gen_helper_fchs_ST0(cpu_env);
6139                     break;
6140                 case 1: /* fabs */
6141                     gen_helper_fabs_ST0(cpu_env);
6142                     break;
6143                 case 4: /* ftst */
6144                     gen_helper_fldz_FT0(cpu_env);
6145                     gen_helper_fcom_ST0_FT0(cpu_env);
6146                     break;
6147                 case 5: /* fxam */
6148                     gen_helper_fxam_ST0(cpu_env);
6149                     break;
6150                 default:
6151                     goto unknown_op;
6152                 }
6153                 break;
6154             case 0x0d: /* grp d9/5 */
6155                 {
6156                     switch(rm) {
6157                     case 0:
6158                         gen_helper_fpush(cpu_env);
6159                         gen_helper_fld1_ST0(cpu_env);
6160                         break;
6161                     case 1:
6162                         gen_helper_fpush(cpu_env);
6163                         gen_helper_fldl2t_ST0(cpu_env);
6164                         break;
6165                     case 2:
6166                         gen_helper_fpush(cpu_env);
6167                         gen_helper_fldl2e_ST0(cpu_env);
6168                         break;
6169                     case 3:
6170                         gen_helper_fpush(cpu_env);
6171                         gen_helper_fldpi_ST0(cpu_env);
6172                         break;
6173                     case 4:
6174                         gen_helper_fpush(cpu_env);
6175                         gen_helper_fldlg2_ST0(cpu_env);
6176                         break;
6177                     case 5:
6178                         gen_helper_fpush(cpu_env);
6179                         gen_helper_fldln2_ST0(cpu_env);
6180                         break;
6181                     case 6:
6182                         gen_helper_fpush(cpu_env);
6183                         gen_helper_fldz_ST0(cpu_env);
6184                         break;
6185                     default:
6186                         goto unknown_op;
6187                     }
6188                 }
6189                 break;
6190             case 0x0e: /* grp d9/6 */
6191                 switch(rm) {
6192                 case 0: /* f2xm1 */
6193                     gen_helper_f2xm1(cpu_env);
6194                     break;
6195                 case 1: /* fyl2x */
6196                     gen_helper_fyl2x(cpu_env);
6197                     break;
6198                 case 2: /* fptan */
6199                     gen_helper_fptan(cpu_env);
6200                     break;
6201                 case 3: /* fpatan */
6202                     gen_helper_fpatan(cpu_env);
6203                     break;
6204                 case 4: /* fxtract */
6205                     gen_helper_fxtract(cpu_env);
6206                     break;
6207                 case 5: /* fprem1 */
6208                     gen_helper_fprem1(cpu_env);
6209                     break;
6210                 case 6: /* fdecstp */
6211                     gen_helper_fdecstp(cpu_env);
6212                     break;
6213                 default:
6214                 case 7: /* fincstp */
6215                     gen_helper_fincstp(cpu_env);
6216                     break;
6217                 }
6218                 break;
6219             case 0x0f: /* grp d9/7 */
6220                 switch(rm) {
6221                 case 0: /* fprem */
6222                     gen_helper_fprem(cpu_env);
6223                     break;
6224                 case 1: /* fyl2xp1 */
6225                     gen_helper_fyl2xp1(cpu_env);
6226                     break;
6227                 case 2: /* fsqrt */
6228                     gen_helper_fsqrt(cpu_env);
6229                     break;
6230                 case 3: /* fsincos */
6231                     gen_helper_fsincos(cpu_env);
6232                     break;
6233                 case 5: /* fscale */
6234                     gen_helper_fscale(cpu_env);
6235                     break;
6236                 case 4: /* frndint */
6237                     gen_helper_frndint(cpu_env);
6238                     break;
6239                 case 6: /* fsin */
6240                     gen_helper_fsin(cpu_env);
6241                     break;
6242                 default:
6243                 case 7: /* fcos */
6244                     gen_helper_fcos(cpu_env);
6245                     break;
6246                 }
6247                 break;
6248             case 0x00: case 0x01: case 0x04 ... 0x07: /* fxxx st, sti */
6249             case 0x20: case 0x21: case 0x24 ... 0x27: /* fxxx sti, st */
6250             case 0x30: case 0x31: case 0x34 ... 0x37: /* fxxxp sti, st */
6251                 {
6252                     int op1;
6253 
6254                     op1 = op & 7;
6255                     if (op >= 0x20) {
6256                         gen_helper_fp_arith_STN_ST0(op1, opreg);
6257                         if (op >= 0x30)
6258                             gen_helper_fpop(cpu_env);
6259                     } else {
6260                         gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg));
6261                         gen_helper_fp_arith_ST0_FT0(op1);
6262                     }
6263                 }
6264                 break;
6265             case 0x02: /* fcom */
6266             case 0x22: /* fcom2, undocumented op */
6267                 gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg));
6268                 gen_helper_fcom_ST0_FT0(cpu_env);
6269                 break;
6270             case 0x03: /* fcomp */
6271             case 0x23: /* fcomp3, undocumented op */
6272             case 0x32: /* fcomp5, undocumented op */
6273                 gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg));
6274                 gen_helper_fcom_ST0_FT0(cpu_env);
6275                 gen_helper_fpop(cpu_env);
6276                 break;
6277             case 0x15: /* da/5 */
6278                 switch(rm) {
6279                 case 1: /* fucompp */
6280                     gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(1));
6281                     gen_helper_fucom_ST0_FT0(cpu_env);
6282                     gen_helper_fpop(cpu_env);
6283                     gen_helper_fpop(cpu_env);
6284                     break;
6285                 default:
6286                     goto unknown_op;
6287                 }
6288                 break;
6289             case 0x1c:
6290                 switch(rm) {
6291                 case 0: /* feni (287 only, just do nop here) */
6292                     break;
6293                 case 1: /* fdisi (287 only, just do nop here) */
6294                     break;
6295                 case 2: /* fclex */
6296                     gen_helper_fclex(cpu_env);
6297                     break;
6298                 case 3: /* fninit */
6299                     gen_helper_fninit(cpu_env);
6300                     break;
6301                 case 4: /* fsetpm (287 only, just do nop here) */
6302                     break;
6303                 default:
6304                     goto unknown_op;
6305                 }
6306                 break;
6307             case 0x1d: /* fucomi */
6308                 if (!(s->cpuid_features & CPUID_CMOV)) {
6309                     goto illegal_op;
6310                 }
6311                 gen_update_cc_op(s);
6312                 gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg));
6313                 gen_helper_fucomi_ST0_FT0(cpu_env);
6314                 set_cc_op(s, CC_OP_EFLAGS);
6315                 break;
6316             case 0x1e: /* fcomi */
6317                 if (!(s->cpuid_features & CPUID_CMOV)) {
6318                     goto illegal_op;
6319                 }
6320                 gen_update_cc_op(s);
6321                 gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg));
6322                 gen_helper_fcomi_ST0_FT0(cpu_env);
6323                 set_cc_op(s, CC_OP_EFLAGS);
6324                 break;
6325             case 0x28: /* ffree sti */
6326                 gen_helper_ffree_STN(cpu_env, tcg_const_i32(opreg));
6327                 break;
6328             case 0x2a: /* fst sti */
6329                 gen_helper_fmov_STN_ST0(cpu_env, tcg_const_i32(opreg));
6330                 break;
6331             case 0x2b: /* fstp sti */
6332             case 0x0b: /* fstp1 sti, undocumented op */
6333             case 0x3a: /* fstp8 sti, undocumented op */
6334             case 0x3b: /* fstp9 sti, undocumented op */
6335                 gen_helper_fmov_STN_ST0(cpu_env, tcg_const_i32(opreg));
6336                 gen_helper_fpop(cpu_env);
6337                 break;
6338             case 0x2c: /* fucom st(i) */
6339                 gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg));
6340                 gen_helper_fucom_ST0_FT0(cpu_env);
6341                 break;
6342             case 0x2d: /* fucomp st(i) */
6343                 gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg));
6344                 gen_helper_fucom_ST0_FT0(cpu_env);
6345                 gen_helper_fpop(cpu_env);
6346                 break;
6347             case 0x33: /* de/3 */
6348                 switch(rm) {
6349                 case 1: /* fcompp */
6350                     gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(1));
6351                     gen_helper_fcom_ST0_FT0(cpu_env);
6352                     gen_helper_fpop(cpu_env);
6353                     gen_helper_fpop(cpu_env);
6354                     break;
6355                 default:
6356                     goto unknown_op;
6357                 }
6358                 break;
6359             case 0x38: /* ffreep sti, undocumented op */
6360                 gen_helper_ffree_STN(cpu_env, tcg_const_i32(opreg));
6361                 gen_helper_fpop(cpu_env);
6362                 break;
6363             case 0x3c: /* df/4 */
6364                 switch(rm) {
6365                 case 0:
6366                     gen_helper_fnstsw(s->tmp2_i32, cpu_env);
6367                     tcg_gen_extu_i32_tl(s->T0, s->tmp2_i32);
6368                     gen_op_mov_reg_v(s, MO_16, R_EAX, s->T0);
6369                     break;
6370                 default:
6371                     goto unknown_op;
6372                 }
6373                 break;
6374             case 0x3d: /* fucomip */
6375                 if (!(s->cpuid_features & CPUID_CMOV)) {
6376                     goto illegal_op;
6377                 }
6378                 gen_update_cc_op(s);
6379                 gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg));
6380                 gen_helper_fucomi_ST0_FT0(cpu_env);
6381                 gen_helper_fpop(cpu_env);
6382                 set_cc_op(s, CC_OP_EFLAGS);
6383                 break;
6384             case 0x3e: /* fcomip */
6385                 if (!(s->cpuid_features & CPUID_CMOV)) {
6386                     goto illegal_op;
6387                 }
6388                 gen_update_cc_op(s);
6389                 gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg));
6390                 gen_helper_fcomi_ST0_FT0(cpu_env);
6391                 gen_helper_fpop(cpu_env);
6392                 set_cc_op(s, CC_OP_EFLAGS);
6393                 break;
6394             case 0x10 ... 0x13: /* fcmovxx */
6395             case 0x18 ... 0x1b:
6396                 {
6397                     int op1;
6398                     TCGLabel *l1;
6399                     static const uint8_t fcmov_cc[8] = {
6400                         (JCC_B << 1),
6401                         (JCC_Z << 1),
6402                         (JCC_BE << 1),
6403                         (JCC_P << 1),
6404                     };
6405 
6406                     if (!(s->cpuid_features & CPUID_CMOV)) {
6407                         goto illegal_op;
6408                     }
6409                     op1 = fcmov_cc[op & 3] | (((op >> 3) & 1) ^ 1);
6410                     l1 = gen_new_label();
6411                     gen_jcc1_noeob(s, op1, l1);
6412                     gen_helper_fmov_ST0_STN(cpu_env, tcg_const_i32(opreg));
6413                     gen_set_label(l1);
6414                 }
6415                 break;
6416             default:
6417                 goto unknown_op;
6418             }
6419         }
6420         break;
6421         /************************/
6422         /* string ops */
6423 
6424     case 0xa4: /* movsS */
6425     case 0xa5:
6426         ot = mo_b_d(b, dflag);
6427         if (prefixes & (PREFIX_REPZ | PREFIX_REPNZ)) {
6428             gen_repz_movs(s, ot, pc_start - s->cs_base, s->pc - s->cs_base);
6429         } else {
6430             gen_movs(s, ot);
6431         }
6432         break;
6433 
6434     case 0xaa: /* stosS */
6435     case 0xab:
6436         ot = mo_b_d(b, dflag);
6437         if (prefixes & (PREFIX_REPZ | PREFIX_REPNZ)) {
6438             gen_repz_stos(s, ot, pc_start - s->cs_base, s->pc - s->cs_base);
6439         } else {
6440             gen_stos(s, ot);
6441         }
6442         break;
6443     case 0xac: /* lodsS */
6444     case 0xad:
6445         ot = mo_b_d(b, dflag);
6446         if (prefixes & (PREFIX_REPZ | PREFIX_REPNZ)) {
6447             gen_repz_lods(s, ot, pc_start - s->cs_base, s->pc - s->cs_base);
6448         } else {
6449             gen_lods(s, ot);
6450         }
6451         break;
6452     case 0xae: /* scasS */
6453     case 0xaf:
6454         ot = mo_b_d(b, dflag);
6455         if (prefixes & PREFIX_REPNZ) {
6456             gen_repz_scas(s, ot, pc_start - s->cs_base, s->pc - s->cs_base, 1);
6457         } else if (prefixes & PREFIX_REPZ) {
6458             gen_repz_scas(s, ot, pc_start - s->cs_base, s->pc - s->cs_base, 0);
6459         } else {
6460             gen_scas(s, ot);
6461         }
6462         break;
6463 
6464     case 0xa6: /* cmpsS */
6465     case 0xa7:
6466         ot = mo_b_d(b, dflag);
6467         if (prefixes & PREFIX_REPNZ) {
6468             gen_repz_cmps(s, ot, pc_start - s->cs_base, s->pc - s->cs_base, 1);
6469         } else if (prefixes & PREFIX_REPZ) {
6470             gen_repz_cmps(s, ot, pc_start - s->cs_base, s->pc - s->cs_base, 0);
6471         } else {
6472             gen_cmps(s, ot);
6473         }
6474         break;
6475     case 0x6c: /* insS */
6476     case 0x6d:
6477         ot = mo_b_d32(b, dflag);
6478         tcg_gen_trunc_tl_i32(s->tmp2_i32, cpu_regs[R_EDX]);
6479         tcg_gen_ext16u_i32(s->tmp2_i32, s->tmp2_i32);
6480         if (!gen_check_io(s, ot, s->tmp2_i32,
6481                           SVM_IOIO_TYPE_MASK | SVM_IOIO_STR_MASK)) {
6482             break;
6483         }
6484         if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
6485             gen_io_start();
6486         }
6487         if (prefixes & (PREFIX_REPZ | PREFIX_REPNZ)) {
6488             gen_repz_ins(s, ot, pc_start - s->cs_base, s->pc - s->cs_base);
6489             /* jump generated by gen_repz_ins */
6490         } else {
6491             gen_ins(s, ot);
6492             if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
6493                 gen_jmp(s, s->pc - s->cs_base);
6494             }
6495         }
6496         break;
6497     case 0x6e: /* outsS */
6498     case 0x6f:
6499         ot = mo_b_d32(b, dflag);
6500         tcg_gen_trunc_tl_i32(s->tmp2_i32, cpu_regs[R_EDX]);
6501         tcg_gen_ext16u_i32(s->tmp2_i32, s->tmp2_i32);
6502         if (!gen_check_io(s, ot, s->tmp2_i32, SVM_IOIO_STR_MASK)) {
6503             break;
6504         }
6505         if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
6506             gen_io_start();
6507         }
6508         if (prefixes & (PREFIX_REPZ | PREFIX_REPNZ)) {
6509             gen_repz_outs(s, ot, pc_start - s->cs_base, s->pc - s->cs_base);
6510             /* jump generated by gen_repz_outs */
6511         } else {
6512             gen_outs(s, ot);
6513             if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
6514                 gen_jmp(s, s->pc - s->cs_base);
6515             }
6516         }
6517         break;
6518 
6519         /************************/
6520         /* port I/O */
6521 
6522     case 0xe4:
6523     case 0xe5:
6524         ot = mo_b_d32(b, dflag);
6525         val = x86_ldub_code(env, s);
6526         tcg_gen_movi_i32(s->tmp2_i32, val);
6527         if (!gen_check_io(s, ot, s->tmp2_i32, SVM_IOIO_TYPE_MASK)) {
6528             break;
6529         }
6530         if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
6531             gen_io_start();
6532         }
6533         gen_helper_in_func(ot, s->T1, s->tmp2_i32);
6534         gen_op_mov_reg_v(s, ot, R_EAX, s->T1);
6535         gen_bpt_io(s, s->tmp2_i32, ot);
6536         if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
6537             gen_jmp(s, s->pc - s->cs_base);
6538         }
6539         break;
6540     case 0xe6:
6541     case 0xe7:
6542         ot = mo_b_d32(b, dflag);
6543         val = x86_ldub_code(env, s);
6544         tcg_gen_movi_i32(s->tmp2_i32, val);
6545         if (!gen_check_io(s, ot, s->tmp2_i32, 0)) {
6546             break;
6547         }
6548         if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
6549             gen_io_start();
6550         }
6551         gen_op_mov_v_reg(s, ot, s->T1, R_EAX);
6552         tcg_gen_trunc_tl_i32(s->tmp3_i32, s->T1);
6553         gen_helper_out_func(ot, s->tmp2_i32, s->tmp3_i32);
6554         gen_bpt_io(s, s->tmp2_i32, ot);
6555         if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
6556             gen_jmp(s, s->pc - s->cs_base);
6557         }
6558         break;
6559     case 0xec:
6560     case 0xed:
6561         ot = mo_b_d32(b, dflag);
6562         tcg_gen_trunc_tl_i32(s->tmp2_i32, cpu_regs[R_EDX]);
6563         tcg_gen_ext16u_i32(s->tmp2_i32, s->tmp2_i32);
6564         if (!gen_check_io(s, ot, s->tmp2_i32, SVM_IOIO_TYPE_MASK)) {
6565             break;
6566         }
6567         if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
6568             gen_io_start();
6569         }
6570         gen_helper_in_func(ot, s->T1, s->tmp2_i32);
6571         gen_op_mov_reg_v(s, ot, R_EAX, s->T1);
6572         gen_bpt_io(s, s->tmp2_i32, ot);
6573         if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
6574             gen_jmp(s, s->pc - s->cs_base);
6575         }
6576         break;
6577     case 0xee:
6578     case 0xef:
6579         ot = mo_b_d32(b, dflag);
6580         tcg_gen_trunc_tl_i32(s->tmp2_i32, cpu_regs[R_EDX]);
6581         tcg_gen_ext16u_i32(s->tmp2_i32, s->tmp2_i32);
6582         if (!gen_check_io(s, ot, s->tmp2_i32, 0)) {
6583             break;
6584         }
6585         if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
6586             gen_io_start();
6587         }
6588         gen_op_mov_v_reg(s, ot, s->T1, R_EAX);
6589         tcg_gen_trunc_tl_i32(s->tmp3_i32, s->T1);
6590         gen_helper_out_func(ot, s->tmp2_i32, s->tmp3_i32);
6591         gen_bpt_io(s, s->tmp2_i32, ot);
6592         if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
6593             gen_jmp(s, s->pc - s->cs_base);
6594         }
6595         break;
6596 
6597         /************************/
6598         /* control */
6599     case 0xc2: /* ret im */
6600         val = x86_ldsw_code(env, s);
6601         ot = gen_pop_T0(s);
6602         gen_stack_update(s, val + (1 << ot));
6603         /* Note that gen_pop_T0 uses a zero-extending load.  */
6604         gen_op_jmp_v(s->T0);
6605         gen_bnd_jmp(s);
6606         gen_jr(s, s->T0);
6607         break;
6608     case 0xc3: /* ret */
6609         ot = gen_pop_T0(s);
6610         gen_pop_update(s, ot);
6611         /* Note that gen_pop_T0 uses a zero-extending load.  */
6612         gen_op_jmp_v(s->T0);
6613         gen_bnd_jmp(s);
6614         gen_jr(s, s->T0);
6615         break;
6616     case 0xca: /* lret im */
6617         val = x86_ldsw_code(env, s);
6618     do_lret:
6619         if (PE(s) && !VM86(s)) {
6620             gen_update_cc_op(s);
6621             gen_jmp_im(s, pc_start - s->cs_base);
6622             gen_helper_lret_protected(cpu_env, tcg_const_i32(dflag - 1),
6623                                       tcg_const_i32(val));
6624         } else {
6625             gen_stack_A0(s);
6626             /* pop offset */
6627             gen_op_ld_v(s, dflag, s->T0, s->A0);
6628             /* NOTE: keeping EIP updated is not a problem in case of
6629                exception */
6630             gen_op_jmp_v(s->T0);
6631             /* pop selector */
6632             gen_add_A0_im(s, 1 << dflag);
6633             gen_op_ld_v(s, dflag, s->T0, s->A0);
6634             gen_op_movl_seg_T0_vm(s, R_CS);
6635             /* add stack offset */
6636             gen_stack_update(s, val + (2 << dflag));
6637         }
6638         gen_eob(s);
6639         break;
6640     case 0xcb: /* lret */
6641         val = 0;
6642         goto do_lret;
6643     case 0xcf: /* iret */
6644         gen_svm_check_intercept(s, SVM_EXIT_IRET);
6645         if (!PE(s) || VM86(s)) {
6646             /* real mode or vm86 mode */
6647             if (!check_vm86_iopl(s)) {
6648                 break;
6649             }
6650             gen_helper_iret_real(cpu_env, tcg_const_i32(dflag - 1));
6651         } else {
6652             gen_helper_iret_protected(cpu_env, tcg_const_i32(dflag - 1),
6653                                       tcg_const_i32(s->pc - s->cs_base));
6654         }
6655         set_cc_op(s, CC_OP_EFLAGS);
6656         gen_eob(s);
6657         break;
6658     case 0xe8: /* call im */
6659         {
6660             if (dflag != MO_16) {
6661                 tval = (int32_t)insn_get(env, s, MO_32);
6662             } else {
6663                 tval = (int16_t)insn_get(env, s, MO_16);
6664             }
6665             next_eip = s->pc - s->cs_base;
6666             tval += next_eip;
6667             if (dflag == MO_16) {
6668                 tval &= 0xffff;
6669             } else if (!CODE64(s)) {
6670                 tval &= 0xffffffff;
6671             }
6672             tcg_gen_movi_tl(s->T0, next_eip);
6673             gen_push_v(s, s->T0);
6674             gen_bnd_jmp(s);
6675             gen_jmp(s, tval);
6676         }
6677         break;
6678     case 0x9a: /* lcall im */
6679         {
6680             unsigned int selector, offset;
6681 
6682             if (CODE64(s))
6683                 goto illegal_op;
6684             ot = dflag;
6685             offset = insn_get(env, s, ot);
6686             selector = insn_get(env, s, MO_16);
6687 
6688             tcg_gen_movi_tl(s->T0, selector);
6689             tcg_gen_movi_tl(s->T1, offset);
6690         }
6691         goto do_lcall;
6692     case 0xe9: /* jmp im */
6693         if (dflag != MO_16) {
6694             tval = (int32_t)insn_get(env, s, MO_32);
6695         } else {
6696             tval = (int16_t)insn_get(env, s, MO_16);
6697         }
6698         tval += s->pc - s->cs_base;
6699         if (dflag == MO_16) {
6700             tval &= 0xffff;
6701         } else if (!CODE64(s)) {
6702             tval &= 0xffffffff;
6703         }
6704         gen_bnd_jmp(s);
6705         gen_jmp(s, tval);
6706         break;
6707     case 0xea: /* ljmp im */
6708         {
6709             unsigned int selector, offset;
6710 
6711             if (CODE64(s))
6712                 goto illegal_op;
6713             ot = dflag;
6714             offset = insn_get(env, s, ot);
6715             selector = insn_get(env, s, MO_16);
6716 
6717             tcg_gen_movi_tl(s->T0, selector);
6718             tcg_gen_movi_tl(s->T1, offset);
6719         }
6720         goto do_ljmp;
6721     case 0xeb: /* jmp Jb */
6722         tval = (int8_t)insn_get(env, s, MO_8);
6723         tval += s->pc - s->cs_base;
6724         if (dflag == MO_16) {
6725             tval &= 0xffff;
6726         }
6727         gen_jmp(s, tval);
6728         break;
6729     case 0x70 ... 0x7f: /* jcc Jb */
6730         tval = (int8_t)insn_get(env, s, MO_8);
6731         goto do_jcc;
6732     case 0x180 ... 0x18f: /* jcc Jv */
6733         if (dflag != MO_16) {
6734             tval = (int32_t)insn_get(env, s, MO_32);
6735         } else {
6736             tval = (int16_t)insn_get(env, s, MO_16);
6737         }
6738     do_jcc:
6739         next_eip = s->pc - s->cs_base;
6740         tval += next_eip;
6741         if (dflag == MO_16) {
6742             tval &= 0xffff;
6743         }
6744         gen_bnd_jmp(s);
6745         gen_jcc(s, b, tval, next_eip);
6746         break;
6747 
6748     case 0x190 ... 0x19f: /* setcc Gv */
6749         modrm = x86_ldub_code(env, s);
6750         gen_setcc1(s, b, s->T0);
6751         gen_ldst_modrm(env, s, modrm, MO_8, OR_TMP0, 1);
6752         break;
6753     case 0x140 ... 0x14f: /* cmov Gv, Ev */
6754         if (!(s->cpuid_features & CPUID_CMOV)) {
6755             goto illegal_op;
6756         }
6757         ot = dflag;
6758         modrm = x86_ldub_code(env, s);
6759         reg = ((modrm >> 3) & 7) | REX_R(s);
6760         gen_cmovcc1(env, s, ot, b, modrm, reg);
6761         break;
6762 
6763         /************************/
6764         /* flags */
6765     case 0x9c: /* pushf */
6766         gen_svm_check_intercept(s, SVM_EXIT_PUSHF);
6767         if (check_vm86_iopl(s)) {
6768             gen_update_cc_op(s);
6769             gen_helper_read_eflags(s->T0, cpu_env);
6770             gen_push_v(s, s->T0);
6771         }
6772         break;
6773     case 0x9d: /* popf */
6774         gen_svm_check_intercept(s, SVM_EXIT_POPF);
6775         if (check_vm86_iopl(s)) {
6776             ot = gen_pop_T0(s);
6777             if (CPL(s) == 0) {
6778                 if (dflag != MO_16) {
6779                     gen_helper_write_eflags(cpu_env, s->T0,
6780                                             tcg_const_i32((TF_MASK | AC_MASK |
6781                                                            ID_MASK | NT_MASK |
6782                                                            IF_MASK |
6783                                                            IOPL_MASK)));
6784                 } else {
6785                     gen_helper_write_eflags(cpu_env, s->T0,
6786                                             tcg_const_i32((TF_MASK | AC_MASK |
6787                                                            ID_MASK | NT_MASK |
6788                                                            IF_MASK | IOPL_MASK)
6789                                                           & 0xffff));
6790                 }
6791             } else {
6792                 if (CPL(s) <= IOPL(s)) {
6793                     if (dflag != MO_16) {
6794                         gen_helper_write_eflags(cpu_env, s->T0,
6795                                                 tcg_const_i32((TF_MASK |
6796                                                                AC_MASK |
6797                                                                ID_MASK |
6798                                                                NT_MASK |
6799                                                                IF_MASK)));
6800                     } else {
6801                         gen_helper_write_eflags(cpu_env, s->T0,
6802                                                 tcg_const_i32((TF_MASK |
6803                                                                AC_MASK |
6804                                                                ID_MASK |
6805                                                                NT_MASK |
6806                                                                IF_MASK)
6807                                                               & 0xffff));
6808                     }
6809                 } else {
6810                     if (dflag != MO_16) {
6811                         gen_helper_write_eflags(cpu_env, s->T0,
6812                                            tcg_const_i32((TF_MASK | AC_MASK |
6813                                                           ID_MASK | NT_MASK)));
6814                     } else {
6815                         gen_helper_write_eflags(cpu_env, s->T0,
6816                                            tcg_const_i32((TF_MASK | AC_MASK |
6817                                                           ID_MASK | NT_MASK)
6818                                                          & 0xffff));
6819                     }
6820                 }
6821             }
6822             gen_pop_update(s, ot);
6823             set_cc_op(s, CC_OP_EFLAGS);
6824             /* abort translation because TF/AC flag may change */
6825             gen_jmp_im(s, s->pc - s->cs_base);
6826             gen_eob(s);
6827         }
6828         break;
6829     case 0x9e: /* sahf */
6830         if (CODE64(s) && !(s->cpuid_ext3_features & CPUID_EXT3_LAHF_LM))
6831             goto illegal_op;
6832         gen_op_mov_v_reg(s, MO_8, s->T0, R_AH);
6833         gen_compute_eflags(s);
6834         tcg_gen_andi_tl(cpu_cc_src, cpu_cc_src, CC_O);
6835         tcg_gen_andi_tl(s->T0, s->T0, CC_S | CC_Z | CC_A | CC_P | CC_C);
6836         tcg_gen_or_tl(cpu_cc_src, cpu_cc_src, s->T0);
6837         break;
6838     case 0x9f: /* lahf */
6839         if (CODE64(s) && !(s->cpuid_ext3_features & CPUID_EXT3_LAHF_LM))
6840             goto illegal_op;
6841         gen_compute_eflags(s);
6842         /* Note: gen_compute_eflags() only gives the condition codes */
6843         tcg_gen_ori_tl(s->T0, cpu_cc_src, 0x02);
6844         gen_op_mov_reg_v(s, MO_8, R_AH, s->T0);
6845         break;
6846     case 0xf5: /* cmc */
6847         gen_compute_eflags(s);
6848         tcg_gen_xori_tl(cpu_cc_src, cpu_cc_src, CC_C);
6849         break;
6850     case 0xf8: /* clc */
6851         gen_compute_eflags(s);
6852         tcg_gen_andi_tl(cpu_cc_src, cpu_cc_src, ~CC_C);
6853         break;
6854     case 0xf9: /* stc */
6855         gen_compute_eflags(s);
6856         tcg_gen_ori_tl(cpu_cc_src, cpu_cc_src, CC_C);
6857         break;
6858     case 0xfc: /* cld */
6859         tcg_gen_movi_i32(s->tmp2_i32, 1);
6860         tcg_gen_st_i32(s->tmp2_i32, cpu_env, offsetof(CPUX86State, df));
6861         break;
6862     case 0xfd: /* std */
6863         tcg_gen_movi_i32(s->tmp2_i32, -1);
6864         tcg_gen_st_i32(s->tmp2_i32, cpu_env, offsetof(CPUX86State, df));
6865         break;
6866 
6867         /************************/
6868         /* bit operations */
6869     case 0x1ba: /* bt/bts/btr/btc Gv, im */
6870         ot = dflag;
6871         modrm = x86_ldub_code(env, s);
6872         op = (modrm >> 3) & 7;
6873         mod = (modrm >> 6) & 3;
6874         rm = (modrm & 7) | REX_B(s);
6875         if (mod != 3) {
6876             s->rip_offset = 1;
6877             gen_lea_modrm(env, s, modrm);
6878             if (!(s->prefix & PREFIX_LOCK)) {
6879                 gen_op_ld_v(s, ot, s->T0, s->A0);
6880             }
6881         } else {
6882             gen_op_mov_v_reg(s, ot, s->T0, rm);
6883         }
6884         /* load shift */
6885         val = x86_ldub_code(env, s);
6886         tcg_gen_movi_tl(s->T1, val);
6887         if (op < 4)
6888             goto unknown_op;
6889         op -= 4;
6890         goto bt_op;
6891     case 0x1a3: /* bt Gv, Ev */
6892         op = 0;
6893         goto do_btx;
6894     case 0x1ab: /* bts */
6895         op = 1;
6896         goto do_btx;
6897     case 0x1b3: /* btr */
6898         op = 2;
6899         goto do_btx;
6900     case 0x1bb: /* btc */
6901         op = 3;
6902     do_btx:
6903         ot = dflag;
6904         modrm = x86_ldub_code(env, s);
6905         reg = ((modrm >> 3) & 7) | REX_R(s);
6906         mod = (modrm >> 6) & 3;
6907         rm = (modrm & 7) | REX_B(s);
6908         gen_op_mov_v_reg(s, MO_32, s->T1, reg);
6909         if (mod != 3) {
6910             AddressParts a = gen_lea_modrm_0(env, s, modrm);
6911             /* specific case: we need to add a displacement */
6912             gen_exts(ot, s->T1);
6913             tcg_gen_sari_tl(s->tmp0, s->T1, 3 + ot);
6914             tcg_gen_shli_tl(s->tmp0, s->tmp0, ot);
6915             tcg_gen_add_tl(s->A0, gen_lea_modrm_1(s, a), s->tmp0);
6916             gen_lea_v_seg(s, s->aflag, s->A0, a.def_seg, s->override);
6917             if (!(s->prefix & PREFIX_LOCK)) {
6918                 gen_op_ld_v(s, ot, s->T0, s->A0);
6919             }
6920         } else {
6921             gen_op_mov_v_reg(s, ot, s->T0, rm);
6922         }
6923     bt_op:
6924         tcg_gen_andi_tl(s->T1, s->T1, (1 << (3 + ot)) - 1);
6925         tcg_gen_movi_tl(s->tmp0, 1);
6926         tcg_gen_shl_tl(s->tmp0, s->tmp0, s->T1);
6927         if (s->prefix & PREFIX_LOCK) {
6928             switch (op) {
6929             case 0: /* bt */
6930                 /* Needs no atomic ops; we surpressed the normal
6931                    memory load for LOCK above so do it now.  */
6932                 gen_op_ld_v(s, ot, s->T0, s->A0);
6933                 break;
6934             case 1: /* bts */
6935                 tcg_gen_atomic_fetch_or_tl(s->T0, s->A0, s->tmp0,
6936                                            s->mem_index, ot | MO_LE);
6937                 break;
6938             case 2: /* btr */
6939                 tcg_gen_not_tl(s->tmp0, s->tmp0);
6940                 tcg_gen_atomic_fetch_and_tl(s->T0, s->A0, s->tmp0,
6941                                             s->mem_index, ot | MO_LE);
6942                 break;
6943             default:
6944             case 3: /* btc */
6945                 tcg_gen_atomic_fetch_xor_tl(s->T0, s->A0, s->tmp0,
6946                                             s->mem_index, ot | MO_LE);
6947                 break;
6948             }
6949             tcg_gen_shr_tl(s->tmp4, s->T0, s->T1);
6950         } else {
6951             tcg_gen_shr_tl(s->tmp4, s->T0, s->T1);
6952             switch (op) {
6953             case 0: /* bt */
6954                 /* Data already loaded; nothing to do.  */
6955                 break;
6956             case 1: /* bts */
6957                 tcg_gen_or_tl(s->T0, s->T0, s->tmp0);
6958                 break;
6959             case 2: /* btr */
6960                 tcg_gen_andc_tl(s->T0, s->T0, s->tmp0);
6961                 break;
6962             default:
6963             case 3: /* btc */
6964                 tcg_gen_xor_tl(s->T0, s->T0, s->tmp0);
6965                 break;
6966             }
6967             if (op != 0) {
6968                 if (mod != 3) {
6969                     gen_op_st_v(s, ot, s->T0, s->A0);
6970                 } else {
6971                     gen_op_mov_reg_v(s, ot, rm, s->T0);
6972                 }
6973             }
6974         }
6975 
6976         /* Delay all CC updates until after the store above.  Note that
6977            C is the result of the test, Z is unchanged, and the others
6978            are all undefined.  */
6979         switch (s->cc_op) {
6980         case CC_OP_MULB ... CC_OP_MULQ:
6981         case CC_OP_ADDB ... CC_OP_ADDQ:
6982         case CC_OP_ADCB ... CC_OP_ADCQ:
6983         case CC_OP_SUBB ... CC_OP_SUBQ:
6984         case CC_OP_SBBB ... CC_OP_SBBQ:
6985         case CC_OP_LOGICB ... CC_OP_LOGICQ:
6986         case CC_OP_INCB ... CC_OP_INCQ:
6987         case CC_OP_DECB ... CC_OP_DECQ:
6988         case CC_OP_SHLB ... CC_OP_SHLQ:
6989         case CC_OP_SARB ... CC_OP_SARQ:
6990         case CC_OP_BMILGB ... CC_OP_BMILGQ:
6991             /* Z was going to be computed from the non-zero status of CC_DST.
6992                We can get that same Z value (and the new C value) by leaving
6993                CC_DST alone, setting CC_SRC, and using a CC_OP_SAR of the
6994                same width.  */
6995             tcg_gen_mov_tl(cpu_cc_src, s->tmp4);
6996             set_cc_op(s, ((s->cc_op - CC_OP_MULB) & 3) + CC_OP_SARB);
6997             break;
6998         default:
6999             /* Otherwise, generate EFLAGS and replace the C bit.  */
7000             gen_compute_eflags(s);
7001             tcg_gen_deposit_tl(cpu_cc_src, cpu_cc_src, s->tmp4,
7002                                ctz32(CC_C), 1);
7003             break;
7004         }
7005         break;
7006     case 0x1bc: /* bsf / tzcnt */
7007     case 0x1bd: /* bsr / lzcnt */
7008         ot = dflag;
7009         modrm = x86_ldub_code(env, s);
7010         reg = ((modrm >> 3) & 7) | REX_R(s);
7011         gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
7012         gen_extu(ot, s->T0);
7013 
7014         /* Note that lzcnt and tzcnt are in different extensions.  */
7015         if ((prefixes & PREFIX_REPZ)
7016             && (b & 1
7017                 ? s->cpuid_ext3_features & CPUID_EXT3_ABM
7018                 : s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI1)) {
7019             int size = 8 << ot;
7020             /* For lzcnt/tzcnt, C bit is defined related to the input. */
7021             tcg_gen_mov_tl(cpu_cc_src, s->T0);
7022             if (b & 1) {
7023                 /* For lzcnt, reduce the target_ulong result by the
7024                    number of zeros that we expect to find at the top.  */
7025                 tcg_gen_clzi_tl(s->T0, s->T0, TARGET_LONG_BITS);
7026                 tcg_gen_subi_tl(s->T0, s->T0, TARGET_LONG_BITS - size);
7027             } else {
7028                 /* For tzcnt, a zero input must return the operand size.  */
7029                 tcg_gen_ctzi_tl(s->T0, s->T0, size);
7030             }
7031             /* For lzcnt/tzcnt, Z bit is defined related to the result.  */
7032             gen_op_update1_cc(s);
7033             set_cc_op(s, CC_OP_BMILGB + ot);
7034         } else {
7035             /* For bsr/bsf, only the Z bit is defined and it is related
7036                to the input and not the result.  */
7037             tcg_gen_mov_tl(cpu_cc_dst, s->T0);
7038             set_cc_op(s, CC_OP_LOGICB + ot);
7039 
7040             /* ??? The manual says that the output is undefined when the
7041                input is zero, but real hardware leaves it unchanged, and
7042                real programs appear to depend on that.  Accomplish this
7043                by passing the output as the value to return upon zero.  */
7044             if (b & 1) {
7045                 /* For bsr, return the bit index of the first 1 bit,
7046                    not the count of leading zeros.  */
7047                 tcg_gen_xori_tl(s->T1, cpu_regs[reg], TARGET_LONG_BITS - 1);
7048                 tcg_gen_clz_tl(s->T0, s->T0, s->T1);
7049                 tcg_gen_xori_tl(s->T0, s->T0, TARGET_LONG_BITS - 1);
7050             } else {
7051                 tcg_gen_ctz_tl(s->T0, s->T0, cpu_regs[reg]);
7052             }
7053         }
7054         gen_op_mov_reg_v(s, ot, reg, s->T0);
7055         break;
7056         /************************/
7057         /* bcd */
7058     case 0x27: /* daa */
7059         if (CODE64(s))
7060             goto illegal_op;
7061         gen_update_cc_op(s);
7062         gen_helper_daa(cpu_env);
7063         set_cc_op(s, CC_OP_EFLAGS);
7064         break;
7065     case 0x2f: /* das */
7066         if (CODE64(s))
7067             goto illegal_op;
7068         gen_update_cc_op(s);
7069         gen_helper_das(cpu_env);
7070         set_cc_op(s, CC_OP_EFLAGS);
7071         break;
7072     case 0x37: /* aaa */
7073         if (CODE64(s))
7074             goto illegal_op;
7075         gen_update_cc_op(s);
7076         gen_helper_aaa(cpu_env);
7077         set_cc_op(s, CC_OP_EFLAGS);
7078         break;
7079     case 0x3f: /* aas */
7080         if (CODE64(s))
7081             goto illegal_op;
7082         gen_update_cc_op(s);
7083         gen_helper_aas(cpu_env);
7084         set_cc_op(s, CC_OP_EFLAGS);
7085         break;
7086     case 0xd4: /* aam */
7087         if (CODE64(s))
7088             goto illegal_op;
7089         val = x86_ldub_code(env, s);
7090         if (val == 0) {
7091             gen_exception(s, EXCP00_DIVZ, pc_start - s->cs_base);
7092         } else {
7093             gen_helper_aam(cpu_env, tcg_const_i32(val));
7094             set_cc_op(s, CC_OP_LOGICB);
7095         }
7096         break;
7097     case 0xd5: /* aad */
7098         if (CODE64(s))
7099             goto illegal_op;
7100         val = x86_ldub_code(env, s);
7101         gen_helper_aad(cpu_env, tcg_const_i32(val));
7102         set_cc_op(s, CC_OP_LOGICB);
7103         break;
7104         /************************/
7105         /* misc */
7106     case 0x90: /* nop */
7107         /* XXX: correct lock test for all insn */
7108         if (prefixes & PREFIX_LOCK) {
7109             goto illegal_op;
7110         }
7111         /* If REX_B is set, then this is xchg eax, r8d, not a nop.  */
7112         if (REX_B(s)) {
7113             goto do_xchg_reg_eax;
7114         }
7115         if (prefixes & PREFIX_REPZ) {
7116             gen_update_cc_op(s);
7117             gen_jmp_im(s, pc_start - s->cs_base);
7118             gen_helper_pause(cpu_env, tcg_const_i32(s->pc - pc_start));
7119             s->base.is_jmp = DISAS_NORETURN;
7120         }
7121         break;
7122     case 0x9b: /* fwait */
7123         if ((s->flags & (HF_MP_MASK | HF_TS_MASK)) ==
7124             (HF_MP_MASK | HF_TS_MASK)) {
7125             gen_exception(s, EXCP07_PREX, pc_start - s->cs_base);
7126         } else {
7127             gen_helper_fwait(cpu_env);
7128         }
7129         break;
7130     case 0xcc: /* int3 */
7131         gen_interrupt(s, EXCP03_INT3, pc_start - s->cs_base, s->pc - s->cs_base);
7132         break;
7133     case 0xcd: /* int N */
7134         val = x86_ldub_code(env, s);
7135         if (check_vm86_iopl(s)) {
7136             gen_interrupt(s, val, pc_start - s->cs_base, s->pc - s->cs_base);
7137         }
7138         break;
7139     case 0xce: /* into */
7140         if (CODE64(s))
7141             goto illegal_op;
7142         gen_update_cc_op(s);
7143         gen_jmp_im(s, pc_start - s->cs_base);
7144         gen_helper_into(cpu_env, tcg_const_i32(s->pc - pc_start));
7145         break;
7146 #ifdef WANT_ICEBP
7147     case 0xf1: /* icebp (undocumented, exits to external debugger) */
7148         gen_svm_check_intercept(s, SVM_EXIT_ICEBP);
7149         gen_debug(s);
7150         break;
7151 #endif
7152     case 0xfa: /* cli */
7153         if (check_iopl(s)) {
7154             gen_helper_cli(cpu_env);
7155         }
7156         break;
7157     case 0xfb: /* sti */
7158         if (check_iopl(s)) {
7159             gen_helper_sti(cpu_env);
7160             /* interruptions are enabled only the first insn after sti */
7161             gen_jmp_im(s, s->pc - s->cs_base);
7162             gen_eob_inhibit_irq(s, true);
7163         }
7164         break;
7165     case 0x62: /* bound */
7166         if (CODE64(s))
7167             goto illegal_op;
7168         ot = dflag;
7169         modrm = x86_ldub_code(env, s);
7170         reg = (modrm >> 3) & 7;
7171         mod = (modrm >> 6) & 3;
7172         if (mod == 3)
7173             goto illegal_op;
7174         gen_op_mov_v_reg(s, ot, s->T0, reg);
7175         gen_lea_modrm(env, s, modrm);
7176         tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
7177         if (ot == MO_16) {
7178             gen_helper_boundw(cpu_env, s->A0, s->tmp2_i32);
7179         } else {
7180             gen_helper_boundl(cpu_env, s->A0, s->tmp2_i32);
7181         }
7182         break;
7183     case 0x1c8 ... 0x1cf: /* bswap reg */
7184         reg = (b & 7) | REX_B(s);
7185 #ifdef TARGET_X86_64
7186         if (dflag == MO_64) {
7187             tcg_gen_bswap64_i64(cpu_regs[reg], cpu_regs[reg]);
7188             break;
7189         }
7190 #endif
7191         tcg_gen_bswap32_tl(cpu_regs[reg], cpu_regs[reg], TCG_BSWAP_OZ);
7192         break;
7193     case 0xd6: /* salc */
7194         if (CODE64(s))
7195             goto illegal_op;
7196         gen_compute_eflags_c(s, s->T0);
7197         tcg_gen_neg_tl(s->T0, s->T0);
7198         gen_op_mov_reg_v(s, MO_8, R_EAX, s->T0);
7199         break;
7200     case 0xe0: /* loopnz */
7201     case 0xe1: /* loopz */
7202     case 0xe2: /* loop */
7203     case 0xe3: /* jecxz */
7204         {
7205             TCGLabel *l1, *l2, *l3;
7206 
7207             tval = (int8_t)insn_get(env, s, MO_8);
7208             next_eip = s->pc - s->cs_base;
7209             tval += next_eip;
7210             if (dflag == MO_16) {
7211                 tval &= 0xffff;
7212             }
7213 
7214             l1 = gen_new_label();
7215             l2 = gen_new_label();
7216             l3 = gen_new_label();
7217             gen_update_cc_op(s);
7218             b &= 3;
7219             switch(b) {
7220             case 0: /* loopnz */
7221             case 1: /* loopz */
7222                 gen_op_add_reg_im(s, s->aflag, R_ECX, -1);
7223                 gen_op_jz_ecx(s, s->aflag, l3);
7224                 gen_jcc1(s, (JCC_Z << 1) | (b ^ 1), l1);
7225                 break;
7226             case 2: /* loop */
7227                 gen_op_add_reg_im(s, s->aflag, R_ECX, -1);
7228                 gen_op_jnz_ecx(s, s->aflag, l1);
7229                 break;
7230             default:
7231             case 3: /* jcxz */
7232                 gen_op_jz_ecx(s, s->aflag, l1);
7233                 break;
7234             }
7235 
7236             gen_set_label(l3);
7237             gen_jmp_im(s, next_eip);
7238             tcg_gen_br(l2);
7239 
7240             gen_set_label(l1);
7241             gen_jmp_im(s, tval);
7242             gen_set_label(l2);
7243             gen_eob(s);
7244         }
7245         break;
7246     case 0x130: /* wrmsr */
7247     case 0x132: /* rdmsr */
7248         if (check_cpl0(s)) {
7249             gen_update_cc_op(s);
7250             gen_jmp_im(s, pc_start - s->cs_base);
7251             if (b & 2) {
7252                 gen_helper_rdmsr(cpu_env);
7253             } else {
7254                 gen_helper_wrmsr(cpu_env);
7255                 gen_jmp_im(s, s->pc - s->cs_base);
7256                 gen_eob(s);
7257             }
7258         }
7259         break;
7260     case 0x131: /* rdtsc */
7261         gen_update_cc_op(s);
7262         gen_jmp_im(s, pc_start - s->cs_base);
7263         if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
7264             gen_io_start();
7265         }
7266         gen_helper_rdtsc(cpu_env);
7267         if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
7268             gen_jmp(s, s->pc - s->cs_base);
7269         }
7270         break;
7271     case 0x133: /* rdpmc */
7272         gen_update_cc_op(s);
7273         gen_jmp_im(s, pc_start - s->cs_base);
7274         gen_helper_rdpmc(cpu_env);
7275         s->base.is_jmp = DISAS_NORETURN;
7276         break;
7277     case 0x134: /* sysenter */
7278         /* For Intel SYSENTER is valid on 64-bit */
7279         if (CODE64(s) && env->cpuid_vendor1 != CPUID_VENDOR_INTEL_1)
7280             goto illegal_op;
7281         if (!PE(s)) {
7282             gen_exception_gpf(s);
7283         } else {
7284             gen_helper_sysenter(cpu_env);
7285             gen_eob(s);
7286         }
7287         break;
7288     case 0x135: /* sysexit */
7289         /* For Intel SYSEXIT is valid on 64-bit */
7290         if (CODE64(s) && env->cpuid_vendor1 != CPUID_VENDOR_INTEL_1)
7291             goto illegal_op;
7292         if (!PE(s)) {
7293             gen_exception_gpf(s);
7294         } else {
7295             gen_helper_sysexit(cpu_env, tcg_const_i32(dflag - 1));
7296             gen_eob(s);
7297         }
7298         break;
7299 #ifdef TARGET_X86_64
7300     case 0x105: /* syscall */
7301         /* XXX: is it usable in real mode ? */
7302         gen_update_cc_op(s);
7303         gen_jmp_im(s, pc_start - s->cs_base);
7304         gen_helper_syscall(cpu_env, tcg_const_i32(s->pc - pc_start));
7305         /* TF handling for the syscall insn is different. The TF bit is  checked
7306            after the syscall insn completes. This allows #DB to not be
7307            generated after one has entered CPL0 if TF is set in FMASK.  */
7308         gen_eob_worker(s, false, true);
7309         break;
7310     case 0x107: /* sysret */
7311         if (!PE(s)) {
7312             gen_exception_gpf(s);
7313         } else {
7314             gen_helper_sysret(cpu_env, tcg_const_i32(dflag - 1));
7315             /* condition codes are modified only in long mode */
7316             if (LMA(s)) {
7317                 set_cc_op(s, CC_OP_EFLAGS);
7318             }
7319             /* TF handling for the sysret insn is different. The TF bit is
7320                checked after the sysret insn completes. This allows #DB to be
7321                generated "as if" the syscall insn in userspace has just
7322                completed.  */
7323             gen_eob_worker(s, false, true);
7324         }
7325         break;
7326 #endif
7327     case 0x1a2: /* cpuid */
7328         gen_update_cc_op(s);
7329         gen_jmp_im(s, pc_start - s->cs_base);
7330         gen_helper_cpuid(cpu_env);
7331         break;
7332     case 0xf4: /* hlt */
7333         if (check_cpl0(s)) {
7334             gen_update_cc_op(s);
7335             gen_jmp_im(s, pc_start - s->cs_base);
7336             gen_helper_hlt(cpu_env, tcg_const_i32(s->pc - pc_start));
7337             s->base.is_jmp = DISAS_NORETURN;
7338         }
7339         break;
7340     case 0x100:
7341         modrm = x86_ldub_code(env, s);
7342         mod = (modrm >> 6) & 3;
7343         op = (modrm >> 3) & 7;
7344         switch(op) {
7345         case 0: /* sldt */
7346             if (!PE(s) || VM86(s))
7347                 goto illegal_op;
7348             gen_svm_check_intercept(s, SVM_EXIT_LDTR_READ);
7349             tcg_gen_ld32u_tl(s->T0, cpu_env,
7350                              offsetof(CPUX86State, ldt.selector));
7351             ot = mod == 3 ? dflag : MO_16;
7352             gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 1);
7353             break;
7354         case 2: /* lldt */
7355             if (!PE(s) || VM86(s))
7356                 goto illegal_op;
7357             if (check_cpl0(s)) {
7358                 gen_svm_check_intercept(s, SVM_EXIT_LDTR_WRITE);
7359                 gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
7360                 tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
7361                 gen_helper_lldt(cpu_env, s->tmp2_i32);
7362             }
7363             break;
7364         case 1: /* str */
7365             if (!PE(s) || VM86(s))
7366                 goto illegal_op;
7367             gen_svm_check_intercept(s, SVM_EXIT_TR_READ);
7368             tcg_gen_ld32u_tl(s->T0, cpu_env,
7369                              offsetof(CPUX86State, tr.selector));
7370             ot = mod == 3 ? dflag : MO_16;
7371             gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 1);
7372             break;
7373         case 3: /* ltr */
7374             if (!PE(s) || VM86(s))
7375                 goto illegal_op;
7376             if (check_cpl0(s)) {
7377                 gen_svm_check_intercept(s, SVM_EXIT_TR_WRITE);
7378                 gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
7379                 tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
7380                 gen_helper_ltr(cpu_env, s->tmp2_i32);
7381             }
7382             break;
7383         case 4: /* verr */
7384         case 5: /* verw */
7385             if (!PE(s) || VM86(s))
7386                 goto illegal_op;
7387             gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
7388             gen_update_cc_op(s);
7389             if (op == 4) {
7390                 gen_helper_verr(cpu_env, s->T0);
7391             } else {
7392                 gen_helper_verw(cpu_env, s->T0);
7393             }
7394             set_cc_op(s, CC_OP_EFLAGS);
7395             break;
7396         default:
7397             goto unknown_op;
7398         }
7399         break;
7400 
7401     case 0x101:
7402         modrm = x86_ldub_code(env, s);
7403         switch (modrm) {
7404         CASE_MODRM_MEM_OP(0): /* sgdt */
7405             gen_svm_check_intercept(s, SVM_EXIT_GDTR_READ);
7406             gen_lea_modrm(env, s, modrm);
7407             tcg_gen_ld32u_tl(s->T0,
7408                              cpu_env, offsetof(CPUX86State, gdt.limit));
7409             gen_op_st_v(s, MO_16, s->T0, s->A0);
7410             gen_add_A0_im(s, 2);
7411             tcg_gen_ld_tl(s->T0, cpu_env, offsetof(CPUX86State, gdt.base));
7412             if (dflag == MO_16) {
7413                 tcg_gen_andi_tl(s->T0, s->T0, 0xffffff);
7414             }
7415             gen_op_st_v(s, CODE64(s) + MO_32, s->T0, s->A0);
7416             break;
7417 
7418         case 0xc8: /* monitor */
7419             if (!(s->cpuid_ext_features & CPUID_EXT_MONITOR) || CPL(s) != 0) {
7420                 goto illegal_op;
7421             }
7422             gen_update_cc_op(s);
7423             gen_jmp_im(s, pc_start - s->cs_base);
7424             tcg_gen_mov_tl(s->A0, cpu_regs[R_EAX]);
7425             gen_extu(s->aflag, s->A0);
7426             gen_add_A0_ds_seg(s);
7427             gen_helper_monitor(cpu_env, s->A0);
7428             break;
7429 
7430         case 0xc9: /* mwait */
7431             if (!(s->cpuid_ext_features & CPUID_EXT_MONITOR) || CPL(s) != 0) {
7432                 goto illegal_op;
7433             }
7434             gen_update_cc_op(s);
7435             gen_jmp_im(s, pc_start - s->cs_base);
7436             gen_helper_mwait(cpu_env, tcg_const_i32(s->pc - pc_start));
7437             s->base.is_jmp = DISAS_NORETURN;
7438             break;
7439 
7440         case 0xca: /* clac */
7441             if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_SMAP)
7442                 || CPL(s) != 0) {
7443                 goto illegal_op;
7444             }
7445             gen_helper_clac(cpu_env);
7446             gen_jmp_im(s, s->pc - s->cs_base);
7447             gen_eob(s);
7448             break;
7449 
7450         case 0xcb: /* stac */
7451             if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_SMAP)
7452                 || CPL(s) != 0) {
7453                 goto illegal_op;
7454             }
7455             gen_helper_stac(cpu_env);
7456             gen_jmp_im(s, s->pc - s->cs_base);
7457             gen_eob(s);
7458             break;
7459 
7460         CASE_MODRM_MEM_OP(1): /* sidt */
7461             gen_svm_check_intercept(s, SVM_EXIT_IDTR_READ);
7462             gen_lea_modrm(env, s, modrm);
7463             tcg_gen_ld32u_tl(s->T0, cpu_env, offsetof(CPUX86State, idt.limit));
7464             gen_op_st_v(s, MO_16, s->T0, s->A0);
7465             gen_add_A0_im(s, 2);
7466             tcg_gen_ld_tl(s->T0, cpu_env, offsetof(CPUX86State, idt.base));
7467             if (dflag == MO_16) {
7468                 tcg_gen_andi_tl(s->T0, s->T0, 0xffffff);
7469             }
7470             gen_op_st_v(s, CODE64(s) + MO_32, s->T0, s->A0);
7471             break;
7472 
7473         case 0xd0: /* xgetbv */
7474             if ((s->cpuid_ext_features & CPUID_EXT_XSAVE) == 0
7475                 || (s->prefix & (PREFIX_LOCK | PREFIX_DATA
7476                                  | PREFIX_REPZ | PREFIX_REPNZ))) {
7477                 goto illegal_op;
7478             }
7479             tcg_gen_trunc_tl_i32(s->tmp2_i32, cpu_regs[R_ECX]);
7480             gen_helper_xgetbv(s->tmp1_i64, cpu_env, s->tmp2_i32);
7481             tcg_gen_extr_i64_tl(cpu_regs[R_EAX], cpu_regs[R_EDX], s->tmp1_i64);
7482             break;
7483 
7484         case 0xd1: /* xsetbv */
7485             if ((s->cpuid_ext_features & CPUID_EXT_XSAVE) == 0
7486                 || (s->prefix & (PREFIX_LOCK | PREFIX_DATA
7487                                  | PREFIX_REPZ | PREFIX_REPNZ))) {
7488                 goto illegal_op;
7489             }
7490             if (!check_cpl0(s)) {
7491                 break;
7492             }
7493             tcg_gen_concat_tl_i64(s->tmp1_i64, cpu_regs[R_EAX],
7494                                   cpu_regs[R_EDX]);
7495             tcg_gen_trunc_tl_i32(s->tmp2_i32, cpu_regs[R_ECX]);
7496             gen_helper_xsetbv(cpu_env, s->tmp2_i32, s->tmp1_i64);
7497             /* End TB because translation flags may change.  */
7498             gen_jmp_im(s, s->pc - s->cs_base);
7499             gen_eob(s);
7500             break;
7501 
7502         case 0xd8: /* VMRUN */
7503             if (!SVME(s) || !PE(s)) {
7504                 goto illegal_op;
7505             }
7506             if (!check_cpl0(s)) {
7507                 break;
7508             }
7509             gen_update_cc_op(s);
7510             gen_jmp_im(s, pc_start - s->cs_base);
7511             gen_helper_vmrun(cpu_env, tcg_const_i32(s->aflag - 1),
7512                              tcg_const_i32(s->pc - pc_start));
7513             tcg_gen_exit_tb(NULL, 0);
7514             s->base.is_jmp = DISAS_NORETURN;
7515             break;
7516 
7517         case 0xd9: /* VMMCALL */
7518             if (!SVME(s)) {
7519                 goto illegal_op;
7520             }
7521             gen_update_cc_op(s);
7522             gen_jmp_im(s, pc_start - s->cs_base);
7523             gen_helper_vmmcall(cpu_env);
7524             break;
7525 
7526         case 0xda: /* VMLOAD */
7527             if (!SVME(s) || !PE(s)) {
7528                 goto illegal_op;
7529             }
7530             if (!check_cpl0(s)) {
7531                 break;
7532             }
7533             gen_update_cc_op(s);
7534             gen_jmp_im(s, pc_start - s->cs_base);
7535             gen_helper_vmload(cpu_env, tcg_const_i32(s->aflag - 1));
7536             break;
7537 
7538         case 0xdb: /* VMSAVE */
7539             if (!SVME(s) || !PE(s)) {
7540                 goto illegal_op;
7541             }
7542             if (!check_cpl0(s)) {
7543                 break;
7544             }
7545             gen_update_cc_op(s);
7546             gen_jmp_im(s, pc_start - s->cs_base);
7547             gen_helper_vmsave(cpu_env, tcg_const_i32(s->aflag - 1));
7548             break;
7549 
7550         case 0xdc: /* STGI */
7551             if ((!SVME(s) && !(s->cpuid_ext3_features & CPUID_EXT3_SKINIT))
7552                 || !PE(s)) {
7553                 goto illegal_op;
7554             }
7555             if (!check_cpl0(s)) {
7556                 break;
7557             }
7558             gen_update_cc_op(s);
7559             gen_helper_stgi(cpu_env);
7560             gen_jmp_im(s, s->pc - s->cs_base);
7561             gen_eob(s);
7562             break;
7563 
7564         case 0xdd: /* CLGI */
7565             if (!SVME(s) || !PE(s)) {
7566                 goto illegal_op;
7567             }
7568             if (!check_cpl0(s)) {
7569                 break;
7570             }
7571             gen_update_cc_op(s);
7572             gen_jmp_im(s, pc_start - s->cs_base);
7573             gen_helper_clgi(cpu_env);
7574             break;
7575 
7576         case 0xde: /* SKINIT */
7577             if ((!SVME(s) && !(s->cpuid_ext3_features & CPUID_EXT3_SKINIT))
7578                 || !PE(s)) {
7579                 goto illegal_op;
7580             }
7581             gen_svm_check_intercept(s, SVM_EXIT_SKINIT);
7582             /* If not intercepted, not implemented -- raise #UD. */
7583             goto illegal_op;
7584 
7585         case 0xdf: /* INVLPGA */
7586             if (!SVME(s) || !PE(s)) {
7587                 goto illegal_op;
7588             }
7589             if (!check_cpl0(s)) {
7590                 break;
7591             }
7592             gen_svm_check_intercept(s, SVM_EXIT_INVLPGA);
7593             if (s->aflag == MO_64) {
7594                 tcg_gen_mov_tl(s->A0, cpu_regs[R_EAX]);
7595             } else {
7596                 tcg_gen_ext32u_tl(s->A0, cpu_regs[R_EAX]);
7597             }
7598             gen_helper_flush_page(cpu_env, s->A0);
7599             gen_jmp_im(s, s->pc - s->cs_base);
7600             gen_eob(s);
7601             break;
7602 
7603         CASE_MODRM_MEM_OP(2): /* lgdt */
7604             if (!check_cpl0(s)) {
7605                 break;
7606             }
7607             gen_svm_check_intercept(s, SVM_EXIT_GDTR_WRITE);
7608             gen_lea_modrm(env, s, modrm);
7609             gen_op_ld_v(s, MO_16, s->T1, s->A0);
7610             gen_add_A0_im(s, 2);
7611             gen_op_ld_v(s, CODE64(s) + MO_32, s->T0, s->A0);
7612             if (dflag == MO_16) {
7613                 tcg_gen_andi_tl(s->T0, s->T0, 0xffffff);
7614             }
7615             tcg_gen_st_tl(s->T0, cpu_env, offsetof(CPUX86State, gdt.base));
7616             tcg_gen_st32_tl(s->T1, cpu_env, offsetof(CPUX86State, gdt.limit));
7617             break;
7618 
7619         CASE_MODRM_MEM_OP(3): /* lidt */
7620             if (!check_cpl0(s)) {
7621                 break;
7622             }
7623             gen_svm_check_intercept(s, SVM_EXIT_IDTR_WRITE);
7624             gen_lea_modrm(env, s, modrm);
7625             gen_op_ld_v(s, MO_16, s->T1, s->A0);
7626             gen_add_A0_im(s, 2);
7627             gen_op_ld_v(s, CODE64(s) + MO_32, s->T0, s->A0);
7628             if (dflag == MO_16) {
7629                 tcg_gen_andi_tl(s->T0, s->T0, 0xffffff);
7630             }
7631             tcg_gen_st_tl(s->T0, cpu_env, offsetof(CPUX86State, idt.base));
7632             tcg_gen_st32_tl(s->T1, cpu_env, offsetof(CPUX86State, idt.limit));
7633             break;
7634 
7635         CASE_MODRM_OP(4): /* smsw */
7636             gen_svm_check_intercept(s, SVM_EXIT_READ_CR0);
7637             tcg_gen_ld_tl(s->T0, cpu_env, offsetof(CPUX86State, cr[0]));
7638             /*
7639              * In 32-bit mode, the higher 16 bits of the destination
7640              * register are undefined.  In practice CR0[31:0] is stored
7641              * just like in 64-bit mode.
7642              */
7643             mod = (modrm >> 6) & 3;
7644             ot = (mod != 3 ? MO_16 : s->dflag);
7645             gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 1);
7646             break;
7647         case 0xee: /* rdpkru */
7648             if (prefixes & PREFIX_LOCK) {
7649                 goto illegal_op;
7650             }
7651             tcg_gen_trunc_tl_i32(s->tmp2_i32, cpu_regs[R_ECX]);
7652             gen_helper_rdpkru(s->tmp1_i64, cpu_env, s->tmp2_i32);
7653             tcg_gen_extr_i64_tl(cpu_regs[R_EAX], cpu_regs[R_EDX], s->tmp1_i64);
7654             break;
7655         case 0xef: /* wrpkru */
7656             if (prefixes & PREFIX_LOCK) {
7657                 goto illegal_op;
7658             }
7659             tcg_gen_concat_tl_i64(s->tmp1_i64, cpu_regs[R_EAX],
7660                                   cpu_regs[R_EDX]);
7661             tcg_gen_trunc_tl_i32(s->tmp2_i32, cpu_regs[R_ECX]);
7662             gen_helper_wrpkru(cpu_env, s->tmp2_i32, s->tmp1_i64);
7663             break;
7664 
7665         CASE_MODRM_OP(6): /* lmsw */
7666             if (!check_cpl0(s)) {
7667                 break;
7668             }
7669             gen_svm_check_intercept(s, SVM_EXIT_WRITE_CR0);
7670             gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
7671             /*
7672              * Only the 4 lower bits of CR0 are modified.
7673              * PE cannot be set to zero if already set to one.
7674              */
7675             tcg_gen_ld_tl(s->T1, cpu_env, offsetof(CPUX86State, cr[0]));
7676             tcg_gen_andi_tl(s->T0, s->T0, 0xf);
7677             tcg_gen_andi_tl(s->T1, s->T1, ~0xe);
7678             tcg_gen_or_tl(s->T0, s->T0, s->T1);
7679             gen_helper_write_crN(cpu_env, tcg_constant_i32(0), s->T0);
7680             gen_jmp_im(s, s->pc - s->cs_base);
7681             gen_eob(s);
7682             break;
7683 
7684         CASE_MODRM_MEM_OP(7): /* invlpg */
7685             if (!check_cpl0(s)) {
7686                 break;
7687             }
7688             gen_svm_check_intercept(s, SVM_EXIT_INVLPG);
7689             gen_lea_modrm(env, s, modrm);
7690             gen_helper_flush_page(cpu_env, s->A0);
7691             gen_jmp_im(s, s->pc - s->cs_base);
7692             gen_eob(s);
7693             break;
7694 
7695         case 0xf8: /* swapgs */
7696 #ifdef TARGET_X86_64
7697             if (CODE64(s)) {
7698                 if (check_cpl0(s)) {
7699                     tcg_gen_mov_tl(s->T0, cpu_seg_base[R_GS]);
7700                     tcg_gen_ld_tl(cpu_seg_base[R_GS], cpu_env,
7701                                   offsetof(CPUX86State, kernelgsbase));
7702                     tcg_gen_st_tl(s->T0, cpu_env,
7703                                   offsetof(CPUX86State, kernelgsbase));
7704                 }
7705                 break;
7706             }
7707 #endif
7708             goto illegal_op;
7709 
7710         case 0xf9: /* rdtscp */
7711             if (!(s->cpuid_ext2_features & CPUID_EXT2_RDTSCP)) {
7712                 goto illegal_op;
7713             }
7714             gen_update_cc_op(s);
7715             gen_jmp_im(s, pc_start - s->cs_base);
7716             if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
7717                 gen_io_start();
7718             }
7719             gen_helper_rdtscp(cpu_env);
7720             if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
7721                 gen_jmp(s, s->pc - s->cs_base);
7722             }
7723             break;
7724 
7725         default:
7726             goto unknown_op;
7727         }
7728         break;
7729 
7730     case 0x108: /* invd */
7731     case 0x109: /* wbinvd */
7732         if (check_cpl0(s)) {
7733             gen_svm_check_intercept(s, (b & 2) ? SVM_EXIT_INVD : SVM_EXIT_WBINVD);
7734             /* nothing to do */
7735         }
7736         break;
7737     case 0x63: /* arpl or movslS (x86_64) */
7738 #ifdef TARGET_X86_64
7739         if (CODE64(s)) {
7740             int d_ot;
7741             /* d_ot is the size of destination */
7742             d_ot = dflag;
7743 
7744             modrm = x86_ldub_code(env, s);
7745             reg = ((modrm >> 3) & 7) | REX_R(s);
7746             mod = (modrm >> 6) & 3;
7747             rm = (modrm & 7) | REX_B(s);
7748 
7749             if (mod == 3) {
7750                 gen_op_mov_v_reg(s, MO_32, s->T0, rm);
7751                 /* sign extend */
7752                 if (d_ot == MO_64) {
7753                     tcg_gen_ext32s_tl(s->T0, s->T0);
7754                 }
7755                 gen_op_mov_reg_v(s, d_ot, reg, s->T0);
7756             } else {
7757                 gen_lea_modrm(env, s, modrm);
7758                 gen_op_ld_v(s, MO_32 | MO_SIGN, s->T0, s->A0);
7759                 gen_op_mov_reg_v(s, d_ot, reg, s->T0);
7760             }
7761         } else
7762 #endif
7763         {
7764             TCGLabel *label1;
7765             TCGv t0, t1, t2, a0;
7766 
7767             if (!PE(s) || VM86(s))
7768                 goto illegal_op;
7769             t0 = tcg_temp_local_new();
7770             t1 = tcg_temp_local_new();
7771             t2 = tcg_temp_local_new();
7772             ot = MO_16;
7773             modrm = x86_ldub_code(env, s);
7774             reg = (modrm >> 3) & 7;
7775             mod = (modrm >> 6) & 3;
7776             rm = modrm & 7;
7777             if (mod != 3) {
7778                 gen_lea_modrm(env, s, modrm);
7779                 gen_op_ld_v(s, ot, t0, s->A0);
7780                 a0 = tcg_temp_local_new();
7781                 tcg_gen_mov_tl(a0, s->A0);
7782             } else {
7783                 gen_op_mov_v_reg(s, ot, t0, rm);
7784                 a0 = NULL;
7785             }
7786             gen_op_mov_v_reg(s, ot, t1, reg);
7787             tcg_gen_andi_tl(s->tmp0, t0, 3);
7788             tcg_gen_andi_tl(t1, t1, 3);
7789             tcg_gen_movi_tl(t2, 0);
7790             label1 = gen_new_label();
7791             tcg_gen_brcond_tl(TCG_COND_GE, s->tmp0, t1, label1);
7792             tcg_gen_andi_tl(t0, t0, ~3);
7793             tcg_gen_or_tl(t0, t0, t1);
7794             tcg_gen_movi_tl(t2, CC_Z);
7795             gen_set_label(label1);
7796             if (mod != 3) {
7797                 gen_op_st_v(s, ot, t0, a0);
7798                 tcg_temp_free(a0);
7799            } else {
7800                 gen_op_mov_reg_v(s, ot, rm, t0);
7801             }
7802             gen_compute_eflags(s);
7803             tcg_gen_andi_tl(cpu_cc_src, cpu_cc_src, ~CC_Z);
7804             tcg_gen_or_tl(cpu_cc_src, cpu_cc_src, t2);
7805             tcg_temp_free(t0);
7806             tcg_temp_free(t1);
7807             tcg_temp_free(t2);
7808         }
7809         break;
7810     case 0x102: /* lar */
7811     case 0x103: /* lsl */
7812         {
7813             TCGLabel *label1;
7814             TCGv t0;
7815             if (!PE(s) || VM86(s))
7816                 goto illegal_op;
7817             ot = dflag != MO_16 ? MO_32 : MO_16;
7818             modrm = x86_ldub_code(env, s);
7819             reg = ((modrm >> 3) & 7) | REX_R(s);
7820             gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
7821             t0 = tcg_temp_local_new();
7822             gen_update_cc_op(s);
7823             if (b == 0x102) {
7824                 gen_helper_lar(t0, cpu_env, s->T0);
7825             } else {
7826                 gen_helper_lsl(t0, cpu_env, s->T0);
7827             }
7828             tcg_gen_andi_tl(s->tmp0, cpu_cc_src, CC_Z);
7829             label1 = gen_new_label();
7830             tcg_gen_brcondi_tl(TCG_COND_EQ, s->tmp0, 0, label1);
7831             gen_op_mov_reg_v(s, ot, reg, t0);
7832             gen_set_label(label1);
7833             set_cc_op(s, CC_OP_EFLAGS);
7834             tcg_temp_free(t0);
7835         }
7836         break;
7837     case 0x118:
7838         modrm = x86_ldub_code(env, s);
7839         mod = (modrm >> 6) & 3;
7840         op = (modrm >> 3) & 7;
7841         switch(op) {
7842         case 0: /* prefetchnta */
7843         case 1: /* prefetchnt0 */
7844         case 2: /* prefetchnt0 */
7845         case 3: /* prefetchnt0 */
7846             if (mod == 3)
7847                 goto illegal_op;
7848             gen_nop_modrm(env, s, modrm);
7849             /* nothing more to do */
7850             break;
7851         default: /* nop (multi byte) */
7852             gen_nop_modrm(env, s, modrm);
7853             break;
7854         }
7855         break;
7856     case 0x11a:
7857         modrm = x86_ldub_code(env, s);
7858         if (s->flags & HF_MPX_EN_MASK) {
7859             mod = (modrm >> 6) & 3;
7860             reg = ((modrm >> 3) & 7) | REX_R(s);
7861             if (prefixes & PREFIX_REPZ) {
7862                 /* bndcl */
7863                 if (reg >= 4
7864                     || (prefixes & PREFIX_LOCK)
7865                     || s->aflag == MO_16) {
7866                     goto illegal_op;
7867                 }
7868                 gen_bndck(env, s, modrm, TCG_COND_LTU, cpu_bndl[reg]);
7869             } else if (prefixes & PREFIX_REPNZ) {
7870                 /* bndcu */
7871                 if (reg >= 4
7872                     || (prefixes & PREFIX_LOCK)
7873                     || s->aflag == MO_16) {
7874                     goto illegal_op;
7875                 }
7876                 TCGv_i64 notu = tcg_temp_new_i64();
7877                 tcg_gen_not_i64(notu, cpu_bndu[reg]);
7878                 gen_bndck(env, s, modrm, TCG_COND_GTU, notu);
7879                 tcg_temp_free_i64(notu);
7880             } else if (prefixes & PREFIX_DATA) {
7881                 /* bndmov -- from reg/mem */
7882                 if (reg >= 4 || s->aflag == MO_16) {
7883                     goto illegal_op;
7884                 }
7885                 if (mod == 3) {
7886                     int reg2 = (modrm & 7) | REX_B(s);
7887                     if (reg2 >= 4 || (prefixes & PREFIX_LOCK)) {
7888                         goto illegal_op;
7889                     }
7890                     if (s->flags & HF_MPX_IU_MASK) {
7891                         tcg_gen_mov_i64(cpu_bndl[reg], cpu_bndl[reg2]);
7892                         tcg_gen_mov_i64(cpu_bndu[reg], cpu_bndu[reg2]);
7893                     }
7894                 } else {
7895                     gen_lea_modrm(env, s, modrm);
7896                     if (CODE64(s)) {
7897                         tcg_gen_qemu_ld_i64(cpu_bndl[reg], s->A0,
7898                                             s->mem_index, MO_LEQ);
7899                         tcg_gen_addi_tl(s->A0, s->A0, 8);
7900                         tcg_gen_qemu_ld_i64(cpu_bndu[reg], s->A0,
7901                                             s->mem_index, MO_LEQ);
7902                     } else {
7903                         tcg_gen_qemu_ld_i64(cpu_bndl[reg], s->A0,
7904                                             s->mem_index, MO_LEUL);
7905                         tcg_gen_addi_tl(s->A0, s->A0, 4);
7906                         tcg_gen_qemu_ld_i64(cpu_bndu[reg], s->A0,
7907                                             s->mem_index, MO_LEUL);
7908                     }
7909                     /* bnd registers are now in-use */
7910                     gen_set_hflag(s, HF_MPX_IU_MASK);
7911                 }
7912             } else if (mod != 3) {
7913                 /* bndldx */
7914                 AddressParts a = gen_lea_modrm_0(env, s, modrm);
7915                 if (reg >= 4
7916                     || (prefixes & PREFIX_LOCK)
7917                     || s->aflag == MO_16
7918                     || a.base < -1) {
7919                     goto illegal_op;
7920                 }
7921                 if (a.base >= 0) {
7922                     tcg_gen_addi_tl(s->A0, cpu_regs[a.base], a.disp);
7923                 } else {
7924                     tcg_gen_movi_tl(s->A0, 0);
7925                 }
7926                 gen_lea_v_seg(s, s->aflag, s->A0, a.def_seg, s->override);
7927                 if (a.index >= 0) {
7928                     tcg_gen_mov_tl(s->T0, cpu_regs[a.index]);
7929                 } else {
7930                     tcg_gen_movi_tl(s->T0, 0);
7931                 }
7932                 if (CODE64(s)) {
7933                     gen_helper_bndldx64(cpu_bndl[reg], cpu_env, s->A0, s->T0);
7934                     tcg_gen_ld_i64(cpu_bndu[reg], cpu_env,
7935                                    offsetof(CPUX86State, mmx_t0.MMX_Q(0)));
7936                 } else {
7937                     gen_helper_bndldx32(cpu_bndu[reg], cpu_env, s->A0, s->T0);
7938                     tcg_gen_ext32u_i64(cpu_bndl[reg], cpu_bndu[reg]);
7939                     tcg_gen_shri_i64(cpu_bndu[reg], cpu_bndu[reg], 32);
7940                 }
7941                 gen_set_hflag(s, HF_MPX_IU_MASK);
7942             }
7943         }
7944         gen_nop_modrm(env, s, modrm);
7945         break;
7946     case 0x11b:
7947         modrm = x86_ldub_code(env, s);
7948         if (s->flags & HF_MPX_EN_MASK) {
7949             mod = (modrm >> 6) & 3;
7950             reg = ((modrm >> 3) & 7) | REX_R(s);
7951             if (mod != 3 && (prefixes & PREFIX_REPZ)) {
7952                 /* bndmk */
7953                 if (reg >= 4
7954                     || (prefixes & PREFIX_LOCK)
7955                     || s->aflag == MO_16) {
7956                     goto illegal_op;
7957                 }
7958                 AddressParts a = gen_lea_modrm_0(env, s, modrm);
7959                 if (a.base >= 0) {
7960                     tcg_gen_extu_tl_i64(cpu_bndl[reg], cpu_regs[a.base]);
7961                     if (!CODE64(s)) {
7962                         tcg_gen_ext32u_i64(cpu_bndl[reg], cpu_bndl[reg]);
7963                     }
7964                 } else if (a.base == -1) {
7965                     /* no base register has lower bound of 0 */
7966                     tcg_gen_movi_i64(cpu_bndl[reg], 0);
7967                 } else {
7968                     /* rip-relative generates #ud */
7969                     goto illegal_op;
7970                 }
7971                 tcg_gen_not_tl(s->A0, gen_lea_modrm_1(s, a));
7972                 if (!CODE64(s)) {
7973                     tcg_gen_ext32u_tl(s->A0, s->A0);
7974                 }
7975                 tcg_gen_extu_tl_i64(cpu_bndu[reg], s->A0);
7976                 /* bnd registers are now in-use */
7977                 gen_set_hflag(s, HF_MPX_IU_MASK);
7978                 break;
7979             } else if (prefixes & PREFIX_REPNZ) {
7980                 /* bndcn */
7981                 if (reg >= 4
7982                     || (prefixes & PREFIX_LOCK)
7983                     || s->aflag == MO_16) {
7984                     goto illegal_op;
7985                 }
7986                 gen_bndck(env, s, modrm, TCG_COND_GTU, cpu_bndu[reg]);
7987             } else if (prefixes & PREFIX_DATA) {
7988                 /* bndmov -- to reg/mem */
7989                 if (reg >= 4 || s->aflag == MO_16) {
7990                     goto illegal_op;
7991                 }
7992                 if (mod == 3) {
7993                     int reg2 = (modrm & 7) | REX_B(s);
7994                     if (reg2 >= 4 || (prefixes & PREFIX_LOCK)) {
7995                         goto illegal_op;
7996                     }
7997                     if (s->flags & HF_MPX_IU_MASK) {
7998                         tcg_gen_mov_i64(cpu_bndl[reg2], cpu_bndl[reg]);
7999                         tcg_gen_mov_i64(cpu_bndu[reg2], cpu_bndu[reg]);
8000                     }
8001                 } else {
8002                     gen_lea_modrm(env, s, modrm);
8003                     if (CODE64(s)) {
8004                         tcg_gen_qemu_st_i64(cpu_bndl[reg], s->A0,
8005                                             s->mem_index, MO_LEQ);
8006                         tcg_gen_addi_tl(s->A0, s->A0, 8);
8007                         tcg_gen_qemu_st_i64(cpu_bndu[reg], s->A0,
8008                                             s->mem_index, MO_LEQ);
8009                     } else {
8010                         tcg_gen_qemu_st_i64(cpu_bndl[reg], s->A0,
8011                                             s->mem_index, MO_LEUL);
8012                         tcg_gen_addi_tl(s->A0, s->A0, 4);
8013                         tcg_gen_qemu_st_i64(cpu_bndu[reg], s->A0,
8014                                             s->mem_index, MO_LEUL);
8015                     }
8016                 }
8017             } else if (mod != 3) {
8018                 /* bndstx */
8019                 AddressParts a = gen_lea_modrm_0(env, s, modrm);
8020                 if (reg >= 4
8021                     || (prefixes & PREFIX_LOCK)
8022                     || s->aflag == MO_16
8023                     || a.base < -1) {
8024                     goto illegal_op;
8025                 }
8026                 if (a.base >= 0) {
8027                     tcg_gen_addi_tl(s->A0, cpu_regs[a.base], a.disp);
8028                 } else {
8029                     tcg_gen_movi_tl(s->A0, 0);
8030                 }
8031                 gen_lea_v_seg(s, s->aflag, s->A0, a.def_seg, s->override);
8032                 if (a.index >= 0) {
8033                     tcg_gen_mov_tl(s->T0, cpu_regs[a.index]);
8034                 } else {
8035                     tcg_gen_movi_tl(s->T0, 0);
8036                 }
8037                 if (CODE64(s)) {
8038                     gen_helper_bndstx64(cpu_env, s->A0, s->T0,
8039                                         cpu_bndl[reg], cpu_bndu[reg]);
8040                 } else {
8041                     gen_helper_bndstx32(cpu_env, s->A0, s->T0,
8042                                         cpu_bndl[reg], cpu_bndu[reg]);
8043                 }
8044             }
8045         }
8046         gen_nop_modrm(env, s, modrm);
8047         break;
8048     case 0x119: case 0x11c ... 0x11f: /* nop (multi byte) */
8049         modrm = x86_ldub_code(env, s);
8050         gen_nop_modrm(env, s, modrm);
8051         break;
8052 
8053     case 0x120: /* mov reg, crN */
8054     case 0x122: /* mov crN, reg */
8055         if (!check_cpl0(s)) {
8056             break;
8057         }
8058         modrm = x86_ldub_code(env, s);
8059         /*
8060          * Ignore the mod bits (assume (modrm&0xc0)==0xc0).
8061          * AMD documentation (24594.pdf) and testing of Intel 386 and 486
8062          * processors all show that the mod bits are assumed to be 1's,
8063          * regardless of actual values.
8064          */
8065         rm = (modrm & 7) | REX_B(s);
8066         reg = ((modrm >> 3) & 7) | REX_R(s);
8067         switch (reg) {
8068         case 0:
8069             if ((prefixes & PREFIX_LOCK) &&
8070                 (s->cpuid_ext3_features & CPUID_EXT3_CR8LEG)) {
8071                 reg = 8;
8072             }
8073             break;
8074         case 2:
8075         case 3:
8076         case 4:
8077         case 8:
8078             break;
8079         default:
8080             goto unknown_op;
8081         }
8082         ot  = (CODE64(s) ? MO_64 : MO_32);
8083 
8084         if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
8085             gen_io_start();
8086         }
8087         if (b & 2) {
8088             gen_svm_check_intercept(s, SVM_EXIT_WRITE_CR0 + reg);
8089             gen_op_mov_v_reg(s, ot, s->T0, rm);
8090             gen_helper_write_crN(cpu_env, tcg_constant_i32(reg), s->T0);
8091             gen_jmp_im(s, s->pc - s->cs_base);
8092             gen_eob(s);
8093         } else {
8094             gen_svm_check_intercept(s, SVM_EXIT_READ_CR0 + reg);
8095             gen_helper_read_crN(s->T0, cpu_env, tcg_constant_i32(reg));
8096             gen_op_mov_reg_v(s, ot, rm, s->T0);
8097             if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
8098                 gen_jmp(s, s->pc - s->cs_base);
8099             }
8100         }
8101         break;
8102 
8103     case 0x121: /* mov reg, drN */
8104     case 0x123: /* mov drN, reg */
8105         if (check_cpl0(s)) {
8106             modrm = x86_ldub_code(env, s);
8107             /* Ignore the mod bits (assume (modrm&0xc0)==0xc0).
8108              * AMD documentation (24594.pdf) and testing of
8109              * intel 386 and 486 processors all show that the mod bits
8110              * are assumed to be 1's, regardless of actual values.
8111              */
8112             rm = (modrm & 7) | REX_B(s);
8113             reg = ((modrm >> 3) & 7) | REX_R(s);
8114             if (CODE64(s))
8115                 ot = MO_64;
8116             else
8117                 ot = MO_32;
8118             if (reg >= 8) {
8119                 goto illegal_op;
8120             }
8121             if (b & 2) {
8122                 gen_svm_check_intercept(s, SVM_EXIT_WRITE_DR0 + reg);
8123                 gen_op_mov_v_reg(s, ot, s->T0, rm);
8124                 tcg_gen_movi_i32(s->tmp2_i32, reg);
8125                 gen_helper_set_dr(cpu_env, s->tmp2_i32, s->T0);
8126                 gen_jmp_im(s, s->pc - s->cs_base);
8127                 gen_eob(s);
8128             } else {
8129                 gen_svm_check_intercept(s, SVM_EXIT_READ_DR0 + reg);
8130                 tcg_gen_movi_i32(s->tmp2_i32, reg);
8131                 gen_helper_get_dr(s->T0, cpu_env, s->tmp2_i32);
8132                 gen_op_mov_reg_v(s, ot, rm, s->T0);
8133             }
8134         }
8135         break;
8136     case 0x106: /* clts */
8137         if (check_cpl0(s)) {
8138             gen_svm_check_intercept(s, SVM_EXIT_WRITE_CR0);
8139             gen_helper_clts(cpu_env);
8140             /* abort block because static cpu state changed */
8141             gen_jmp_im(s, s->pc - s->cs_base);
8142             gen_eob(s);
8143         }
8144         break;
8145     /* MMX/3DNow!/SSE/SSE2/SSE3/SSSE3/SSE4 support */
8146     case 0x1c3: /* MOVNTI reg, mem */
8147         if (!(s->cpuid_features & CPUID_SSE2))
8148             goto illegal_op;
8149         ot = mo_64_32(dflag);
8150         modrm = x86_ldub_code(env, s);
8151         mod = (modrm >> 6) & 3;
8152         if (mod == 3)
8153             goto illegal_op;
8154         reg = ((modrm >> 3) & 7) | REX_R(s);
8155         /* generate a generic store */
8156         gen_ldst_modrm(env, s, modrm, ot, reg, 1);
8157         break;
8158     case 0x1ae:
8159         modrm = x86_ldub_code(env, s);
8160         switch (modrm) {
8161         CASE_MODRM_MEM_OP(0): /* fxsave */
8162             if (!(s->cpuid_features & CPUID_FXSR)
8163                 || (prefixes & PREFIX_LOCK)) {
8164                 goto illegal_op;
8165             }
8166             if ((s->flags & HF_EM_MASK) || (s->flags & HF_TS_MASK)) {
8167                 gen_exception(s, EXCP07_PREX, pc_start - s->cs_base);
8168                 break;
8169             }
8170             gen_lea_modrm(env, s, modrm);
8171             gen_helper_fxsave(cpu_env, s->A0);
8172             break;
8173 
8174         CASE_MODRM_MEM_OP(1): /* fxrstor */
8175             if (!(s->cpuid_features & CPUID_FXSR)
8176                 || (prefixes & PREFIX_LOCK)) {
8177                 goto illegal_op;
8178             }
8179             if ((s->flags & HF_EM_MASK) || (s->flags & HF_TS_MASK)) {
8180                 gen_exception(s, EXCP07_PREX, pc_start - s->cs_base);
8181                 break;
8182             }
8183             gen_lea_modrm(env, s, modrm);
8184             gen_helper_fxrstor(cpu_env, s->A0);
8185             break;
8186 
8187         CASE_MODRM_MEM_OP(2): /* ldmxcsr */
8188             if ((s->flags & HF_EM_MASK) || !(s->flags & HF_OSFXSR_MASK)) {
8189                 goto illegal_op;
8190             }
8191             if (s->flags & HF_TS_MASK) {
8192                 gen_exception(s, EXCP07_PREX, pc_start - s->cs_base);
8193                 break;
8194             }
8195             gen_lea_modrm(env, s, modrm);
8196             tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0, s->mem_index, MO_LEUL);
8197             gen_helper_ldmxcsr(cpu_env, s->tmp2_i32);
8198             break;
8199 
8200         CASE_MODRM_MEM_OP(3): /* stmxcsr */
8201             if ((s->flags & HF_EM_MASK) || !(s->flags & HF_OSFXSR_MASK)) {
8202                 goto illegal_op;
8203             }
8204             if (s->flags & HF_TS_MASK) {
8205                 gen_exception(s, EXCP07_PREX, pc_start - s->cs_base);
8206                 break;
8207             }
8208             gen_helper_update_mxcsr(cpu_env);
8209             gen_lea_modrm(env, s, modrm);
8210             tcg_gen_ld32u_tl(s->T0, cpu_env, offsetof(CPUX86State, mxcsr));
8211             gen_op_st_v(s, MO_32, s->T0, s->A0);
8212             break;
8213 
8214         CASE_MODRM_MEM_OP(4): /* xsave */
8215             if ((s->cpuid_ext_features & CPUID_EXT_XSAVE) == 0
8216                 || (prefixes & (PREFIX_LOCK | PREFIX_DATA
8217                                 | PREFIX_REPZ | PREFIX_REPNZ))) {
8218                 goto illegal_op;
8219             }
8220             gen_lea_modrm(env, s, modrm);
8221             tcg_gen_concat_tl_i64(s->tmp1_i64, cpu_regs[R_EAX],
8222                                   cpu_regs[R_EDX]);
8223             gen_helper_xsave(cpu_env, s->A0, s->tmp1_i64);
8224             break;
8225 
8226         CASE_MODRM_MEM_OP(5): /* xrstor */
8227             if ((s->cpuid_ext_features & CPUID_EXT_XSAVE) == 0
8228                 || (prefixes & (PREFIX_LOCK | PREFIX_DATA
8229                                 | PREFIX_REPZ | PREFIX_REPNZ))) {
8230                 goto illegal_op;
8231             }
8232             gen_lea_modrm(env, s, modrm);
8233             tcg_gen_concat_tl_i64(s->tmp1_i64, cpu_regs[R_EAX],
8234                                   cpu_regs[R_EDX]);
8235             gen_helper_xrstor(cpu_env, s->A0, s->tmp1_i64);
8236             /* XRSTOR is how MPX is enabled, which changes how
8237                we translate.  Thus we need to end the TB.  */
8238             gen_update_cc_op(s);
8239             gen_jmp_im(s, s->pc - s->cs_base);
8240             gen_eob(s);
8241             break;
8242 
8243         CASE_MODRM_MEM_OP(6): /* xsaveopt / clwb */
8244             if (prefixes & PREFIX_LOCK) {
8245                 goto illegal_op;
8246             }
8247             if (prefixes & PREFIX_DATA) {
8248                 /* clwb */
8249                 if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_CLWB)) {
8250                     goto illegal_op;
8251                 }
8252                 gen_nop_modrm(env, s, modrm);
8253             } else {
8254                 /* xsaveopt */
8255                 if ((s->cpuid_ext_features & CPUID_EXT_XSAVE) == 0
8256                     || (s->cpuid_xsave_features & CPUID_XSAVE_XSAVEOPT) == 0
8257                     || (prefixes & (PREFIX_REPZ | PREFIX_REPNZ))) {
8258                     goto illegal_op;
8259                 }
8260                 gen_lea_modrm(env, s, modrm);
8261                 tcg_gen_concat_tl_i64(s->tmp1_i64, cpu_regs[R_EAX],
8262                                       cpu_regs[R_EDX]);
8263                 gen_helper_xsaveopt(cpu_env, s->A0, s->tmp1_i64);
8264             }
8265             break;
8266 
8267         CASE_MODRM_MEM_OP(7): /* clflush / clflushopt */
8268             if (prefixes & PREFIX_LOCK) {
8269                 goto illegal_op;
8270             }
8271             if (prefixes & PREFIX_DATA) {
8272                 /* clflushopt */
8273                 if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_CLFLUSHOPT)) {
8274                     goto illegal_op;
8275                 }
8276             } else {
8277                 /* clflush */
8278                 if ((s->prefix & (PREFIX_REPZ | PREFIX_REPNZ))
8279                     || !(s->cpuid_features & CPUID_CLFLUSH)) {
8280                     goto illegal_op;
8281                 }
8282             }
8283             gen_nop_modrm(env, s, modrm);
8284             break;
8285 
8286         case 0xc0 ... 0xc7: /* rdfsbase (f3 0f ae /0) */
8287         case 0xc8 ... 0xcf: /* rdgsbase (f3 0f ae /1) */
8288         case 0xd0 ... 0xd7: /* wrfsbase (f3 0f ae /2) */
8289         case 0xd8 ... 0xdf: /* wrgsbase (f3 0f ae /3) */
8290             if (CODE64(s)
8291                 && (prefixes & PREFIX_REPZ)
8292                 && !(prefixes & PREFIX_LOCK)
8293                 && (s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_FSGSBASE)) {
8294                 TCGv base, treg, src, dst;
8295 
8296                 /* Preserve hflags bits by testing CR4 at runtime.  */
8297                 tcg_gen_movi_i32(s->tmp2_i32, CR4_FSGSBASE_MASK);
8298                 gen_helper_cr4_testbit(cpu_env, s->tmp2_i32);
8299 
8300                 base = cpu_seg_base[modrm & 8 ? R_GS : R_FS];
8301                 treg = cpu_regs[(modrm & 7) | REX_B(s)];
8302 
8303                 if (modrm & 0x10) {
8304                     /* wr*base */
8305                     dst = base, src = treg;
8306                 } else {
8307                     /* rd*base */
8308                     dst = treg, src = base;
8309                 }
8310 
8311                 if (s->dflag == MO_32) {
8312                     tcg_gen_ext32u_tl(dst, src);
8313                 } else {
8314                     tcg_gen_mov_tl(dst, src);
8315                 }
8316                 break;
8317             }
8318             goto unknown_op;
8319 
8320         case 0xf8: /* sfence / pcommit */
8321             if (prefixes & PREFIX_DATA) {
8322                 /* pcommit */
8323                 if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_PCOMMIT)
8324                     || (prefixes & PREFIX_LOCK)) {
8325                     goto illegal_op;
8326                 }
8327                 break;
8328             }
8329             /* fallthru */
8330         case 0xf9 ... 0xff: /* sfence */
8331             if (!(s->cpuid_features & CPUID_SSE)
8332                 || (prefixes & PREFIX_LOCK)) {
8333                 goto illegal_op;
8334             }
8335             tcg_gen_mb(TCG_MO_ST_ST | TCG_BAR_SC);
8336             break;
8337         case 0xe8 ... 0xef: /* lfence */
8338             if (!(s->cpuid_features & CPUID_SSE)
8339                 || (prefixes & PREFIX_LOCK)) {
8340                 goto illegal_op;
8341             }
8342             tcg_gen_mb(TCG_MO_LD_LD | TCG_BAR_SC);
8343             break;
8344         case 0xf0 ... 0xf7: /* mfence */
8345             if (!(s->cpuid_features & CPUID_SSE2)
8346                 || (prefixes & PREFIX_LOCK)) {
8347                 goto illegal_op;
8348             }
8349             tcg_gen_mb(TCG_MO_ALL | TCG_BAR_SC);
8350             break;
8351 
8352         default:
8353             goto unknown_op;
8354         }
8355         break;
8356 
8357     case 0x10d: /* 3DNow! prefetch(w) */
8358         modrm = x86_ldub_code(env, s);
8359         mod = (modrm >> 6) & 3;
8360         if (mod == 3)
8361             goto illegal_op;
8362         gen_nop_modrm(env, s, modrm);
8363         break;
8364     case 0x1aa: /* rsm */
8365         gen_svm_check_intercept(s, SVM_EXIT_RSM);
8366         if (!(s->flags & HF_SMM_MASK))
8367             goto illegal_op;
8368 #ifdef CONFIG_USER_ONLY
8369         /* we should not be in SMM mode */
8370         g_assert_not_reached();
8371 #else
8372         gen_update_cc_op(s);
8373         gen_jmp_im(s, s->pc - s->cs_base);
8374         gen_helper_rsm(cpu_env);
8375 #endif /* CONFIG_USER_ONLY */
8376         gen_eob(s);
8377         break;
8378     case 0x1b8: /* SSE4.2 popcnt */
8379         if ((prefixes & (PREFIX_REPZ | PREFIX_LOCK | PREFIX_REPNZ)) !=
8380              PREFIX_REPZ)
8381             goto illegal_op;
8382         if (!(s->cpuid_ext_features & CPUID_EXT_POPCNT))
8383             goto illegal_op;
8384 
8385         modrm = x86_ldub_code(env, s);
8386         reg = ((modrm >> 3) & 7) | REX_R(s);
8387 
8388         if (s->prefix & PREFIX_DATA) {
8389             ot = MO_16;
8390         } else {
8391             ot = mo_64_32(dflag);
8392         }
8393 
8394         gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
8395         gen_extu(ot, s->T0);
8396         tcg_gen_mov_tl(cpu_cc_src, s->T0);
8397         tcg_gen_ctpop_tl(s->T0, s->T0);
8398         gen_op_mov_reg_v(s, ot, reg, s->T0);
8399 
8400         set_cc_op(s, CC_OP_POPCNT);
8401         break;
8402     case 0x10e ... 0x10f:
8403         /* 3DNow! instructions, ignore prefixes */
8404         s->prefix &= ~(PREFIX_REPZ | PREFIX_REPNZ | PREFIX_DATA);
8405         /* fall through */
8406     case 0x110 ... 0x117:
8407     case 0x128 ... 0x12f:
8408     case 0x138 ... 0x13a:
8409     case 0x150 ... 0x179:
8410     case 0x17c ... 0x17f:
8411     case 0x1c2:
8412     case 0x1c4 ... 0x1c6:
8413     case 0x1d0 ... 0x1fe:
8414         gen_sse(env, s, b, pc_start);
8415         break;
8416     default:
8417         goto unknown_op;
8418     }
8419     return s->pc;
8420  illegal_op:
8421     gen_illegal_opcode(s);
8422     return s->pc;
8423  unknown_op:
8424     gen_unknown_opcode(env, s);
8425     return s->pc;
8426 }
8427 
8428 void tcg_x86_init(void)
8429 {
8430     static const char reg_names[CPU_NB_REGS][4] = {
8431 #ifdef TARGET_X86_64
8432         [R_EAX] = "rax",
8433         [R_EBX] = "rbx",
8434         [R_ECX] = "rcx",
8435         [R_EDX] = "rdx",
8436         [R_ESI] = "rsi",
8437         [R_EDI] = "rdi",
8438         [R_EBP] = "rbp",
8439         [R_ESP] = "rsp",
8440         [8]  = "r8",
8441         [9]  = "r9",
8442         [10] = "r10",
8443         [11] = "r11",
8444         [12] = "r12",
8445         [13] = "r13",
8446         [14] = "r14",
8447         [15] = "r15",
8448 #else
8449         [R_EAX] = "eax",
8450         [R_EBX] = "ebx",
8451         [R_ECX] = "ecx",
8452         [R_EDX] = "edx",
8453         [R_ESI] = "esi",
8454         [R_EDI] = "edi",
8455         [R_EBP] = "ebp",
8456         [R_ESP] = "esp",
8457 #endif
8458     };
8459     static const char seg_base_names[6][8] = {
8460         [R_CS] = "cs_base",
8461         [R_DS] = "ds_base",
8462         [R_ES] = "es_base",
8463         [R_FS] = "fs_base",
8464         [R_GS] = "gs_base",
8465         [R_SS] = "ss_base",
8466     };
8467     static const char bnd_regl_names[4][8] = {
8468         "bnd0_lb", "bnd1_lb", "bnd2_lb", "bnd3_lb"
8469     };
8470     static const char bnd_regu_names[4][8] = {
8471         "bnd0_ub", "bnd1_ub", "bnd2_ub", "bnd3_ub"
8472     };
8473     int i;
8474 
8475     cpu_cc_op = tcg_global_mem_new_i32(cpu_env,
8476                                        offsetof(CPUX86State, cc_op), "cc_op");
8477     cpu_cc_dst = tcg_global_mem_new(cpu_env, offsetof(CPUX86State, cc_dst),
8478                                     "cc_dst");
8479     cpu_cc_src = tcg_global_mem_new(cpu_env, offsetof(CPUX86State, cc_src),
8480                                     "cc_src");
8481     cpu_cc_src2 = tcg_global_mem_new(cpu_env, offsetof(CPUX86State, cc_src2),
8482                                      "cc_src2");
8483 
8484     for (i = 0; i < CPU_NB_REGS; ++i) {
8485         cpu_regs[i] = tcg_global_mem_new(cpu_env,
8486                                          offsetof(CPUX86State, regs[i]),
8487                                          reg_names[i]);
8488     }
8489 
8490     for (i = 0; i < 6; ++i) {
8491         cpu_seg_base[i]
8492             = tcg_global_mem_new(cpu_env,
8493                                  offsetof(CPUX86State, segs[i].base),
8494                                  seg_base_names[i]);
8495     }
8496 
8497     for (i = 0; i < 4; ++i) {
8498         cpu_bndl[i]
8499             = tcg_global_mem_new_i64(cpu_env,
8500                                      offsetof(CPUX86State, bnd_regs[i].lb),
8501                                      bnd_regl_names[i]);
8502         cpu_bndu[i]
8503             = tcg_global_mem_new_i64(cpu_env,
8504                                      offsetof(CPUX86State, bnd_regs[i].ub),
8505                                      bnd_regu_names[i]);
8506     }
8507 }
8508 
8509 static void i386_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cpu)
8510 {
8511     DisasContext *dc = container_of(dcbase, DisasContext, base);
8512     CPUX86State *env = cpu->env_ptr;
8513     uint32_t flags = dc->base.tb->flags;
8514     int cpl = (flags >> HF_CPL_SHIFT) & 3;
8515     int iopl = (flags >> IOPL_SHIFT) & 3;
8516 
8517     dc->cs_base = dc->base.tb->cs_base;
8518     dc->flags = flags;
8519 #ifndef CONFIG_USER_ONLY
8520     dc->cpl = cpl;
8521     dc->iopl = iopl;
8522 #endif
8523 
8524     /* We make some simplifying assumptions; validate they're correct. */
8525     g_assert(PE(dc) == ((flags & HF_PE_MASK) != 0));
8526     g_assert(CPL(dc) == cpl);
8527     g_assert(IOPL(dc) == iopl);
8528     g_assert(VM86(dc) == ((flags & HF_VM_MASK) != 0));
8529     g_assert(CODE32(dc) == ((flags & HF_CS32_MASK) != 0));
8530     g_assert(CODE64(dc) == ((flags & HF_CS64_MASK) != 0));
8531     g_assert(SS32(dc) == ((flags & HF_SS32_MASK) != 0));
8532     g_assert(LMA(dc) == ((flags & HF_LMA_MASK) != 0));
8533     g_assert(ADDSEG(dc) == ((flags & HF_ADDSEG_MASK) != 0));
8534     g_assert(SVME(dc) == ((flags & HF_SVME_MASK) != 0));
8535     g_assert(GUEST(dc) == ((flags & HF_GUEST_MASK) != 0));
8536 
8537     dc->cc_op = CC_OP_DYNAMIC;
8538     dc->cc_op_dirty = false;
8539     dc->popl_esp_hack = 0;
8540     /* select memory access functions */
8541     dc->mem_index = 0;
8542 #ifdef CONFIG_SOFTMMU
8543     dc->mem_index = cpu_mmu_index(env, false);
8544 #endif
8545     dc->cpuid_features = env->features[FEAT_1_EDX];
8546     dc->cpuid_ext_features = env->features[FEAT_1_ECX];
8547     dc->cpuid_ext2_features = env->features[FEAT_8000_0001_EDX];
8548     dc->cpuid_ext3_features = env->features[FEAT_8000_0001_ECX];
8549     dc->cpuid_7_0_ebx_features = env->features[FEAT_7_0_EBX];
8550     dc->cpuid_xsave_features = env->features[FEAT_XSAVE];
8551     dc->jmp_opt = !(dc->base.singlestep_enabled ||
8552                     (flags & (HF_TF_MASK | HF_INHIBIT_IRQ_MASK)));
8553     /*
8554      * If jmp_opt, we want to handle each string instruction individually.
8555      * For icount also disable repz optimization so that each iteration
8556      * is accounted separately.
8557      */
8558     dc->repz_opt = !dc->jmp_opt && !(tb_cflags(dc->base.tb) & CF_USE_ICOUNT);
8559 
8560     dc->T0 = tcg_temp_new();
8561     dc->T1 = tcg_temp_new();
8562     dc->A0 = tcg_temp_new();
8563 
8564     dc->tmp0 = tcg_temp_new();
8565     dc->tmp1_i64 = tcg_temp_new_i64();
8566     dc->tmp2_i32 = tcg_temp_new_i32();
8567     dc->tmp3_i32 = tcg_temp_new_i32();
8568     dc->tmp4 = tcg_temp_new();
8569     dc->ptr0 = tcg_temp_new_ptr();
8570     dc->ptr1 = tcg_temp_new_ptr();
8571     dc->cc_srcT = tcg_temp_local_new();
8572 }
8573 
8574 static void i386_tr_tb_start(DisasContextBase *db, CPUState *cpu)
8575 {
8576 }
8577 
8578 static void i386_tr_insn_start(DisasContextBase *dcbase, CPUState *cpu)
8579 {
8580     DisasContext *dc = container_of(dcbase, DisasContext, base);
8581 
8582     tcg_gen_insn_start(dc->base.pc_next, dc->cc_op);
8583 }
8584 
8585 static bool i386_tr_breakpoint_check(DisasContextBase *dcbase, CPUState *cpu,
8586                                      const CPUBreakpoint *bp)
8587 {
8588     DisasContext *dc = container_of(dcbase, DisasContext, base);
8589     /* If RF is set, suppress an internally generated breakpoint.  */
8590     int flags = dc->base.tb->flags & HF_RF_MASK ? BP_GDB : BP_ANY;
8591     if (bp->flags & flags) {
8592         gen_debug(dc);
8593         /* The address covered by the breakpoint must be included in
8594            [tb->pc, tb->pc + tb->size) in order to for it to be
8595            properly cleared -- thus we increment the PC here so that
8596            the generic logic setting tb->size later does the right thing.  */
8597         dc->base.pc_next += 1;
8598         return true;
8599     } else {
8600         return false;
8601     }
8602 }
8603 
8604 static void i386_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu)
8605 {
8606     DisasContext *dc = container_of(dcbase, DisasContext, base);
8607     target_ulong pc_next;
8608 
8609 #ifdef TARGET_VSYSCALL_PAGE
8610     /*
8611      * Detect entry into the vsyscall page and invoke the syscall.
8612      */
8613     if ((dc->base.pc_next & TARGET_PAGE_MASK) == TARGET_VSYSCALL_PAGE) {
8614         gen_exception(dc, EXCP_VSYSCALL, dc->base.pc_next);
8615         dc->base.pc_next = dc->pc + 1;
8616         return;
8617     }
8618 #endif
8619 
8620     pc_next = disas_insn(dc, cpu);
8621 
8622     if (dc->flags & (HF_TF_MASK | HF_INHIBIT_IRQ_MASK)) {
8623         /* if single step mode, we generate only one instruction and
8624            generate an exception */
8625         /* if irq were inhibited with HF_INHIBIT_IRQ_MASK, we clear
8626            the flag and abort the translation to give the irqs a
8627            chance to happen */
8628         dc->base.is_jmp = DISAS_TOO_MANY;
8629     } else if ((tb_cflags(dc->base.tb) & CF_USE_ICOUNT)
8630                && ((pc_next & TARGET_PAGE_MASK)
8631                    != ((pc_next + TARGET_MAX_INSN_SIZE - 1)
8632                        & TARGET_PAGE_MASK)
8633                    || (pc_next & ~TARGET_PAGE_MASK) == 0)) {
8634         /* Do not cross the boundary of the pages in icount mode,
8635            it can cause an exception. Do it only when boundary is
8636            crossed by the first instruction in the block.
8637            If current instruction already crossed the bound - it's ok,
8638            because an exception hasn't stopped this code.
8639          */
8640         dc->base.is_jmp = DISAS_TOO_MANY;
8641     } else if ((pc_next - dc->base.pc_first) >= (TARGET_PAGE_SIZE - 32)) {
8642         dc->base.is_jmp = DISAS_TOO_MANY;
8643     }
8644 
8645     dc->base.pc_next = pc_next;
8646 }
8647 
8648 static void i386_tr_tb_stop(DisasContextBase *dcbase, CPUState *cpu)
8649 {
8650     DisasContext *dc = container_of(dcbase, DisasContext, base);
8651 
8652     if (dc->base.is_jmp == DISAS_TOO_MANY) {
8653         gen_jmp_im(dc, dc->base.pc_next - dc->cs_base);
8654         gen_eob(dc);
8655     }
8656 }
8657 
8658 static void i386_tr_disas_log(const DisasContextBase *dcbase,
8659                               CPUState *cpu)
8660 {
8661     DisasContext *dc = container_of(dcbase, DisasContext, base);
8662 
8663     qemu_log("IN: %s\n", lookup_symbol(dc->base.pc_first));
8664     log_target_disas(cpu, dc->base.pc_first, dc->base.tb->size);
8665 }
8666 
8667 static const TranslatorOps i386_tr_ops = {
8668     .init_disas_context = i386_tr_init_disas_context,
8669     .tb_start           = i386_tr_tb_start,
8670     .insn_start         = i386_tr_insn_start,
8671     .breakpoint_check   = i386_tr_breakpoint_check,
8672     .translate_insn     = i386_tr_translate_insn,
8673     .tb_stop            = i386_tr_tb_stop,
8674     .disas_log          = i386_tr_disas_log,
8675 };
8676 
8677 /* generate intermediate code for basic block 'tb'.  */
8678 void gen_intermediate_code(CPUState *cpu, TranslationBlock *tb, int max_insns)
8679 {
8680     DisasContext dc;
8681 
8682     translator_loop(&i386_tr_ops, &dc.base, cpu, tb, max_insns);
8683 }
8684 
8685 void restore_state_to_opc(CPUX86State *env, TranslationBlock *tb,
8686                           target_ulong *data)
8687 {
8688     int cc_op = data[1];
8689     env->eip = data[0] - tb->cs_base;
8690     if (cc_op != CC_OP_DYNAMIC) {
8691         env->cc_op = cc_op;
8692     }
8693 }
8694