xref: /openbmc/qemu/target/i386/tcg/decode-new.c.inc (revision b9c0a2e0)
1/*
2 * New-style decoder for i386 instructions
3 *
4 *  Copyright (c) 2022 Red Hat, Inc.
5 *
6 * Author: Paolo Bonzini <pbonzini@redhat.com>
7 *
8 * This library is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either
11 * version 2.1 of the License, or (at your option) any later version.
12 *
13 * This library is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
16 * Lesser General Public License for more details.
17 *
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
20 */
21
22/*
23 * The decoder is mostly based on tables copied from the Intel SDM.  As
24 * a result, most operand load and writeback is done entirely in common
25 * table-driven code using the same operand type (X86_TYPE_*) and
26 * size (X86_SIZE_*) codes used in the manual.  There are a few differences
27 * though.
28 *
29 * Operand sizes
30 * -------------
31 *
32 * The manual lists d64 ("cannot encode 32-bit size in 64-bit mode") and f64
33 * ("cannot encode 16-bit or 32-bit size in 64-bit mode") as modifiers of the
34 * "v" or "z" sizes.  The decoder simply makes them separate operand sizes.
35 *
36 * The manual lists immediate far destinations as Ap (technically an implicit
37 * argument).  The decoder splits them into two immediates, using "Ip" for
38 * the offset part (that comes first in the instruction stream) and "Iw" for
39 * the segment/selector part.  The size of the offset is given by s->dflag
40 * and the instructions are illegal in 64-bit mode, so the choice of "Ip"
41 * is somewhat arbitrary; "Iv" or "Iz" would work just as well.
42 *
43 * Operand types
44 * -------------
45 *
46 * For memory-only operands, if the emitter functions wants to rely on
47 * generic load and writeback, the decoder needs to know the type of the
48 * operand.  Therefore, M is often replaced by the more specific EM and WM
49 * (respectively selecting an ALU operand, like the operand type E, or a
50 * vector operand like the operand type W).
51 *
52 * Immediates are almost always signed or masked away in helpers.  Two
53 * common exceptions are IN/OUT and absolute jumps.  For these, there is
54 * an additional custom operand type "I_unsigned".  Alternatively, the
55 * mask could be applied (and the original sign-extended value would be
56 * optimized away by TCG) in the emitter function.
57 *
58 * Finally, a "nop" operand type is used for multi-byte NOPs.  It accepts
59 * any value of mod including 11b (unlike M) but it does not try to
60 * interpret the operand (like M).
61 *
62 * Vector operands
63 * ---------------
64 *
65 * The main difference is that the V, U and W types are extended to
66 * cover MMX as well; if an instruction is like
67 *
68 *      por   Pq, Qq
69 *  66  por   Vx, Hx, Wx
70 *
71 * only the second row is included and the instruction is marked as a
72 * valid MMX instruction.  The MMX flag directs the decoder to rewrite
73 * the V/U/H/W types to P/N/P/Q if there is no prefix, as well as changing
74 * "x" to "q" if there is no prefix.
75 *
76 * In addition, the ss/ps/sd/pd types are sometimes mushed together as "x"
77 * if the difference is expressed via prefixes.  Individual instructions
78 * are separated by prefix in the generator functions.
79 *
80 * There is a custom size "xh" used to address half of a SSE/AVX operand.
81 * This points to a 64-bit operand for SSE operations, 128-bit operand
82 * for 256-bit AVX operands, etc.  It is used for conversion operations
83 * such as VCVTPH2PS or VCVTSS2SD.
84 *
85 * There are a couple cases in which instructions (e.g. MOVD) write the
86 * whole XMM or MM register but are established incorrectly in the manual
87 * as "d" or "q".  These have to be fixed for the decoder to work correctly.
88 *
89 * VEX exception classes
90 * ---------------------
91 *
92 * Speaking about imprecisions in the manual, the decoder treats all
93 * exception-class 4 instructions as having an optional VEX prefix, and
94 * all exception-class 6 instructions as having a mandatory VEX prefix.
95 * This is true except for a dozen instructions; these are in exception
96 * class 4 but do not ignore the VEX.W bit (which does not even exist
97 * without a VEX prefix).  These instructions are mostly listed in Intel's
98 * table 2-16, but with a few exceptions.
99 *
100 * The AMD manual has more precise subclasses for exceptions, and unlike Intel
101 * they list the VEX.W requirements in the exception classes as well (except
102 * when they don't).  AMD describes class 6 as "AVX Mixed Memory Argument"
103 * without defining what a mixed memory argument is, but still use 4 as the
104 * primary exception class... except when they don't.
105 *
106 * The summary is:
107 *                       Intel     AMD         VEX.W           note
108 * -------------------------------------------------------------------
109 * vpblendd              4         4J          0
110 * vpblendvb             4         4E-X        0               (*)
111 * vpbroadcastq          6         6D          0               (+)
112 * vpermd/vpermps        4         4H          0               (§)
113 * vpermq/vpermpd        4         4H-1        1               (§)
114 * vpermilpd/vpermilps   4         6E          0               (^)
115 * vpmaskmovd            6         4K          significant     (^)
116 * vpsllv                4         4K          significant
117 * vpsrav                4         4J          0
118 * vpsrlv                4         4K          significant
119 * vtestps/vtestpd       4         4G          0
120 *
121 *    (*)  AMD lists VPBLENDVB as related to SSE4.1 PBLENDVB, which may
122 *         explain why it is considered exception class 4.  However,
123 *         Intel says that VEX-only instructions should be in class 6...
124 *
125 *    (+)  Not found in Intel's table 2-16
126 *
127 *    (§)  4H and 4H-1 do not mention VEX.W requirements, which are
128 *         however present in the description of the instruction
129 *
130 *    (^)  these are the two cases in which Intel and AMD disagree on the
131 *         primary exception class
132 */
133
134#define X86_OP_NONE { 0 },
135
136#define X86_OP_GROUP3(op, op0_, s0_, op1_, s1_, op2_, s2_, ...) { \
137    .decode = glue(decode_, op),                                  \
138    .op0 = glue(X86_TYPE_, op0_),                                 \
139    .s0 = glue(X86_SIZE_, s0_),                                   \
140    .op1 = glue(X86_TYPE_, op1_),                                 \
141    .s1 = glue(X86_SIZE_, s1_),                                   \
142    .op2 = glue(X86_TYPE_, op2_),                                 \
143    .s2 = glue(X86_SIZE_, s2_),                                   \
144    .is_decode = true,                                            \
145    ## __VA_ARGS__                                                \
146}
147
148#define X86_OP_GROUP1(op, op0, s0, ...)                           \
149    X86_OP_GROUP3(op, op0, s0, 2op, s0, None, None, ## __VA_ARGS__)
150#define X86_OP_GROUP2(op, op0, s0, op1, s1, ...)                  \
151    X86_OP_GROUP3(op, op0, s0, 2op, s0, op1, s1, ## __VA_ARGS__)
152#define X86_OP_GROUPw(op, op0, s0, ...)                           \
153    X86_OP_GROUP3(op, op0, s0, None, None, None, None, ## __VA_ARGS__)
154#define X86_OP_GROUPwr(op, op0, s0, op1, s1, ...)                 \
155    X86_OP_GROUP3(op, op0, s0, op1, s1, None, None, ## __VA_ARGS__)
156#define X86_OP_GROUP0(op, ...)                                    \
157    X86_OP_GROUP3(op, None, None, None, None, None, None, ## __VA_ARGS__)
158
159#define X86_OP_ENTRY3(op, op0_, s0_, op1_, s1_, op2_, s2_, ...) { \
160    .gen = glue(gen_, op),                                        \
161    .op0 = glue(X86_TYPE_, op0_),                                 \
162    .s0 = glue(X86_SIZE_, s0_),                                   \
163    .op1 = glue(X86_TYPE_, op1_),                                 \
164    .s1 = glue(X86_SIZE_, s1_),                                   \
165    .op2 = glue(X86_TYPE_, op2_),                                 \
166    .s2 = glue(X86_SIZE_, s2_),                                   \
167    ## __VA_ARGS__                                                \
168}
169
170#define X86_OP_ENTRY4(op, op0_, s0_, op1_, s1_, op2_, s2_, ...)   \
171    X86_OP_ENTRY3(op, op0_, s0_, op1_, s1_, op2_, s2_,            \
172        .op3 = X86_TYPE_I, .s3 = X86_SIZE_b,                      \
173        ## __VA_ARGS__)
174
175/*
176 * Short forms that are mostly useful for ALU opcodes and other
177 * one-byte opcodes.  For vector instructions it is usually
178 * clearer to write all three operands explicitly, because the
179 * corresponding gen_* function will use OP_PTRn rather than s->T0
180 * and s->T1.
181 */
182#define X86_OP_ENTRYrr(op, op0, s0, op1, s1, ...)                 \
183    X86_OP_ENTRY3(op, None, None, op0, s0, op1, s1, ## __VA_ARGS__)
184#define X86_OP_ENTRYwr(op, op0, s0, op1, s1, ...)                 \
185    X86_OP_ENTRY3(op, op0, s0, op1, s1, None, None, ## __VA_ARGS__)
186#define X86_OP_ENTRY2(op, op0, s0, op1, s1, ...)                  \
187    X86_OP_ENTRY3(op, op0, s0, 2op, s0, op1, s1, ## __VA_ARGS__)
188#define X86_OP_ENTRYw(op, op0, s0, ...)                           \
189    X86_OP_ENTRY3(op, op0, s0, None, None, None, None, ## __VA_ARGS__)
190#define X86_OP_ENTRYr(op, op0, s0, ...)                           \
191    X86_OP_ENTRY3(op, None, None, op0, s0, None, None, ## __VA_ARGS__)
192#define X86_OP_ENTRY1(op, op0, s0, ...)                           \
193    X86_OP_ENTRY3(op, op0, s0, 2op, s0, None, None, ## __VA_ARGS__)
194#define X86_OP_ENTRY0(op, ...)                                    \
195    X86_OP_ENTRY3(op, None, None, None, None, None, None, ## __VA_ARGS__)
196
197#define cpuid(feat) .cpuid = X86_FEAT_##feat,
198#define nolea .special = X86_SPECIAL_NoLoadEA,
199#define xchg .special = X86_SPECIAL_Locked,
200#define lock .special = X86_SPECIAL_HasLock,
201#define mmx .special = X86_SPECIAL_MMX,
202#define op0_Rd .special = X86_SPECIAL_Op0_Rd,
203#define op2_Ry .special = X86_SPECIAL_Op2_Ry,
204#define avx_movx .special = X86_SPECIAL_AVXExtMov,
205#define sextT0 .special = X86_SPECIAL_SExtT0,
206#define zextT0 .special = X86_SPECIAL_ZExtT0,
207#define op0_Mw .special = X86_SPECIAL_Op0_Mw,
208
209#define vex1 .vex_class = 1,
210#define vex1_rep3 .vex_class = 1, .vex_special = X86_VEX_REPScalar,
211#define vex2 .vex_class = 2,
212#define vex2_rep3 .vex_class = 2, .vex_special = X86_VEX_REPScalar,
213#define vex3 .vex_class = 3,
214#define vex4 .vex_class = 4,
215#define vex4_unal .vex_class = 4, .vex_special = X86_VEX_SSEUnaligned,
216#define vex4_rep5 .vex_class = 4, .vex_special = X86_VEX_REPScalar,
217#define vex5 .vex_class = 5,
218#define vex6 .vex_class = 6,
219#define vex7 .vex_class = 7,
220#define vex8 .vex_class = 8,
221#define vex11 .vex_class = 11,
222#define vex12 .vex_class = 12,
223#define vex13 .vex_class = 13,
224
225#define chk(a) .check = X86_CHECK_##a,
226#define chk2(a, b) .check = X86_CHECK_##a | X86_CHECK_##b,
227#define chk3(a, b, c) .check = X86_CHECK_##a | X86_CHECK_##b | X86_CHECK_##c,
228#define svm(a) .intercept = SVM_EXIT_##a, .has_intercept = true,
229
230#define avx2_256 .vex_special = X86_VEX_AVX2_256,
231
232#define P_00          1
233#define P_66          (1 << PREFIX_DATA)
234#define P_F3          (1 << PREFIX_REPZ)
235#define P_F2          (1 << PREFIX_REPNZ)
236
237#define p_00          .valid_prefix = P_00,
238#define p_66          .valid_prefix = P_66,
239#define p_f3          .valid_prefix = P_F3,
240#define p_f2          .valid_prefix = P_F2,
241#define p_00_66       .valid_prefix = P_00 | P_66,
242#define p_00_f3       .valid_prefix = P_00 | P_F3,
243#define p_66_f2       .valid_prefix = P_66 | P_F2,
244#define p_00_66_f3    .valid_prefix = P_00 | P_66 | P_F3,
245#define p_66_f3_f2    .valid_prefix = P_66 | P_F3 | P_F2,
246#define p_00_66_f3_f2 .valid_prefix = P_00 | P_66 | P_F3 | P_F2,
247
248#define UNKNOWN_OPCODE ((X86OpEntry) {})
249
250static uint8_t get_modrm(DisasContext *s, CPUX86State *env)
251{
252    if (!s->has_modrm) {
253        s->modrm = x86_ldub_code(env, s);
254        s->has_modrm = true;
255    }
256    return s->modrm;
257}
258
259static inline const X86OpEntry *decode_by_prefix(DisasContext *s, const X86OpEntry entries[4])
260{
261    if (s->prefix & PREFIX_REPNZ) {
262        return &entries[3];
263    } else if (s->prefix & PREFIX_REPZ) {
264        return &entries[2];
265    } else if (s->prefix & PREFIX_DATA) {
266        return &entries[1];
267    } else {
268        return &entries[0];
269    }
270}
271
272static void decode_group15(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
273{
274    static const X86OpEntry group15_reg[8] = {
275        [0] = X86_OP_ENTRYw(RDxxBASE,   R,y, cpuid(FSGSBASE) chk(o64) p_f3),
276        [1] = X86_OP_ENTRYw(RDxxBASE,   R,y, cpuid(FSGSBASE) chk(o64) p_f3),
277        [2] = X86_OP_ENTRYr(WRxxBASE,   R,y, cpuid(FSGSBASE) chk(o64) p_f3 zextT0),
278        [3] = X86_OP_ENTRYr(WRxxBASE,   R,y, cpuid(FSGSBASE) chk(o64) p_f3 zextT0),
279        [5] = X86_OP_ENTRY0(LFENCE,          cpuid(SSE2) p_00),
280        [6] = X86_OP_ENTRY0(MFENCE,          cpuid(SSE2) p_00),
281        [7] = X86_OP_ENTRY0(SFENCE,          cpuid(SSE2) p_00),
282    };
283
284    static const X86OpEntry group15_mem[8] = {
285        [0] = X86_OP_ENTRYw(FXSAVE,     M,y, cpuid(FXSR) p_00),
286        [1] = X86_OP_ENTRYr(FXRSTOR,    M,y, cpuid(FXSR) p_00),
287        [2] = X86_OP_ENTRYr(LDMXCSR,    E,d, vex5 chk(VEX128) p_00),
288        [3] = X86_OP_ENTRYw(STMXCSR,    E,d, vex5 chk(VEX128) p_00),
289        [4] = X86_OP_ENTRYw(XSAVE,      M,y, cpuid(XSAVE) p_00),
290        [5] = X86_OP_ENTRYr(XRSTOR,     M,y, cpuid(XSAVE) p_00),
291        [6] = X86_OP_ENTRYw(XSAVEOPT,   M,b, cpuid(XSAVEOPT) p_00),
292        [7] = X86_OP_ENTRYw(NOP,        M,b, cpuid(CLFLUSH) p_00),
293    };
294
295    static const X86OpEntry group15_mem_66[8] = {
296        [6] = X86_OP_ENTRYw(NOP,        M,b, cpuid(CLWB)),
297        [7] = X86_OP_ENTRYw(NOP,        M,b, cpuid(CLFLUSHOPT)),
298    };
299
300    uint8_t modrm = get_modrm(s, env);
301    int op = (modrm >> 3) & 7;
302
303    if ((modrm >> 6) == 3) {
304        *entry = group15_reg[op];
305    } else if (s->prefix & PREFIX_DATA) {
306        *entry = group15_mem_66[op];
307    } else {
308        *entry = group15_mem[op];
309    }
310}
311
312static void decode_group17(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
313{
314    static const X86GenFunc group17_gen[8] = {
315        NULL, gen_BLSR, gen_BLSMSK, gen_BLSI,
316    };
317    int op = (get_modrm(s, env) >> 3) & 7;
318    entry->gen = group17_gen[op];
319}
320
321static void decode_group12(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
322{
323    static const X86OpEntry opcodes_group12[8] = {
324        {},
325        {},
326        X86_OP_ENTRY3(PSRLW_i,  H,x, U,x, I,b, vex7 mmx avx2_256 p_00_66),
327        {},
328        X86_OP_ENTRY3(PSRAW_i,  H,x, U,x, I,b, vex7 mmx avx2_256 p_00_66),
329        {},
330        X86_OP_ENTRY3(PSLLW_i,  H,x, U,x, I,b, vex7 mmx avx2_256 p_00_66),
331        {},
332    };
333
334    int op = (get_modrm(s, env) >> 3) & 7;
335    *entry = opcodes_group12[op];
336}
337
338static void decode_group13(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
339{
340    static const X86OpEntry opcodes_group13[8] = {
341        {},
342        {},
343        X86_OP_ENTRY3(PSRLD_i,  H,x, U,x, I,b, vex7 mmx avx2_256 p_00_66),
344        {},
345        X86_OP_ENTRY3(PSRAD_i,  H,x, U,x, I,b, vex7 mmx avx2_256 p_00_66),
346        {},
347        X86_OP_ENTRY3(PSLLD_i,  H,x, U,x, I,b, vex7 mmx avx2_256 p_00_66),
348        {},
349    };
350
351    int op = (get_modrm(s, env) >> 3) & 7;
352    *entry = opcodes_group13[op];
353}
354
355static void decode_group14(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
356{
357    static const X86OpEntry opcodes_group14[8] = {
358        /* grp14 */
359        {},
360        {},
361        X86_OP_ENTRY3(PSRLQ_i,  H,x, U,x, I,b, vex7 mmx avx2_256 p_00_66),
362        X86_OP_ENTRY3(PSRLDQ_i, H,x, U,x, I,b, vex7 avx2_256 p_66),
363        {},
364        {},
365        X86_OP_ENTRY3(PSLLQ_i,  H,x, U,x, I,b, vex7 mmx avx2_256 p_00_66),
366        X86_OP_ENTRY3(PSLLDQ_i, H,x, U,x, I,b, vex7 avx2_256 p_66),
367    };
368
369    int op = (get_modrm(s, env) >> 3) & 7;
370    *entry = opcodes_group14[op];
371}
372
373static void decode_0F6F(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
374{
375    static const X86OpEntry opcodes_0F6F[4] = {
376        X86_OP_ENTRY3(MOVDQ,       P,q, None,None, Q,q, vex5 mmx),  /* movq */
377        X86_OP_ENTRY3(MOVDQ,       V,x, None,None, W,x, vex1),      /* movdqa */
378        X86_OP_ENTRY3(MOVDQ,       V,x, None,None, W,x, vex4_unal), /* movdqu */
379        {},
380    };
381    *entry = *decode_by_prefix(s, opcodes_0F6F);
382}
383
384static void decode_0F70(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
385{
386    static const X86OpEntry pshufw[4] = {
387        X86_OP_ENTRY3(PSHUFW,  P,q, Q,q, I,b, vex4 mmx),
388        X86_OP_ENTRY3(PSHUFD,  V,x, W,x, I,b, vex4 avx2_256),
389        X86_OP_ENTRY3(PSHUFHW, V,x, W,x, I,b, vex4 avx2_256),
390        X86_OP_ENTRY3(PSHUFLW, V,x, W,x, I,b, vex4 avx2_256),
391    };
392
393    *entry = *decode_by_prefix(s, pshufw);
394}
395
396static void decode_0F77(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
397{
398    if (!(s->prefix & PREFIX_VEX)) {
399        entry->gen = gen_EMMS;
400    } else if (!s->vex_l) {
401        entry->gen = gen_VZEROUPPER;
402        entry->vex_class = 8;
403    } else {
404        entry->gen = gen_VZEROALL;
405        entry->vex_class = 8;
406    }
407}
408
409static void decode_0F78(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
410{
411    static const X86OpEntry opcodes_0F78[4] = {
412        {},
413        X86_OP_ENTRY3(EXTRQ_i,       V,x, None,None, I,w,  cpuid(SSE4A)), /* AMD extension */
414        {},
415        X86_OP_ENTRY3(INSERTQ_i,     V,x, U,x, I,w,        cpuid(SSE4A)), /* AMD extension */
416    };
417    *entry = *decode_by_prefix(s, opcodes_0F78);
418}
419
420static void decode_0F79(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
421{
422    if (s->prefix & PREFIX_REPNZ) {
423        entry->gen = gen_INSERTQ_r; /* AMD extension */
424    } else if (s->prefix & PREFIX_DATA) {
425        entry->gen = gen_EXTRQ_r; /* AMD extension */
426    } else {
427        entry->gen = NULL;
428    };
429}
430
431static void decode_0F7E(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
432{
433    static const X86OpEntry opcodes_0F7E[4] = {
434        X86_OP_ENTRY3(MOVD_from,  E,y, None,None, P,y, vex5 mmx),
435        X86_OP_ENTRY3(MOVD_from,  E,y, None,None, V,y, vex5),
436        X86_OP_ENTRY3(MOVQ,       V,x, None,None, W,q, vex5),  /* wrong dest Vy on SDM! */
437        {},
438    };
439    *entry = *decode_by_prefix(s, opcodes_0F7E);
440}
441
442static void decode_0F7F(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
443{
444    static const X86OpEntry opcodes_0F7F[4] = {
445        X86_OP_ENTRY3(MOVDQ,       W,x, None,None, V,x, vex5 mmx), /* movq */
446        X86_OP_ENTRY3(MOVDQ,       W,x, None,None, V,x, vex1), /* movdqa */
447        X86_OP_ENTRY3(MOVDQ,       W,x, None,None, V,x, vex4_unal), /* movdqu */
448        {},
449    };
450    *entry = *decode_by_prefix(s, opcodes_0F7F);
451}
452
453static void decode_0FB8(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
454{
455    static const X86OpEntry popcnt =
456        X86_OP_ENTRYwr(POPCNT,    G,v, E,v,  cpuid(POPCNT) zextT0);
457
458    if (s->prefix & PREFIX_REPZ) {
459        *entry = popcnt;
460    } else {
461        memset(entry, 0, sizeof(*entry));
462    }
463}
464
465static void decode_0FBC(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
466{
467    /* For BSF, pass 2op as the third operand so that we can use zextT0 */
468    static const X86OpEntry opcodes_0FBC[4] = {
469        X86_OP_ENTRY3(BSF,    G,v, E,v, 2op,v, zextT0),
470        X86_OP_ENTRY3(BSF,    G,v, E,v, 2op,v, zextT0), /* 0x66 */
471        X86_OP_ENTRYwr(TZCNT, G,v, E,v,        zextT0), /* 0xf3 */
472        X86_OP_ENTRY3(BSF,    G,v, E,v, 2op,v, zextT0), /* 0xf2 */
473    };
474    if (!(s->cpuid_ext3_features & CPUID_EXT3_ABM)) {
475        *entry = opcodes_0FBC[0];
476    } else {
477        *entry = *decode_by_prefix(s, opcodes_0FBC);
478    }
479}
480
481static void decode_0FBD(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
482{
483    /* For BSR, pass 2op as the third operand so that we can use zextT0 */
484    static const X86OpEntry opcodes_0FBD[4] = {
485        X86_OP_ENTRY3(BSR,    G,v, E,v, 2op,v, zextT0),
486        X86_OP_ENTRY3(BSR,    G,v, E,v, 2op,v, zextT0), /* 0x66 */
487        X86_OP_ENTRYwr(LZCNT, G,v, E,v,        zextT0), /* 0xf3 */
488        X86_OP_ENTRY3(BSR,    G,v, E,v, 2op,v, zextT0), /* 0xf2 */
489    };
490    if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI1)) {
491        *entry = opcodes_0FBD[0];
492    } else {
493        *entry = *decode_by_prefix(s, opcodes_0FBD);
494    }
495}
496
497static void decode_0FD6(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
498{
499    static const X86OpEntry movq[4] = {
500        {},
501        X86_OP_ENTRY3(MOVQ,    W,x,  None, None, V,q, vex5),
502        X86_OP_ENTRY3(MOVq_dq, V,dq, None, None, N,q),
503        X86_OP_ENTRY3(MOVq_dq, P,q,  None, None, U,q),
504    };
505
506    *entry = *decode_by_prefix(s, movq);
507}
508
509static const X86OpEntry opcodes_0F38_00toEF[240] = {
510    [0x00] = X86_OP_ENTRY3(PSHUFB,    V,x,  H,x,   W,x,  vex4 cpuid(SSSE3) mmx avx2_256 p_00_66),
511    [0x01] = X86_OP_ENTRY3(PHADDW,    V,x,  H,x,   W,x,  vex4 cpuid(SSSE3) mmx avx2_256 p_00_66),
512    [0x02] = X86_OP_ENTRY3(PHADDD,    V,x,  H,x,   W,x,  vex4 cpuid(SSSE3) mmx avx2_256 p_00_66),
513    [0x03] = X86_OP_ENTRY3(PHADDSW,   V,x,  H,x,   W,x,  vex4 cpuid(SSSE3) mmx avx2_256 p_00_66),
514    [0x04] = X86_OP_ENTRY3(PMADDUBSW, V,x,  H,x,   W,x,  vex4 cpuid(SSSE3) mmx avx2_256 p_00_66),
515    [0x05] = X86_OP_ENTRY3(PHSUBW,    V,x,  H,x,   W,x,  vex4 cpuid(SSSE3) mmx avx2_256 p_00_66),
516    [0x06] = X86_OP_ENTRY3(PHSUBD,    V,x,  H,x,   W,x,  vex4 cpuid(SSSE3) mmx avx2_256 p_00_66),
517    [0x07] = X86_OP_ENTRY3(PHSUBSW,   V,x,  H,x,   W,x,  vex4 cpuid(SSSE3) mmx avx2_256 p_00_66),
518
519    [0x10] = X86_OP_ENTRY2(PBLENDVB,  V,x,         W,x,  vex4 cpuid(SSE41) avx2_256 p_66),
520    [0x13] = X86_OP_ENTRY2(VCVTPH2PS, V,x,         W,xh, vex11 chk(W0) cpuid(F16C) p_66),
521    [0x14] = X86_OP_ENTRY2(BLENDVPS,  V,x,         W,x,  vex4 cpuid(SSE41) p_66),
522    [0x15] = X86_OP_ENTRY2(BLENDVPD,  V,x,         W,x,  vex4 cpuid(SSE41) p_66),
523    /* Listed incorrectly as type 4 */
524    [0x16] = X86_OP_ENTRY3(VPERMD,    V,qq, H,qq,      W,qq,  vex6 chk(W0) cpuid(AVX2) p_66), /* vpermps */
525    [0x17] = X86_OP_ENTRY3(VPTEST,    None,None, V,x,  W,x,   vex4 cpuid(SSE41) p_66),
526
527    /*
528     * Source operand listed as Mq/Ux and similar in the manual; incorrectly listed
529     * as 128-bit only in 2-17.
530     */
531    [0x20] = X86_OP_ENTRY3(VPMOVSXBW, V,x,  None,None, W,q,   vex5 cpuid(SSE41) avx_movx avx2_256 p_66),
532    [0x21] = X86_OP_ENTRY3(VPMOVSXBD, V,x,  None,None, W,d,   vex5 cpuid(SSE41) avx_movx avx2_256 p_66),
533    [0x22] = X86_OP_ENTRY3(VPMOVSXBQ, V,x,  None,None, W,w,   vex5 cpuid(SSE41) avx_movx avx2_256 p_66),
534    [0x23] = X86_OP_ENTRY3(VPMOVSXWD, V,x,  None,None, W,q,   vex5 cpuid(SSE41) avx_movx avx2_256 p_66),
535    [0x24] = X86_OP_ENTRY3(VPMOVSXWQ, V,x,  None,None, W,d,   vex5 cpuid(SSE41) avx_movx avx2_256 p_66),
536    [0x25] = X86_OP_ENTRY3(VPMOVSXDQ, V,x,  None,None, W,q,   vex5 cpuid(SSE41) avx_movx avx2_256 p_66),
537
538    /* Same as PMOVSX.  */
539    [0x30] = X86_OP_ENTRY3(VPMOVZXBW, V,x,  None,None, W,q,   vex5 cpuid(SSE41) avx_movx avx2_256 p_66),
540    [0x31] = X86_OP_ENTRY3(VPMOVZXBD, V,x,  None,None, W,d,   vex5 cpuid(SSE41) avx_movx avx2_256 p_66),
541    [0x32] = X86_OP_ENTRY3(VPMOVZXBQ, V,x,  None,None, W,w,   vex5 cpuid(SSE41) avx_movx avx2_256 p_66),
542    [0x33] = X86_OP_ENTRY3(VPMOVZXWD, V,x,  None,None, W,q,   vex5 cpuid(SSE41) avx_movx avx2_256 p_66),
543    [0x34] = X86_OP_ENTRY3(VPMOVZXWQ, V,x,  None,None, W,d,   vex5 cpuid(SSE41) avx_movx avx2_256 p_66),
544    [0x35] = X86_OP_ENTRY3(VPMOVZXDQ, V,x,  None,None, W,q,   vex5 cpuid(SSE41) avx_movx avx2_256 p_66),
545    [0x36] = X86_OP_ENTRY3(VPERMD,    V,qq, H,qq,      W,qq,  vex6 chk(W0) cpuid(AVX2) p_66),
546    [0x37] = X86_OP_ENTRY3(PCMPGTQ,   V,x,  H,x,       W,x,   vex4 cpuid(SSE42) avx2_256 p_66),
547
548    [0x40] = X86_OP_ENTRY3(PMULLD,      V,x,  H,x,       W,x,  vex4 cpuid(SSE41) avx2_256 p_66),
549    [0x41] = X86_OP_ENTRY3(VPHMINPOSUW, V,dq, None,None, W,dq, vex4 cpuid(SSE41) p_66),
550    /* Listed incorrectly as type 4 */
551    [0x45] = X86_OP_ENTRY3(VPSRLV,      V,x,  H,x,       W,x,  vex6 cpuid(AVX2) p_66),
552    [0x46] = X86_OP_ENTRY3(VPSRAV,      V,x,  H,x,       W,x,  vex6 chk(W0) cpuid(AVX2) p_66),
553    [0x47] = X86_OP_ENTRY3(VPSLLV,      V,x,  H,x,       W,x,  vex6 cpuid(AVX2) p_66),
554
555    [0x90] = X86_OP_ENTRY3(VPGATHERD, V,x,  H,x,  M,d,  vex12 cpuid(AVX2) p_66), /* vpgatherdd/q */
556    [0x91] = X86_OP_ENTRY3(VPGATHERQ, V,x,  H,x,  M,q,  vex12 cpuid(AVX2) p_66), /* vpgatherqd/q */
557    [0x92] = X86_OP_ENTRY3(VPGATHERD, V,x,  H,x,  M,d,  vex12 cpuid(AVX2) p_66), /* vgatherdps/d */
558    [0x93] = X86_OP_ENTRY3(VPGATHERQ, V,x,  H,x,  M,q,  vex12 cpuid(AVX2) p_66), /* vgatherqps/d */
559
560    /* Should be exception type 2 but they do not have legacy SSE equivalents? */
561    [0x96] = X86_OP_ENTRY3(VFMADDSUB132Px, V,x,  H,x, W,x,  vex6 cpuid(FMA) p_66),
562    [0x97] = X86_OP_ENTRY3(VFMSUBADD132Px, V,x,  H,x, W,x,  vex6 cpuid(FMA) p_66),
563
564    [0xa6] = X86_OP_ENTRY3(VFMADDSUB213Px, V,x,  H,x, W,x,  vex6 cpuid(FMA) p_66),
565    [0xa7] = X86_OP_ENTRY3(VFMSUBADD213Px, V,x,  H,x, W,x,  vex6 cpuid(FMA) p_66),
566
567    [0xb6] = X86_OP_ENTRY3(VFMADDSUB231Px, V,x,  H,x, W,x,  vex6 cpuid(FMA) p_66),
568    [0xb7] = X86_OP_ENTRY3(VFMSUBADD231Px, V,x,  H,x, W,x,  vex6 cpuid(FMA) p_66),
569
570    [0x08] = X86_OP_ENTRY3(PSIGNB,    V,x,        H,x,  W,x,  vex4 cpuid(SSSE3) mmx avx2_256 p_00_66),
571    [0x09] = X86_OP_ENTRY3(PSIGNW,    V,x,        H,x,  W,x,  vex4 cpuid(SSSE3) mmx avx2_256 p_00_66),
572    [0x0a] = X86_OP_ENTRY3(PSIGND,    V,x,        H,x,  W,x,  vex4 cpuid(SSSE3) mmx avx2_256 p_00_66),
573    [0x0b] = X86_OP_ENTRY3(PMULHRSW,  V,x,        H,x,  W,x,  vex4 cpuid(SSSE3) mmx avx2_256 p_00_66),
574    /* Listed incorrectly as type 4 */
575    [0x0c] = X86_OP_ENTRY3(VPERMILPS, V,x,        H,x,  W,x,  vex6 chk(W0) cpuid(AVX) p_00_66),
576    [0x0d] = X86_OP_ENTRY3(VPERMILPD, V,x,        H,x,  W,x,  vex6 chk(W0) cpuid(AVX) p_66),
577    [0x0e] = X86_OP_ENTRY3(VTESTPS,   None,None,  V,x,  W,x,  vex6 chk(W0) cpuid(AVX) p_66),
578    [0x0f] = X86_OP_ENTRY3(VTESTPD,   None,None,  V,x,  W,x,  vex6 chk(W0) cpuid(AVX) p_66),
579
580    [0x18] = X86_OP_ENTRY3(VPBROADCASTD,   V,x,  None,None, W,d,  vex6 chk(W0) cpuid(AVX) p_66), /* vbroadcastss */
581    [0x19] = X86_OP_ENTRY3(VPBROADCASTQ,   V,qq, None,None, W,q,  vex6 chk(W0) cpuid(AVX) p_66), /* vbroadcastsd */
582    [0x1a] = X86_OP_ENTRY3(VBROADCASTx128, V,qq, None,None, WM,dq,vex6 chk(W0) cpuid(AVX) p_66),
583    [0x1c] = X86_OP_ENTRY3(PABSB,          V,x,  None,None, W,x,  vex4 cpuid(SSSE3) mmx avx2_256 p_00_66),
584    [0x1d] = X86_OP_ENTRY3(PABSW,          V,x,  None,None, W,x,  vex4 cpuid(SSSE3) mmx avx2_256 p_00_66),
585    [0x1e] = X86_OP_ENTRY3(PABSD,          V,x,  None,None, W,x,  vex4 cpuid(SSSE3) mmx avx2_256 p_00_66),
586
587    [0x28] = X86_OP_ENTRY3(PMULDQ,        V,x, H,x,       W,x,  vex4 cpuid(SSE41) avx2_256 p_66),
588    [0x29] = X86_OP_ENTRY3(PCMPEQQ,       V,x, H,x,       W,x,  vex4 cpuid(SSE41) avx2_256 p_66),
589    [0x2a] = X86_OP_ENTRY3(MOVDQ,         V,x, None,None, WM,x, vex1 cpuid(SSE41) avx2_256 p_66), /* movntdqa */
590    [0x2b] = X86_OP_ENTRY3(VPACKUSDW,     V,x, H,x,       W,x,  vex4 cpuid(SSE41) avx2_256 p_66),
591    [0x2c] = X86_OP_ENTRY3(VMASKMOVPS,    V,x, H,x,       WM,x, vex6 chk(W0) cpuid(AVX) p_66),
592    [0x2d] = X86_OP_ENTRY3(VMASKMOVPD,    V,x, H,x,       WM,x, vex6 chk(W0) cpuid(AVX) p_66),
593    /* Incorrectly listed as Mx,Hx,Vx in the manual */
594    [0x2e] = X86_OP_ENTRY3(VMASKMOVPS_st, M,x, V,x,       H,x,  vex6 chk(W0) cpuid(AVX) p_66),
595    [0x2f] = X86_OP_ENTRY3(VMASKMOVPD_st, M,x, V,x,       H,x,  vex6 chk(W0) cpuid(AVX) p_66),
596
597    [0x38] = X86_OP_ENTRY3(PMINSB,        V,x,  H,x, W,x,  vex4 cpuid(SSE41) avx2_256 p_66),
598    [0x39] = X86_OP_ENTRY3(PMINSD,        V,x,  H,x, W,x,  vex4 cpuid(SSE41) avx2_256 p_66),
599    [0x3a] = X86_OP_ENTRY3(PMINUW,        V,x,  H,x, W,x,  vex4 cpuid(SSE41) avx2_256 p_66),
600    [0x3b] = X86_OP_ENTRY3(PMINUD,        V,x,  H,x, W,x,  vex4 cpuid(SSE41) avx2_256 p_66),
601    [0x3c] = X86_OP_ENTRY3(PMAXSB,        V,x,  H,x, W,x,  vex4 cpuid(SSE41) avx2_256 p_66),
602    [0x3d] = X86_OP_ENTRY3(PMAXSD,        V,x,  H,x, W,x,  vex4 cpuid(SSE41) avx2_256 p_66),
603    [0x3e] = X86_OP_ENTRY3(PMAXUW,        V,x,  H,x, W,x,  vex4 cpuid(SSE41) avx2_256 p_66),
604    [0x3f] = X86_OP_ENTRY3(PMAXUD,        V,x,  H,x, W,x,  vex4 cpuid(SSE41) avx2_256 p_66),
605
606    /* VPBROADCASTQ not listed as W0 in table 2-16 */
607    [0x58] = X86_OP_ENTRY3(VPBROADCASTD,   V,x,  None,None, W,d,  vex6 chk(W0) cpuid(AVX2) p_66),
608    [0x59] = X86_OP_ENTRY3(VPBROADCASTQ,   V,x,  None,None, W,q,  vex6 chk(W0) cpuid(AVX2) p_66),
609    [0x5a] = X86_OP_ENTRY3(VBROADCASTx128, V,qq, None,None, WM,dq,vex6 chk(W0) cpuid(AVX2) p_66),
610
611    [0x78] = X86_OP_ENTRY3(VPBROADCASTB,   V,x,  None,None, W,b,  vex6 chk(W0) cpuid(AVX2) p_66),
612    [0x79] = X86_OP_ENTRY3(VPBROADCASTW,   V,x,  None,None, W,w,  vex6 chk(W0) cpuid(AVX2) p_66),
613
614    [0x8c] = X86_OP_ENTRY3(VPMASKMOV,    V,x,  H,x, WM,x, vex6 cpuid(AVX2) p_66),
615    [0x8e] = X86_OP_ENTRY3(VPMASKMOV_st, M,x,  V,x, H,x,  vex6 cpuid(AVX2) p_66),
616
617    /* Should be exception type 2 or 3 but they do not have legacy SSE equivalents? */
618    [0x98] = X86_OP_ENTRY3(VFMADD132Px,  V,x,  H,x, W,x,  vex6 cpuid(FMA) p_66),
619    [0x99] = X86_OP_ENTRY3(VFMADD132Sx,  V,x,  H,x, W,x,  vex6 cpuid(FMA) p_66),
620    [0x9a] = X86_OP_ENTRY3(VFMSUB132Px,  V,x,  H,x, W,x,  vex6 cpuid(FMA) p_66),
621    [0x9b] = X86_OP_ENTRY3(VFMSUB132Sx,  V,x,  H,x, W,x,  vex6 cpuid(FMA) p_66),
622    [0x9c] = X86_OP_ENTRY3(VFNMADD132Px, V,x,  H,x, W,x,  vex6 cpuid(FMA) p_66),
623    [0x9d] = X86_OP_ENTRY3(VFNMADD132Sx, V,x,  H,x, W,x,  vex6 cpuid(FMA) p_66),
624    [0x9e] = X86_OP_ENTRY3(VFNMSUB132Px, V,x,  H,x, W,x,  vex6 cpuid(FMA) p_66),
625    [0x9f] = X86_OP_ENTRY3(VFNMSUB132Sx, V,x,  H,x, W,x,  vex6 cpuid(FMA) p_66),
626
627    [0xa8] = X86_OP_ENTRY3(VFMADD213Px,  V,x,  H,x, W,x,  vex6 cpuid(FMA) p_66),
628    [0xa9] = X86_OP_ENTRY3(VFMADD213Sx,  V,x,  H,x, W,x,  vex6 cpuid(FMA) p_66),
629    [0xaa] = X86_OP_ENTRY3(VFMSUB213Px,  V,x,  H,x, W,x,  vex6 cpuid(FMA) p_66),
630    [0xab] = X86_OP_ENTRY3(VFMSUB213Sx,  V,x,  H,x, W,x,  vex6 cpuid(FMA) p_66),
631    [0xac] = X86_OP_ENTRY3(VFNMADD213Px, V,x,  H,x, W,x,  vex6 cpuid(FMA) p_66),
632    [0xad] = X86_OP_ENTRY3(VFNMADD213Sx, V,x,  H,x, W,x,  vex6 cpuid(FMA) p_66),
633    [0xae] = X86_OP_ENTRY3(VFNMSUB213Px, V,x,  H,x, W,x,  vex6 cpuid(FMA) p_66),
634    [0xaf] = X86_OP_ENTRY3(VFNMSUB213Sx, V,x,  H,x, W,x,  vex6 cpuid(FMA) p_66),
635
636    [0xb8] = X86_OP_ENTRY3(VFMADD231Px,  V,x,  H,x, W,x,  vex6 cpuid(FMA) p_66),
637    [0xb9] = X86_OP_ENTRY3(VFMADD231Sx,  V,x,  H,x, W,x,  vex6 cpuid(FMA) p_66),
638    [0xba] = X86_OP_ENTRY3(VFMSUB231Px,  V,x,  H,x, W,x,  vex6 cpuid(FMA) p_66),
639    [0xbb] = X86_OP_ENTRY3(VFMSUB231Sx,  V,x,  H,x, W,x,  vex6 cpuid(FMA) p_66),
640    [0xbc] = X86_OP_ENTRY3(VFNMADD231Px, V,x,  H,x, W,x,  vex6 cpuid(FMA) p_66),
641    [0xbd] = X86_OP_ENTRY3(VFNMADD231Sx, V,x,  H,x, W,x,  vex6 cpuid(FMA) p_66),
642    [0xbe] = X86_OP_ENTRY3(VFNMSUB231Px, V,x,  H,x, W,x,  vex6 cpuid(FMA) p_66),
643    [0xbf] = X86_OP_ENTRY3(VFNMSUB231Sx, V,x,  H,x, W,x,  vex6 cpuid(FMA) p_66),
644
645    [0xc8] = X86_OP_ENTRY2(SHA1NEXTE,   V,dq, W,dq, cpuid(SHA_NI)),
646    [0xc9] = X86_OP_ENTRY2(SHA1MSG1,    V,dq, W,dq, cpuid(SHA_NI)),
647    [0xca] = X86_OP_ENTRY2(SHA1MSG2,    V,dq, W,dq, cpuid(SHA_NI)),
648    [0xcb] = X86_OP_ENTRY2(SHA256RNDS2, V,dq, W,dq, cpuid(SHA_NI)),
649    [0xcc] = X86_OP_ENTRY2(SHA256MSG1,  V,dq, W,dq, cpuid(SHA_NI)),
650    [0xcd] = X86_OP_ENTRY2(SHA256MSG2,  V,dq, W,dq, cpuid(SHA_NI)),
651
652    [0xdb] = X86_OP_ENTRY3(VAESIMC,     V,dq, None,None, W,dq, vex4 cpuid(AES) p_66),
653    [0xdc] = X86_OP_ENTRY3(VAESENC,     V,x,  H,x,       W,x,  vex4 cpuid(AES) p_66),
654    [0xdd] = X86_OP_ENTRY3(VAESENCLAST, V,x,  H,x,       W,x,  vex4 cpuid(AES) p_66),
655    [0xde] = X86_OP_ENTRY3(VAESDEC,     V,x,  H,x,       W,x,  vex4 cpuid(AES) p_66),
656    [0xdf] = X86_OP_ENTRY3(VAESDECLAST, V,x,  H,x,       W,x,  vex4 cpuid(AES) p_66),
657
658    /*
659     * REG selects srcdest2 operand, VEX.vvvv selects src3.  VEX class not found
660     * in manual, assumed to be 13 from the VEX.L0 constraint.
661     */
662    [0xe0] = X86_OP_ENTRY3(CMPccXADD,   M,y, G,y, B,y,  vex13 xchg chk(o64) cpuid(CMPCCXADD) p_66),
663    [0xe1] = X86_OP_ENTRY3(CMPccXADD,   M,y, G,y, B,y,  vex13 xchg chk(o64) cpuid(CMPCCXADD) p_66),
664    [0xe2] = X86_OP_ENTRY3(CMPccXADD,   M,y, G,y, B,y,  vex13 xchg chk(o64) cpuid(CMPCCXADD) p_66),
665    [0xe3] = X86_OP_ENTRY3(CMPccXADD,   M,y, G,y, B,y,  vex13 xchg chk(o64) cpuid(CMPCCXADD) p_66),
666    [0xe4] = X86_OP_ENTRY3(CMPccXADD,   M,y, G,y, B,y,  vex13 xchg chk(o64) cpuid(CMPCCXADD) p_66),
667    [0xe5] = X86_OP_ENTRY3(CMPccXADD,   M,y, G,y, B,y,  vex13 xchg chk(o64) cpuid(CMPCCXADD) p_66),
668    [0xe6] = X86_OP_ENTRY3(CMPccXADD,   M,y, G,y, B,y,  vex13 xchg chk(o64) cpuid(CMPCCXADD) p_66),
669    [0xe7] = X86_OP_ENTRY3(CMPccXADD,   M,y, G,y, B,y,  vex13 xchg chk(o64) cpuid(CMPCCXADD) p_66),
670
671    [0xe8] = X86_OP_ENTRY3(CMPccXADD,   M,y, G,y, B,y,  vex13 xchg chk(o64) cpuid(CMPCCXADD) p_66),
672    [0xe9] = X86_OP_ENTRY3(CMPccXADD,   M,y, G,y, B,y,  vex13 xchg chk(o64) cpuid(CMPCCXADD) p_66),
673    [0xea] = X86_OP_ENTRY3(CMPccXADD,   M,y, G,y, B,y,  vex13 xchg chk(o64) cpuid(CMPCCXADD) p_66),
674    [0xeb] = X86_OP_ENTRY3(CMPccXADD,   M,y, G,y, B,y,  vex13 xchg chk(o64) cpuid(CMPCCXADD) p_66),
675    [0xec] = X86_OP_ENTRY3(CMPccXADD,   M,y, G,y, B,y,  vex13 xchg chk(o64) cpuid(CMPCCXADD) p_66),
676    [0xed] = X86_OP_ENTRY3(CMPccXADD,   M,y, G,y, B,y,  vex13 xchg chk(o64) cpuid(CMPCCXADD) p_66),
677    [0xee] = X86_OP_ENTRY3(CMPccXADD,   M,y, G,y, B,y,  vex13 xchg chk(o64) cpuid(CMPCCXADD) p_66),
678    [0xef] = X86_OP_ENTRY3(CMPccXADD,   M,y, G,y, B,y,  vex13 xchg chk(o64) cpuid(CMPCCXADD) p_66),
679};
680
681/* five rows for no prefix, 66, F3, F2, 66+F2  */
682static const X86OpEntry opcodes_0F38_F0toFF[16][5] = {
683    [0] = {
684        X86_OP_ENTRYwr(MOVBE, G,y, M,y, cpuid(MOVBE)),
685        X86_OP_ENTRYwr(MOVBE, G,w, M,w, cpuid(MOVBE)),
686        {},
687        X86_OP_ENTRY2(CRC32, G,d, E,b, cpuid(SSE42)),
688        X86_OP_ENTRY2(CRC32, G,d, E,b, cpuid(SSE42)),
689    },
690    [1] = {
691        X86_OP_ENTRYwr(MOVBE, M,y, G,y, cpuid(MOVBE)),
692        X86_OP_ENTRYwr(MOVBE, M,w, G,w, cpuid(MOVBE)),
693        {},
694        X86_OP_ENTRY2(CRC32, G,d, E,y, cpuid(SSE42)),
695        X86_OP_ENTRY2(CRC32, G,d, E,w, cpuid(SSE42)),
696    },
697    [2] = {
698        X86_OP_ENTRY3(ANDN, G,y, B,y, E,y, vex13 cpuid(BMI1)),
699        {},
700        {},
701        {},
702        {},
703    },
704    [3] = {
705        X86_OP_GROUP3(group17, B,y, None,None, E,y, vex13 cpuid(BMI1)),
706        {},
707        {},
708        {},
709        {},
710    },
711    [5] = {
712        X86_OP_ENTRY3(BZHI, G,y, E,y, B,y, vex13 cpuid(BMI1)),
713        {},
714        X86_OP_ENTRY3(PEXT, G,y, B,y, E,y, vex13 zextT0 cpuid(BMI2)),
715        X86_OP_ENTRY3(PDEP, G,y, B,y, E,y, vex13 zextT0 cpuid(BMI2)),
716        {},
717    },
718    [6] = {
719        {},
720        X86_OP_ENTRY2(ADCX, G,y, E,y, cpuid(ADX)),
721        X86_OP_ENTRY2(ADOX, G,y, E,y, cpuid(ADX)),
722        X86_OP_ENTRY3(MULX, /* B,y, */ G,y, E,y, 2,y, vex13 cpuid(BMI2)),
723        {},
724    },
725    [7] = {
726        X86_OP_ENTRY3(BEXTR, G,y, E,y, B,y, vex13 zextT0 cpuid(BMI1)),
727        X86_OP_ENTRY3(SHLX, G,y, E,y, B,y, vex13 cpuid(BMI1)),
728        X86_OP_ENTRY3(SARX, G,y, E,y, B,y, vex13 sextT0 cpuid(BMI1)),
729        X86_OP_ENTRY3(SHRX, G,y, E,y, B,y, vex13 zextT0 cpuid(BMI1)),
730        {},
731    },
732};
733
734static void decode_0F38(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
735{
736    *b = x86_ldub_code(env, s);
737    if (*b < 0xf0) {
738        *entry = opcodes_0F38_00toEF[*b];
739    } else {
740        int row = 0;
741        if (s->prefix & PREFIX_REPZ) {
742            /* The REPZ (F3) prefix has priority over 66 */
743            row = 2;
744        } else {
745            row += s->prefix & PREFIX_REPNZ ? 3 : 0;
746            row += s->prefix & PREFIX_DATA ? 1 : 0;
747        }
748        *entry = opcodes_0F38_F0toFF[*b & 15][row];
749    }
750}
751
752static void decode_VINSERTPS(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
753{
754    static const X86OpEntry
755        vinsertps_reg = X86_OP_ENTRY4(VINSERTPS_r, V,dq, H,dq, U,dq, vex5 cpuid(SSE41) p_66),
756        vinsertps_mem = X86_OP_ENTRY4(VINSERTPS_m, V,dq, H,dq, M,d,  vex5 cpuid(SSE41) p_66);
757
758    int modrm = get_modrm(s, env);
759    *entry = (modrm >> 6) == 3 ? vinsertps_reg : vinsertps_mem;
760}
761
762static const X86OpEntry opcodes_0F3A[256] = {
763    /*
764     * These are VEX-only, but incorrectly listed in the manual as exception type 4.
765     * Also the "qq" instructions are sometimes omitted by Table 2-17, but are VEX256
766     * only.
767     */
768    [0x00] = X86_OP_ENTRY3(VPERMQ,      V,qq, W,qq, I,b,  vex6 chk(W1) cpuid(AVX2) p_66),
769    [0x01] = X86_OP_ENTRY3(VPERMQ,      V,qq, W,qq, I,b,  vex6 chk(W1) cpuid(AVX2) p_66), /* VPERMPD */
770    [0x02] = X86_OP_ENTRY4(VBLENDPS,    V,x,  H,x,  W,x,  vex6 chk(W0) cpuid(AVX2) p_66), /* VPBLENDD */
771    [0x04] = X86_OP_ENTRY3(VPERMILPS_i, V,x,  W,x,  I,b,  vex6 chk(W0) cpuid(AVX) p_66),
772    [0x05] = X86_OP_ENTRY3(VPERMILPD_i, V,x,  W,x,  I,b,  vex6 chk(W0) cpuid(AVX) p_66),
773    [0x06] = X86_OP_ENTRY4(VPERM2x128,  V,qq, H,qq, W,qq, vex6 chk(W0) cpuid(AVX) p_66),
774
775    [0x14] = X86_OP_ENTRY3(PEXTRB,     E,b,  V,dq, I,b,  vex5 cpuid(SSE41) op0_Rd p_66),
776    [0x15] = X86_OP_ENTRY3(PEXTRW,     E,w,  V,dq, I,b,  vex5 cpuid(SSE41) op0_Rd p_66),
777    [0x16] = X86_OP_ENTRY3(PEXTR,      E,y,  V,dq, I,b,  vex5 cpuid(SSE41) p_66),
778    [0x17] = X86_OP_ENTRY3(VEXTRACTPS, E,d,  V,dq, I,b,  vex5 cpuid(SSE41) p_66),
779    [0x1d] = X86_OP_ENTRY3(VCVTPS2PH,  W,xh, V,x,  I,b,  vex11 chk(W0) cpuid(F16C) p_66),
780
781    [0x20] = X86_OP_ENTRY4(PINSRB,     V,dq, H,dq, E,b,  vex5 cpuid(SSE41) op2_Ry p_66),
782    [0x21] = X86_OP_GROUP0(VINSERTPS),
783    [0x22] = X86_OP_ENTRY4(PINSR,      V,dq, H,dq, E,y,  vex5 cpuid(SSE41) p_66),
784
785    [0x40] = X86_OP_ENTRY4(VDDPS,      V,x,  H,x,  W,x,  vex2 cpuid(SSE41) p_66),
786    [0x41] = X86_OP_ENTRY4(VDDPD,      V,dq, H,dq, W,dq, vex2 cpuid(SSE41) p_66),
787    [0x42] = X86_OP_ENTRY4(VMPSADBW,   V,x,  H,x,  W,x,  vex2 cpuid(SSE41) avx2_256 p_66),
788    [0x44] = X86_OP_ENTRY4(PCLMULQDQ,  V,dq, H,dq, W,dq, vex4 cpuid(PCLMULQDQ) p_66),
789    [0x46] = X86_OP_ENTRY4(VPERM2x128, V,qq, H,qq, W,qq, vex6 chk(W0) cpuid(AVX2) p_66),
790
791    [0x60] = X86_OP_ENTRY4(PCMPESTRM,  None,None, V,dq, W,dq, vex4_unal cpuid(SSE42) p_66),
792    [0x61] = X86_OP_ENTRY4(PCMPESTRI,  None,None, V,dq, W,dq, vex4_unal cpuid(SSE42) p_66),
793    [0x62] = X86_OP_ENTRY4(PCMPISTRM,  None,None, V,dq, W,dq, vex4_unal cpuid(SSE42) p_66),
794    [0x63] = X86_OP_ENTRY4(PCMPISTRI,  None,None, V,dq, W,dq, vex4_unal cpuid(SSE42) p_66),
795
796    [0x08] = X86_OP_ENTRY3(VROUNDPS,   V,x,  W,x,  I,b,  vex2 cpuid(SSE41) p_66),
797    [0x09] = X86_OP_ENTRY3(VROUNDPD,   V,x,  W,x,  I,b,  vex2 cpuid(SSE41) p_66),
798    /*
799     * Not listed as four operand in the manual.  Also writes and reads 128-bits
800     * from the first two operands due to the V operand picking higher entries of
801     * the H operand; the "Vss,Hss,Wss" description from the manual is incorrect.
802     * For other unary operations such as VSQRTSx this is hidden by the "REPScalar"
803     * value of vex_special, because the table lists the operand types of VSQRTPx.
804     */
805    [0x0a] = X86_OP_ENTRY4(VROUNDSS,   V,x,  H,x, W,ss, vex3 cpuid(SSE41) p_66),
806    [0x0b] = X86_OP_ENTRY4(VROUNDSD,   V,x,  H,x, W,sd, vex3 cpuid(SSE41) p_66),
807    [0x0c] = X86_OP_ENTRY4(VBLENDPS,   V,x,  H,x,  W,x,  vex4 cpuid(SSE41) p_66),
808    [0x0d] = X86_OP_ENTRY4(VBLENDPD,   V,x,  H,x,  W,x,  vex4 cpuid(SSE41) p_66),
809    [0x0e] = X86_OP_ENTRY4(VPBLENDW,   V,x,  H,x,  W,x,  vex4 cpuid(SSE41) avx2_256 p_66),
810    [0x0f] = X86_OP_ENTRY4(PALIGNR,    V,x,  H,x,  W,x,  vex4 cpuid(SSSE3) mmx avx2_256 p_00_66),
811
812    [0x18] = X86_OP_ENTRY4(VINSERTx128,  V,qq, H,qq, W,qq, vex6 chk(W0) cpuid(AVX) p_66),
813    [0x19] = X86_OP_ENTRY3(VEXTRACTx128, W,dq, V,qq, I,b,  vex6 chk(W0) cpuid(AVX) p_66),
814
815    [0x38] = X86_OP_ENTRY4(VINSERTx128,  V,qq, H,qq, W,qq, vex6 chk(W0) cpuid(AVX2) p_66),
816    [0x39] = X86_OP_ENTRY3(VEXTRACTx128, W,dq, V,qq, I,b,  vex6 chk(W0) cpuid(AVX2) p_66),
817
818    /* Listed incorrectly as type 4 */
819    [0x4a] = X86_OP_ENTRY4(VBLENDVPS, V,x,  H,x,  W,x,   vex6 chk(W0) cpuid(AVX) p_66),
820    [0x4b] = X86_OP_ENTRY4(VBLENDVPD, V,x,  H,x,  W,x,   vex6 chk(W0) cpuid(AVX) p_66),
821    [0x4c] = X86_OP_ENTRY4(VPBLENDVB, V,x,  H,x,  W,x,   vex6 chk(W0) cpuid(AVX) p_66 avx2_256),
822
823    [0xcc] = X86_OP_ENTRY3(SHA1RNDS4,  V,dq, W,dq, I,b,  cpuid(SHA_NI)),
824
825    [0xdf] = X86_OP_ENTRY3(VAESKEYGEN, V,dq, W,dq, I,b,  vex4 cpuid(AES) p_66),
826
827    [0xF0] = X86_OP_ENTRY3(RORX, G,y, E,y, I,b, vex13 cpuid(BMI2) p_f2),
828};
829
830static void decode_0F3A(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
831{
832    *b = x86_ldub_code(env, s);
833    *entry = opcodes_0F3A[*b];
834}
835
836/*
837 * There are some mistakes in the operands in the manual, and the load/store/register
838 * cases are easiest to keep separate, so the entries for 10-17 follow simplicity and
839 * efficiency of implementation rather than copying what the manual says.
840 *
841 * In particular:
842 *
843 * 1) "VMOVSS m32, xmm1" and "VMOVSD m64, xmm1" do not support VEX.vvvv != 1111b,
844 * but this is not mentioned in the tables.
845 *
846 * 2) MOVHLPS, MOVHPS, MOVHPD, MOVLPD, MOVLPS read the high quadword of one of their
847 * operands, which must therefore be dq; MOVLPD and MOVLPS also write the high
848 * quadword of the V operand.
849 */
850static void decode_0F10(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
851{
852    static const X86OpEntry opcodes_0F10_reg[4] = {
853        X86_OP_ENTRY3(MOVDQ,   V,x,  None,None, W,x, vex4_unal), /* MOVUPS */
854        X86_OP_ENTRY3(MOVDQ,   V,x,  None,None, W,x, vex4_unal), /* MOVUPD */
855        X86_OP_ENTRY3(VMOVSS,  V,x,  H,x,       W,x, vex5),
856        X86_OP_ENTRY3(VMOVLPx, V,x,  H,x,       W,x, vex5), /* MOVSD */
857    };
858
859    static const X86OpEntry opcodes_0F10_mem[4] = {
860        X86_OP_ENTRY3(MOVDQ,      V,x,  None,None, W,x,  vex4_unal), /* MOVUPS */
861        X86_OP_ENTRY3(MOVDQ,      V,x,  None,None, W,x,  vex4_unal), /* MOVUPD */
862        X86_OP_ENTRY3(VMOVSS_ld,  V,x,  H,x,       M,ss, vex5),
863        X86_OP_ENTRY3(VMOVSD_ld,  V,x,  H,x,       M,sd, vex5),
864    };
865
866    if ((get_modrm(s, env) >> 6) == 3) {
867        *entry = *decode_by_prefix(s, opcodes_0F10_reg);
868    } else {
869        *entry = *decode_by_prefix(s, opcodes_0F10_mem);
870    }
871}
872
873static void decode_0F11(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
874{
875    static const X86OpEntry opcodes_0F11_reg[4] = {
876        X86_OP_ENTRY3(MOVDQ,   W,x,  None,None, V,x, vex4), /* MOVUPS */
877        X86_OP_ENTRY3(MOVDQ,   W,x,  None,None, V,x, vex4), /* MOVUPD */
878        X86_OP_ENTRY3(VMOVSS,  W,x,  H,x,       V,x, vex5),
879        X86_OP_ENTRY3(VMOVLPx, W,x,  H,x,       V,q, vex5), /* MOVSD */
880    };
881
882    static const X86OpEntry opcodes_0F11_mem[4] = {
883        X86_OP_ENTRY3(MOVDQ,      W,x,  None,None, V,x, vex4), /* MOVUPS */
884        X86_OP_ENTRY3(MOVDQ,      W,x,  None,None, V,x, vex4), /* MOVUPD */
885        X86_OP_ENTRY3(VMOVSS_st,  M,ss, None,None, V,x, vex5),
886        X86_OP_ENTRY3(VMOVLPx_st, M,sd, None,None, V,x, vex5), /* MOVSD */
887    };
888
889    if ((get_modrm(s, env) >> 6) == 3) {
890        *entry = *decode_by_prefix(s, opcodes_0F11_reg);
891    } else {
892        *entry = *decode_by_prefix(s, opcodes_0F11_mem);
893    }
894}
895
896static void decode_0F12(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
897{
898    static const X86OpEntry opcodes_0F12_mem[4] = {
899        /*
900         * Use dq for operand for compatibility with gen_MOVSD and
901         * to allow VEX128 only.
902         */
903        X86_OP_ENTRY3(VMOVLPx_ld, V,dq, H,dq,      M,q, vex5), /* MOVLPS */
904        X86_OP_ENTRY3(VMOVLPx_ld, V,dq, H,dq,      M,q, vex5), /* MOVLPD */
905        X86_OP_ENTRY3(VMOVSLDUP,  V,x,  None,None, W,x, vex4 cpuid(SSE3)),
906        X86_OP_ENTRY3(VMOVDDUP,   V,x,  None,None, WM,q, vex5 cpuid(SSE3)), /* qq if VEX.256 */
907    };
908    static const X86OpEntry opcodes_0F12_reg[4] = {
909        X86_OP_ENTRY3(VMOVHLPS,  V,dq, H,dq,       U,dq, vex7),
910        X86_OP_ENTRY3(VMOVLPx,   W,x,  H,x,        U,q,  vex5), /* MOVLPD */
911        X86_OP_ENTRY3(VMOVSLDUP, V,x,  None,None,  U,x,  vex4 cpuid(SSE3)),
912        X86_OP_ENTRY3(VMOVDDUP,  V,x,  None,None,  U,x,  vex5 cpuid(SSE3)),
913    };
914
915    if ((get_modrm(s, env) >> 6) == 3) {
916        *entry = *decode_by_prefix(s, opcodes_0F12_reg);
917    } else {
918        *entry = *decode_by_prefix(s, opcodes_0F12_mem);
919        if ((s->prefix & PREFIX_REPNZ) && s->vex_l) {
920            entry->s2 = X86_SIZE_qq;
921        }
922    }
923}
924
925static void decode_0F16(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
926{
927    static const X86OpEntry opcodes_0F16_mem[4] = {
928        /*
929         * Operand 1 technically only reads the low 64 bits, but uses dq so that
930         * it is easier to check for op0 == op1 in an endianness-neutral manner.
931         */
932        X86_OP_ENTRY3(VMOVHPx_ld, V,dq, H,dq,      M,q, vex5), /* MOVHPS */
933        X86_OP_ENTRY3(VMOVHPx_ld, V,dq, H,dq,      M,q, vex5), /* MOVHPD */
934        X86_OP_ENTRY3(VMOVSHDUP,  V,x,  None,None, W,x, vex4 cpuid(SSE3)),
935        {},
936    };
937    static const X86OpEntry opcodes_0F16_reg[4] = {
938        /* Same as above, operand 1 could be Hq if it wasn't for big-endian.  */
939        X86_OP_ENTRY3(VMOVLHPS,  V,dq, H,dq,      U,q, vex7),
940        X86_OP_ENTRY3(VMOVHPx,   V,x,  H,x,       U,x, vex5), /* MOVHPD */
941        X86_OP_ENTRY3(VMOVSHDUP, V,x,  None,None, U,x, vex4 cpuid(SSE3)),
942        {},
943    };
944
945    if ((get_modrm(s, env) >> 6) == 3) {
946        *entry = *decode_by_prefix(s, opcodes_0F16_reg);
947    } else {
948        *entry = *decode_by_prefix(s, opcodes_0F16_mem);
949    }
950}
951
952static void decode_0F2A(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
953{
954    static const X86OpEntry opcodes_0F2A[4] = {
955        X86_OP_ENTRY3(CVTPI2Px,  V,x,  None,None, Q,q),
956        X86_OP_ENTRY3(CVTPI2Px,  V,x,  None,None, Q,q),
957        X86_OP_ENTRY3(VCVTSI2Sx, V,x,  H,x, E,y,        vex3),
958        X86_OP_ENTRY3(VCVTSI2Sx, V,x,  H,x, E,y,        vex3),
959    };
960    *entry = *decode_by_prefix(s, opcodes_0F2A);
961}
962
963static void decode_0F2B(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
964{
965    static const X86OpEntry opcodes_0F2B[4] = {
966        X86_OP_ENTRY3(MOVDQ,      M,x,  None,None, V,x, vex1), /* MOVNTPS */
967        X86_OP_ENTRY3(MOVDQ,      M,x,  None,None, V,x, vex1), /* MOVNTPD */
968        /* AMD extensions */
969        X86_OP_ENTRY3(VMOVSS_st,  M,ss, None,None, V,x, vex4 cpuid(SSE4A)), /* MOVNTSS */
970        X86_OP_ENTRY3(VMOVLPx_st, M,sd, None,None, V,x, vex4 cpuid(SSE4A)), /* MOVNTSD */
971    };
972
973    *entry = *decode_by_prefix(s, opcodes_0F2B);
974}
975
976static void decode_0F2C(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
977{
978    static const X86OpEntry opcodes_0F2C[4] = {
979        /* Listed as ps/pd in the manual, but CVTTPS2PI only reads 64-bit.  */
980        X86_OP_ENTRY3(CVTTPx2PI,  P,q,  None,None, W,q),
981        X86_OP_ENTRY3(CVTTPx2PI,  P,q,  None,None, W,dq),
982        X86_OP_ENTRY3(VCVTTSx2SI, G,y,  None,None, W,ss, vex3),
983        X86_OP_ENTRY3(VCVTTSx2SI, G,y,  None,None, W,sd, vex3),
984    };
985    *entry = *decode_by_prefix(s, opcodes_0F2C);
986}
987
988static void decode_0F2D(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
989{
990    static const X86OpEntry opcodes_0F2D[4] = {
991        /* Listed as ps/pd in the manual, but CVTPS2PI only reads 64-bit.  */
992        X86_OP_ENTRY3(CVTPx2PI,  P,q,  None,None, W,q),
993        X86_OP_ENTRY3(CVTPx2PI,  P,q,  None,None, W,dq),
994        X86_OP_ENTRY3(VCVTSx2SI, G,y,  None,None, W,ss, vex3),
995        X86_OP_ENTRY3(VCVTSx2SI, G,y,  None,None, W,sd, vex3),
996    };
997    *entry = *decode_by_prefix(s, opcodes_0F2D);
998}
999
1000static void decode_VxCOMISx(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
1001{
1002    /*
1003     * VUCOMISx and VCOMISx are different and use no-prefix and 0x66 for SS and SD
1004     * respectively.  Scalar values usually are associated with 0xF2 and 0xF3, for
1005     * which X86_VEX_REPScalar exists, but here it has to be decoded by hand.
1006     */
1007    entry->s1 = entry->s2 = (s->prefix & PREFIX_DATA ? X86_SIZE_sd : X86_SIZE_ss);
1008    entry->gen = (*b == 0x2E ? gen_VUCOMI : gen_VCOMI);
1009}
1010
1011static void decode_sse_unary(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
1012{
1013    if (!(s->prefix & (PREFIX_REPZ | PREFIX_REPNZ))) {
1014        entry->op1 = X86_TYPE_None;
1015        entry->s1 = X86_SIZE_None;
1016    }
1017    switch (*b) {
1018    case 0x51: entry->gen = gen_VSQRT; break;
1019    case 0x52: entry->gen = gen_VRSQRT; break;
1020    case 0x53: entry->gen = gen_VRCP; break;
1021    }
1022}
1023
1024static void decode_0F5A(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
1025{
1026    static const X86OpEntry opcodes_0F5A[4] = {
1027        X86_OP_ENTRY2(VCVTPS2PD,  V,x,       W,xh, vex2),      /* VCVTPS2PD */
1028        X86_OP_ENTRY2(VCVTPD2PS,  V,x,       W,x,  vex2),      /* VCVTPD2PS */
1029        X86_OP_ENTRY3(VCVTSS2SD,  V,x,  H,x, W,x,  vex2_rep3), /* VCVTSS2SD */
1030        X86_OP_ENTRY3(VCVTSD2SS,  V,x,  H,x, W,x,  vex2_rep3), /* VCVTSD2SS */
1031    };
1032    *entry = *decode_by_prefix(s, opcodes_0F5A);
1033}
1034
1035static void decode_0F5B(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
1036{
1037    static const X86OpEntry opcodes_0F5B[4] = {
1038        X86_OP_ENTRY2(VCVTDQ2PS,   V,x, W,x,      vex2),
1039        X86_OP_ENTRY2(VCVTPS2DQ,   V,x, W,x,      vex2),
1040        X86_OP_ENTRY2(VCVTTPS2DQ,  V,x, W,x,      vex2),
1041        {},
1042    };
1043    *entry = *decode_by_prefix(s, opcodes_0F5B);
1044}
1045
1046static void decode_0FE6(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
1047{
1048    static const X86OpEntry opcodes_0FE6[4] = {
1049        {},
1050        X86_OP_ENTRY2(VCVTTPD2DQ,  V,x, W,x,      vex2),
1051        X86_OP_ENTRY2(VCVTDQ2PD,   V,x, W,x,      vex5),
1052        X86_OP_ENTRY2(VCVTPD2DQ,   V,x, W,x,      vex2),
1053    };
1054    *entry = *decode_by_prefix(s, opcodes_0FE6);
1055}
1056
1057/*
1058 * These ignore the mod bits (assume (modrm&0xc0)==0xc0), so group the
1059 * pre-decode tweak here for all MOVs from/to CR and DR.
1060 *
1061 * AMD documentation (24594.pdf) and testing of Intel 386 and 486
1062 * processors all show that the mod bits are assumed to be 1's,
1063 * regardless of actual values.
1064 */
1065static void decode_MOV_CR_DR(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
1066{
1067    /*
1068     */
1069    get_modrm(s, env);
1070    s->modrm |= 0xC0;
1071
1072    entry->gen = gen_MOV;
1073}
1074
1075static const X86OpEntry opcodes_0F[256] = {
1076    [0x02] = X86_OP_ENTRYwr(LAR,        G,v, E,w,             chk(prot)),
1077    [0x03] = X86_OP_ENTRYwr(LSL,        G,v, E,w,             chk(prot)),
1078    [0x05] = X86_OP_ENTRY0(SYSCALL,                           chk(o64_intel)),
1079    [0x06] = X86_OP_ENTRY0(CLTS,                              chk(cpl0) svm(WRITE_CR0)),
1080    [0x07] = X86_OP_ENTRY0(SYSRET,                            chk3(o64_intel, prot, cpl0)),
1081
1082    [0x10] = X86_OP_GROUP0(0F10),
1083    [0x11] = X86_OP_GROUP0(0F11),
1084    [0x12] = X86_OP_GROUP0(0F12),
1085    [0x13] = X86_OP_ENTRY3(VMOVLPx_st,  M,q, None,None, V,q,  vex5 p_00_66),
1086    [0x14] = X86_OP_ENTRY3(VUNPCKLPx,   V,x, H,x, W,x,        vex4 p_00_66),
1087    [0x15] = X86_OP_ENTRY3(VUNPCKHPx,   V,x, H,x, W,x,        vex4 p_00_66),
1088    [0x16] = X86_OP_GROUP0(0F16),
1089    /* Incorrectly listed as Mq,Vq in the manual */
1090    [0x17] = X86_OP_ENTRY3(VMOVHPx_st,  M,q, None,None, V,dq, vex5 p_00_66),
1091
1092    /*
1093     * Incorrectly listed as using "d" operand type in the manual.  In reality
1094     * there's no 16-bit version (like y) and it does not use REX.W (like d64).
1095     */
1096    [0x20] = X86_OP_GROUPwr(MOV_CR_DR,   R,y_d64, C,y_d64, chk(cpl0) svm(READ_CR0)),
1097    [0x21] = X86_OP_GROUPwr(MOV_CR_DR,   R,y_d64, D,y_d64, chk(cpl0) svm(READ_DR0)),
1098    [0x22] = X86_OP_GROUPwr(MOV_CR_DR,   C,y_d64, R,y_d64, zextT0 chk(cpl0) svm(WRITE_CR0)),
1099    [0x23] = X86_OP_GROUPwr(MOV_CR_DR,   D,y_d64, R,y_d64, zextT0 chk(cpl0) svm(WRITE_DR0)),
1100
1101    [0x30] = X86_OP_ENTRY0(WRMSR,                             chk(cpl0)),
1102    [0x31] = X86_OP_ENTRY0(RDTSC),
1103    [0x32] = X86_OP_ENTRY0(RDMSR,                             chk(cpl0)),
1104    [0x33] = X86_OP_ENTRY0(RDPMC),
1105    [0x34] = X86_OP_ENTRY0(SYSENTER,                          chk2(i64_amd, prot_or_vm86)),
1106    [0x35] = X86_OP_ENTRY0(SYSEXIT,                           chk3(i64_amd, prot, cpl0)),
1107
1108    [0x40] = X86_OP_ENTRY2(CMOVcc,     G,v, E,v, cpuid(CMOV)),
1109    [0x41] = X86_OP_ENTRY2(CMOVcc,     G,v, E,v, cpuid(CMOV)),
1110    [0x42] = X86_OP_ENTRY2(CMOVcc,     G,v, E,v, cpuid(CMOV)),
1111    [0x43] = X86_OP_ENTRY2(CMOVcc,     G,v, E,v, cpuid(CMOV)),
1112    [0x44] = X86_OP_ENTRY2(CMOVcc,     G,v, E,v, cpuid(CMOV)),
1113    [0x45] = X86_OP_ENTRY2(CMOVcc,     G,v, E,v, cpuid(CMOV)),
1114    [0x46] = X86_OP_ENTRY2(CMOVcc,     G,v, E,v, cpuid(CMOV)),
1115    [0x47] = X86_OP_ENTRY2(CMOVcc,     G,v, E,v, cpuid(CMOV)),
1116
1117    [0x50] = X86_OP_ENTRY3(MOVMSK,     G,y, None,None, U,x, vex7 p_00_66),
1118    [0x51] = X86_OP_GROUP3(sse_unary,  V,x, H,x, W,x, vex2_rep3 p_00_66_f3_f2), /* sqrtps */
1119    [0x52] = X86_OP_GROUP3(sse_unary,  V,x, H,x, W,x, vex4_rep5 p_00_f3), /* rsqrtps */
1120    [0x53] = X86_OP_GROUP3(sse_unary,  V,x, H,x, W,x, vex4_rep5 p_00_f3), /* rcpps */
1121    [0x54] = X86_OP_ENTRY3(PAND,       V,x, H,x, W,x,  vex4 p_00_66), /* vand */
1122    [0x55] = X86_OP_ENTRY3(PANDN,      V,x, H,x, W,x,  vex4 p_00_66), /* vandn */
1123    [0x56] = X86_OP_ENTRY3(POR,        V,x, H,x, W,x,  vex4 p_00_66), /* vor */
1124    [0x57] = X86_OP_ENTRY3(PXOR,       V,x, H,x, W,x,  vex4 p_00_66), /* vxor */
1125
1126    [0x60] = X86_OP_ENTRY3(PUNPCKLBW,  V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
1127    [0x61] = X86_OP_ENTRY3(PUNPCKLWD,  V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
1128    [0x62] = X86_OP_ENTRY3(PUNPCKLDQ,  V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
1129    [0x63] = X86_OP_ENTRY3(PACKSSWB,   V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
1130    [0x64] = X86_OP_ENTRY3(PCMPGTB,    V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
1131    [0x65] = X86_OP_ENTRY3(PCMPGTW,    V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
1132    [0x66] = X86_OP_ENTRY3(PCMPGTD,    V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
1133    [0x67] = X86_OP_ENTRY3(PACKUSWB,   V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
1134
1135    [0x70] = X86_OP_GROUP0(0F70),
1136    [0x71] = X86_OP_GROUP0(group12),
1137    [0x72] = X86_OP_GROUP0(group13),
1138    [0x73] = X86_OP_GROUP0(group14),
1139    [0x74] = X86_OP_ENTRY3(PCMPEQB,    V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
1140    [0x75] = X86_OP_ENTRY3(PCMPEQW,    V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
1141    [0x76] = X86_OP_ENTRY3(PCMPEQD,    V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
1142    [0x77] = X86_OP_GROUP0(0F77),
1143
1144    [0x80] = X86_OP_ENTRYr(Jcc, J,z_f64),
1145    [0x81] = X86_OP_ENTRYr(Jcc, J,z_f64),
1146    [0x82] = X86_OP_ENTRYr(Jcc, J,z_f64),
1147    [0x83] = X86_OP_ENTRYr(Jcc, J,z_f64),
1148    [0x84] = X86_OP_ENTRYr(Jcc, J,z_f64),
1149    [0x85] = X86_OP_ENTRYr(Jcc, J,z_f64),
1150    [0x86] = X86_OP_ENTRYr(Jcc, J,z_f64),
1151    [0x87] = X86_OP_ENTRYr(Jcc, J,z_f64),
1152
1153    [0x90] = X86_OP_ENTRYw(SETcc, E,b),
1154    [0x91] = X86_OP_ENTRYw(SETcc, E,b),
1155    [0x92] = X86_OP_ENTRYw(SETcc, E,b),
1156    [0x93] = X86_OP_ENTRYw(SETcc, E,b),
1157    [0x94] = X86_OP_ENTRYw(SETcc, E,b),
1158    [0x95] = X86_OP_ENTRYw(SETcc, E,b),
1159    [0x96] = X86_OP_ENTRYw(SETcc, E,b),
1160    [0x97] = X86_OP_ENTRYw(SETcc, E,b),
1161
1162    [0xa0] = X86_OP_ENTRYr(PUSH, FS, w),
1163    [0xa1] = X86_OP_ENTRYw(POP, FS, w),
1164    [0xa2] = X86_OP_ENTRY0(CPUID),
1165    [0xa4] = X86_OP_ENTRY4(SHLD,  E,v, 2op,v, G,v),
1166    [0xa5] = X86_OP_ENTRY3(SHLD,  E,v, 2op,v, G,v),
1167
1168    [0xb0] = X86_OP_ENTRY2(CMPXCHG,E,b, G,b, lock),
1169    [0xb1] = X86_OP_ENTRY2(CMPXCHG,E,v, G,v, lock),
1170    [0xb2] = X86_OP_ENTRY3(LSS,    G,v, EM,p, None, None),
1171    [0xb4] = X86_OP_ENTRY3(LFS,    G,v, EM,p, None, None),
1172    [0xb5] = X86_OP_ENTRY3(LGS,    G,v, EM,p, None, None),
1173    [0xb6] = X86_OP_ENTRY3(MOV,    G,v, E,b, None, None, zextT0), /* MOVZX */
1174    [0xb7] = X86_OP_ENTRY3(MOV,    G,v, E,w, None, None, zextT0), /* MOVZX */
1175
1176    [0xc0] = X86_OP_ENTRY2(XADD,       E,b, G,b,            lock),
1177    [0xc1] = X86_OP_ENTRY2(XADD,       E,v, G,v,            lock),
1178    [0xc2] = X86_OP_ENTRY4(VCMP,       V,x, H,x, W,x,       vex2_rep3 p_00_66_f3_f2),
1179    [0xc3] = X86_OP_ENTRY3(MOV,        EM,y,G,y, None,None, cpuid(SSE2)), /* MOVNTI */
1180    [0xc4] = X86_OP_ENTRY4(PINSRW,     V,dq,H,dq,E,w,       vex5 mmx p_00_66),
1181    [0xc5] = X86_OP_ENTRY3(PEXTRW,     G,d, U,dq,I,b,       vex5 mmx p_00_66),
1182    [0xc6] = X86_OP_ENTRY4(VSHUF,      V,x, H,x, W,x,       vex4 p_00_66),
1183
1184    [0xd0] = X86_OP_ENTRY3(VADDSUB,   V,x, H,x, W,x,        vex2 cpuid(SSE3) p_66_f2),
1185    [0xd1] = X86_OP_ENTRY3(PSRLW_r,   V,x, H,x, W,x,        vex4 mmx avx2_256 p_00_66),
1186    [0xd2] = X86_OP_ENTRY3(PSRLD_r,   V,x, H,x, W,x,        vex4 mmx avx2_256 p_00_66),
1187    [0xd3] = X86_OP_ENTRY3(PSRLQ_r,   V,x, H,x, W,x,        vex4 mmx avx2_256 p_00_66),
1188    [0xd4] = X86_OP_ENTRY3(PADDQ,     V,x, H,x, W,x,        vex4 mmx avx2_256 p_00_66),
1189    [0xd5] = X86_OP_ENTRY3(PMULLW,    V,x, H,x, W,x,        vex4 mmx avx2_256 p_00_66),
1190    [0xd6] = X86_OP_GROUP0(0FD6),
1191    [0xd7] = X86_OP_ENTRY3(PMOVMSKB,  G,d, None,None, U,x,  vex7 mmx avx2_256 p_00_66),
1192
1193    [0xe0] = X86_OP_ENTRY3(PAVGB,     V,x, H,x, W,x,        vex4 mmx avx2_256 p_00_66),
1194    [0xe1] = X86_OP_ENTRY3(PSRAW_r,   V,x, H,x, W,x,        vex7 mmx avx2_256 p_00_66),
1195    [0xe2] = X86_OP_ENTRY3(PSRAD_r,   V,x, H,x, W,x,        vex7 mmx avx2_256 p_00_66),
1196    [0xe3] = X86_OP_ENTRY3(PAVGW,     V,x, H,x, W,x,        vex4 mmx avx2_256 p_00_66),
1197    [0xe4] = X86_OP_ENTRY3(PMULHUW,   V,x, H,x, W,x,        vex4 mmx avx2_256 p_00_66),
1198    [0xe5] = X86_OP_ENTRY3(PMULHW,    V,x, H,x, W,x,        vex4 mmx avx2_256 p_00_66),
1199    [0xe6] = X86_OP_GROUP0(0FE6),
1200    [0xe7] = X86_OP_ENTRY3(MOVDQ,     W,x, None,None, V,x,  vex1 mmx p_00_66), /* MOVNTQ/MOVNTDQ */
1201
1202    [0xf0] = X86_OP_ENTRY3(MOVDQ,    V,x, None,None, WM,x,  vex4_unal cpuid(SSE3) p_f2), /* LDDQU */
1203    [0xf1] = X86_OP_ENTRY3(PSLLW_r,  V,x, H,x, W,x,         vex7 mmx avx2_256 p_00_66),
1204    [0xf2] = X86_OP_ENTRY3(PSLLD_r,  V,x, H,x, W,x,         vex7 mmx avx2_256 p_00_66),
1205    [0xf3] = X86_OP_ENTRY3(PSLLQ_r,  V,x, H,x, W,x,         vex7 mmx avx2_256 p_00_66),
1206    [0xf4] = X86_OP_ENTRY3(PMULUDQ,  V,x, H,x, W,x,         vex4 mmx avx2_256 p_00_66),
1207    [0xf5] = X86_OP_ENTRY3(PMADDWD,  V,x, H,x, W,x,         vex4 mmx avx2_256 p_00_66),
1208    [0xf6] = X86_OP_ENTRY3(PSADBW,   V,x, H,x, W,x,         vex4 mmx avx2_256 p_00_66),
1209    [0xf7] = X86_OP_ENTRY3(MASKMOV,  None,None, V,dq, U,dq, vex4_unal avx2_256 mmx p_00_66),
1210
1211    [0x08] = X86_OP_ENTRY0(NOP,           svm(INVD)),
1212    [0x09] = X86_OP_ENTRY0(NOP,           svm(WBINVD)),
1213    [0x0b] = X86_OP_ENTRY0(UD),           /* UD2 */
1214    [0x0d] = X86_OP_ENTRY1(NOP,  M,v),    /* 3DNow! prefetch */
1215    [0x0e] = X86_OP_ENTRY0(EMMS,                              cpuid(3DNOW)), /* femms */
1216    /*
1217     * 3DNow!'s opcode byte comes *after* modrm and displacements, making it
1218     * more like an Ib operand.  Dispatch to the right helper in a single gen_*
1219     * function.
1220     */
1221    [0x0f] = X86_OP_ENTRY3(3dnow,       P,q, Q,q, I,b,        cpuid(3DNOW)),
1222
1223    [0x18] = X86_OP_ENTRY1(NOP,  nop,v),  /* prefetch/reserved NOP */
1224    [0x19] = X86_OP_ENTRY1(NOP,  nop,v),  /* reserved NOP */
1225    [0x1c] = X86_OP_ENTRY1(NOP,  nop,v),  /* reserved NOP */
1226    [0x1d] = X86_OP_ENTRY1(NOP,  nop,v),  /* reserved NOP */
1227    [0x1e] = X86_OP_ENTRY1(NOP,  nop,v),  /* reserved NOP */
1228    [0x1f] = X86_OP_ENTRY1(NOP,  nop,v),  /* NOP/reserved NOP */
1229
1230    [0x28] = X86_OP_ENTRY3(MOVDQ,      V,x,  None,None, W,x, vex1 p_00_66), /* MOVAPS */
1231    [0x29] = X86_OP_ENTRY3(MOVDQ,      W,x,  None,None, V,x, vex1 p_00_66), /* MOVAPS */
1232    [0x2A] = X86_OP_GROUP0(0F2A),
1233    [0x2B] = X86_OP_GROUP0(0F2B),
1234    [0x2C] = X86_OP_GROUP0(0F2C),
1235    [0x2D] = X86_OP_GROUP0(0F2D),
1236    [0x2E] = X86_OP_GROUP3(VxCOMISx,   None,None, V,x, W,x,  vex3 p_00_66), /* VUCOMISS/SD */
1237    [0x2F] = X86_OP_GROUP3(VxCOMISx,   None,None, V,x, W,x,  vex3 p_00_66), /* VCOMISS/SD */
1238
1239    [0x38] = X86_OP_GROUP0(0F38),
1240    [0x3a] = X86_OP_GROUP0(0F3A),
1241
1242    [0x48] = X86_OP_ENTRY2(CMOVcc,     G,v, E,v, cpuid(CMOV)),
1243    [0x49] = X86_OP_ENTRY2(CMOVcc,     G,v, E,v, cpuid(CMOV)),
1244    [0x4a] = X86_OP_ENTRY2(CMOVcc,     G,v, E,v, cpuid(CMOV)),
1245    [0x4b] = X86_OP_ENTRY2(CMOVcc,     G,v, E,v, cpuid(CMOV)),
1246    [0x4c] = X86_OP_ENTRY2(CMOVcc,     G,v, E,v, cpuid(CMOV)),
1247    [0x4d] = X86_OP_ENTRY2(CMOVcc,     G,v, E,v, cpuid(CMOV)),
1248    [0x4e] = X86_OP_ENTRY2(CMOVcc,     G,v, E,v, cpuid(CMOV)),
1249    [0x4f] = X86_OP_ENTRY2(CMOVcc,     G,v, E,v, cpuid(CMOV)),
1250
1251    [0x58] = X86_OP_ENTRY3(VADD,       V,x, H,x, W,x, vex2_rep3 p_00_66_f3_f2),
1252    [0x59] = X86_OP_ENTRY3(VMUL,       V,x, H,x, W,x, vex2_rep3 p_00_66_f3_f2),
1253    [0x5a] = X86_OP_GROUP0(0F5A),
1254    [0x5b] = X86_OP_GROUP0(0F5B),
1255    [0x5c] = X86_OP_ENTRY3(VSUB,       V,x, H,x, W,x, vex2_rep3 p_00_66_f3_f2),
1256    [0x5d] = X86_OP_ENTRY3(VMIN,       V,x, H,x, W,x, vex2_rep3 p_00_66_f3_f2),
1257    [0x5e] = X86_OP_ENTRY3(VDIV,       V,x, H,x, W,x, vex2_rep3 p_00_66_f3_f2),
1258    [0x5f] = X86_OP_ENTRY3(VMAX,       V,x, H,x, W,x, vex2_rep3 p_00_66_f3_f2),
1259
1260    [0x68] = X86_OP_ENTRY3(PUNPCKHBW,  V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
1261    [0x69] = X86_OP_ENTRY3(PUNPCKHWD,  V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
1262    [0x6a] = X86_OP_ENTRY3(PUNPCKHDQ,  V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
1263    [0x6b] = X86_OP_ENTRY3(PACKSSDW,   V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
1264    [0x6c] = X86_OP_ENTRY3(PUNPCKLQDQ, V,x, H,x, W,x,  vex4 p_66 avx2_256),
1265    [0x6d] = X86_OP_ENTRY3(PUNPCKHQDQ, V,x, H,x, W,x,  vex4 p_66 avx2_256),
1266    [0x6e] = X86_OP_ENTRY3(MOVD_to,    V,x, None,None, E,y, vex5 mmx p_00_66),  /* wrong dest Vy on SDM! */
1267    [0x6f] = X86_OP_GROUP0(0F6F),
1268
1269    [0x78] = X86_OP_GROUP0(0F78),
1270    [0x79] = X86_OP_GROUP2(0F79,       V,x, U,x,       cpuid(SSE4A)),
1271    [0x7c] = X86_OP_ENTRY3(VHADD,      V,x, H,x, W,x,  vex2 cpuid(SSE3) p_66_f2),
1272    [0x7d] = X86_OP_ENTRY3(VHSUB,      V,x, H,x, W,x,  vex2 cpuid(SSE3) p_66_f2),
1273    [0x7e] = X86_OP_GROUP0(0F7E),
1274    [0x7f] = X86_OP_GROUP0(0F7F),
1275
1276    [0x88] = X86_OP_ENTRYr(Jcc, J,z_f64),
1277    [0x89] = X86_OP_ENTRYr(Jcc, J,z_f64),
1278    [0x8a] = X86_OP_ENTRYr(Jcc, J,z_f64),
1279    [0x8b] = X86_OP_ENTRYr(Jcc, J,z_f64),
1280    [0x8c] = X86_OP_ENTRYr(Jcc, J,z_f64),
1281    [0x8d] = X86_OP_ENTRYr(Jcc, J,z_f64),
1282    [0x8e] = X86_OP_ENTRYr(Jcc, J,z_f64),
1283    [0x8f] = X86_OP_ENTRYr(Jcc, J,z_f64),
1284
1285    [0x98] = X86_OP_ENTRYw(SETcc, E,b),
1286    [0x99] = X86_OP_ENTRYw(SETcc, E,b),
1287    [0x9a] = X86_OP_ENTRYw(SETcc, E,b),
1288    [0x9b] = X86_OP_ENTRYw(SETcc, E,b),
1289    [0x9c] = X86_OP_ENTRYw(SETcc, E,b),
1290    [0x9d] = X86_OP_ENTRYw(SETcc, E,b),
1291    [0x9e] = X86_OP_ENTRYw(SETcc, E,b),
1292    [0x9f] = X86_OP_ENTRYw(SETcc, E,b),
1293
1294    [0xa8] = X86_OP_ENTRYr(PUSH,   GS, w),
1295    [0xa9] = X86_OP_ENTRYw(POP,    GS, w),
1296    [0xaa] = X86_OP_ENTRY0(RSM,             chk(smm) svm(RSM)),
1297    [0xac] = X86_OP_ENTRY4(SHRD,   E,v, 2op,v, G,v),
1298    [0xad] = X86_OP_ENTRY3(SHRD,   E,v, 2op,v, G,v),
1299    [0xae] = X86_OP_GROUP0(group15),
1300    /*
1301     * It's slightly more efficient to put Ev operand in T0 and allow gen_IMUL3
1302     * to assume sextT0.  Multiplication is commutative anyway.
1303     */
1304    [0xaf] = X86_OP_ENTRY3(IMUL3,  G,v, E,v, 2op,v, sextT0),
1305
1306    [0xb8] = X86_OP_GROUP0(0FB8),
1307    /* decoded as modrm, which is visible as a difference between page fault and #UD */
1308    [0xb9] = X86_OP_ENTRYr(UD,     nop,v),                        /* UD1 */
1309    [0xbc] = X86_OP_GROUP0(0FBC),
1310    [0xbd] = X86_OP_GROUP0(0FBD),
1311    [0xbe] = X86_OP_ENTRY3(MOV,    G,v, E,b, None, None, sextT0), /* MOVSX */
1312    [0xbf] = X86_OP_ENTRY3(MOV,    G,v, E,w, None, None, sextT0), /* MOVSX */
1313
1314    [0xc8] = X86_OP_ENTRY1(BSWAP,     LoBits,y),
1315    [0xc9] = X86_OP_ENTRY1(BSWAP,     LoBits,y),
1316    [0xca] = X86_OP_ENTRY1(BSWAP,     LoBits,y),
1317    [0xcb] = X86_OP_ENTRY1(BSWAP,     LoBits,y),
1318    [0xcc] = X86_OP_ENTRY1(BSWAP,     LoBits,y),
1319    [0xcd] = X86_OP_ENTRY1(BSWAP,     LoBits,y),
1320    [0xce] = X86_OP_ENTRY1(BSWAP,     LoBits,y),
1321    [0xcf] = X86_OP_ENTRY1(BSWAP,     LoBits,y),
1322
1323    /* Incorrectly missing from 2-17 */
1324    [0xd8] = X86_OP_ENTRY3(PSUBUSB,  V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
1325    [0xd9] = X86_OP_ENTRY3(PSUBUSW,  V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
1326    [0xda] = X86_OP_ENTRY3(PMINUB,   V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
1327    [0xdb] = X86_OP_ENTRY3(PAND,     V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
1328    [0xdc] = X86_OP_ENTRY3(PADDUSB,  V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
1329    [0xdd] = X86_OP_ENTRY3(PADDUSW,  V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
1330    [0xde] = X86_OP_ENTRY3(PMAXUB,   V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
1331    [0xdf] = X86_OP_ENTRY3(PANDN,    V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
1332
1333    [0xe8] = X86_OP_ENTRY3(PSUBSB,  V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
1334    [0xe9] = X86_OP_ENTRY3(PSUBSW,  V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
1335    [0xea] = X86_OP_ENTRY3(PMINSW,  V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
1336    [0xeb] = X86_OP_ENTRY3(POR,     V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
1337    [0xec] = X86_OP_ENTRY3(PADDSB,  V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
1338    [0xed] = X86_OP_ENTRY3(PADDSW,  V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
1339    [0xee] = X86_OP_ENTRY3(PMAXSW,  V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
1340    [0xef] = X86_OP_ENTRY3(PXOR,    V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
1341
1342    [0xf8] = X86_OP_ENTRY3(PSUBB,  V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
1343    [0xf9] = X86_OP_ENTRY3(PSUBW,  V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
1344    [0xfa] = X86_OP_ENTRY3(PSUBD,  V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
1345    [0xfb] = X86_OP_ENTRY3(PSUBQ,  V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
1346    [0xfc] = X86_OP_ENTRY3(PADDB,  V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
1347    [0xfd] = X86_OP_ENTRY3(PADDW,  V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
1348    [0xfe] = X86_OP_ENTRY3(PADDD,  V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
1349    [0xff] = X86_OP_ENTRYr(UD,     nop,v),                        /* UD0 */
1350};
1351
1352static void do_decode_0F(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
1353{
1354    *entry = opcodes_0F[*b];
1355}
1356
1357static void decode_0F(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
1358{
1359    *b = x86_ldub_code(env, s);
1360    do_decode_0F(s, env, entry, b);
1361}
1362
1363static void decode_63(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
1364{
1365    static const X86OpEntry arpl = X86_OP_ENTRY2(ARPL, E,w, G,w, chk(prot));
1366    static const X86OpEntry mov = X86_OP_ENTRY3(MOV, G,v, E,v, None, None);
1367    static const X86OpEntry movsxd = X86_OP_ENTRY3(MOV, G,v, E,d, None, None, sextT0);
1368    if (!CODE64(s)) {
1369        *entry = arpl;
1370    } else if (REX_W(s)) {
1371        *entry = movsxd;
1372    } else {
1373        *entry = mov;
1374    }
1375}
1376
1377static void decode_group1(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
1378{
1379    static const X86GenFunc group1_gen[8] = {
1380        gen_ADD, gen_OR, gen_ADC, gen_SBB, gen_AND, gen_SUB, gen_XOR, gen_SUB,
1381    };
1382    int op = (get_modrm(s, env) >> 3) & 7;
1383    entry->gen = group1_gen[op];
1384
1385    if (op == 7) {
1386        /* prevent writeback for CMP */
1387        entry->op1 = entry->op0;
1388        entry->op0 = X86_TYPE_None;
1389        entry->s0 = X86_SIZE_None;
1390    } else {
1391        entry->special = X86_SPECIAL_HasLock;
1392    }
1393}
1394
1395static void decode_group1A(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
1396{
1397    int op = (get_modrm(s, env) >> 3) & 7;
1398    if (op != 0) {
1399        /* could be XOP prefix too */
1400        *entry = UNKNOWN_OPCODE;
1401    } else {
1402        entry->gen = gen_POP;
1403        /* The address must use the value of ESP after the pop.  */
1404        s->popl_esp_hack = 1 << mo_pushpop(s, s->dflag);
1405    }
1406}
1407
1408static void decode_group2(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
1409{
1410    static const X86GenFunc group2_gen[8] = {
1411        gen_ROL, gen_ROR, gen_RCL, gen_RCR,
1412        gen_SHL, gen_SHR, gen_SHL /* SAL, undocumented */, gen_SAR,
1413    };
1414    int op = (get_modrm(s, env) >> 3) & 7;
1415    entry->gen = group2_gen[op];
1416    if (op == 7) {
1417        entry->special = X86_SPECIAL_SExtT0;
1418    } else {
1419        entry->special = X86_SPECIAL_ZExtT0;
1420    }
1421}
1422
1423static void decode_group3(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
1424{
1425    static const X86OpEntry opcodes_grp3[16] = {
1426        /* 0xf6 */
1427        [0x00] = X86_OP_ENTRYrr(AND, E,b, I,b),
1428        [0x02] = X86_OP_ENTRY1(NOT,  E,b,      lock),
1429        [0x03] = X86_OP_ENTRY1(NEG,  E,b,      lock),
1430        [0x04] = X86_OP_ENTRYrr(MUL, E,b, 0,b, zextT0),
1431        [0x05] = X86_OP_ENTRYrr(IMUL,E,b, 0,b, sextT0),
1432        [0x06] = X86_OP_ENTRYr(DIV,  E,b),
1433        [0x07] = X86_OP_ENTRYr(IDIV, E,b),
1434
1435        /* 0xf7 */
1436        [0x08] = X86_OP_ENTRYrr(AND, E,v, I,z),
1437        [0x0a] = X86_OP_ENTRY1(NOT,  E,v,      lock),
1438        [0x0b] = X86_OP_ENTRY1(NEG,  E,v,      lock),
1439        [0x0c] = X86_OP_ENTRYrr(MUL, E,v, 0,v, zextT0),
1440        [0x0d] = X86_OP_ENTRYrr(IMUL,E,v, 0,v, sextT0),
1441        [0x0e] = X86_OP_ENTRYr(DIV,  E,v),
1442        [0x0f] = X86_OP_ENTRYr(IDIV, E,v),
1443    };
1444
1445    int w = (*b & 1);
1446    int reg = (get_modrm(s, env) >> 3) & 7;
1447
1448    *entry = opcodes_grp3[(w << 3) | reg];
1449}
1450
1451static void decode_group4_5(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
1452{
1453    static const X86OpEntry opcodes_grp4_5[16] = {
1454        /* 0xfe */
1455        [0x00] = X86_OP_ENTRY1(INC,     E,b,                           lock),
1456        [0x01] = X86_OP_ENTRY1(DEC,     E,b,                           lock),
1457
1458        /* 0xff */
1459        [0x08] = X86_OP_ENTRY1(INC,     E,v,                           lock),
1460        [0x09] = X86_OP_ENTRY1(DEC,     E,v,                           lock),
1461        [0x0a] = X86_OP_ENTRYr(CALL_m,  E,f64,                         zextT0),
1462        [0x0b] = X86_OP_ENTRYr(CALLF_m, M,p),
1463        [0x0c] = X86_OP_ENTRYr(JMP_m,   E,f64,                         zextT0),
1464        [0x0d] = X86_OP_ENTRYr(JMPF_m,  M,p),
1465        [0x0e] = X86_OP_ENTRYr(PUSH,    E,f64),
1466    };
1467
1468    int w = (*b & 1);
1469    int reg = (get_modrm(s, env) >> 3) & 7;
1470
1471    *entry = opcodes_grp4_5[(w << 3) | reg];
1472}
1473
1474
1475static void decode_group11(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
1476{
1477    int op = (get_modrm(s, env) >> 3) & 7;
1478    if (op != 0) {
1479        *entry = UNKNOWN_OPCODE;
1480    } else {
1481        entry->gen = gen_MOV;
1482    }
1483}
1484
1485static void decode_90(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
1486{
1487    static X86OpEntry pause = X86_OP_ENTRY0(PAUSE, svm(PAUSE));
1488    static X86OpEntry nop = X86_OP_ENTRY0(NOP);
1489    static X86OpEntry xchg_ax = X86_OP_ENTRY2(XCHG, 0,v, LoBits,v);
1490
1491    if (REX_B(s)) {
1492        *entry = xchg_ax;
1493    } else {
1494        *entry = (s->prefix & PREFIX_REPZ) ? pause : nop;
1495    }
1496}
1497
1498static const X86OpEntry opcodes_root[256] = {
1499    [0x00] = X86_OP_ENTRY2(ADD, E,b, G,b, lock),
1500    [0x01] = X86_OP_ENTRY2(ADD, E,v, G,v, lock),
1501    [0x02] = X86_OP_ENTRY2(ADD, G,b, E,b, lock),
1502    [0x03] = X86_OP_ENTRY2(ADD, G,v, E,v, lock),
1503    [0x04] = X86_OP_ENTRY2(ADD, 0,b, I,b, lock),   /* AL, Ib */
1504    [0x05] = X86_OP_ENTRY2(ADD, 0,v, I,z, lock),   /* rAX, Iz */
1505    [0x06] = X86_OP_ENTRYr(PUSH, ES, w, chk(i64)),
1506    [0x07] = X86_OP_ENTRYw(POP, ES, w, chk(i64)),
1507
1508    [0x10] = X86_OP_ENTRY2(ADC, E,b, G,b, lock),
1509    [0x11] = X86_OP_ENTRY2(ADC, E,v, G,v, lock),
1510    [0x12] = X86_OP_ENTRY2(ADC, G,b, E,b, lock),
1511    [0x13] = X86_OP_ENTRY2(ADC, G,v, E,v, lock),
1512    [0x14] = X86_OP_ENTRY2(ADC, 0,b, I,b, lock),   /* AL, Ib */
1513    [0x15] = X86_OP_ENTRY2(ADC, 0,v, I,z, lock),   /* rAX, Iz */
1514    [0x16] = X86_OP_ENTRYr(PUSH, SS, w, chk(i64)),
1515    [0x17] = X86_OP_ENTRYw(POP, SS, w, chk(i64)),
1516
1517    [0x20] = X86_OP_ENTRY2(AND, E,b, G,b, lock),
1518    [0x21] = X86_OP_ENTRY2(AND, E,v, G,v, lock),
1519    [0x22] = X86_OP_ENTRY2(AND, G,b, E,b, lock),
1520    [0x23] = X86_OP_ENTRY2(AND, G,v, E,v, lock),
1521    [0x24] = X86_OP_ENTRY2(AND, 0,b, I,b, lock),   /* AL, Ib */
1522    [0x25] = X86_OP_ENTRY2(AND, 0,v, I,z, lock),   /* rAX, Iz */
1523    [0x26] = {},
1524    [0x27] = X86_OP_ENTRY0(DAA, chk(i64)),
1525
1526    [0x30] = X86_OP_ENTRY2(XOR, E,b, G,b, lock),
1527    [0x31] = X86_OP_ENTRY2(XOR, E,v, G,v, lock),
1528    [0x32] = X86_OP_ENTRY2(XOR, G,b, E,b, lock),
1529    [0x33] = X86_OP_ENTRY2(XOR, G,v, E,v, lock),
1530    [0x34] = X86_OP_ENTRY2(XOR, 0,b, I,b, lock),   /* AL, Ib */
1531    [0x35] = X86_OP_ENTRY2(XOR, 0,v, I,z, lock),   /* rAX, Iz */
1532    [0x36] = {},
1533    [0x37] = X86_OP_ENTRY0(AAA, chk(i64)),
1534
1535    [0x40] = X86_OP_ENTRY1(INC, 0,v, chk(i64)),
1536    [0x41] = X86_OP_ENTRY1(INC, 1,v, chk(i64)),
1537    [0x42] = X86_OP_ENTRY1(INC, 2,v, chk(i64)),
1538    [0x43] = X86_OP_ENTRY1(INC, 3,v, chk(i64)),
1539    [0x44] = X86_OP_ENTRY1(INC, 4,v, chk(i64)),
1540    [0x45] = X86_OP_ENTRY1(INC, 5,v, chk(i64)),
1541    [0x46] = X86_OP_ENTRY1(INC, 6,v, chk(i64)),
1542    [0x47] = X86_OP_ENTRY1(INC, 7,v, chk(i64)),
1543
1544    [0x50] = X86_OP_ENTRYr(PUSH, LoBits,d64),
1545    [0x51] = X86_OP_ENTRYr(PUSH, LoBits,d64),
1546    [0x52] = X86_OP_ENTRYr(PUSH, LoBits,d64),
1547    [0x53] = X86_OP_ENTRYr(PUSH, LoBits,d64),
1548    [0x54] = X86_OP_ENTRYr(PUSH, LoBits,d64),
1549    [0x55] = X86_OP_ENTRYr(PUSH, LoBits,d64),
1550    [0x56] = X86_OP_ENTRYr(PUSH, LoBits,d64),
1551    [0x57] = X86_OP_ENTRYr(PUSH, LoBits,d64),
1552
1553    [0x60] = X86_OP_ENTRY0(PUSHA, chk(i64)),
1554    [0x61] = X86_OP_ENTRY0(POPA, chk(i64)),
1555    [0x62] = X86_OP_ENTRYrr(BOUND, G,v, M,a, chk(i64)),
1556    [0x63] = X86_OP_GROUP0(63),
1557    [0x64] = {},
1558    [0x65] = {},
1559    [0x66] = {},
1560    [0x67] = {},
1561
1562    [0x70] = X86_OP_ENTRYr(Jcc, J,b),
1563    [0x71] = X86_OP_ENTRYr(Jcc, J,b),
1564    [0x72] = X86_OP_ENTRYr(Jcc, J,b),
1565    [0x73] = X86_OP_ENTRYr(Jcc, J,b),
1566    [0x74] = X86_OP_ENTRYr(Jcc, J,b),
1567    [0x75] = X86_OP_ENTRYr(Jcc, J,b),
1568    [0x76] = X86_OP_ENTRYr(Jcc, J,b),
1569    [0x77] = X86_OP_ENTRYr(Jcc, J,b),
1570
1571    [0x80] = X86_OP_GROUP2(group1, E,b, I,b),
1572    [0x81] = X86_OP_GROUP2(group1, E,v, I,z),
1573    [0x82] = X86_OP_GROUP2(group1, E,b, I,b, chk(i64)),
1574    [0x83] = X86_OP_GROUP2(group1, E,v, I,b),
1575    [0x84] = X86_OP_ENTRYrr(AND, E,b, G,b),
1576    [0x85] = X86_OP_ENTRYrr(AND, E,v, G,v),
1577    [0x86] = X86_OP_ENTRY2(XCHG, E,b, G,b, xchg),
1578    [0x87] = X86_OP_ENTRY2(XCHG, E,v, G,v, xchg),
1579
1580    [0x90] = X86_OP_GROUP0(90),
1581    [0x91] = X86_OP_ENTRY2(XCHG, 0,v, LoBits,v),
1582    [0x92] = X86_OP_ENTRY2(XCHG, 0,v, LoBits,v),
1583    [0x93] = X86_OP_ENTRY2(XCHG, 0,v, LoBits,v),
1584    [0x94] = X86_OP_ENTRY2(XCHG, 0,v, LoBits,v),
1585    [0x95] = X86_OP_ENTRY2(XCHG, 0,v, LoBits,v),
1586    [0x96] = X86_OP_ENTRY2(XCHG, 0,v, LoBits,v),
1587    [0x97] = X86_OP_ENTRY2(XCHG, 0,v, LoBits,v),
1588
1589    [0xA0] = X86_OP_ENTRY3(MOV, 0,b, O,b, None, None), /* AL, Ob */
1590    [0xA1] = X86_OP_ENTRY3(MOV, 0,v, O,v, None, None), /* rAX, Ov */
1591    [0xA2] = X86_OP_ENTRY3(MOV, O,b, 0,b, None, None), /* Ob, AL */
1592    [0xA3] = X86_OP_ENTRY3(MOV, O,v, 0,v, None, None), /* Ov, rAX */
1593    [0xA4] = X86_OP_ENTRYrr(MOVS, Y,b, X,b),
1594    [0xA5] = X86_OP_ENTRYrr(MOVS, Y,v, X,v),
1595    [0xA6] = X86_OP_ENTRYrr(CMPS, Y,b, X,b),
1596    [0xA7] = X86_OP_ENTRYrr(CMPS, Y,v, X,v),
1597
1598    [0xB0] = X86_OP_ENTRY3(MOV, LoBits,b, I,b, None, None),
1599    [0xB1] = X86_OP_ENTRY3(MOV, LoBits,b, I,b, None, None),
1600    [0xB2] = X86_OP_ENTRY3(MOV, LoBits,b, I,b, None, None),
1601    [0xB3] = X86_OP_ENTRY3(MOV, LoBits,b, I,b, None, None),
1602    [0xB4] = X86_OP_ENTRY3(MOV, LoBits,b, I,b, None, None),
1603    [0xB5] = X86_OP_ENTRY3(MOV, LoBits,b, I,b, None, None),
1604    [0xB6] = X86_OP_ENTRY3(MOV, LoBits,b, I,b, None, None),
1605    [0xB7] = X86_OP_ENTRY3(MOV, LoBits,b, I,b, None, None),
1606
1607    [0xC0] = X86_OP_GROUP2(group2, E,b, I,b),
1608    [0xC1] = X86_OP_GROUP2(group2, E,v, I,b),
1609    [0xC2] = X86_OP_ENTRYr(RET, I,w),
1610    [0xC3] = X86_OP_ENTRY0(RET),
1611    [0xC4] = X86_OP_ENTRY3(LES, G,z, EM,p, None, None, chk(i64)),
1612    [0xC5] = X86_OP_ENTRY3(LDS, G,z, EM,p, None, None, chk(i64)),
1613    [0xC6] = X86_OP_GROUP3(group11, E,b, I,b, None, None), /* reg=000b */
1614    [0xC7] = X86_OP_GROUP3(group11, E,v, I,z, None, None), /* reg=000b */
1615
1616    [0xD0] = X86_OP_GROUP1(group2, E,b),
1617    [0xD1] = X86_OP_GROUP1(group2, E,v),
1618    [0xD2] = X86_OP_GROUP2(group2, E,b, 1,b), /* CL */
1619    [0xD3] = X86_OP_GROUP2(group2, E,v, 1,b), /* CL */
1620    [0xD4] = X86_OP_ENTRY2(AAM, 0,w, I,b),
1621    [0xD5] = X86_OP_ENTRY2(AAD, 0,w, I,b),
1622    [0xD6] = X86_OP_ENTRYw(SALC, 0,b),
1623    [0xD7] = X86_OP_ENTRY1(XLAT, 0,b, zextT0), /* AL read/written */
1624
1625    [0xE0] = X86_OP_ENTRYr(LOOPNE, J,b), /* implicit: CX with aflag size */
1626    [0xE1] = X86_OP_ENTRYr(LOOPE,  J,b), /* implicit: CX with aflag size */
1627    [0xE2] = X86_OP_ENTRYr(LOOP,   J,b), /* implicit: CX with aflag size */
1628    [0xE3] = X86_OP_ENTRYr(JCXZ,   J,b), /* implicit: CX with aflag size */
1629    [0xE4] = X86_OP_ENTRYwr(IN,    0,b, I_unsigned,b), /* AL */
1630    [0xE5] = X86_OP_ENTRYwr(IN,    0,v, I_unsigned,b), /* AX/EAX */
1631    [0xE6] = X86_OP_ENTRYrr(OUT,   0,b, I_unsigned,b), /* AL */
1632    [0xE7] = X86_OP_ENTRYrr(OUT,   0,v, I_unsigned,b), /* AX/EAX */
1633
1634    [0xF1] = X86_OP_ENTRY0(INT1,   svm(ICEBP)),
1635    [0xF4] = X86_OP_ENTRY0(HLT,    chk(cpl0) svm(HLT)),
1636    [0xF5] = X86_OP_ENTRY0(CMC),
1637    [0xF6] = X86_OP_GROUP1(group3, E,b),
1638    [0xF7] = X86_OP_GROUP1(group3, E,v),
1639
1640    [0x08] = X86_OP_ENTRY2(OR, E,b, G,b, lock),
1641    [0x09] = X86_OP_ENTRY2(OR, E,v, G,v, lock),
1642    [0x0A] = X86_OP_ENTRY2(OR, G,b, E,b, lock),
1643    [0x0B] = X86_OP_ENTRY2(OR, G,v, E,v, lock),
1644    [0x0C] = X86_OP_ENTRY2(OR, 0,b, I,b, lock),   /* AL, Ib */
1645    [0x0D] = X86_OP_ENTRY2(OR, 0,v, I,z, lock),   /* rAX, Iz */
1646    [0x0E] = X86_OP_ENTRYr(PUSH, CS, w, chk(i64)),
1647    [0x0F] = X86_OP_GROUP0(0F),
1648
1649    [0x18] = X86_OP_ENTRY2(SBB, E,b, G,b, lock),
1650    [0x19] = X86_OP_ENTRY2(SBB, E,v, G,v, lock),
1651    [0x1A] = X86_OP_ENTRY2(SBB, G,b, E,b, lock),
1652    [0x1B] = X86_OP_ENTRY2(SBB, G,v, E,v, lock),
1653    [0x1C] = X86_OP_ENTRY2(SBB, 0,b, I,b, lock),   /* AL, Ib */
1654    [0x1D] = X86_OP_ENTRY2(SBB, 0,v, I,z, lock),   /* rAX, Iz */
1655    [0x1E] = X86_OP_ENTRYr(PUSH, DS, w, chk(i64)),
1656    [0x1F] = X86_OP_ENTRYw(POP, DS, w, chk(i64)),
1657
1658    [0x28] = X86_OP_ENTRY2(SUB, E,b, G,b, lock),
1659    [0x29] = X86_OP_ENTRY2(SUB, E,v, G,v, lock),
1660    [0x2A] = X86_OP_ENTRY2(SUB, G,b, E,b, lock),
1661    [0x2B] = X86_OP_ENTRY2(SUB, G,v, E,v, lock),
1662    [0x2C] = X86_OP_ENTRY2(SUB, 0,b, I,b, lock),   /* AL, Ib */
1663    [0x2D] = X86_OP_ENTRY2(SUB, 0,v, I,z, lock),   /* rAX, Iz */
1664    [0x2E] = {},
1665    [0x2F] = X86_OP_ENTRY0(DAS, chk(i64)),
1666
1667    [0x38] = X86_OP_ENTRYrr(SUB, E,b, G,b),
1668    [0x39] = X86_OP_ENTRYrr(SUB, E,v, G,v),
1669    [0x3A] = X86_OP_ENTRYrr(SUB, G,b, E,b),
1670    [0x3B] = X86_OP_ENTRYrr(SUB, G,v, E,v),
1671    [0x3C] = X86_OP_ENTRYrr(SUB, 0,b, I,b),   /* AL, Ib */
1672    [0x3D] = X86_OP_ENTRYrr(SUB, 0,v, I,z),   /* rAX, Iz */
1673    [0x3E] = {},
1674    [0x3F] = X86_OP_ENTRY0(AAS, chk(i64)),
1675
1676    [0x48] = X86_OP_ENTRY1(DEC, 0,v, chk(i64)),
1677    [0x49] = X86_OP_ENTRY1(DEC, 1,v, chk(i64)),
1678    [0x4A] = X86_OP_ENTRY1(DEC, 2,v, chk(i64)),
1679    [0x4B] = X86_OP_ENTRY1(DEC, 3,v, chk(i64)),
1680    [0x4C] = X86_OP_ENTRY1(DEC, 4,v, chk(i64)),
1681    [0x4D] = X86_OP_ENTRY1(DEC, 5,v, chk(i64)),
1682    [0x4E] = X86_OP_ENTRY1(DEC, 6,v, chk(i64)),
1683    [0x4F] = X86_OP_ENTRY1(DEC, 7,v, chk(i64)),
1684
1685    [0x58] = X86_OP_ENTRYw(POP, LoBits,d64),
1686    [0x59] = X86_OP_ENTRYw(POP, LoBits,d64),
1687    [0x5A] = X86_OP_ENTRYw(POP, LoBits,d64),
1688    [0x5B] = X86_OP_ENTRYw(POP, LoBits,d64),
1689    [0x5C] = X86_OP_ENTRYw(POP, LoBits,d64),
1690    [0x5D] = X86_OP_ENTRYw(POP, LoBits,d64),
1691    [0x5E] = X86_OP_ENTRYw(POP, LoBits,d64),
1692    [0x5F] = X86_OP_ENTRYw(POP, LoBits,d64),
1693
1694    [0x68] = X86_OP_ENTRYr(PUSH, I,z),
1695    [0x69] = X86_OP_ENTRY3(IMUL3, G,v, E,v, I,z, sextT0),
1696    [0x6A] = X86_OP_ENTRYr(PUSH, I,b),
1697    [0x6B] = X86_OP_ENTRY3(IMUL3, G,v, E,v, I,b, sextT0),
1698    [0x6C] = X86_OP_ENTRYrr(INS, Y,b, 2,w), /* DX */
1699    [0x6D] = X86_OP_ENTRYrr(INS, Y,z, 2,w), /* DX */
1700    [0x6E] = X86_OP_ENTRYrr(OUTS, X,b, 2,w), /* DX */
1701    [0x6F] = X86_OP_ENTRYrr(OUTS, X,z, 2,w), /* DX */
1702
1703    [0x78] = X86_OP_ENTRYr(Jcc, J,b),
1704    [0x79] = X86_OP_ENTRYr(Jcc, J,b),
1705    [0x7A] = X86_OP_ENTRYr(Jcc, J,b),
1706    [0x7B] = X86_OP_ENTRYr(Jcc, J,b),
1707    [0x7C] = X86_OP_ENTRYr(Jcc, J,b),
1708    [0x7D] = X86_OP_ENTRYr(Jcc, J,b),
1709    [0x7E] = X86_OP_ENTRYr(Jcc, J,b),
1710    [0x7F] = X86_OP_ENTRYr(Jcc, J,b),
1711
1712    [0x88] = X86_OP_ENTRYwr(MOV, E,b, G,b),
1713    [0x89] = X86_OP_ENTRYwr(MOV, E,v, G,v),
1714    [0x8A] = X86_OP_ENTRYwr(MOV, G,b, E,b),
1715    [0x8B] = X86_OP_ENTRYwr(MOV, G,v, E,v),
1716     /* Missing in Table A-2: memory destination is always 16-bit.  */
1717    [0x8C] = X86_OP_ENTRYwr(MOV, E,v, S,w, op0_Mw),
1718    [0x8D] = X86_OP_ENTRYwr(LEA, G,v, M,v, nolea),
1719    [0x8E] = X86_OP_ENTRYwr(MOV, S,w, E,w),
1720    [0x8F] = X86_OP_GROUPw(group1A, E,d64),
1721
1722    [0x98] = X86_OP_ENTRY1(CBW,    0,v), /* rAX */
1723    [0x99] = X86_OP_ENTRYwr(CWD,   2,v, 0,v), /* rDX, rAX */
1724    [0x9A] = X86_OP_ENTRYrr(CALLF, I_unsigned,p, I_unsigned,w, chk(i64)),
1725    [0x9B] = X86_OP_ENTRY0(WAIT),
1726    [0x9C] = X86_OP_ENTRY0(PUSHF,  chk(vm86_iopl) svm(PUSHF)),
1727    [0x9D] = X86_OP_ENTRY0(POPF,   chk(vm86_iopl) svm(POPF)),
1728    [0x9E] = X86_OP_ENTRY0(SAHF),
1729    [0x9F] = X86_OP_ENTRY0(LAHF),
1730
1731    [0xA8] = X86_OP_ENTRYrr(AND, 0,b, I,b),   /* AL, Ib */
1732    [0xA9] = X86_OP_ENTRYrr(AND, 0,v, I,z),   /* rAX, Iz */
1733    [0xAA] = X86_OP_ENTRYwr(STOS, Y,b, 0,b),
1734    [0xAB] = X86_OP_ENTRYwr(STOS, Y,v, 0,v),
1735    /* Manual writeback because REP LODS (!) has to write EAX/RAX after every LODS.  */
1736    [0xAC] = X86_OP_ENTRYr(LODS, X,b),
1737    [0xAD] = X86_OP_ENTRYr(LODS, X,v),
1738    [0xAE] = X86_OP_ENTRYrr(SCAS, 0,b, Y,b),
1739    [0xAF] = X86_OP_ENTRYrr(SCAS, 0,v, Y,v),
1740
1741    [0xB8] = X86_OP_ENTRYwr(MOV, LoBits,v, I,v),
1742    [0xB9] = X86_OP_ENTRYwr(MOV, LoBits,v, I,v),
1743    [0xBA] = X86_OP_ENTRYwr(MOV, LoBits,v, I,v),
1744    [0xBB] = X86_OP_ENTRYwr(MOV, LoBits,v, I,v),
1745    [0xBC] = X86_OP_ENTRYwr(MOV, LoBits,v, I,v),
1746    [0xBD] = X86_OP_ENTRYwr(MOV, LoBits,v, I,v),
1747    [0xBE] = X86_OP_ENTRYwr(MOV, LoBits,v, I,v),
1748    [0xBF] = X86_OP_ENTRYwr(MOV, LoBits,v, I,v),
1749
1750    [0xC8] = X86_OP_ENTRYrr(ENTER, I,w, I,b),
1751    [0xC9] = X86_OP_ENTRY1(LEAVE, A,d64),
1752    [0xCA] = X86_OP_ENTRYr(RETF,  I,w),
1753    [0xCB] = X86_OP_ENTRY0(RETF),
1754    [0xCC] = X86_OP_ENTRY0(INT3),
1755    [0xCD] = X86_OP_ENTRYr(INT, I,b,  chk(vm86_iopl)),
1756    [0xCE] = X86_OP_ENTRY0(INTO),
1757    [0xCF] = X86_OP_ENTRY0(IRET,      chk(vm86_iopl) svm(IRET)),
1758
1759    [0xE8] = X86_OP_ENTRYr(CALL,   J,z_f64),
1760    [0xE9] = X86_OP_ENTRYr(JMP,    J,z_f64),
1761    [0xEA] = X86_OP_ENTRYrr(JMPF,  I_unsigned,p, I_unsigned,w, chk(i64)),
1762    [0xEB] = X86_OP_ENTRYr(JMP,    J,b),
1763    [0xEC] = X86_OP_ENTRYwr(IN,    0,b, 2,w), /* AL, DX */
1764    [0xED] = X86_OP_ENTRYwr(IN,    0,v, 2,w), /* AX/EAX, DX */
1765    [0xEE] = X86_OP_ENTRYrr(OUT,   0,b, 2,w), /* DX, AL */
1766    [0xEF] = X86_OP_ENTRYrr(OUT,   0,v, 2,w), /* DX, AX/EAX */
1767
1768    [0xF8] = X86_OP_ENTRY0(CLC),
1769    [0xF9] = X86_OP_ENTRY0(STC),
1770    [0xFA] = X86_OP_ENTRY0(CLI,    chk(iopl)),
1771    [0xFB] = X86_OP_ENTRY0(STI,    chk(iopl)),
1772    [0xFC] = X86_OP_ENTRY0(CLD),
1773    [0xFD] = X86_OP_ENTRY0(STD),
1774    [0xFE] = X86_OP_GROUP1(group4_5, E,b),
1775    [0xFF] = X86_OP_GROUP1(group4_5, E,v),
1776};
1777
1778#undef mmx
1779#undef vex1
1780#undef vex2
1781#undef vex3
1782#undef vex4
1783#undef vex4_unal
1784#undef vex5
1785#undef vex6
1786#undef vex7
1787#undef vex8
1788#undef vex11
1789#undef vex12
1790#undef vex13
1791
1792/*
1793 * Decode the fixed part of the opcode and place the last
1794 * in b.
1795 */
1796static void decode_root(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
1797{
1798    *entry = opcodes_root[*b];
1799}
1800
1801
1802static int decode_modrm(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode,
1803                        X86DecodedOp *op, X86OpType type)
1804{
1805    int modrm = get_modrm(s, env);
1806    if ((modrm >> 6) == 3) {
1807        op->n = (modrm & 7);
1808        if (type != X86_TYPE_Q && type != X86_TYPE_N) {
1809            op->n |= REX_B(s);
1810        }
1811    } else {
1812        op->has_ea = true;
1813        op->n = -1;
1814        decode->mem = gen_lea_modrm_0(env, s, get_modrm(s, env));
1815    }
1816    return modrm;
1817}
1818
1819static bool decode_op_size(DisasContext *s, X86OpEntry *e, X86OpSize size, MemOp *ot)
1820{
1821    switch (size) {
1822    case X86_SIZE_b:  /* byte */
1823        *ot = MO_8;
1824        return true;
1825
1826    case X86_SIZE_d:  /* 32-bit */
1827    case X86_SIZE_ss: /* SSE/AVX scalar single precision */
1828        *ot = MO_32;
1829        return true;
1830
1831    case X86_SIZE_p:  /* Far pointer, return offset size */
1832    case X86_SIZE_s:  /* Descriptor, return offset size */
1833    case X86_SIZE_v:  /* 16/32/64-bit, based on operand size */
1834        *ot = s->dflag;
1835        return true;
1836
1837    case X86_SIZE_pi: /* MMX */
1838    case X86_SIZE_q:  /* 64-bit */
1839    case X86_SIZE_sd: /* SSE/AVX scalar double precision */
1840        *ot = MO_64;
1841        return true;
1842
1843    case X86_SIZE_w:  /* 16-bit */
1844        *ot = MO_16;
1845        return true;
1846
1847    case X86_SIZE_y:  /* 32/64-bit, based on operand size */
1848        *ot = s->dflag == MO_16 ? MO_32 : s->dflag;
1849        return true;
1850
1851    case X86_SIZE_y_d64:  /* Full (not 16-bit) register access */
1852        *ot = CODE64(s) ? MO_64 : MO_32;
1853        return true;
1854
1855    case X86_SIZE_z:  /* 16-bit for 16-bit operand size, else 32-bit */
1856        *ot = s->dflag == MO_16 ? MO_16 : MO_32;
1857        return true;
1858
1859    case X86_SIZE_z_f64:  /* 32-bit for 32-bit operand size or 64-bit mode, else 16-bit */
1860        *ot = !CODE64(s) && s->dflag == MO_16 ? MO_16 : MO_32;
1861        return true;
1862
1863    case X86_SIZE_dq: /* SSE/AVX 128-bit */
1864        if (e->special == X86_SPECIAL_MMX &&
1865            !(s->prefix & (PREFIX_DATA | PREFIX_REPZ | PREFIX_REPNZ))) {
1866            *ot = MO_64;
1867            return true;
1868        }
1869        if (s->vex_l && e->s0 != X86_SIZE_qq && e->s1 != X86_SIZE_qq) {
1870            return false;
1871        }
1872        *ot = MO_128;
1873        return true;
1874
1875    case X86_SIZE_qq: /* AVX 256-bit */
1876        if (!s->vex_l) {
1877            return false;
1878        }
1879        *ot = MO_256;
1880        return true;
1881
1882    case X86_SIZE_x:  /* 128/256-bit, based on operand size */
1883        if (e->special == X86_SPECIAL_MMX &&
1884            !(s->prefix & (PREFIX_DATA | PREFIX_REPZ | PREFIX_REPNZ))) {
1885            *ot = MO_64;
1886            return true;
1887        }
1888        /* fall through */
1889    case X86_SIZE_ps: /* SSE/AVX packed single precision */
1890    case X86_SIZE_pd: /* SSE/AVX packed double precision */
1891        *ot = s->vex_l ? MO_256 : MO_128;
1892        return true;
1893
1894    case X86_SIZE_xh: /* SSE/AVX packed half register */
1895        *ot = s->vex_l ? MO_128 : MO_64;
1896        return true;
1897
1898    case X86_SIZE_d64:  /* Default to 64-bit in 64-bit mode */
1899        *ot = CODE64(s) && s->dflag == MO_32 ? MO_64 : s->dflag;
1900        return true;
1901
1902    case X86_SIZE_f64:  /* Ignore size override prefix in 64-bit mode */
1903        *ot = CODE64(s) ? MO_64 : s->dflag;
1904        return true;
1905
1906    default:
1907        *ot = -1;
1908        return true;
1909    }
1910}
1911
1912static bool decode_op(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode,
1913                      X86DecodedOp *op, X86OpType type, int b)
1914{
1915    int modrm;
1916
1917    switch (type) {
1918    case X86_TYPE_None:  /* Implicit or absent */
1919    case X86_TYPE_A:  /* Implicit */
1920    case X86_TYPE_F:  /* EFLAGS/RFLAGS */
1921    case X86_TYPE_X:  /* string source */
1922    case X86_TYPE_Y:  /* string destination */
1923        break;
1924
1925    case X86_TYPE_B:  /* VEX.vvvv selects a GPR */
1926        op->unit = X86_OP_INT;
1927        op->n = s->vex_v;
1928        break;
1929
1930    case X86_TYPE_C:  /* REG in the modrm byte selects a control register */
1931        op->unit = X86_OP_CR;
1932        op->n = ((get_modrm(s, env) >> 3) & 7) | REX_R(s);
1933        if (op->n == 0 && (s->prefix & PREFIX_LOCK) &&
1934            (s->cpuid_ext3_features & CPUID_EXT3_CR8LEG)) {
1935            op->n = 8;
1936            s->prefix &= ~PREFIX_LOCK;
1937        }
1938        if (op->n != 0 && op->n != 2 && op->n != 3 && op->n != 4 && op->n != 8) {
1939            return false;
1940        }
1941        if (decode->e.intercept) {
1942            decode->e.intercept += op->n;
1943        }
1944        break;
1945
1946    case X86_TYPE_D:  /* REG in the modrm byte selects a debug register */
1947        op->unit = X86_OP_DR;
1948        op->n = ((get_modrm(s, env) >> 3) & 7) | REX_R(s);
1949        if (op->n >= 8) {
1950            /*
1951             * illegal opcode.  The DR4 and DR5 case is checked in the generated
1952             * code instead, to save on hflags bits.
1953             */
1954            return false;
1955        }
1956        if (decode->e.intercept) {
1957            decode->e.intercept += op->n;
1958        }
1959        break;
1960
1961    case X86_TYPE_G:  /* REG in the modrm byte selects a GPR */
1962        op->unit = X86_OP_INT;
1963        goto get_reg;
1964
1965    case X86_TYPE_S:  /* reg selects a segment register */
1966        op->unit = X86_OP_SEG;
1967        goto get_reg;
1968
1969    case X86_TYPE_P:
1970        op->unit = X86_OP_MMX;
1971        goto get_reg;
1972
1973    case X86_TYPE_V:  /* reg in the modrm byte selects an XMM/YMM register */
1974        if (decode->e.special == X86_SPECIAL_MMX &&
1975            !(s->prefix & (PREFIX_DATA | PREFIX_REPZ | PREFIX_REPNZ))) {
1976            op->unit = X86_OP_MMX;
1977        } else {
1978            op->unit = X86_OP_SSE;
1979        }
1980    get_reg:
1981        op->n = ((get_modrm(s, env) >> 3) & 7) | REX_R(s);
1982        break;
1983
1984    case X86_TYPE_E:  /* ALU modrm operand */
1985        op->unit = X86_OP_INT;
1986        goto get_modrm;
1987
1988    case X86_TYPE_Q:  /* MMX modrm operand */
1989        op->unit = X86_OP_MMX;
1990        goto get_modrm;
1991
1992    case X86_TYPE_W:  /* XMM/YMM modrm operand */
1993        if (decode->e.special == X86_SPECIAL_MMX &&
1994            !(s->prefix & (PREFIX_DATA | PREFIX_REPZ | PREFIX_REPNZ))) {
1995            op->unit = X86_OP_MMX;
1996        } else {
1997            op->unit = X86_OP_SSE;
1998        }
1999        goto get_modrm;
2000
2001    case X86_TYPE_N:  /* R/M in the modrm byte selects an MMX register */
2002        op->unit = X86_OP_MMX;
2003        goto get_modrm_reg;
2004
2005    case X86_TYPE_U:  /* R/M in the modrm byte selects an XMM/YMM register */
2006        if (decode->e.special == X86_SPECIAL_MMX &&
2007            !(s->prefix & (PREFIX_DATA | PREFIX_REPZ | PREFIX_REPNZ))) {
2008            op->unit = X86_OP_MMX;
2009        } else {
2010            op->unit = X86_OP_SSE;
2011        }
2012        goto get_modrm_reg;
2013
2014    case X86_TYPE_R:  /* R/M in the modrm byte selects a register */
2015        op->unit = X86_OP_INT;
2016    get_modrm_reg:
2017        modrm = get_modrm(s, env);
2018        if ((modrm >> 6) != 3) {
2019            return false;
2020        }
2021        goto get_modrm;
2022
2023    case X86_TYPE_WM:  /* modrm byte selects an XMM/YMM memory operand */
2024        op->unit = X86_OP_SSE;
2025        goto get_modrm_mem;
2026
2027    case X86_TYPE_EM:  /* modrm byte selects an ALU memory operand */
2028        op->unit = X86_OP_INT;
2029        /* fall through */
2030    case X86_TYPE_M:  /* modrm byte selects a memory operand */
2031    get_modrm_mem:
2032        modrm = get_modrm(s, env);
2033        if ((modrm >> 6) == 3) {
2034            return false;
2035        }
2036        /* fall through */
2037    case X86_TYPE_nop:  /* modrm operand decoded but not fetched */
2038    get_modrm:
2039        decode_modrm(s, env, decode, op, type);
2040        break;
2041
2042    case X86_TYPE_O:  /* Absolute address encoded in the instruction */
2043        op->unit = X86_OP_INT;
2044        op->has_ea = true;
2045        op->n = -1;
2046        decode->mem = (AddressParts) {
2047            .def_seg = R_DS,
2048            .base = -1,
2049            .index = -1,
2050            .disp = insn_get_addr(env, s, s->aflag)
2051        };
2052        break;
2053
2054    case X86_TYPE_H:  /* For AVX, VEX.vvvv selects an XMM/YMM register */
2055        if ((s->prefix & PREFIX_VEX)) {
2056            op->unit = X86_OP_SSE;
2057            op->n = s->vex_v;
2058            break;
2059        }
2060        if (op == &decode->op[0]) {
2061            /* shifts place the destination in VEX.vvvv, use modrm */
2062            return decode_op(s, env, decode, op, decode->e.op1, b);
2063        } else {
2064            return decode_op(s, env, decode, op, decode->e.op0, b);
2065        }
2066
2067    case X86_TYPE_I:  /* Immediate */
2068    case X86_TYPE_J:  /* Relative offset for a jump */
2069        op->unit = X86_OP_IMM;
2070        decode->immediate = op->imm = insn_get_signed(env, s, op->ot);
2071        break;
2072
2073    case X86_TYPE_I_unsigned:  /* Immediate */
2074        op->unit = X86_OP_IMM;
2075        decode->immediate = op->imm = insn_get(env, s, op->ot);
2076        break;
2077
2078    case X86_TYPE_L:  /* The upper 4 bits of the immediate select a 128-bit register */
2079        op->n = insn_get(env, s, op->ot) >> 4;
2080        break;
2081
2082    case X86_TYPE_2op:
2083        *op = decode->op[0];
2084        break;
2085
2086    case X86_TYPE_LoBits:
2087        op->n = (b & 7) | REX_B(s);
2088        op->unit = X86_OP_INT;
2089        break;
2090
2091    case X86_TYPE_0 ... X86_TYPE_7:
2092        op->n = type - X86_TYPE_0;
2093        op->unit = X86_OP_INT;
2094        break;
2095
2096    case X86_TYPE_ES ... X86_TYPE_GS:
2097        op->n = type - X86_TYPE_ES;
2098        op->unit = X86_OP_SEG;
2099        break;
2100    }
2101
2102    return true;
2103}
2104
2105static bool validate_sse_prefix(DisasContext *s, X86OpEntry *e)
2106{
2107    uint16_t sse_prefixes;
2108
2109    if (!e->valid_prefix) {
2110        return true;
2111    }
2112    if (s->prefix & (PREFIX_REPZ | PREFIX_REPNZ)) {
2113        /* In SSE instructions, 0xF3 and 0xF2 cancel 0x66.  */
2114        s->prefix &= ~PREFIX_DATA;
2115    }
2116
2117    /* Now, either zero or one bit is set in sse_prefixes.  */
2118    sse_prefixes = s->prefix & (PREFIX_REPZ | PREFIX_REPNZ | PREFIX_DATA);
2119    return e->valid_prefix & (1 << sse_prefixes);
2120}
2121
2122static bool decode_insn(DisasContext *s, CPUX86State *env, X86DecodeFunc decode_func,
2123                        X86DecodedInsn *decode)
2124{
2125    X86OpEntry *e = &decode->e;
2126
2127    decode_func(s, env, e, &decode->b);
2128    while (e->is_decode) {
2129        e->is_decode = false;
2130        e->decode(s, env, e, &decode->b);
2131    }
2132
2133    if (!validate_sse_prefix(s, e)) {
2134        return false;
2135    }
2136
2137    /* First compute size of operands in order to initialize s->rip_offset.  */
2138    if (e->op0 != X86_TYPE_None) {
2139        if (!decode_op_size(s, e, e->s0, &decode->op[0].ot)) {
2140            return false;
2141        }
2142        if (e->op0 == X86_TYPE_I) {
2143            s->rip_offset += 1 << decode->op[0].ot;
2144        }
2145    }
2146    if (e->op1 != X86_TYPE_None) {
2147        if (!decode_op_size(s, e, e->s1, &decode->op[1].ot)) {
2148            return false;
2149        }
2150        if (e->op1 == X86_TYPE_I) {
2151            s->rip_offset += 1 << decode->op[1].ot;
2152        }
2153    }
2154    if (e->op2 != X86_TYPE_None) {
2155        if (!decode_op_size(s, e, e->s2, &decode->op[2].ot)) {
2156            return false;
2157        }
2158        if (e->op2 == X86_TYPE_I) {
2159            s->rip_offset += 1 << decode->op[2].ot;
2160        }
2161    }
2162    if (e->op3 != X86_TYPE_None) {
2163        /*
2164         * A couple instructions actually use the extra immediate byte for an Lx
2165         * register operand; those are handled in the gen_* functions as one off.
2166         */
2167        assert(e->op3 == X86_TYPE_I && e->s3 == X86_SIZE_b);
2168        s->rip_offset += 1;
2169    }
2170
2171    if (e->op0 != X86_TYPE_None &&
2172        !decode_op(s, env, decode, &decode->op[0], e->op0, decode->b)) {
2173        return false;
2174    }
2175
2176    if (e->op1 != X86_TYPE_None &&
2177        !decode_op(s, env, decode, &decode->op[1], e->op1, decode->b)) {
2178        return false;
2179    }
2180
2181    if (e->op2 != X86_TYPE_None &&
2182        !decode_op(s, env, decode, &decode->op[2], e->op2, decode->b)) {
2183        return false;
2184    }
2185
2186    if (e->op3 != X86_TYPE_None) {
2187        decode->immediate = insn_get_signed(env, s, MO_8);
2188    }
2189
2190    return true;
2191}
2192
2193static bool has_cpuid_feature(DisasContext *s, X86CPUIDFeature cpuid)
2194{
2195    switch (cpuid) {
2196    case X86_FEAT_None:
2197        return true;
2198    case X86_FEAT_CMOV:
2199        return (s->cpuid_features & CPUID_CMOV);
2200    case X86_FEAT_CLFLUSH:
2201        return (s->cpuid_features & CPUID_CLFLUSH);
2202    case X86_FEAT_FXSR:
2203        return (s->cpuid_features & CPUID_FXSR);
2204    case X86_FEAT_F16C:
2205        return (s->cpuid_ext_features & CPUID_EXT_F16C);
2206    case X86_FEAT_FMA:
2207        return (s->cpuid_ext_features & CPUID_EXT_FMA);
2208    case X86_FEAT_MOVBE:
2209        return (s->cpuid_ext_features & CPUID_EXT_MOVBE);
2210    case X86_FEAT_PCLMULQDQ:
2211        return (s->cpuid_ext_features & CPUID_EXT_PCLMULQDQ);
2212    case X86_FEAT_POPCNT:
2213        return (s->cpuid_ext_features & CPUID_EXT_POPCNT);
2214    case X86_FEAT_SSE:
2215        return (s->cpuid_features & CPUID_SSE);
2216    case X86_FEAT_SSE2:
2217        return (s->cpuid_features & CPUID_SSE2);
2218    case X86_FEAT_SSE3:
2219        return (s->cpuid_ext_features & CPUID_EXT_SSE3);
2220    case X86_FEAT_SSSE3:
2221        return (s->cpuid_ext_features & CPUID_EXT_SSSE3);
2222    case X86_FEAT_SSE41:
2223        return (s->cpuid_ext_features & CPUID_EXT_SSE41);
2224    case X86_FEAT_SSE42:
2225        return (s->cpuid_ext_features & CPUID_EXT_SSE42);
2226    case X86_FEAT_AES:
2227        if (!(s->cpuid_ext_features & CPUID_EXT_AES)) {
2228            return false;
2229        } else if (!(s->prefix & PREFIX_VEX)) {
2230            return true;
2231        } else if (!(s->cpuid_ext_features & CPUID_EXT_AVX)) {
2232            return false;
2233        } else {
2234            return !s->vex_l || (s->cpuid_7_0_ecx_features & CPUID_7_0_ECX_VAES);
2235        }
2236
2237    case X86_FEAT_AVX:
2238        return (s->cpuid_ext_features & CPUID_EXT_AVX);
2239    case X86_FEAT_XSAVE:
2240        return (s->cpuid_ext_features & CPUID_EXT_XSAVE);
2241
2242    case X86_FEAT_3DNOW:
2243        return (s->cpuid_ext2_features & CPUID_EXT2_3DNOW);
2244    case X86_FEAT_SSE4A:
2245        return (s->cpuid_ext3_features & CPUID_EXT3_SSE4A);
2246
2247    case X86_FEAT_ADX:
2248        return (s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_ADX);
2249    case X86_FEAT_BMI1:
2250        return (s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI1);
2251    case X86_FEAT_BMI2:
2252        return (s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI2);
2253    case X86_FEAT_AVX2:
2254        return (s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_AVX2);
2255    case X86_FEAT_CLFLUSHOPT:
2256        return (s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_CLFLUSHOPT);
2257    case X86_FEAT_CLWB:
2258        return (s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_CLWB);
2259    case X86_FEAT_FSGSBASE:
2260        return (s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_FSGSBASE);
2261    case X86_FEAT_SHA_NI:
2262        return (s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_SHA_NI);
2263
2264    case X86_FEAT_CMPCCXADD:
2265        return (s->cpuid_7_1_eax_features & CPUID_7_1_EAX_CMPCCXADD);
2266
2267    case X86_FEAT_XSAVEOPT:
2268        return (s->cpuid_xsave_features & CPUID_XSAVE_XSAVEOPT);
2269    }
2270    g_assert_not_reached();
2271}
2272
2273static bool validate_vex(DisasContext *s, X86DecodedInsn *decode)
2274{
2275    X86OpEntry *e = &decode->e;
2276
2277    switch (e->vex_special) {
2278    case X86_VEX_REPScalar:
2279        /*
2280         * Instructions which differ between 00/66 and F2/F3 in the
2281         * exception classification and the size of the memory operand.
2282         */
2283        assert(e->vex_class == 1 || e->vex_class == 2 || e->vex_class == 4);
2284        if (s->prefix & (PREFIX_REPZ | PREFIX_REPNZ)) {
2285            e->vex_class = e->vex_class < 4 ? 3 : 5;
2286            if (s->vex_l) {
2287                goto illegal;
2288            }
2289            assert(decode->e.s2 == X86_SIZE_x);
2290            if (decode->op[2].has_ea) {
2291                decode->op[2].ot = s->prefix & PREFIX_REPZ ? MO_32 : MO_64;
2292            }
2293        }
2294        break;
2295
2296    case X86_VEX_SSEUnaligned:
2297        /* handled in sse_needs_alignment.  */
2298        break;
2299
2300    case X86_VEX_AVX2_256:
2301        if ((s->prefix & PREFIX_VEX) && s->vex_l && !has_cpuid_feature(s, X86_FEAT_AVX2)) {
2302            goto illegal;
2303        }
2304    }
2305
2306    switch (e->vex_class) {
2307    case 0:
2308        if (s->prefix & PREFIX_VEX) {
2309            goto illegal;
2310        }
2311        return true;
2312    case 1:
2313    case 2:
2314    case 3:
2315    case 4:
2316    case 5:
2317    case 7:
2318        if (s->prefix & PREFIX_VEX) {
2319            if (!(s->flags & HF_AVX_EN_MASK)) {
2320                goto illegal;
2321            }
2322        } else if (e->special != X86_SPECIAL_MMX ||
2323                   (s->prefix & (PREFIX_REPZ | PREFIX_REPNZ | PREFIX_DATA))) {
2324            if (!(s->flags & HF_OSFXSR_MASK)) {
2325                goto illegal;
2326            }
2327        }
2328        break;
2329    case 12:
2330        /* Must have a VSIB byte and no address prefix.  */
2331        assert(s->has_modrm);
2332        if ((s->modrm & 7) != 4 || s->aflag == MO_16) {
2333            goto illegal;
2334        }
2335
2336        /* Check no overlap between registers.  */
2337        if (!decode->op[0].has_ea &&
2338            (decode->op[0].n == decode->mem.index || decode->op[0].n == decode->op[1].n)) {
2339            goto illegal;
2340        }
2341        assert(!decode->op[1].has_ea);
2342        if (decode->op[1].n == decode->mem.index) {
2343            goto illegal;
2344        }
2345        if (!decode->op[2].has_ea &&
2346            (decode->op[2].n == decode->mem.index || decode->op[2].n == decode->op[1].n)) {
2347            goto illegal;
2348        }
2349        /* fall through */
2350    case 6:
2351    case 11:
2352        if (!(s->prefix & PREFIX_VEX)) {
2353            goto illegal;
2354        }
2355        if (!(s->flags & HF_AVX_EN_MASK)) {
2356            goto illegal;
2357        }
2358        break;
2359    case 8:
2360        /* Non-VEX case handled in decode_0F77.  */
2361        assert(s->prefix & PREFIX_VEX);
2362        if (!(s->flags & HF_AVX_EN_MASK)) {
2363            goto illegal;
2364        }
2365        break;
2366    case 13:
2367        if (!(s->prefix & PREFIX_VEX)) {
2368            goto illegal;
2369        }
2370        if (s->vex_l) {
2371            goto illegal;
2372        }
2373        /* All integer instructions use VEX.vvvv, so exit.  */
2374        return true;
2375    }
2376
2377    if (s->vex_v != 0 &&
2378        e->op0 != X86_TYPE_H && e->op0 != X86_TYPE_B &&
2379        e->op1 != X86_TYPE_H && e->op1 != X86_TYPE_B &&
2380        e->op2 != X86_TYPE_H && e->op2 != X86_TYPE_B) {
2381        goto illegal;
2382    }
2383
2384    if (s->flags & HF_TS_MASK) {
2385        goto nm_exception;
2386    }
2387    if (s->flags & HF_EM_MASK) {
2388        goto illegal;
2389    }
2390
2391    if (e->check) {
2392        if (e->check & X86_CHECK_VEX128) {
2393            if (s->vex_l) {
2394                goto illegal;
2395            }
2396        }
2397        if (e->check & X86_CHECK_W0) {
2398            if (s->vex_w) {
2399                goto illegal;
2400            }
2401        }
2402        if (e->check & X86_CHECK_W1) {
2403            if (!s->vex_w) {
2404                goto illegal;
2405            }
2406        }
2407    }
2408    return true;
2409
2410nm_exception:
2411    gen_NM_exception(s);
2412    return false;
2413illegal:
2414    gen_illegal_opcode(s);
2415    return false;
2416}
2417
2418/*
2419 * Convert one instruction. s->base.is_jmp is set if the translation must
2420 * be stopped.
2421 */
2422static void disas_insn(DisasContext *s, CPUState *cpu)
2423{
2424    CPUX86State *env = cpu_env(cpu);
2425    X86DecodedInsn decode;
2426    X86DecodeFunc decode_func = decode_root;
2427    uint8_t cc_live, b;
2428
2429    s->pc = s->base.pc_next;
2430    s->override = -1;
2431    s->popl_esp_hack = 0;
2432#ifdef TARGET_X86_64
2433    s->rex_r = 0;
2434    s->rex_x = 0;
2435    s->rex_b = 0;
2436#endif
2437    s->rip_offset = 0; /* for relative ip address */
2438    s->vex_l = 0;
2439    s->vex_v = 0;
2440    s->vex_w = false;
2441    s->has_modrm = false;
2442    s->prefix = 0;
2443
2444 next_byte:
2445    b = x86_ldub_code(env, s);
2446
2447    /* Collect prefixes.  */
2448    switch (b) {
2449    case 0xf3:
2450        s->prefix |= PREFIX_REPZ;
2451        s->prefix &= ~PREFIX_REPNZ;
2452        goto next_byte;
2453    case 0xf2:
2454        s->prefix |= PREFIX_REPNZ;
2455        s->prefix &= ~PREFIX_REPZ;
2456        goto next_byte;
2457    case 0xf0:
2458        s->prefix |= PREFIX_LOCK;
2459        goto next_byte;
2460    case 0x2e:
2461        s->override = R_CS;
2462        goto next_byte;
2463    case 0x36:
2464        s->override = R_SS;
2465        goto next_byte;
2466    case 0x3e:
2467        s->override = R_DS;
2468        goto next_byte;
2469    case 0x26:
2470        s->override = R_ES;
2471        goto next_byte;
2472    case 0x64:
2473        s->override = R_FS;
2474        goto next_byte;
2475    case 0x65:
2476        s->override = R_GS;
2477        goto next_byte;
2478    case 0x66:
2479        s->prefix |= PREFIX_DATA;
2480        goto next_byte;
2481    case 0x67:
2482        s->prefix |= PREFIX_ADR;
2483        goto next_byte;
2484#ifdef TARGET_X86_64
2485    case 0x40 ... 0x4f:
2486        if (CODE64(s)) {
2487            /* REX prefix */
2488            s->prefix |= PREFIX_REX;
2489            s->vex_w = (b >> 3) & 1;
2490            s->rex_r = (b & 0x4) << 1;
2491            s->rex_x = (b & 0x2) << 2;
2492            s->rex_b = (b & 0x1) << 3;
2493            goto next_byte;
2494        }
2495        break;
2496#endif
2497    case 0xc5: /* 2-byte VEX */
2498    case 0xc4: /* 3-byte VEX */
2499        /*
2500         * VEX prefixes cannot be used except in 32-bit mode.
2501         * Otherwise the instruction is LES or LDS.
2502         */
2503        if (CODE32(s) && !VM86(s)) {
2504            static const int pp_prefix[4] = {
2505                0, PREFIX_DATA, PREFIX_REPZ, PREFIX_REPNZ
2506            };
2507            int vex3, vex2 = x86_ldub_code(env, s);
2508
2509            if (!CODE64(s) && (vex2 & 0xc0) != 0xc0) {
2510                /*
2511                 * 4.1.4.6: In 32-bit mode, bits [7:6] must be 11b,
2512                 * otherwise the instruction is LES or LDS.
2513                 */
2514                s->pc--; /* rewind the advance_pc() x86_ldub_code() did */
2515                break;
2516            }
2517
2518            /* 4.1.1-4.1.3: No preceding lock, 66, f2, f3, or rex prefixes. */
2519            if (s->prefix & (PREFIX_REPZ | PREFIX_REPNZ
2520                             | PREFIX_LOCK | PREFIX_DATA | PREFIX_REX)) {
2521                goto illegal_op;
2522            }
2523#ifdef TARGET_X86_64
2524            s->rex_r = (~vex2 >> 4) & 8;
2525#endif
2526            if (b == 0xc5) {
2527                /* 2-byte VEX prefix: RVVVVlpp, implied 0f leading opcode byte */
2528                vex3 = vex2;
2529                decode_func = decode_0F;
2530            } else {
2531                /* 3-byte VEX prefix: RXBmmmmm wVVVVlpp */
2532                vex3 = x86_ldub_code(env, s);
2533#ifdef TARGET_X86_64
2534                s->rex_x = (~vex2 >> 3) & 8;
2535                s->rex_b = (~vex2 >> 2) & 8;
2536#endif
2537                s->vex_w = (vex3 >> 7) & 1;
2538                switch (vex2 & 0x1f) {
2539                case 0x01: /* Implied 0f leading opcode bytes.  */
2540                    decode_func = decode_0F;
2541                    break;
2542                case 0x02: /* Implied 0f 38 leading opcode bytes.  */
2543                    decode_func = decode_0F38;
2544                    break;
2545                case 0x03: /* Implied 0f 3a leading opcode bytes.  */
2546                    decode_func = decode_0F3A;
2547                    break;
2548                default:   /* Reserved for future use.  */
2549                    goto unknown_op;
2550                }
2551            }
2552            s->vex_v = (~vex3 >> 3) & 0xf;
2553            s->vex_l = (vex3 >> 2) & 1;
2554            s->prefix |= pp_prefix[vex3 & 3] | PREFIX_VEX;
2555        }
2556        break;
2557    default:
2558        break;
2559    }
2560
2561    /* Post-process prefixes.  */
2562    if (CODE64(s)) {
2563        /*
2564         * In 64-bit mode, the default data size is 32-bit.  Select 64-bit
2565         * data with rex_w, and 16-bit data with 0x66; rex_w takes precedence
2566         * over 0x66 if both are present.
2567         */
2568        s->dflag = (REX_W(s) ? MO_64 : s->prefix & PREFIX_DATA ? MO_16 : MO_32);
2569        /* In 64-bit mode, 0x67 selects 32-bit addressing.  */
2570        s->aflag = (s->prefix & PREFIX_ADR ? MO_32 : MO_64);
2571    } else {
2572        /* In 16/32-bit mode, 0x66 selects the opposite data size.  */
2573        if (CODE32(s) ^ ((s->prefix & PREFIX_DATA) != 0)) {
2574            s->dflag = MO_32;
2575        } else {
2576            s->dflag = MO_16;
2577        }
2578        /* In 16/32-bit mode, 0x67 selects the opposite addressing.  */
2579        if (CODE32(s) ^ ((s->prefix & PREFIX_ADR) != 0)) {
2580            s->aflag = MO_32;
2581        }  else {
2582            s->aflag = MO_16;
2583        }
2584    }
2585
2586    /* Go back to old decoder for unconverted opcodes.  */
2587    if (!(s->prefix & PREFIX_VEX)) {
2588        if ((b & ~7) == 0xd8) {
2589            if (!disas_insn_x87(s, cpu, b)) {
2590                goto unknown_op;
2591            }
2592            return;
2593        }
2594
2595        if (b == 0x0f) {
2596            b = x86_ldub_code(env, s);
2597            switch (b) {
2598            case 0x00 ... 0x01: /* mostly privileged instructions */
2599            case 0x1a ... 0x1b: /* MPX */
2600            case 0xa3:          /* bt */
2601            case 0xab:          /* bts */
2602            case 0xb3:          /* btr */
2603            case 0xba ... 0xbb: /* grp8, btc */
2604            case 0xc7:          /* grp9 */
2605                disas_insn_old(s, cpu, b + 0x100);
2606                return;
2607            default:
2608                decode_func = do_decode_0F;
2609                break;
2610            }
2611        }
2612    }
2613
2614    memset(&decode, 0, sizeof(decode));
2615    decode.cc_op = -1;
2616    decode.b = b;
2617    if (!decode_insn(s, env, decode_func, &decode)) {
2618        goto illegal_op;
2619    }
2620    if (!decode.e.gen) {
2621        goto unknown_op;
2622    }
2623
2624    if (!has_cpuid_feature(s, decode.e.cpuid)) {
2625        goto illegal_op;
2626    }
2627
2628    /* Checks that result in #UD come first.  */
2629    if (decode.e.check) {
2630        if (CODE64(s)) {
2631            if (decode.e.check & X86_CHECK_i64) {
2632                goto illegal_op;
2633            }
2634            if ((decode.e.check & X86_CHECK_i64_amd) && env->cpuid_vendor1 != CPUID_VENDOR_INTEL_1) {
2635                goto illegal_op;
2636            }
2637        } else {
2638            if (decode.e.check & X86_CHECK_o64) {
2639                goto illegal_op;
2640            }
2641            if ((decode.e.check & X86_CHECK_o64_intel) && env->cpuid_vendor1 == CPUID_VENDOR_INTEL_1) {
2642                goto illegal_op;
2643            }
2644        }
2645        if (decode.e.check & X86_CHECK_prot_or_vm86) {
2646            if (!PE(s)) {
2647                goto illegal_op;
2648            }
2649        }
2650        if (decode.e.check & X86_CHECK_no_vm86) {
2651            if (VM86(s)) {
2652                goto illegal_op;
2653            }
2654        }
2655    }
2656
2657    switch (decode.e.special) {
2658    case X86_SPECIAL_None:
2659        break;
2660
2661    case X86_SPECIAL_Locked:
2662        if (decode.op[0].has_ea) {
2663            s->prefix |= PREFIX_LOCK;
2664        }
2665        decode.e.special = X86_SPECIAL_HasLock;
2666        /* fallthrough */
2667    case X86_SPECIAL_HasLock:
2668        break;
2669
2670    case X86_SPECIAL_Op0_Rd:
2671        assert(decode.op[0].unit == X86_OP_INT);
2672        if (!decode.op[0].has_ea) {
2673            decode.op[0].ot = MO_32;
2674        }
2675        break;
2676
2677    case X86_SPECIAL_Op2_Ry:
2678        assert(decode.op[2].unit == X86_OP_INT);
2679        if (!decode.op[2].has_ea) {
2680            decode.op[2].ot = s->dflag == MO_16 ? MO_32 : s->dflag;
2681        }
2682        break;
2683
2684    case X86_SPECIAL_AVXExtMov:
2685        if (!decode.op[2].has_ea) {
2686            decode.op[2].ot = s->vex_l ? MO_256 : MO_128;
2687        } else if (s->vex_l) {
2688            decode.op[2].ot++;
2689        }
2690        break;
2691
2692    case X86_SPECIAL_SExtT0:
2693    case X86_SPECIAL_ZExtT0:
2694        /* Handled in gen_load.  */
2695        assert(decode.op[1].unit == X86_OP_INT);
2696        break;
2697
2698    case X86_SPECIAL_Op0_Mw:
2699        assert(decode.op[0].unit == X86_OP_INT);
2700        if (decode.op[0].has_ea) {
2701            decode.op[0].ot = MO_16;
2702        }
2703        break;
2704
2705    default:
2706        break;
2707    }
2708
2709    if (s->prefix & PREFIX_LOCK) {
2710        if (decode.e.special != X86_SPECIAL_HasLock || !decode.op[0].has_ea) {
2711            goto illegal_op;
2712        }
2713    }
2714
2715    if (!validate_vex(s, &decode)) {
2716        return;
2717    }
2718
2719    /*
2720     * Checks that result in #GP or VMEXIT come second.  Intercepts are
2721     * generally checked after non-memory exceptions (i.e. after all
2722     * exceptions if there is no memory operand).  Exceptions are
2723     * vm86 checks (INTn, IRET, PUSHF/POPF), RSM and XSETBV (!).
2724     *
2725     * XSETBV will check for CPL0 in the gen_* function instead of using chk().
2726     */
2727    if (decode.e.check & X86_CHECK_cpl0) {
2728        if (CPL(s) != 0) {
2729            goto gp_fault;
2730        }
2731    }
2732    if (decode.e.has_intercept && unlikely(GUEST(s))) {
2733        gen_helper_svm_check_intercept(tcg_env,
2734                                       tcg_constant_i32(decode.e.intercept));
2735    }
2736    if (decode.e.check) {
2737        if ((decode.e.check & X86_CHECK_smm) && !(s->flags & HF_SMM_MASK)) {
2738            goto illegal_op;
2739        }
2740        if ((decode.e.check & X86_CHECK_vm86_iopl) && VM86(s)) {
2741            if (IOPL(s) < 3) {
2742                goto gp_fault;
2743            }
2744        } else if (decode.e.check & X86_CHECK_cpl_iopl) {
2745            if (IOPL(s) < CPL(s)) {
2746                goto gp_fault;
2747            }
2748        }
2749    }
2750
2751    if (decode.e.special == X86_SPECIAL_MMX &&
2752        !(s->prefix & (PREFIX_REPZ | PREFIX_REPNZ | PREFIX_DATA))) {
2753        gen_helper_enter_mmx(tcg_env);
2754    }
2755
2756    if (decode.e.special != X86_SPECIAL_NoLoadEA &&
2757        (decode.op[0].has_ea || decode.op[1].has_ea || decode.op[2].has_ea)) {
2758        gen_load_ea(s, &decode.mem, decode.e.vex_class == 12);
2759    }
2760    if (s->prefix & PREFIX_LOCK) {
2761        gen_load(s, &decode, 2, s->T1);
2762        decode.e.gen(s, &decode);
2763    } else {
2764        if (decode.op[0].unit == X86_OP_MMX) {
2765            compute_mmx_offset(&decode.op[0]);
2766        } else if (decode.op[0].unit == X86_OP_SSE) {
2767            compute_xmm_offset(&decode.op[0]);
2768        }
2769        gen_load(s, &decode, 1, s->T0);
2770        gen_load(s, &decode, 2, s->T1);
2771        decode.e.gen(s, &decode);
2772        gen_writeback(s, &decode, 0, s->T0);
2773    }
2774
2775    /*
2776     * Write back flags after last memory access.  Some older ALU instructions, as
2777     * well as SSE instructions, write flags in the gen_* function, but that can
2778     * cause incorrect tracking of CC_OP for instructions that write to both memory
2779     * and flags.
2780     */
2781    if (decode.cc_op != -1) {
2782        if (decode.cc_dst) {
2783            tcg_gen_mov_tl(cpu_cc_dst, decode.cc_dst);
2784        }
2785        if (decode.cc_src) {
2786            tcg_gen_mov_tl(cpu_cc_src, decode.cc_src);
2787        }
2788        if (decode.cc_src2) {
2789            tcg_gen_mov_tl(cpu_cc_src2, decode.cc_src2);
2790        }
2791        if (decode.cc_op == CC_OP_DYNAMIC) {
2792            tcg_gen_mov_i32(cpu_cc_op, decode.cc_op_dynamic);
2793        }
2794        set_cc_op(s, decode.cc_op);
2795        cc_live = cc_op_live[decode.cc_op];
2796    } else {
2797        cc_live = 0;
2798    }
2799    if (decode.cc_op != CC_OP_DYNAMIC) {
2800        assert(!decode.cc_op_dynamic);
2801        assert(!!decode.cc_dst == !!(cc_live & USES_CC_DST));
2802        assert(!!decode.cc_src == !!(cc_live & USES_CC_SRC));
2803        assert(!!decode.cc_src2 == !!(cc_live & USES_CC_SRC2));
2804    }
2805
2806    return;
2807 gp_fault:
2808    gen_exception_gpf(s);
2809    return;
2810 illegal_op:
2811    gen_illegal_opcode(s);
2812    return;
2813 unknown_op:
2814    gen_unknown_opcode(env, s);
2815}
2816