xref: /openbmc/qemu/target/i386/tcg/decode-new.c.inc (revision 90f5f434bfc834c03a417f43179a2f7674770c13)
1/*
2 * New-style decoder for i386 instructions
3 *
4 *  Copyright (c) 2022 Red Hat, Inc.
5 *
6 * Author: Paolo Bonzini <pbonzini@redhat.com>
7 *
8 * This library is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either
11 * version 2.1 of the License, or (at your option) any later version.
12 *
13 * This library is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
16 * Lesser General Public License for more details.
17 *
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
20 */
21
22/*
23 * The decoder is mostly based on tables copied from the Intel SDM.  As
24 * a result, most operand load and writeback is done entirely in common
25 * table-driven code using the same operand type (X86_TYPE_*) and
26 * size (X86_SIZE_*) codes used in the manual.  There are a few differences
27 * though.
28 *
29 * Operand sizes
30 * -------------
31 *
32 * The manual lists d64 ("cannot encode 32-bit size in 64-bit mode") and f64
33 * ("cannot encode 16-bit or 32-bit size in 64-bit mode") as modifiers of the
34 * "v" or "z" sizes.  The decoder simply makes them separate operand sizes.
35 *
36 * The manual lists immediate far destinations as Ap (technically an implicit
37 * argument).  The decoder splits them into two immediates, using "Ip" for
38 * the offset part (that comes first in the instruction stream) and "Iw" for
39 * the segment/selector part.  The size of the offset is given by s->dflag
40 * and the instructions are illegal in 64-bit mode, so the choice of "Ip"
41 * is somewhat arbitrary; "Iv" or "Iz" would work just as well.
42 *
43 * Operand types
44 * -------------
45 *
46 * For memory-only operands, if the emitter functions wants to rely on
47 * generic load and writeback, the decoder needs to know the type of the
48 * operand.  Therefore, M is often replaced by the more specific EM and WM
49 * (respectively selecting an ALU operand, like the operand type E, or a
50 * vector operand like the operand type W).
51 *
52 * Immediates are almost always signed or masked away in helpers.  Two
53 * common exceptions are IN/OUT and absolute jumps.  For these, there is
54 * an additional custom operand type "I_unsigned".  Alternatively, the
55 * mask could be applied (and the original sign-extended value would be
56 * optimized away by TCG) in the emitter function.
57 *
58 * Finally, a "nop" operand type is used for multi-byte NOPs.  It accepts
59 * any value of mod including 11b (unlike M) but it does not try to
60 * interpret the operand (like M).
61 *
62 * Vector operands
63 * ---------------
64 *
65 * The main difference is that the V, U and W types are extended to
66 * cover MMX as well; if an instruction is like
67 *
68 *      por   Pq, Qq
69 *  66  por   Vx, Hx, Wx
70 *
71 * only the second row is included and the instruction is marked as a
72 * valid MMX instruction.  The MMX flag directs the decoder to rewrite
73 * the V/U/H/W types to P/N/P/Q if there is no prefix, as well as changing
74 * "x" to "q" if there is no prefix.
75 *
76 * In addition, the ss/ps/sd/pd types are sometimes mushed together as "x"
77 * if the difference is expressed via prefixes.  Individual instructions
78 * are separated by prefix in the generator functions.
79 *
80 * There is a custom size "xh" used to address half of a SSE/AVX operand.
81 * This points to a 64-bit operand for SSE operations, 128-bit operand
82 * for 256-bit AVX operands, etc.  It is used for conversion operations
83 * such as VCVTPH2PS or VCVTSS2SD.
84 *
85 * There are a couple cases in which instructions (e.g. MOVD) write the
86 * whole XMM or MM register but are established incorrectly in the manual
87 * as "d" or "q".  These have to be fixed for the decoder to work correctly.
88 *
89 * VEX exception classes
90 * ---------------------
91 *
92 * Speaking about imprecisions in the manual, the decoder treats all
93 * exception-class 4 instructions as having an optional VEX prefix, and
94 * all exception-class 6 instructions as having a mandatory VEX prefix.
95 * This is true except for a dozen instructions; these are in exception
96 * class 4 but do not ignore the VEX.W bit (which does not even exist
97 * without a VEX prefix).  These instructions are mostly listed in Intel's
98 * table 2-16, but with a few exceptions.
99 *
100 * The AMD manual has more precise subclasses for exceptions, and unlike Intel
101 * they list the VEX.W requirements in the exception classes as well (except
102 * when they don't).  AMD describes class 6 as "AVX Mixed Memory Argument"
103 * without defining what a mixed memory argument is, but still use 4 as the
104 * primary exception class... except when they don't.
105 *
106 * The summary is:
107 *                       Intel     AMD         VEX.W           note
108 * -------------------------------------------------------------------
109 * vpblendd              4         4J          0
110 * vpblendvb             4         4E-X        0               (*)
111 * vpbroadcastq          6         6D          0               (+)
112 * vpermd/vpermps        4         4H          0               (§)
113 * vpermq/vpermpd        4         4H-1        1               (§)
114 * vpermilpd/vpermilps   4         6E          0               (^)
115 * vpmaskmovd            6         4K          significant     (^)
116 * vpsllv                4         4K          significant
117 * vpsrav                4         4J          0
118 * vpsrlv                4         4K          significant
119 * vtestps/vtestpd       4         4G          0
120 *
121 *    (*)  AMD lists VPBLENDVB as related to SSE4.1 PBLENDVB, which may
122 *         explain why it is considered exception class 4.  However,
123 *         Intel says that VEX-only instructions should be in class 6...
124 *
125 *    (+)  Not found in Intel's table 2-16
126 *
127 *    (§)  4H and 4H-1 do not mention VEX.W requirements, which are
128 *         however present in the description of the instruction
129 *
130 *    (^)  these are the two cases in which Intel and AMD disagree on the
131 *         primary exception class
132 *
133 * Instructions still in translate.c
134 * ---------------------------------
135 * Generation of TCG opcodes for almost all instructions is in emit.c.inc;
136 * this file interprets the prefixes and opcode bytes down to individual
137 * instruction mnemonics.  There is only a handful of opcodes still using
138 * a switch statement to decode modrm bits 3-5 and prefixes after decoding
139 * is complete; these are relics of the older x86 decoder and their code
140 * generation is performed in translate.c.
141 *
142 * These unconverted opcodes also perform their own effective address
143 * generation using the gen_lea_modrm() function.
144 *
145 * There is nothing particularly complicated about them; simply, they don't
146 * need any nasty hacks in the decoder, and they shouldn't get in the way
147 * of the implementation of new x86 instructions, so they are left alone
148 * for the time being.
149 *
150 * x87:
151 * 0xD8 - 0xDF
152 *
153 * privileged/system:
154 * 0x0F 0x00               group 6 (SLDT, STR, LLDT, LTR, VERR, VERW)
155 * 0x0F 0x01               group 7 (SGDT, SIDT, LGDT, LIDT, SMSW, LMSW, INVLPG,
156 *                                  MONITOR, MWAIT, CLAC, STAC, XGETBV, XSETBV,
157 *                                  SWAPGS, RDTSCP)
158 * 0x0F 0xC7 (reg operand) group 9 (RDRAND, RDSEED, RDPID)
159 *
160 * MPX:
161 * 0x0F 0x1A               BNDLDX, BNDMOV, BNDCL, BNDCU
162 * 0x0F 0x1B               BNDSTX, BNDMOV, BNDMK, BNDCN
163 */
164
165#define X86_OP_NONE { 0 },
166
167#define X86_OP_GROUP3(op, op0_, s0_, op1_, s1_, op2_, s2_, ...) { \
168    .decode = glue(decode_, op),                                  \
169    .op0 = glue(X86_TYPE_, op0_),                                 \
170    .s0 = glue(X86_SIZE_, s0_),                                   \
171    .op1 = glue(X86_TYPE_, op1_),                                 \
172    .s1 = glue(X86_SIZE_, s1_),                                   \
173    .op2 = glue(X86_TYPE_, op2_),                                 \
174    .s2 = glue(X86_SIZE_, s2_),                                   \
175    .is_decode = true,                                            \
176    ## __VA_ARGS__                                                \
177}
178
179#define X86_OP_GROUP1(op, op0, s0, ...)                           \
180    X86_OP_GROUP3(op, op0, s0, 2op, s0, None, None, ## __VA_ARGS__)
181#define X86_OP_GROUP2(op, op0, s0, op1, s1, ...)                  \
182    X86_OP_GROUP3(op, op0, s0, 2op, s0, op1, s1, ## __VA_ARGS__)
183#define X86_OP_GROUPw(op, op0, s0, ...)                           \
184    X86_OP_GROUP3(op, op0, s0, None, None, None, None, ## __VA_ARGS__)
185#define X86_OP_GROUPwr(op, op0, s0, op1, s1, ...)                 \
186    X86_OP_GROUP3(op, op0, s0, op1, s1, None, None, ## __VA_ARGS__)
187#define X86_OP_GROUP0(op, ...)                                    \
188    X86_OP_GROUP3(op, None, None, None, None, None, None, ## __VA_ARGS__)
189
190#define X86_OP_ENTRY3(op, op0_, s0_, op1_, s1_, op2_, s2_, ...) { \
191    .gen = glue(gen_, op),                                        \
192    .op0 = glue(X86_TYPE_, op0_),                                 \
193    .s0 = glue(X86_SIZE_, s0_),                                   \
194    .op1 = glue(X86_TYPE_, op1_),                                 \
195    .s1 = glue(X86_SIZE_, s1_),                                   \
196    .op2 = glue(X86_TYPE_, op2_),                                 \
197    .s2 = glue(X86_SIZE_, s2_),                                   \
198    ## __VA_ARGS__                                                \
199}
200
201#define X86_OP_ENTRY4(op, op0_, s0_, op1_, s1_, op2_, s2_, ...)   \
202    X86_OP_ENTRY3(op, op0_, s0_, op1_, s1_, op2_, s2_,            \
203        .op3 = X86_TYPE_I, .s3 = X86_SIZE_b,                      \
204        ## __VA_ARGS__)
205
206/*
207 * Short forms that are mostly useful for ALU opcodes and other
208 * one-byte opcodes.  For vector instructions it is usually
209 * clearer to write all three operands explicitly, because the
210 * corresponding gen_* function will use OP_PTRn rather than s->T0
211 * and s->T1.
212 */
213#define X86_OP_ENTRYrr(op, op0, s0, op1, s1, ...)                 \
214    X86_OP_ENTRY3(op, None, None, op0, s0, op1, s1, ## __VA_ARGS__)
215#define X86_OP_ENTRYwr(op, op0, s0, op1, s1, ...)                 \
216    X86_OP_ENTRY3(op, op0, s0, op1, s1, None, None, ## __VA_ARGS__)
217#define X86_OP_ENTRY2(op, op0, s0, op1, s1, ...)                  \
218    X86_OP_ENTRY3(op, op0, s0, 2op, s0, op1, s1, ## __VA_ARGS__)
219#define X86_OP_ENTRYw(op, op0, s0, ...)                           \
220    X86_OP_ENTRY3(op, op0, s0, None, None, None, None, ## __VA_ARGS__)
221#define X86_OP_ENTRYr(op, op0, s0, ...)                           \
222    X86_OP_ENTRY3(op, None, None, op0, s0, None, None, ## __VA_ARGS__)
223#define X86_OP_ENTRY1(op, op0, s0, ...)                           \
224    X86_OP_ENTRY3(op, op0, s0, 2op, s0, None, None, ## __VA_ARGS__)
225#define X86_OP_ENTRY0(op, ...)                                    \
226    X86_OP_ENTRY3(op, None, None, None, None, None, None, ## __VA_ARGS__)
227
228#define cpuid(feat) .cpuid = X86_FEAT_##feat,
229#define nolea .special = X86_SPECIAL_NoLoadEA,
230#define xchg .special = X86_SPECIAL_Locked,
231#define lock .special = X86_SPECIAL_HasLock,
232#define mmx .special = X86_SPECIAL_MMX,
233#define op0_Rd .special = X86_SPECIAL_Op0_Rd,
234#define op2_Ry .special = X86_SPECIAL_Op2_Ry,
235#define avx_movx .special = X86_SPECIAL_AVXExtMov,
236#define sextT0 .special = X86_SPECIAL_SExtT0,
237#define zextT0 .special = X86_SPECIAL_ZExtT0,
238#define op0_Mw .special = X86_SPECIAL_Op0_Mw,
239#define btEvGv .special = X86_SPECIAL_BitTest,
240
241#define vex1 .vex_class = 1,
242#define vex1_rep3 .vex_class = 1, .vex_special = X86_VEX_REPScalar,
243#define vex2 .vex_class = 2,
244#define vex2_rep3 .vex_class = 2, .vex_special = X86_VEX_REPScalar,
245#define vex3 .vex_class = 3,
246#define vex4 .vex_class = 4,
247#define vex4_unal .vex_class = 4, .vex_special = X86_VEX_SSEUnaligned,
248#define vex4_rep5 .vex_class = 4, .vex_special = X86_VEX_REPScalar,
249#define vex5 .vex_class = 5,
250#define vex6 .vex_class = 6,
251#define vex7 .vex_class = 7,
252#define vex8 .vex_class = 8,
253#define vex11 .vex_class = 11,
254#define vex12 .vex_class = 12,
255#define vex13 .vex_class = 13,
256
257#define chk(a) .check = X86_CHECK_##a,
258#define chk2(a, b) .check = X86_CHECK_##a | X86_CHECK_##b,
259#define chk3(a, b, c) .check = X86_CHECK_##a | X86_CHECK_##b | X86_CHECK_##c,
260#define svm(a) .intercept = SVM_EXIT_##a, .has_intercept = true,
261
262#define avx2_256 .vex_special = X86_VEX_AVX2_256,
263
264#define P_00          1
265#define P_66          (1 << PREFIX_DATA)
266#define P_F3          (1 << PREFIX_REPZ)
267#define P_F2          (1 << PREFIX_REPNZ)
268
269#define p_00          .valid_prefix = P_00,
270#define p_66          .valid_prefix = P_66,
271#define p_f3          .valid_prefix = P_F3,
272#define p_f2          .valid_prefix = P_F2,
273#define p_00_66       .valid_prefix = P_00 | P_66,
274#define p_00_f3       .valid_prefix = P_00 | P_F3,
275#define p_66_f2       .valid_prefix = P_66 | P_F2,
276#define p_00_66_f3    .valid_prefix = P_00 | P_66 | P_F3,
277#define p_66_f3_f2    .valid_prefix = P_66 | P_F3 | P_F2,
278#define p_00_66_f3_f2 .valid_prefix = P_00 | P_66 | P_F3 | P_F2,
279
280#define UNKNOWN_OPCODE ((X86OpEntry) {})
281
282static uint8_t get_modrm(DisasContext *s, CPUX86State *env)
283{
284    if (!s->has_modrm) {
285        s->modrm = x86_ldub_code(env, s);
286        s->has_modrm = true;
287    }
288    return s->modrm;
289}
290
291static inline const X86OpEntry *decode_by_prefix(DisasContext *s, const X86OpEntry entries[4])
292{
293    if (s->prefix & PREFIX_REPNZ) {
294        return &entries[3];
295    } else if (s->prefix & PREFIX_REPZ) {
296        return &entries[2];
297    } else if (s->prefix & PREFIX_DATA) {
298        return &entries[1];
299    } else {
300        return &entries[0];
301    }
302}
303
304static void decode_group8(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
305{
306    static const X86GenFunc group8_gen[8] = {
307        NULL, NULL, NULL, NULL,
308        gen_BT, gen_BTS, gen_BTR, gen_BTC,
309    };
310    int op = (get_modrm(s, env) >> 3) & 7;
311    entry->gen = group8_gen[op];
312    if (op == 4) {
313        /* prevent writeback and LOCK for BT */
314        entry->op1 = entry->op0;
315        entry->op0 = X86_TYPE_None;
316        entry->s0 = X86_SIZE_None;
317    } else {
318        entry->special = X86_SPECIAL_HasLock;
319    }
320}
321
322static void decode_group9(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
323{
324    static const X86OpEntry group9_reg =
325        X86_OP_ENTRY0(multi0F);  /* unconverted */
326    static const X86OpEntry cmpxchg8b =
327        X86_OP_ENTRY1(CMPXCHG8B,  M,q,  lock p_00 cpuid(CX8));
328    static const X86OpEntry cmpxchg16b =
329        X86_OP_ENTRY1(CMPXCHG16B, M,dq, lock p_00 cpuid(CX16));
330
331    int modrm = get_modrm(s, env);
332    int op = (modrm >> 3) & 7;
333
334    if ((modrm >> 6) == 3) {
335        *entry = group9_reg;
336    } else if (op == 1) {
337        *entry = REX_W(s) ? cmpxchg16b : cmpxchg8b;
338    } else {
339        *entry = UNKNOWN_OPCODE;
340    }
341}
342
343static void decode_group15(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
344{
345    static const X86OpEntry group15_reg[8] = {
346        [0] = X86_OP_ENTRYw(RDxxBASE,   R,y, cpuid(FSGSBASE) chk(o64) p_f3),
347        [1] = X86_OP_ENTRYw(RDxxBASE,   R,y, cpuid(FSGSBASE) chk(o64) p_f3),
348        [2] = X86_OP_ENTRYr(WRxxBASE,   R,y, cpuid(FSGSBASE) chk(o64) p_f3 zextT0),
349        [3] = X86_OP_ENTRYr(WRxxBASE,   R,y, cpuid(FSGSBASE) chk(o64) p_f3 zextT0),
350        [5] = X86_OP_ENTRY0(LFENCE,          cpuid(SSE) p_00),
351        [6] = X86_OP_ENTRY0(MFENCE,          cpuid(SSE2) p_00),
352        [7] = X86_OP_ENTRY0(SFENCE,          cpuid(SSE) p_00),
353    };
354
355    static const X86OpEntry group15_mem[8] = {
356        [0] = X86_OP_ENTRYw(FXSAVE,     M,y, cpuid(FXSR) p_00),
357        [1] = X86_OP_ENTRYr(FXRSTOR,    M,y, cpuid(FXSR) p_00),
358        [2] = X86_OP_ENTRYr(LDMXCSR,    E,d, vex5 chk(VEX128) p_00),
359        [3] = X86_OP_ENTRYw(STMXCSR,    E,d, vex5 chk(VEX128) p_00),
360        [4] = X86_OP_ENTRYw(XSAVE,      M,y, cpuid(XSAVE) p_00),
361        [5] = X86_OP_ENTRYr(XRSTOR,     M,y, cpuid(XSAVE) p_00),
362        [6] = X86_OP_ENTRYw(XSAVEOPT,   M,b, cpuid(XSAVEOPT) p_00),
363        [7] = X86_OP_ENTRYw(NOP,        M,b, cpuid(CLFLUSH) p_00),
364    };
365
366    static const X86OpEntry group15_mem_66[8] = {
367        [6] = X86_OP_ENTRYw(NOP,        M,b, cpuid(CLWB)),
368        [7] = X86_OP_ENTRYw(NOP,        M,b, cpuid(CLFLUSHOPT)),
369    };
370
371    uint8_t modrm = get_modrm(s, env);
372    int op = (modrm >> 3) & 7;
373
374    if ((modrm >> 6) == 3) {
375        *entry = group15_reg[op];
376    } else if (s->prefix & PREFIX_DATA) {
377        *entry = group15_mem_66[op];
378    } else {
379        *entry = group15_mem[op];
380    }
381}
382
383static void decode_group17(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
384{
385    static const X86GenFunc group17_gen[8] = {
386        NULL, gen_BLSR, gen_BLSMSK, gen_BLSI,
387    };
388    int op = (get_modrm(s, env) >> 3) & 7;
389    entry->gen = group17_gen[op];
390}
391
392static void decode_group12(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
393{
394    static const X86OpEntry opcodes_group12[8] = {
395        {},
396        {},
397        X86_OP_ENTRY3(PSRLW_i,  H,x, U,x, I,b, vex7 mmx avx2_256 p_00_66),
398        {},
399        X86_OP_ENTRY3(PSRAW_i,  H,x, U,x, I,b, vex7 mmx avx2_256 p_00_66),
400        {},
401        X86_OP_ENTRY3(PSLLW_i,  H,x, U,x, I,b, vex7 mmx avx2_256 p_00_66),
402        {},
403    };
404
405    int op = (get_modrm(s, env) >> 3) & 7;
406    *entry = opcodes_group12[op];
407}
408
409static void decode_group13(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
410{
411    static const X86OpEntry opcodes_group13[8] = {
412        {},
413        {},
414        X86_OP_ENTRY3(PSRLD_i,  H,x, U,x, I,b, vex7 mmx avx2_256 p_00_66),
415        {},
416        X86_OP_ENTRY3(PSRAD_i,  H,x, U,x, I,b, vex7 mmx avx2_256 p_00_66),
417        {},
418        X86_OP_ENTRY3(PSLLD_i,  H,x, U,x, I,b, vex7 mmx avx2_256 p_00_66),
419        {},
420    };
421
422    int op = (get_modrm(s, env) >> 3) & 7;
423    *entry = opcodes_group13[op];
424}
425
426static void decode_group14(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
427{
428    static const X86OpEntry opcodes_group14[8] = {
429        /* grp14 */
430        {},
431        {},
432        X86_OP_ENTRY3(PSRLQ_i,  H,x, U,x, I,b, vex7 mmx avx2_256 p_00_66),
433        X86_OP_ENTRY3(PSRLDQ_i, H,x, U,x, I,b, vex7 avx2_256 p_66),
434        {},
435        {},
436        X86_OP_ENTRY3(PSLLQ_i,  H,x, U,x, I,b, vex7 mmx avx2_256 p_00_66),
437        X86_OP_ENTRY3(PSLLDQ_i, H,x, U,x, I,b, vex7 avx2_256 p_66),
438    };
439
440    int op = (get_modrm(s, env) >> 3) & 7;
441    *entry = opcodes_group14[op];
442}
443
444static void decode_0F6F(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
445{
446    static const X86OpEntry opcodes_0F6F[4] = {
447        X86_OP_ENTRY3(MOVDQ,       P,q, None,None, Q,q, vex5 mmx),  /* movq */
448        X86_OP_ENTRY3(MOVDQ,       V,x, None,None, W,x, vex1),      /* movdqa */
449        X86_OP_ENTRY3(MOVDQ,       V,x, None,None, W,x, vex4_unal), /* movdqu */
450        {},
451    };
452    *entry = *decode_by_prefix(s, opcodes_0F6F);
453}
454
455static void decode_0F70(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
456{
457    static const X86OpEntry pshufw[4] = {
458        X86_OP_ENTRY3(PSHUFW,  P,q, Q,q, I,b, vex4 mmx),
459        X86_OP_ENTRY3(PSHUFD,  V,x, W,x, I,b, vex4 avx2_256),
460        X86_OP_ENTRY3(PSHUFHW, V,x, W,x, I,b, vex4 avx2_256),
461        X86_OP_ENTRY3(PSHUFLW, V,x, W,x, I,b, vex4 avx2_256),
462    };
463
464    *entry = *decode_by_prefix(s, pshufw);
465}
466
467static void decode_0F77(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
468{
469    if (!(s->prefix & PREFIX_VEX)) {
470        entry->gen = gen_EMMS;
471    } else if (!s->vex_l) {
472        entry->gen = gen_VZEROUPPER;
473        entry->vex_class = 8;
474    } else {
475        entry->gen = gen_VZEROALL;
476        entry->vex_class = 8;
477    }
478}
479
480static void decode_0F78(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
481{
482    static const X86OpEntry opcodes_0F78[4] = {
483        {},
484        X86_OP_ENTRY3(EXTRQ_i,       V,x, None,None, I,w,  cpuid(SSE4A)), /* AMD extension */
485        {},
486        X86_OP_ENTRY3(INSERTQ_i,     V,x, U,x, I,w,        cpuid(SSE4A)), /* AMD extension */
487    };
488    *entry = *decode_by_prefix(s, opcodes_0F78);
489}
490
491static void decode_0F79(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
492{
493    if (s->prefix & PREFIX_REPNZ) {
494        entry->gen = gen_INSERTQ_r; /* AMD extension */
495    } else if (s->prefix & PREFIX_DATA) {
496        entry->gen = gen_EXTRQ_r; /* AMD extension */
497    } else {
498        entry->gen = NULL;
499    };
500}
501
502static void decode_0F7E(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
503{
504    static const X86OpEntry opcodes_0F7E[4] = {
505        X86_OP_ENTRY3(MOVD_from,  E,y, None,None, P,y, vex5 mmx),
506        X86_OP_ENTRY3(MOVD_from,  E,y, None,None, V,y, vex5),
507        X86_OP_ENTRY3(MOVQ,       V,x, None,None, W,q, vex5),  /* wrong dest Vy on SDM! */
508        {},
509    };
510    *entry = *decode_by_prefix(s, opcodes_0F7E);
511}
512
513static void decode_0F7F(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
514{
515    static const X86OpEntry opcodes_0F7F[4] = {
516        X86_OP_ENTRY3(MOVDQ,       W,x, None,None, V,x, vex5 mmx), /* movq */
517        X86_OP_ENTRY3(MOVDQ,       W,x, None,None, V,x, vex1), /* movdqa */
518        X86_OP_ENTRY3(MOVDQ,       W,x, None,None, V,x, vex4_unal), /* movdqu */
519        {},
520    };
521    *entry = *decode_by_prefix(s, opcodes_0F7F);
522}
523
524static void decode_0FB8(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
525{
526    static const X86OpEntry popcnt =
527        X86_OP_ENTRYwr(POPCNT,    G,v, E,v,  cpuid(POPCNT) zextT0);
528
529    if (s->prefix & PREFIX_REPZ) {
530        *entry = popcnt;
531    } else {
532        memset(entry, 0, sizeof(*entry));
533    }
534}
535
536static void decode_0FBC(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
537{
538    /* For BSF, pass 2op as the third operand so that we can use zextT0 */
539    static const X86OpEntry opcodes_0FBC[4] = {
540        X86_OP_ENTRY3(BSF,    G,v, E,v, 2op,v, zextT0),
541        X86_OP_ENTRY3(BSF,    G,v, E,v, 2op,v, zextT0), /* 0x66 */
542        X86_OP_ENTRYwr(TZCNT, G,v, E,v,        zextT0), /* 0xf3 */
543        X86_OP_ENTRY3(BSF,    G,v, E,v, 2op,v, zextT0), /* 0xf2 */
544    };
545    if (!(s->cpuid_ext3_features & CPUID_EXT3_ABM)) {
546        *entry = opcodes_0FBC[0];
547    } else {
548        *entry = *decode_by_prefix(s, opcodes_0FBC);
549    }
550}
551
552static void decode_0FBD(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
553{
554    /* For BSR, pass 2op as the third operand so that we can use zextT0 */
555    static const X86OpEntry opcodes_0FBD[4] = {
556        X86_OP_ENTRY3(BSR,    G,v, E,v, 2op,v, zextT0),
557        X86_OP_ENTRY3(BSR,    G,v, E,v, 2op,v, zextT0), /* 0x66 */
558        X86_OP_ENTRYwr(LZCNT, G,v, E,v,        zextT0), /* 0xf3 */
559        X86_OP_ENTRY3(BSR,    G,v, E,v, 2op,v, zextT0), /* 0xf2 */
560    };
561    if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI1)) {
562        *entry = opcodes_0FBD[0];
563    } else {
564        *entry = *decode_by_prefix(s, opcodes_0FBD);
565    }
566}
567
568static void decode_0FD6(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
569{
570    static const X86OpEntry movq[4] = {
571        {},
572        X86_OP_ENTRY3(MOVQ,    W,x,  None, None, V,q, vex5),
573        X86_OP_ENTRY3(MOVq_dq, V,dq, None, None, N,q),
574        X86_OP_ENTRY3(MOVq_dq, P,q,  None, None, U,q),
575    };
576
577    *entry = *decode_by_prefix(s, movq);
578}
579
580static const X86OpEntry opcodes_0F38_00toEF[240] = {
581    [0x00] = X86_OP_ENTRY3(PSHUFB,    V,x,  H,x,   W,x,  vex4 cpuid(SSSE3) mmx avx2_256 p_00_66),
582    [0x01] = X86_OP_ENTRY3(PHADDW,    V,x,  H,x,   W,x,  vex4 cpuid(SSSE3) mmx avx2_256 p_00_66),
583    [0x02] = X86_OP_ENTRY3(PHADDD,    V,x,  H,x,   W,x,  vex4 cpuid(SSSE3) mmx avx2_256 p_00_66),
584    [0x03] = X86_OP_ENTRY3(PHADDSW,   V,x,  H,x,   W,x,  vex4 cpuid(SSSE3) mmx avx2_256 p_00_66),
585    [0x04] = X86_OP_ENTRY3(PMADDUBSW, V,x,  H,x,   W,x,  vex4 cpuid(SSSE3) mmx avx2_256 p_00_66),
586    [0x05] = X86_OP_ENTRY3(PHSUBW,    V,x,  H,x,   W,x,  vex4 cpuid(SSSE3) mmx avx2_256 p_00_66),
587    [0x06] = X86_OP_ENTRY3(PHSUBD,    V,x,  H,x,   W,x,  vex4 cpuid(SSSE3) mmx avx2_256 p_00_66),
588    [0x07] = X86_OP_ENTRY3(PHSUBSW,   V,x,  H,x,   W,x,  vex4 cpuid(SSSE3) mmx avx2_256 p_00_66),
589
590    [0x10] = X86_OP_ENTRY2(PBLENDVB,  V,x,         W,x,  vex4 cpuid(SSE41) avx2_256 p_66),
591    [0x13] = X86_OP_ENTRY2(VCVTPH2PS, V,x,         W,xh, vex11 chk(W0) cpuid(F16C) p_66),
592    [0x14] = X86_OP_ENTRY2(BLENDVPS,  V,x,         W,x,  vex4 cpuid(SSE41) p_66),
593    [0x15] = X86_OP_ENTRY2(BLENDVPD,  V,x,         W,x,  vex4 cpuid(SSE41) p_66),
594    /* Listed incorrectly as type 4 */
595    [0x16] = X86_OP_ENTRY3(VPERMD,    V,qq, H,qq,      W,qq,  vex6 chk(W0) cpuid(AVX2) p_66), /* vpermps */
596    [0x17] = X86_OP_ENTRY3(VPTEST,    None,None, V,x,  W,x,   vex4 cpuid(SSE41) p_66),
597
598    /*
599     * Source operand listed as Mq/Ux and similar in the manual; incorrectly listed
600     * as 128-bit only in 2-17.
601     */
602    [0x20] = X86_OP_ENTRY3(VPMOVSXBW, V,x,  None,None, W,q,   vex5 cpuid(SSE41) avx_movx avx2_256 p_66),
603    [0x21] = X86_OP_ENTRY3(VPMOVSXBD, V,x,  None,None, W,d,   vex5 cpuid(SSE41) avx_movx avx2_256 p_66),
604    [0x22] = X86_OP_ENTRY3(VPMOVSXBQ, V,x,  None,None, W,w,   vex5 cpuid(SSE41) avx_movx avx2_256 p_66),
605    [0x23] = X86_OP_ENTRY3(VPMOVSXWD, V,x,  None,None, W,q,   vex5 cpuid(SSE41) avx_movx avx2_256 p_66),
606    [0x24] = X86_OP_ENTRY3(VPMOVSXWQ, V,x,  None,None, W,d,   vex5 cpuid(SSE41) avx_movx avx2_256 p_66),
607    [0x25] = X86_OP_ENTRY3(VPMOVSXDQ, V,x,  None,None, W,q,   vex5 cpuid(SSE41) avx_movx avx2_256 p_66),
608
609    /* Same as PMOVSX.  */
610    [0x30] = X86_OP_ENTRY3(VPMOVZXBW, V,x,  None,None, W,q,   vex5 cpuid(SSE41) avx_movx avx2_256 p_66),
611    [0x31] = X86_OP_ENTRY3(VPMOVZXBD, V,x,  None,None, W,d,   vex5 cpuid(SSE41) avx_movx avx2_256 p_66),
612    [0x32] = X86_OP_ENTRY3(VPMOVZXBQ, V,x,  None,None, W,w,   vex5 cpuid(SSE41) avx_movx avx2_256 p_66),
613    [0x33] = X86_OP_ENTRY3(VPMOVZXWD, V,x,  None,None, W,q,   vex5 cpuid(SSE41) avx_movx avx2_256 p_66),
614    [0x34] = X86_OP_ENTRY3(VPMOVZXWQ, V,x,  None,None, W,d,   vex5 cpuid(SSE41) avx_movx avx2_256 p_66),
615    [0x35] = X86_OP_ENTRY3(VPMOVZXDQ, V,x,  None,None, W,q,   vex5 cpuid(SSE41) avx_movx avx2_256 p_66),
616    [0x36] = X86_OP_ENTRY3(VPERMD,    V,qq, H,qq,      W,qq,  vex6 chk(W0) cpuid(AVX2) p_66),
617    [0x37] = X86_OP_ENTRY3(PCMPGTQ,   V,x,  H,x,       W,x,   vex4 cpuid(SSE42) avx2_256 p_66),
618
619    [0x40] = X86_OP_ENTRY3(PMULLD,      V,x,  H,x,       W,x,  vex4 cpuid(SSE41) avx2_256 p_66),
620    [0x41] = X86_OP_ENTRY3(VPHMINPOSUW, V,dq, None,None, W,dq, vex4 cpuid(SSE41) p_66),
621    /* Listed incorrectly as type 4 */
622    [0x45] = X86_OP_ENTRY3(VPSRLV,      V,x,  H,x,       W,x,  vex6 cpuid(AVX2) p_66),
623    [0x46] = X86_OP_ENTRY3(VPSRAV,      V,x,  H,x,       W,x,  vex6 chk(W0) cpuid(AVX2) p_66),
624    [0x47] = X86_OP_ENTRY3(VPSLLV,      V,x,  H,x,       W,x,  vex6 cpuid(AVX2) p_66),
625
626    [0x90] = X86_OP_ENTRY3(VPGATHERD, V,x,  H,x,  M,d,  vex12 cpuid(AVX2) p_66), /* vpgatherdd/q */
627    [0x91] = X86_OP_ENTRY3(VPGATHERQ, V,x,  H,x,  M,q,  vex12 cpuid(AVX2) p_66), /* vpgatherqd/q */
628    [0x92] = X86_OP_ENTRY3(VPGATHERD, V,x,  H,x,  M,d,  vex12 cpuid(AVX2) p_66), /* vgatherdps/d */
629    [0x93] = X86_OP_ENTRY3(VPGATHERQ, V,x,  H,x,  M,q,  vex12 cpuid(AVX2) p_66), /* vgatherqps/d */
630
631    /* Should be exception type 2 but they do not have legacy SSE equivalents? */
632    [0x96] = X86_OP_ENTRY3(VFMADDSUB132Px, V,x,  H,x, W,x,  vex6 cpuid(FMA) p_66),
633    [0x97] = X86_OP_ENTRY3(VFMSUBADD132Px, V,x,  H,x, W,x,  vex6 cpuid(FMA) p_66),
634
635    [0xa6] = X86_OP_ENTRY3(VFMADDSUB213Px, V,x,  H,x, W,x,  vex6 cpuid(FMA) p_66),
636    [0xa7] = X86_OP_ENTRY3(VFMSUBADD213Px, V,x,  H,x, W,x,  vex6 cpuid(FMA) p_66),
637
638    [0xb6] = X86_OP_ENTRY3(VFMADDSUB231Px, V,x,  H,x, W,x,  vex6 cpuid(FMA) p_66),
639    [0xb7] = X86_OP_ENTRY3(VFMSUBADD231Px, V,x,  H,x, W,x,  vex6 cpuid(FMA) p_66),
640
641    [0x08] = X86_OP_ENTRY3(PSIGNB,    V,x,        H,x,  W,x,  vex4 cpuid(SSSE3) mmx avx2_256 p_00_66),
642    [0x09] = X86_OP_ENTRY3(PSIGNW,    V,x,        H,x,  W,x,  vex4 cpuid(SSSE3) mmx avx2_256 p_00_66),
643    [0x0a] = X86_OP_ENTRY3(PSIGND,    V,x,        H,x,  W,x,  vex4 cpuid(SSSE3) mmx avx2_256 p_00_66),
644    [0x0b] = X86_OP_ENTRY3(PMULHRSW,  V,x,        H,x,  W,x,  vex4 cpuid(SSSE3) mmx avx2_256 p_00_66),
645    /* Listed incorrectly as type 4 */
646    [0x0c] = X86_OP_ENTRY3(VPERMILPS, V,x,        H,x,  W,x,  vex6 chk(W0) cpuid(AVX) p_00_66),
647    [0x0d] = X86_OP_ENTRY3(VPERMILPD, V,x,        H,x,  W,x,  vex6 chk(W0) cpuid(AVX) p_66),
648    [0x0e] = X86_OP_ENTRY3(VTESTPS,   None,None,  V,x,  W,x,  vex6 chk(W0) cpuid(AVX) p_66),
649    [0x0f] = X86_OP_ENTRY3(VTESTPD,   None,None,  V,x,  W,x,  vex6 chk(W0) cpuid(AVX) p_66),
650
651    [0x18] = X86_OP_ENTRY3(VPBROADCASTD,   V,x,  None,None, W,d,  vex6 chk(W0) cpuid(AVX) p_66), /* vbroadcastss */
652    [0x19] = X86_OP_ENTRY3(VPBROADCASTQ,   V,qq, None,None, W,q,  vex6 chk(W0) cpuid(AVX) p_66), /* vbroadcastsd */
653    [0x1a] = X86_OP_ENTRY3(VBROADCASTx128, V,qq, None,None, WM,dq,vex6 chk(W0) cpuid(AVX) p_66),
654    [0x1c] = X86_OP_ENTRY3(PABSB,          V,x,  None,None, W,x,  vex4 cpuid(SSSE3) mmx avx2_256 p_00_66),
655    [0x1d] = X86_OP_ENTRY3(PABSW,          V,x,  None,None, W,x,  vex4 cpuid(SSSE3) mmx avx2_256 p_00_66),
656    [0x1e] = X86_OP_ENTRY3(PABSD,          V,x,  None,None, W,x,  vex4 cpuid(SSSE3) mmx avx2_256 p_00_66),
657
658    [0x28] = X86_OP_ENTRY3(PMULDQ,        V,x, H,x,       W,x,  vex4 cpuid(SSE41) avx2_256 p_66),
659    [0x29] = X86_OP_ENTRY3(PCMPEQQ,       V,x, H,x,       W,x,  vex4 cpuid(SSE41) avx2_256 p_66),
660    [0x2a] = X86_OP_ENTRY3(MOVDQ,         V,x, None,None, WM,x, vex1 cpuid(SSE41) avx2_256 p_66), /* movntdqa */
661    [0x2b] = X86_OP_ENTRY3(VPACKUSDW,     V,x, H,x,       W,x,  vex4 cpuid(SSE41) avx2_256 p_66),
662    [0x2c] = X86_OP_ENTRY3(VMASKMOVPS,    V,x, H,x,       WM,x, vex6 chk(W0) cpuid(AVX) p_66),
663    [0x2d] = X86_OP_ENTRY3(VMASKMOVPD,    V,x, H,x,       WM,x, vex6 chk(W0) cpuid(AVX) p_66),
664    /* Incorrectly listed as Mx,Hx,Vx in the manual */
665    [0x2e] = X86_OP_ENTRY3(VMASKMOVPS_st, M,x, V,x,       H,x,  vex6 chk(W0) cpuid(AVX) p_66),
666    [0x2f] = X86_OP_ENTRY3(VMASKMOVPD_st, M,x, V,x,       H,x,  vex6 chk(W0) cpuid(AVX) p_66),
667
668    [0x38] = X86_OP_ENTRY3(PMINSB,        V,x,  H,x, W,x,  vex4 cpuid(SSE41) avx2_256 p_66),
669    [0x39] = X86_OP_ENTRY3(PMINSD,        V,x,  H,x, W,x,  vex4 cpuid(SSE41) avx2_256 p_66),
670    [0x3a] = X86_OP_ENTRY3(PMINUW,        V,x,  H,x, W,x,  vex4 cpuid(SSE41) avx2_256 p_66),
671    [0x3b] = X86_OP_ENTRY3(PMINUD,        V,x,  H,x, W,x,  vex4 cpuid(SSE41) avx2_256 p_66),
672    [0x3c] = X86_OP_ENTRY3(PMAXSB,        V,x,  H,x, W,x,  vex4 cpuid(SSE41) avx2_256 p_66),
673    [0x3d] = X86_OP_ENTRY3(PMAXSD,        V,x,  H,x, W,x,  vex4 cpuid(SSE41) avx2_256 p_66),
674    [0x3e] = X86_OP_ENTRY3(PMAXUW,        V,x,  H,x, W,x,  vex4 cpuid(SSE41) avx2_256 p_66),
675    [0x3f] = X86_OP_ENTRY3(PMAXUD,        V,x,  H,x, W,x,  vex4 cpuid(SSE41) avx2_256 p_66),
676
677    /* VPBROADCASTQ not listed as W0 in table 2-16 */
678    [0x58] = X86_OP_ENTRY3(VPBROADCASTD,   V,x,  None,None, W,d,  vex6 chk(W0) cpuid(AVX2) p_66),
679    [0x59] = X86_OP_ENTRY3(VPBROADCASTQ,   V,x,  None,None, W,q,  vex6 chk(W0) cpuid(AVX2) p_66),
680    [0x5a] = X86_OP_ENTRY3(VBROADCASTx128, V,qq, None,None, WM,dq,vex6 chk(W0) cpuid(AVX2) p_66),
681
682    [0x78] = X86_OP_ENTRY3(VPBROADCASTB,   V,x,  None,None, W,b,  vex6 chk(W0) cpuid(AVX2) p_66),
683    [0x79] = X86_OP_ENTRY3(VPBROADCASTW,   V,x,  None,None, W,w,  vex6 chk(W0) cpuid(AVX2) p_66),
684
685    [0x8c] = X86_OP_ENTRY3(VPMASKMOV,    V,x,  H,x, WM,x, vex6 cpuid(AVX2) p_66),
686    [0x8e] = X86_OP_ENTRY3(VPMASKMOV_st, M,x,  V,x, H,x,  vex6 cpuid(AVX2) p_66),
687
688    /* Should be exception type 2 or 3 but they do not have legacy SSE equivalents? */
689    [0x98] = X86_OP_ENTRY3(VFMADD132Px,  V,x,  H,x, W,x,  vex6 cpuid(FMA) p_66),
690    [0x99] = X86_OP_ENTRY3(VFMADD132Sx,  V,x,  H,x, W,x,  vex6 cpuid(FMA) p_66),
691    [0x9a] = X86_OP_ENTRY3(VFMSUB132Px,  V,x,  H,x, W,x,  vex6 cpuid(FMA) p_66),
692    [0x9b] = X86_OP_ENTRY3(VFMSUB132Sx,  V,x,  H,x, W,x,  vex6 cpuid(FMA) p_66),
693    [0x9c] = X86_OP_ENTRY3(VFNMADD132Px, V,x,  H,x, W,x,  vex6 cpuid(FMA) p_66),
694    [0x9d] = X86_OP_ENTRY3(VFNMADD132Sx, V,x,  H,x, W,x,  vex6 cpuid(FMA) p_66),
695    [0x9e] = X86_OP_ENTRY3(VFNMSUB132Px, V,x,  H,x, W,x,  vex6 cpuid(FMA) p_66),
696    [0x9f] = X86_OP_ENTRY3(VFNMSUB132Sx, V,x,  H,x, W,x,  vex6 cpuid(FMA) p_66),
697
698    [0xa8] = X86_OP_ENTRY3(VFMADD213Px,  V,x,  H,x, W,x,  vex6 cpuid(FMA) p_66),
699    [0xa9] = X86_OP_ENTRY3(VFMADD213Sx,  V,x,  H,x, W,x,  vex6 cpuid(FMA) p_66),
700    [0xaa] = X86_OP_ENTRY3(VFMSUB213Px,  V,x,  H,x, W,x,  vex6 cpuid(FMA) p_66),
701    [0xab] = X86_OP_ENTRY3(VFMSUB213Sx,  V,x,  H,x, W,x,  vex6 cpuid(FMA) p_66),
702    [0xac] = X86_OP_ENTRY3(VFNMADD213Px, V,x,  H,x, W,x,  vex6 cpuid(FMA) p_66),
703    [0xad] = X86_OP_ENTRY3(VFNMADD213Sx, V,x,  H,x, W,x,  vex6 cpuid(FMA) p_66),
704    [0xae] = X86_OP_ENTRY3(VFNMSUB213Px, V,x,  H,x, W,x,  vex6 cpuid(FMA) p_66),
705    [0xaf] = X86_OP_ENTRY3(VFNMSUB213Sx, V,x,  H,x, W,x,  vex6 cpuid(FMA) p_66),
706
707    [0xb8] = X86_OP_ENTRY3(VFMADD231Px,  V,x,  H,x, W,x,  vex6 cpuid(FMA) p_66),
708    [0xb9] = X86_OP_ENTRY3(VFMADD231Sx,  V,x,  H,x, W,x,  vex6 cpuid(FMA) p_66),
709    [0xba] = X86_OP_ENTRY3(VFMSUB231Px,  V,x,  H,x, W,x,  vex6 cpuid(FMA) p_66),
710    [0xbb] = X86_OP_ENTRY3(VFMSUB231Sx,  V,x,  H,x, W,x,  vex6 cpuid(FMA) p_66),
711    [0xbc] = X86_OP_ENTRY3(VFNMADD231Px, V,x,  H,x, W,x,  vex6 cpuid(FMA) p_66),
712    [0xbd] = X86_OP_ENTRY3(VFNMADD231Sx, V,x,  H,x, W,x,  vex6 cpuid(FMA) p_66),
713    [0xbe] = X86_OP_ENTRY3(VFNMSUB231Px, V,x,  H,x, W,x,  vex6 cpuid(FMA) p_66),
714    [0xbf] = X86_OP_ENTRY3(VFNMSUB231Sx, V,x,  H,x, W,x,  vex6 cpuid(FMA) p_66),
715
716    [0xc8] = X86_OP_ENTRY2(SHA1NEXTE,   V,dq, W,dq, cpuid(SHA_NI)),
717    [0xc9] = X86_OP_ENTRY2(SHA1MSG1,    V,dq, W,dq, cpuid(SHA_NI)),
718    [0xca] = X86_OP_ENTRY2(SHA1MSG2,    V,dq, W,dq, cpuid(SHA_NI)),
719    [0xcb] = X86_OP_ENTRY2(SHA256RNDS2, V,dq, W,dq, cpuid(SHA_NI)),
720    [0xcc] = X86_OP_ENTRY2(SHA256MSG1,  V,dq, W,dq, cpuid(SHA_NI)),
721    [0xcd] = X86_OP_ENTRY2(SHA256MSG2,  V,dq, W,dq, cpuid(SHA_NI)),
722
723    [0xdb] = X86_OP_ENTRY3(VAESIMC,     V,dq, None,None, W,dq, vex4 cpuid(AES) p_66),
724    [0xdc] = X86_OP_ENTRY3(VAESENC,     V,x,  H,x,       W,x,  vex4 cpuid(AES) p_66),
725    [0xdd] = X86_OP_ENTRY3(VAESENCLAST, V,x,  H,x,       W,x,  vex4 cpuid(AES) p_66),
726    [0xde] = X86_OP_ENTRY3(VAESDEC,     V,x,  H,x,       W,x,  vex4 cpuid(AES) p_66),
727    [0xdf] = X86_OP_ENTRY3(VAESDECLAST, V,x,  H,x,       W,x,  vex4 cpuid(AES) p_66),
728
729    /*
730     * REG selects srcdest2 operand, VEX.vvvv selects src3.  VEX class not found
731     * in manual, assumed to be 13 from the VEX.L0 constraint.
732     */
733    [0xe0] = X86_OP_ENTRY3(CMPccXADD,   M,y, G,y, B,y,  vex13 xchg chk(o64) cpuid(CMPCCXADD) p_66),
734    [0xe1] = X86_OP_ENTRY3(CMPccXADD,   M,y, G,y, B,y,  vex13 xchg chk(o64) cpuid(CMPCCXADD) p_66),
735    [0xe2] = X86_OP_ENTRY3(CMPccXADD,   M,y, G,y, B,y,  vex13 xchg chk(o64) cpuid(CMPCCXADD) p_66),
736    [0xe3] = X86_OP_ENTRY3(CMPccXADD,   M,y, G,y, B,y,  vex13 xchg chk(o64) cpuid(CMPCCXADD) p_66),
737    [0xe4] = X86_OP_ENTRY3(CMPccXADD,   M,y, G,y, B,y,  vex13 xchg chk(o64) cpuid(CMPCCXADD) p_66),
738    [0xe5] = X86_OP_ENTRY3(CMPccXADD,   M,y, G,y, B,y,  vex13 xchg chk(o64) cpuid(CMPCCXADD) p_66),
739    [0xe6] = X86_OP_ENTRY3(CMPccXADD,   M,y, G,y, B,y,  vex13 xchg chk(o64) cpuid(CMPCCXADD) p_66),
740    [0xe7] = X86_OP_ENTRY3(CMPccXADD,   M,y, G,y, B,y,  vex13 xchg chk(o64) cpuid(CMPCCXADD) p_66),
741
742    [0xe8] = X86_OP_ENTRY3(CMPccXADD,   M,y, G,y, B,y,  vex13 xchg chk(o64) cpuid(CMPCCXADD) p_66),
743    [0xe9] = X86_OP_ENTRY3(CMPccXADD,   M,y, G,y, B,y,  vex13 xchg chk(o64) cpuid(CMPCCXADD) p_66),
744    [0xea] = X86_OP_ENTRY3(CMPccXADD,   M,y, G,y, B,y,  vex13 xchg chk(o64) cpuid(CMPCCXADD) p_66),
745    [0xeb] = X86_OP_ENTRY3(CMPccXADD,   M,y, G,y, B,y,  vex13 xchg chk(o64) cpuid(CMPCCXADD) p_66),
746    [0xec] = X86_OP_ENTRY3(CMPccXADD,   M,y, G,y, B,y,  vex13 xchg chk(o64) cpuid(CMPCCXADD) p_66),
747    [0xed] = X86_OP_ENTRY3(CMPccXADD,   M,y, G,y, B,y,  vex13 xchg chk(o64) cpuid(CMPCCXADD) p_66),
748    [0xee] = X86_OP_ENTRY3(CMPccXADD,   M,y, G,y, B,y,  vex13 xchg chk(o64) cpuid(CMPCCXADD) p_66),
749    [0xef] = X86_OP_ENTRY3(CMPccXADD,   M,y, G,y, B,y,  vex13 xchg chk(o64) cpuid(CMPCCXADD) p_66),
750};
751
752/* five rows for no prefix, 66, F3, F2, 66+F2  */
753static const X86OpEntry opcodes_0F38_F0toFF[16][5] = {
754    [0] = {
755        X86_OP_ENTRYwr(MOVBE, G,y, M,y, cpuid(MOVBE)),
756        X86_OP_ENTRYwr(MOVBE, G,w, M,w, cpuid(MOVBE)),
757        {},
758        X86_OP_ENTRY2(CRC32, G,d, E,b, cpuid(SSE42)),
759        X86_OP_ENTRY2(CRC32, G,d, E,b, cpuid(SSE42)),
760    },
761    [1] = {
762        X86_OP_ENTRYwr(MOVBE, M,y, G,y, cpuid(MOVBE)),
763        X86_OP_ENTRYwr(MOVBE, M,w, G,w, cpuid(MOVBE)),
764        {},
765        X86_OP_ENTRY2(CRC32, G,d, E,y, cpuid(SSE42)),
766        X86_OP_ENTRY2(CRC32, G,d, E,w, cpuid(SSE42)),
767    },
768    [2] = {
769        X86_OP_ENTRY3(ANDN, G,y, B,y, E,y, vex13 cpuid(BMI1)),
770        {},
771        {},
772        {},
773        {},
774    },
775    [3] = {
776        X86_OP_GROUP3(group17, B,y, None,None, E,y, vex13 cpuid(BMI1)),
777        {},
778        {},
779        {},
780        {},
781    },
782    [5] = {
783        X86_OP_ENTRY3(BZHI, G,y, E,y, B,y, vex13 cpuid(BMI1)),
784        {},
785        X86_OP_ENTRY3(PEXT, G,y, B,y, E,y, vex13 zextT0 cpuid(BMI2)),
786        X86_OP_ENTRY3(PDEP, G,y, B,y, E,y, vex13 zextT0 cpuid(BMI2)),
787        {},
788    },
789    [6] = {
790        {},
791        X86_OP_ENTRY2(ADCX, G,y, E,y, cpuid(ADX)),
792        X86_OP_ENTRY2(ADOX, G,y, E,y, cpuid(ADX)),
793        X86_OP_ENTRY3(MULX, /* B,y, */ G,y, E,y, 2,y, vex13 cpuid(BMI2)),
794        {},
795    },
796    [7] = {
797        X86_OP_ENTRY3(BEXTR, G,y, E,y, B,y, vex13 zextT0 cpuid(BMI1)),
798        X86_OP_ENTRY3(SHLX, G,y, E,y, B,y, vex13 cpuid(BMI1)),
799        X86_OP_ENTRY3(SARX, G,y, E,y, B,y, vex13 sextT0 cpuid(BMI1)),
800        X86_OP_ENTRY3(SHRX, G,y, E,y, B,y, vex13 zextT0 cpuid(BMI1)),
801        {},
802    },
803};
804
805static void decode_0F38(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
806{
807    *b = x86_ldub_code(env, s);
808    if (*b < 0xf0) {
809        *entry = opcodes_0F38_00toEF[*b];
810    } else {
811        int row = 0;
812        if (s->prefix & PREFIX_REPZ) {
813            /* The REPZ (F3) prefix has priority over 66 */
814            row = 2;
815        } else {
816            row += s->prefix & PREFIX_REPNZ ? 3 : 0;
817            row += s->prefix & PREFIX_DATA ? 1 : 0;
818        }
819        *entry = opcodes_0F38_F0toFF[*b & 15][row];
820    }
821}
822
823static void decode_VINSERTPS(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
824{
825    static const X86OpEntry
826        vinsertps_reg = X86_OP_ENTRY4(VINSERTPS_r, V,dq, H,dq, U,dq, vex5 cpuid(SSE41) p_66),
827        vinsertps_mem = X86_OP_ENTRY4(VINSERTPS_m, V,dq, H,dq, M,d,  vex5 cpuid(SSE41) p_66);
828
829    int modrm = get_modrm(s, env);
830    *entry = (modrm >> 6) == 3 ? vinsertps_reg : vinsertps_mem;
831}
832
833static const X86OpEntry opcodes_0F3A[256] = {
834    /*
835     * These are VEX-only, but incorrectly listed in the manual as exception type 4.
836     * Also the "qq" instructions are sometimes omitted by Table 2-17, but are VEX256
837     * only.
838     */
839    [0x00] = X86_OP_ENTRY3(VPERMQ,      V,qq, W,qq, I,b,  vex6 chk(W1) cpuid(AVX2) p_66),
840    [0x01] = X86_OP_ENTRY3(VPERMQ,      V,qq, W,qq, I,b,  vex6 chk(W1) cpuid(AVX2) p_66), /* VPERMPD */
841    [0x02] = X86_OP_ENTRY4(VBLENDPS,    V,x,  H,x,  W,x,  vex6 chk(W0) cpuid(AVX2) p_66), /* VPBLENDD */
842    [0x04] = X86_OP_ENTRY3(VPERMILPS_i, V,x,  W,x,  I,b,  vex6 chk(W0) cpuid(AVX) p_66),
843    [0x05] = X86_OP_ENTRY3(VPERMILPD_i, V,x,  W,x,  I,b,  vex6 chk(W0) cpuid(AVX) p_66),
844    [0x06] = X86_OP_ENTRY4(VPERM2x128,  V,qq, H,qq, W,qq, vex6 chk(W0) cpuid(AVX) p_66),
845
846    [0x14] = X86_OP_ENTRY3(PEXTRB,     E,b,  V,dq, I,b,  vex5 cpuid(SSE41) op0_Rd p_66),
847    [0x15] = X86_OP_ENTRY3(PEXTRW,     E,w,  V,dq, I,b,  vex5 cpuid(SSE41) op0_Rd p_66),
848    [0x16] = X86_OP_ENTRY3(PEXTR,      E,y,  V,dq, I,b,  vex5 cpuid(SSE41) p_66),
849    [0x17] = X86_OP_ENTRY3(VEXTRACTPS, E,d,  V,dq, I,b,  vex5 cpuid(SSE41) p_66),
850    [0x1d] = X86_OP_ENTRY3(VCVTPS2PH,  W,xh, V,x,  I,b,  vex11 chk(W0) cpuid(F16C) p_66),
851
852    [0x20] = X86_OP_ENTRY4(PINSRB,     V,dq, H,dq, E,b,  vex5 cpuid(SSE41) op2_Ry p_66),
853    [0x21] = X86_OP_GROUP0(VINSERTPS),
854    [0x22] = X86_OP_ENTRY4(PINSR,      V,dq, H,dq, E,y,  vex5 cpuid(SSE41) p_66),
855
856    [0x40] = X86_OP_ENTRY4(VDDPS,      V,x,  H,x,  W,x,  vex2 cpuid(SSE41) p_66),
857    [0x41] = X86_OP_ENTRY4(VDDPD,      V,dq, H,dq, W,dq, vex2 cpuid(SSE41) p_66),
858    [0x42] = X86_OP_ENTRY4(VMPSADBW,   V,x,  H,x,  W,x,  vex2 cpuid(SSE41) avx2_256 p_66),
859    [0x44] = X86_OP_ENTRY4(PCLMULQDQ,  V,dq, H,dq, W,dq, vex4 cpuid(PCLMULQDQ) p_66),
860    [0x46] = X86_OP_ENTRY4(VPERM2x128, V,qq, H,qq, W,qq, vex6 chk(W0) cpuid(AVX2) p_66),
861
862    [0x60] = X86_OP_ENTRY4(PCMPESTRM,  None,None, V,dq, W,dq, vex4_unal cpuid(SSE42) p_66),
863    [0x61] = X86_OP_ENTRY4(PCMPESTRI,  None,None, V,dq, W,dq, vex4_unal cpuid(SSE42) p_66),
864    [0x62] = X86_OP_ENTRY4(PCMPISTRM,  None,None, V,dq, W,dq, vex4_unal cpuid(SSE42) p_66),
865    [0x63] = X86_OP_ENTRY4(PCMPISTRI,  None,None, V,dq, W,dq, vex4_unal cpuid(SSE42) p_66),
866
867    [0x08] = X86_OP_ENTRY3(VROUNDPS,   V,x,  W,x,  I,b,  vex2 cpuid(SSE41) p_66),
868    [0x09] = X86_OP_ENTRY3(VROUNDPD,   V,x,  W,x,  I,b,  vex2 cpuid(SSE41) p_66),
869    /*
870     * Not listed as four operand in the manual.  Also writes and reads 128-bits
871     * from the first two operands due to the V operand picking higher entries of
872     * the H operand; the "Vss,Hss,Wss" description from the manual is incorrect.
873     * For other unary operations such as VSQRTSx this is hidden by the "REPScalar"
874     * value of vex_special, because the table lists the operand types of VSQRTPx.
875     */
876    [0x0a] = X86_OP_ENTRY4(VROUNDSS,   V,x,  H,x, W,ss, vex3 cpuid(SSE41) p_66),
877    [0x0b] = X86_OP_ENTRY4(VROUNDSD,   V,x,  H,x, W,sd, vex3 cpuid(SSE41) p_66),
878    [0x0c] = X86_OP_ENTRY4(VBLENDPS,   V,x,  H,x,  W,x,  vex4 cpuid(SSE41) p_66),
879    [0x0d] = X86_OP_ENTRY4(VBLENDPD,   V,x,  H,x,  W,x,  vex4 cpuid(SSE41) p_66),
880    [0x0e] = X86_OP_ENTRY4(VPBLENDW,   V,x,  H,x,  W,x,  vex4 cpuid(SSE41) avx2_256 p_66),
881    [0x0f] = X86_OP_ENTRY4(PALIGNR,    V,x,  H,x,  W,x,  vex4 cpuid(SSSE3) mmx avx2_256 p_00_66),
882
883    [0x18] = X86_OP_ENTRY4(VINSERTx128,  V,qq, H,qq, W,dq, vex6 chk(W0) cpuid(AVX) p_66),
884    [0x19] = X86_OP_ENTRY3(VEXTRACTx128, W,dq, V,qq, I,b,  vex6 chk(W0) cpuid(AVX) p_66),
885
886    [0x38] = X86_OP_ENTRY4(VINSERTx128,  V,qq, H,qq, W,dq, vex6 chk(W0) cpuid(AVX2) p_66),
887    [0x39] = X86_OP_ENTRY3(VEXTRACTx128, W,dq, V,qq, I,b,  vex6 chk(W0) cpuid(AVX2) p_66),
888
889    /* Listed incorrectly as type 4 */
890    [0x4a] = X86_OP_ENTRY4(VBLENDVPS, V,x,  H,x,  W,x,   vex6 chk(W0) cpuid(AVX) p_66),
891    [0x4b] = X86_OP_ENTRY4(VBLENDVPD, V,x,  H,x,  W,x,   vex6 chk(W0) cpuid(AVX) p_66),
892    [0x4c] = X86_OP_ENTRY4(VPBLENDVB, V,x,  H,x,  W,x,   vex6 chk(W0) cpuid(AVX) p_66 avx2_256),
893
894    [0xcc] = X86_OP_ENTRY3(SHA1RNDS4,  V,dq, W,dq, I,b,  cpuid(SHA_NI)),
895
896    [0xdf] = X86_OP_ENTRY3(VAESKEYGEN, V,dq, W,dq, I,b,  vex4 cpuid(AES) p_66),
897
898    [0xF0] = X86_OP_ENTRY3(RORX, G,y, E,y, I,b, vex13 cpuid(BMI2) p_f2),
899};
900
901static void decode_0F3A(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
902{
903    *b = x86_ldub_code(env, s);
904    *entry = opcodes_0F3A[*b];
905}
906
907/*
908 * There are some mistakes in the operands in the manual, and the load/store/register
909 * cases are easiest to keep separate, so the entries for 10-17 follow simplicity and
910 * efficiency of implementation rather than copying what the manual says.
911 *
912 * In particular:
913 *
914 * 1) "VMOVSS m32, xmm1" and "VMOVSD m64, xmm1" do not support VEX.vvvv != 1111b,
915 * but this is not mentioned in the tables.
916 *
917 * 2) MOVHLPS, MOVHPS, MOVHPD, MOVLPD, MOVLPS read the high quadword of one of their
918 * operands, which must therefore be dq; MOVLPD and MOVLPS also write the high
919 * quadword of the V operand.
920 */
921static void decode_0F10(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
922{
923    static const X86OpEntry opcodes_0F10_reg[4] = {
924        X86_OP_ENTRY3(MOVDQ,   V,x,  None,None, W,x, vex4_unal), /* MOVUPS */
925        X86_OP_ENTRY3(MOVDQ,   V,x,  None,None, W,x, vex4_unal), /* MOVUPD */
926        X86_OP_ENTRY3(VMOVSS,  V,x,  H,x,       W,x, vex5),
927        X86_OP_ENTRY3(VMOVLPx, V,x,  H,x,       W,x, vex5), /* MOVSD */
928    };
929
930    static const X86OpEntry opcodes_0F10_mem[4] = {
931        X86_OP_ENTRY3(MOVDQ,      V,x,  None,None, W,x,  vex4_unal), /* MOVUPS */
932        X86_OP_ENTRY3(MOVDQ,      V,x,  None,None, W,x,  vex4_unal), /* MOVUPD */
933        X86_OP_ENTRY3(VMOVSS_ld,  V,x,  H,x,       M,ss, vex5),
934        X86_OP_ENTRY3(VMOVSD_ld,  V,x,  H,x,       M,sd, vex5),
935    };
936
937    if ((get_modrm(s, env) >> 6) == 3) {
938        *entry = *decode_by_prefix(s, opcodes_0F10_reg);
939    } else {
940        *entry = *decode_by_prefix(s, opcodes_0F10_mem);
941    }
942}
943
944static void decode_0F11(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
945{
946    static const X86OpEntry opcodes_0F11_reg[4] = {
947        X86_OP_ENTRY3(MOVDQ,   W,x,  None,None, V,x, vex4), /* MOVUPS */
948        X86_OP_ENTRY3(MOVDQ,   W,x,  None,None, V,x, vex4), /* MOVUPD */
949        X86_OP_ENTRY3(VMOVSS,  W,x,  H,x,       V,x, vex5),
950        X86_OP_ENTRY3(VMOVLPx, W,x,  H,x,       V,q, vex5), /* MOVSD */
951    };
952
953    static const X86OpEntry opcodes_0F11_mem[4] = {
954        X86_OP_ENTRY3(MOVDQ,      W,x,  None,None, V,x, vex4), /* MOVUPS */
955        X86_OP_ENTRY3(MOVDQ,      W,x,  None,None, V,x, vex4), /* MOVUPD */
956        X86_OP_ENTRY3(VMOVSS_st,  M,ss, None,None, V,x, vex5),
957        X86_OP_ENTRY3(VMOVLPx_st, M,sd, None,None, V,x, vex5), /* MOVSD */
958    };
959
960    if ((get_modrm(s, env) >> 6) == 3) {
961        *entry = *decode_by_prefix(s, opcodes_0F11_reg);
962    } else {
963        *entry = *decode_by_prefix(s, opcodes_0F11_mem);
964    }
965}
966
967static void decode_0F12(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
968{
969    static const X86OpEntry opcodes_0F12_mem[4] = {
970        /*
971         * Use dq for operand for compatibility with gen_MOVSD and
972         * to allow VEX128 only.
973         */
974        X86_OP_ENTRY3(VMOVLPx_ld, V,dq, H,dq,      M,q, vex5), /* MOVLPS */
975        X86_OP_ENTRY3(VMOVLPx_ld, V,dq, H,dq,      M,q, vex5), /* MOVLPD */
976        X86_OP_ENTRY3(VMOVSLDUP,  V,x,  None,None, W,x, vex4 cpuid(SSE3)),
977        X86_OP_ENTRY3(VMOVDDUP,   V,x,  None,None, WM,q, vex5 cpuid(SSE3)), /* qq if VEX.256 */
978    };
979    static const X86OpEntry opcodes_0F12_reg[4] = {
980        X86_OP_ENTRY3(VMOVHLPS,  V,dq, H,dq,       U,dq, vex7),
981        X86_OP_ENTRY3(VMOVLPx,   W,x,  H,x,        U,q,  vex5), /* MOVLPD */
982        X86_OP_ENTRY3(VMOVSLDUP, V,x,  None,None,  U,x,  vex4 cpuid(SSE3)),
983        X86_OP_ENTRY3(VMOVDDUP,  V,x,  None,None,  U,x,  vex5 cpuid(SSE3)),
984    };
985
986    if ((get_modrm(s, env) >> 6) == 3) {
987        *entry = *decode_by_prefix(s, opcodes_0F12_reg);
988    } else {
989        *entry = *decode_by_prefix(s, opcodes_0F12_mem);
990        if ((s->prefix & PREFIX_REPNZ) && s->vex_l) {
991            entry->s2 = X86_SIZE_qq;
992        }
993    }
994}
995
996static void decode_0F16(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
997{
998    static const X86OpEntry opcodes_0F16_mem[4] = {
999        /*
1000         * Operand 1 technically only reads the low 64 bits, but uses dq so that
1001         * it is easier to check for op0 == op1 in an endianness-neutral manner.
1002         */
1003        X86_OP_ENTRY3(VMOVHPx_ld, V,dq, H,dq,      M,q, vex5), /* MOVHPS */
1004        X86_OP_ENTRY3(VMOVHPx_ld, V,dq, H,dq,      M,q, vex5), /* MOVHPD */
1005        X86_OP_ENTRY3(VMOVSHDUP,  V,x,  None,None, W,x, vex4 cpuid(SSE3)),
1006        {},
1007    };
1008    static const X86OpEntry opcodes_0F16_reg[4] = {
1009        /* Same as above, operand 1 could be Hq if it wasn't for big-endian.  */
1010        X86_OP_ENTRY3(VMOVLHPS,  V,dq, H,dq,      U,q, vex7),
1011        X86_OP_ENTRY3(VMOVHPx,   V,x,  H,x,       U,x, vex5), /* MOVHPD */
1012        X86_OP_ENTRY3(VMOVSHDUP, V,x,  None,None, U,x, vex4 cpuid(SSE3)),
1013        {},
1014    };
1015
1016    if ((get_modrm(s, env) >> 6) == 3) {
1017        *entry = *decode_by_prefix(s, opcodes_0F16_reg);
1018    } else {
1019        *entry = *decode_by_prefix(s, opcodes_0F16_mem);
1020    }
1021}
1022
1023static void decode_0F2A(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
1024{
1025    static const X86OpEntry opcodes_0F2A[4] = {
1026        X86_OP_ENTRY3(CVTPI2Px,  V,x,  None,None, Q,q),
1027        X86_OP_ENTRY3(CVTPI2Px,  V,x,  None,None, Q,q),
1028        X86_OP_ENTRY3(VCVTSI2Sx, V,x,  H,x, E,y,        vex3),
1029        X86_OP_ENTRY3(VCVTSI2Sx, V,x,  H,x, E,y,        vex3),
1030    };
1031    *entry = *decode_by_prefix(s, opcodes_0F2A);
1032}
1033
1034static void decode_0F2B(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
1035{
1036    static const X86OpEntry opcodes_0F2B[4] = {
1037        X86_OP_ENTRY3(MOVDQ,      M,x,  None,None, V,x, vex1), /* MOVNTPS */
1038        X86_OP_ENTRY3(MOVDQ,      M,x,  None,None, V,x, vex1), /* MOVNTPD */
1039        /* AMD extensions */
1040        X86_OP_ENTRY3(VMOVSS_st,  M,ss, None,None, V,x, vex4 cpuid(SSE4A)), /* MOVNTSS */
1041        X86_OP_ENTRY3(VMOVLPx_st, M,sd, None,None, V,x, vex4 cpuid(SSE4A)), /* MOVNTSD */
1042    };
1043
1044    *entry = *decode_by_prefix(s, opcodes_0F2B);
1045}
1046
1047static void decode_0F2C(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
1048{
1049    static const X86OpEntry opcodes_0F2C[4] = {
1050        /* Listed as ps/pd in the manual, but CVTTPS2PI only reads 64-bit.  */
1051        X86_OP_ENTRY3(CVTTPx2PI,  P,q,  None,None, W,q),
1052        X86_OP_ENTRY3(CVTTPx2PI,  P,q,  None,None, W,dq),
1053        X86_OP_ENTRY3(VCVTTSx2SI, G,y,  None,None, W,ss, vex3),
1054        X86_OP_ENTRY3(VCVTTSx2SI, G,y,  None,None, W,sd, vex3),
1055    };
1056    *entry = *decode_by_prefix(s, opcodes_0F2C);
1057}
1058
1059static void decode_0F2D(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
1060{
1061    static const X86OpEntry opcodes_0F2D[4] = {
1062        /* Listed as ps/pd in the manual, but CVTPS2PI only reads 64-bit.  */
1063        X86_OP_ENTRY3(CVTPx2PI,  P,q,  None,None, W,q),
1064        X86_OP_ENTRY3(CVTPx2PI,  P,q,  None,None, W,dq),
1065        X86_OP_ENTRY3(VCVTSx2SI, G,y,  None,None, W,ss, vex3),
1066        X86_OP_ENTRY3(VCVTSx2SI, G,y,  None,None, W,sd, vex3),
1067    };
1068    *entry = *decode_by_prefix(s, opcodes_0F2D);
1069}
1070
1071static void decode_VxCOMISx(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
1072{
1073    /*
1074     * VUCOMISx and VCOMISx are different and use no-prefix and 0x66 for SS and SD
1075     * respectively.  Scalar values usually are associated with 0xF2 and 0xF3, for
1076     * which X86_VEX_REPScalar exists, but here it has to be decoded by hand.
1077     */
1078    entry->s1 = entry->s2 = (s->prefix & PREFIX_DATA ? X86_SIZE_sd : X86_SIZE_ss);
1079    entry->gen = (*b == 0x2E ? gen_VUCOMI : gen_VCOMI);
1080}
1081
1082static void decode_sse_unary(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
1083{
1084    if (!(s->prefix & (PREFIX_REPZ | PREFIX_REPNZ))) {
1085        entry->op1 = X86_TYPE_None;
1086        entry->s1 = X86_SIZE_None;
1087    }
1088    switch (*b) {
1089    case 0x51: entry->gen = gen_VSQRT; break;
1090    case 0x52: entry->gen = gen_VRSQRT; break;
1091    case 0x53: entry->gen = gen_VRCP; break;
1092    }
1093}
1094
1095static void decode_0F5A(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
1096{
1097    static const X86OpEntry opcodes_0F5A[4] = {
1098        X86_OP_ENTRY2(VCVTPS2PD,  V,x,       W,xh, vex2),      /* VCVTPS2PD */
1099        X86_OP_ENTRY2(VCVTPD2PS,  V,x,       W,x,  vex2),      /* VCVTPD2PS */
1100        X86_OP_ENTRY3(VCVTSS2SD,  V,x,  H,x, W,x,  vex2_rep3), /* VCVTSS2SD */
1101        X86_OP_ENTRY3(VCVTSD2SS,  V,x,  H,x, W,x,  vex2_rep3), /* VCVTSD2SS */
1102    };
1103    *entry = *decode_by_prefix(s, opcodes_0F5A);
1104}
1105
1106static void decode_0F5B(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
1107{
1108    static const X86OpEntry opcodes_0F5B[4] = {
1109        X86_OP_ENTRY2(VCVTDQ2PS,   V,x, W,x,      vex2),
1110        X86_OP_ENTRY2(VCVTPS2DQ,   V,x, W,x,      vex2),
1111        X86_OP_ENTRY2(VCVTTPS2DQ,  V,x, W,x,      vex2),
1112        {},
1113    };
1114    *entry = *decode_by_prefix(s, opcodes_0F5B);
1115}
1116
1117static void decode_0FE6(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
1118{
1119    static const X86OpEntry opcodes_0FE6[4] = {
1120        {},
1121        X86_OP_ENTRY2(VCVTTPD2DQ,  V,x, W,x,      vex2),
1122        X86_OP_ENTRY2(VCVTDQ2PD,   V,x, W,x,      vex5),
1123        X86_OP_ENTRY2(VCVTPD2DQ,   V,x, W,x,      vex2),
1124    };
1125    *entry = *decode_by_prefix(s, opcodes_0FE6);
1126}
1127
1128/*
1129 * These ignore the mod bits (assume (modrm&0xc0)==0xc0), so group the
1130 * pre-decode tweak here for all MOVs from/to CR and DR.
1131 *
1132 * AMD documentation (24594.pdf) and testing of Intel 386 and 486
1133 * processors all show that the mod bits are assumed to be 1's,
1134 * regardless of actual values.
1135 */
1136static void decode_MOV_CR_DR(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
1137{
1138    /*
1139     */
1140    get_modrm(s, env);
1141    s->modrm |= 0xC0;
1142
1143    entry->gen = gen_MOV;
1144}
1145
1146static const X86OpEntry opcodes_0F[256] = {
1147    [0x00] = X86_OP_ENTRY1(multi0F,     nop,v,                nolea), /* unconverted */
1148    [0x01] = X86_OP_ENTRY1(multi0F,     nop,v,                nolea), /* unconverted */
1149    [0x02] = X86_OP_ENTRYwr(LAR,        G,v, E,w,             chk(prot)),
1150    [0x03] = X86_OP_ENTRYwr(LSL,        G,v, E,w,             chk(prot)),
1151    [0x05] = X86_OP_ENTRY0(SYSCALL,                           chk(o64_intel)),
1152    [0x06] = X86_OP_ENTRY0(CLTS,                              chk(cpl0) svm(WRITE_CR0)),
1153    [0x07] = X86_OP_ENTRY0(SYSRET,                            chk3(o64_intel, prot, cpl0)),
1154
1155    [0x10] = X86_OP_GROUP0(0F10),
1156    [0x11] = X86_OP_GROUP0(0F11),
1157    [0x12] = X86_OP_GROUP0(0F12),
1158    [0x13] = X86_OP_ENTRY3(VMOVLPx_st,  M,q, None,None, V,q,  vex5 p_00_66),
1159    [0x14] = X86_OP_ENTRY3(VUNPCKLPx,   V,x, H,x, W,x,        vex4 p_00_66),
1160    [0x15] = X86_OP_ENTRY3(VUNPCKHPx,   V,x, H,x, W,x,        vex4 p_00_66),
1161    [0x16] = X86_OP_GROUP0(0F16),
1162    /* Incorrectly listed as Mq,Vq in the manual */
1163    [0x17] = X86_OP_ENTRY3(VMOVHPx_st,  M,q, None,None, V,dq, vex5 p_00_66),
1164
1165    /*
1166     * Incorrectly listed as using "d" operand type in the manual.  In reality
1167     * there's no 16-bit version (like y) and it does not use REX.W (like d64).
1168     */
1169    [0x20] = X86_OP_GROUPwr(MOV_CR_DR,   R,y_d64, C,y_d64, chk(cpl0) svm(READ_CR0)),
1170    [0x21] = X86_OP_GROUPwr(MOV_CR_DR,   R,y_d64, D,y_d64, chk(cpl0) svm(READ_DR0)),
1171    [0x22] = X86_OP_GROUPwr(MOV_CR_DR,   C,y_d64, R,y_d64, zextT0 chk(cpl0) svm(WRITE_CR0)),
1172    [0x23] = X86_OP_GROUPwr(MOV_CR_DR,   D,y_d64, R,y_d64, zextT0 chk(cpl0) svm(WRITE_DR0)),
1173
1174    [0x30] = X86_OP_ENTRY0(WRMSR,                             chk(cpl0)),
1175    [0x31] = X86_OP_ENTRY0(RDTSC),
1176    [0x32] = X86_OP_ENTRY0(RDMSR,                             chk(cpl0)),
1177    [0x33] = X86_OP_ENTRY0(RDPMC),
1178    [0x34] = X86_OP_ENTRY0(SYSENTER,                          chk2(i64_amd, prot_or_vm86)),
1179    [0x35] = X86_OP_ENTRY0(SYSEXIT,                           chk3(i64_amd, prot, cpl0)),
1180
1181    [0x40] = X86_OP_ENTRY2(CMOVcc,     G,v, E,v, cpuid(CMOV)),
1182    [0x41] = X86_OP_ENTRY2(CMOVcc,     G,v, E,v, cpuid(CMOV)),
1183    [0x42] = X86_OP_ENTRY2(CMOVcc,     G,v, E,v, cpuid(CMOV)),
1184    [0x43] = X86_OP_ENTRY2(CMOVcc,     G,v, E,v, cpuid(CMOV)),
1185    [0x44] = X86_OP_ENTRY2(CMOVcc,     G,v, E,v, cpuid(CMOV)),
1186    [0x45] = X86_OP_ENTRY2(CMOVcc,     G,v, E,v, cpuid(CMOV)),
1187    [0x46] = X86_OP_ENTRY2(CMOVcc,     G,v, E,v, cpuid(CMOV)),
1188    [0x47] = X86_OP_ENTRY2(CMOVcc,     G,v, E,v, cpuid(CMOV)),
1189
1190    [0x50] = X86_OP_ENTRY3(MOVMSK,     G,y, None,None, U,x, vex7 p_00_66),
1191    [0x51] = X86_OP_GROUP3(sse_unary,  V,x, H,x, W,x, vex2_rep3 p_00_66_f3_f2), /* sqrtps */
1192    [0x52] = X86_OP_GROUP3(sse_unary,  V,x, H,x, W,x, vex4_rep5 p_00_f3), /* rsqrtps */
1193    [0x53] = X86_OP_GROUP3(sse_unary,  V,x, H,x, W,x, vex4_rep5 p_00_f3), /* rcpps */
1194    [0x54] = X86_OP_ENTRY3(PAND,       V,x, H,x, W,x,  vex4 p_00_66), /* vand */
1195    [0x55] = X86_OP_ENTRY3(PANDN,      V,x, H,x, W,x,  vex4 p_00_66), /* vandn */
1196    [0x56] = X86_OP_ENTRY3(POR,        V,x, H,x, W,x,  vex4 p_00_66), /* vor */
1197    [0x57] = X86_OP_ENTRY3(PXOR,       V,x, H,x, W,x,  vex4 p_00_66), /* vxor */
1198
1199    [0x60] = X86_OP_ENTRY3(PUNPCKLBW,  V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
1200    [0x61] = X86_OP_ENTRY3(PUNPCKLWD,  V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
1201    [0x62] = X86_OP_ENTRY3(PUNPCKLDQ,  V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
1202    [0x63] = X86_OP_ENTRY3(PACKSSWB,   V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
1203    [0x64] = X86_OP_ENTRY3(PCMPGTB,    V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
1204    [0x65] = X86_OP_ENTRY3(PCMPGTW,    V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
1205    [0x66] = X86_OP_ENTRY3(PCMPGTD,    V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
1206    [0x67] = X86_OP_ENTRY3(PACKUSWB,   V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
1207
1208    [0x70] = X86_OP_GROUP0(0F70),
1209    [0x71] = X86_OP_GROUP0(group12),
1210    [0x72] = X86_OP_GROUP0(group13),
1211    [0x73] = X86_OP_GROUP0(group14),
1212    [0x74] = X86_OP_ENTRY3(PCMPEQB,    V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
1213    [0x75] = X86_OP_ENTRY3(PCMPEQW,    V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
1214    [0x76] = X86_OP_ENTRY3(PCMPEQD,    V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
1215    [0x77] = X86_OP_GROUP0(0F77),
1216
1217    [0x80] = X86_OP_ENTRYr(Jcc, J,z_f64),
1218    [0x81] = X86_OP_ENTRYr(Jcc, J,z_f64),
1219    [0x82] = X86_OP_ENTRYr(Jcc, J,z_f64),
1220    [0x83] = X86_OP_ENTRYr(Jcc, J,z_f64),
1221    [0x84] = X86_OP_ENTRYr(Jcc, J,z_f64),
1222    [0x85] = X86_OP_ENTRYr(Jcc, J,z_f64),
1223    [0x86] = X86_OP_ENTRYr(Jcc, J,z_f64),
1224    [0x87] = X86_OP_ENTRYr(Jcc, J,z_f64),
1225
1226    [0x90] = X86_OP_ENTRYw(SETcc, E,b),
1227    [0x91] = X86_OP_ENTRYw(SETcc, E,b),
1228    [0x92] = X86_OP_ENTRYw(SETcc, E,b),
1229    [0x93] = X86_OP_ENTRYw(SETcc, E,b),
1230    [0x94] = X86_OP_ENTRYw(SETcc, E,b),
1231    [0x95] = X86_OP_ENTRYw(SETcc, E,b),
1232    [0x96] = X86_OP_ENTRYw(SETcc, E,b),
1233    [0x97] = X86_OP_ENTRYw(SETcc, E,b),
1234
1235    [0xa0] = X86_OP_ENTRYr(PUSH, FS, w),
1236    [0xa1] = X86_OP_ENTRYw(POP, FS, w),
1237    [0xa2] = X86_OP_ENTRY0(CPUID),
1238    [0xa3] = X86_OP_ENTRYrr(BT,   E,v, G,v,          btEvGv),
1239    [0xa4] = X86_OP_ENTRY4(SHLD,  E,v, 2op,v, G,v),
1240    [0xa5] = X86_OP_ENTRY3(SHLD,  E,v, 2op,v, G,v),
1241
1242    [0xb0] = X86_OP_ENTRY2(CMPXCHG,E,b, G,b, lock),
1243    [0xb1] = X86_OP_ENTRY2(CMPXCHG,E,v, G,v, lock),
1244    [0xb2] = X86_OP_ENTRY3(LSS,    G,v, EM,p, None, None),
1245    [0xb3] = X86_OP_ENTRY2(BTR,    E,v, G,v,             btEvGv),
1246    [0xb4] = X86_OP_ENTRY3(LFS,    G,v, EM,p, None, None),
1247    [0xb5] = X86_OP_ENTRY3(LGS,    G,v, EM,p, None, None),
1248    [0xb6] = X86_OP_ENTRY3(MOV,    G,v, E,b, None, None, zextT0), /* MOVZX */
1249    [0xb7] = X86_OP_ENTRY3(MOV,    G,v, E,w, None, None, zextT0), /* MOVZX */
1250
1251    [0xc0] = X86_OP_ENTRY2(XADD,       E,b, G,b,            lock),
1252    [0xc1] = X86_OP_ENTRY2(XADD,       E,v, G,v,            lock),
1253    [0xc2] = X86_OP_ENTRY4(VCMP,       V,x, H,x, W,x,       vex2_rep3 p_00_66_f3_f2),
1254    [0xc3] = X86_OP_ENTRY3(MOV,        EM,y,G,y, None,None, cpuid(SSE2)), /* MOVNTI */
1255    [0xc4] = X86_OP_ENTRY4(PINSRW,     V,dq,H,dq,E,w,       vex5 mmx p_00_66),
1256    [0xc5] = X86_OP_ENTRY3(PEXTRW,     G,d, U,dq,I,b,       vex5 mmx p_00_66),
1257    [0xc6] = X86_OP_ENTRY4(VSHUF,      V,x, H,x, W,x,       vex4 p_00_66),
1258    [0xc7] = X86_OP_GROUP0(group9),
1259
1260    [0xd0] = X86_OP_ENTRY3(VADDSUB,   V,x, H,x, W,x,        vex2 cpuid(SSE3) p_66_f2),
1261    [0xd1] = X86_OP_ENTRY3(PSRLW_r,   V,x, H,x, W,x,        vex4 mmx avx2_256 p_00_66),
1262    [0xd2] = X86_OP_ENTRY3(PSRLD_r,   V,x, H,x, W,x,        vex4 mmx avx2_256 p_00_66),
1263    [0xd3] = X86_OP_ENTRY3(PSRLQ_r,   V,x, H,x, W,x,        vex4 mmx avx2_256 p_00_66),
1264    [0xd4] = X86_OP_ENTRY3(PADDQ,     V,x, H,x, W,x,        vex4 mmx avx2_256 p_00_66),
1265    [0xd5] = X86_OP_ENTRY3(PMULLW,    V,x, H,x, W,x,        vex4 mmx avx2_256 p_00_66),
1266    [0xd6] = X86_OP_GROUP0(0FD6),
1267    [0xd7] = X86_OP_ENTRY3(PMOVMSKB,  G,d, None,None, U,x,  vex7 mmx avx2_256 p_00_66),
1268
1269    [0xe0] = X86_OP_ENTRY3(PAVGB,     V,x, H,x, W,x,        vex4 mmx avx2_256 p_00_66),
1270    [0xe1] = X86_OP_ENTRY3(PSRAW_r,   V,x, H,x, W,x,        vex7 mmx avx2_256 p_00_66),
1271    [0xe2] = X86_OP_ENTRY3(PSRAD_r,   V,x, H,x, W,x,        vex7 mmx avx2_256 p_00_66),
1272    [0xe3] = X86_OP_ENTRY3(PAVGW,     V,x, H,x, W,x,        vex4 mmx avx2_256 p_00_66),
1273    [0xe4] = X86_OP_ENTRY3(PMULHUW,   V,x, H,x, W,x,        vex4 mmx avx2_256 p_00_66),
1274    [0xe5] = X86_OP_ENTRY3(PMULHW,    V,x, H,x, W,x,        vex4 mmx avx2_256 p_00_66),
1275    [0xe6] = X86_OP_GROUP0(0FE6),
1276    [0xe7] = X86_OP_ENTRY3(MOVDQ,     W,x, None,None, V,x,  vex1 mmx p_00_66), /* MOVNTQ/MOVNTDQ */
1277
1278    [0xf0] = X86_OP_ENTRY3(MOVDQ,    V,x, None,None, WM,x,  vex4_unal cpuid(SSE3) p_f2), /* LDDQU */
1279    [0xf1] = X86_OP_ENTRY3(PSLLW_r,  V,x, H,x, W,x,         vex7 mmx avx2_256 p_00_66),
1280    [0xf2] = X86_OP_ENTRY3(PSLLD_r,  V,x, H,x, W,x,         vex7 mmx avx2_256 p_00_66),
1281    [0xf3] = X86_OP_ENTRY3(PSLLQ_r,  V,x, H,x, W,x,         vex7 mmx avx2_256 p_00_66),
1282    [0xf4] = X86_OP_ENTRY3(PMULUDQ,  V,x, H,x, W,x,         vex4 mmx avx2_256 p_00_66),
1283    [0xf5] = X86_OP_ENTRY3(PMADDWD,  V,x, H,x, W,x,         vex4 mmx avx2_256 p_00_66),
1284    [0xf6] = X86_OP_ENTRY3(PSADBW,   V,x, H,x, W,x,         vex4 mmx avx2_256 p_00_66),
1285    [0xf7] = X86_OP_ENTRY3(MASKMOV,  None,None, V,dq, U,dq, vex4_unal avx2_256 mmx p_00_66),
1286
1287    [0x08] = X86_OP_ENTRY0(NOP,           svm(INVD)),
1288    [0x09] = X86_OP_ENTRY0(NOP,           svm(WBINVD)),
1289    [0x0b] = X86_OP_ENTRY0(UD),           /* UD2 */
1290    [0x0d] = X86_OP_ENTRY1(NOP,  M,v),    /* 3DNow! prefetch */
1291    [0x0e] = X86_OP_ENTRY0(EMMS,                              cpuid(3DNOW)), /* femms */
1292    /*
1293     * 3DNow!'s opcode byte comes *after* modrm and displacements, making it
1294     * more like an Ib operand.  Dispatch to the right helper in a single gen_*
1295     * function.
1296     */
1297    [0x0f] = X86_OP_ENTRY3(3dnow,       P,q, Q,q, I,b,        cpuid(3DNOW)),
1298
1299    [0x18] = X86_OP_ENTRY1(NOP,  nop,v),  /* prefetch/reserved NOP */
1300    [0x19] = X86_OP_ENTRY1(NOP,  nop,v),  /* reserved NOP */
1301    [0x1a] = X86_OP_ENTRY1(multi0F, nop,v, nolea),  /* unconverted MPX */
1302    [0x1b] = X86_OP_ENTRY1(multi0F, nop,v, nolea),  /* unconverted MPX */
1303    [0x1c] = X86_OP_ENTRY1(NOP,  nop,v),  /* reserved NOP */
1304    [0x1d] = X86_OP_ENTRY1(NOP,  nop,v),  /* reserved NOP */
1305    [0x1e] = X86_OP_ENTRY1(NOP,  nop,v),  /* reserved NOP */
1306    [0x1f] = X86_OP_ENTRY1(NOP,  nop,v),  /* NOP/reserved NOP */
1307
1308    [0x28] = X86_OP_ENTRY3(MOVDQ,      V,x,  None,None, W,x, vex1 p_00_66), /* MOVAPS */
1309    [0x29] = X86_OP_ENTRY3(MOVDQ,      W,x,  None,None, V,x, vex1 p_00_66), /* MOVAPS */
1310    [0x2A] = X86_OP_GROUP0(0F2A),
1311    [0x2B] = X86_OP_GROUP0(0F2B),
1312    [0x2C] = X86_OP_GROUP0(0F2C),
1313    [0x2D] = X86_OP_GROUP0(0F2D),
1314    [0x2E] = X86_OP_GROUP3(VxCOMISx,   None,None, V,x, W,x,  vex3 p_00_66), /* VUCOMISS/SD */
1315    [0x2F] = X86_OP_GROUP3(VxCOMISx,   None,None, V,x, W,x,  vex3 p_00_66), /* VCOMISS/SD */
1316
1317    [0x38] = X86_OP_GROUP0(0F38),
1318    [0x3a] = X86_OP_GROUP0(0F3A),
1319
1320    [0x48] = X86_OP_ENTRY2(CMOVcc,     G,v, E,v, cpuid(CMOV)),
1321    [0x49] = X86_OP_ENTRY2(CMOVcc,     G,v, E,v, cpuid(CMOV)),
1322    [0x4a] = X86_OP_ENTRY2(CMOVcc,     G,v, E,v, cpuid(CMOV)),
1323    [0x4b] = X86_OP_ENTRY2(CMOVcc,     G,v, E,v, cpuid(CMOV)),
1324    [0x4c] = X86_OP_ENTRY2(CMOVcc,     G,v, E,v, cpuid(CMOV)),
1325    [0x4d] = X86_OP_ENTRY2(CMOVcc,     G,v, E,v, cpuid(CMOV)),
1326    [0x4e] = X86_OP_ENTRY2(CMOVcc,     G,v, E,v, cpuid(CMOV)),
1327    [0x4f] = X86_OP_ENTRY2(CMOVcc,     G,v, E,v, cpuid(CMOV)),
1328
1329    [0x58] = X86_OP_ENTRY3(VADD,       V,x, H,x, W,x, vex2_rep3 p_00_66_f3_f2),
1330    [0x59] = X86_OP_ENTRY3(VMUL,       V,x, H,x, W,x, vex2_rep3 p_00_66_f3_f2),
1331    [0x5a] = X86_OP_GROUP0(0F5A),
1332    [0x5b] = X86_OP_GROUP0(0F5B),
1333    [0x5c] = X86_OP_ENTRY3(VSUB,       V,x, H,x, W,x, vex2_rep3 p_00_66_f3_f2),
1334    [0x5d] = X86_OP_ENTRY3(VMIN,       V,x, H,x, W,x, vex2_rep3 p_00_66_f3_f2),
1335    [0x5e] = X86_OP_ENTRY3(VDIV,       V,x, H,x, W,x, vex2_rep3 p_00_66_f3_f2),
1336    [0x5f] = X86_OP_ENTRY3(VMAX,       V,x, H,x, W,x, vex2_rep3 p_00_66_f3_f2),
1337
1338    [0x68] = X86_OP_ENTRY3(PUNPCKHBW,  V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
1339    [0x69] = X86_OP_ENTRY3(PUNPCKHWD,  V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
1340    [0x6a] = X86_OP_ENTRY3(PUNPCKHDQ,  V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
1341    [0x6b] = X86_OP_ENTRY3(PACKSSDW,   V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
1342    [0x6c] = X86_OP_ENTRY3(PUNPCKLQDQ, V,x, H,x, W,x,  vex4 p_66 avx2_256),
1343    [0x6d] = X86_OP_ENTRY3(PUNPCKHQDQ, V,x, H,x, W,x,  vex4 p_66 avx2_256),
1344    [0x6e] = X86_OP_ENTRY3(MOVD_to,    V,x, None,None, E,y, vex5 mmx p_00_66),  /* wrong dest Vy on SDM! */
1345    [0x6f] = X86_OP_GROUP0(0F6F),
1346
1347    [0x78] = X86_OP_GROUP0(0F78),
1348    [0x79] = X86_OP_GROUP2(0F79,       V,x, U,x,       cpuid(SSE4A)),
1349    [0x7c] = X86_OP_ENTRY3(VHADD,      V,x, H,x, W,x,  vex2 cpuid(SSE3) p_66_f2),
1350    [0x7d] = X86_OP_ENTRY3(VHSUB,      V,x, H,x, W,x,  vex2 cpuid(SSE3) p_66_f2),
1351    [0x7e] = X86_OP_GROUP0(0F7E),
1352    [0x7f] = X86_OP_GROUP0(0F7F),
1353
1354    [0x88] = X86_OP_ENTRYr(Jcc, J,z_f64),
1355    [0x89] = X86_OP_ENTRYr(Jcc, J,z_f64),
1356    [0x8a] = X86_OP_ENTRYr(Jcc, J,z_f64),
1357    [0x8b] = X86_OP_ENTRYr(Jcc, J,z_f64),
1358    [0x8c] = X86_OP_ENTRYr(Jcc, J,z_f64),
1359    [0x8d] = X86_OP_ENTRYr(Jcc, J,z_f64),
1360    [0x8e] = X86_OP_ENTRYr(Jcc, J,z_f64),
1361    [0x8f] = X86_OP_ENTRYr(Jcc, J,z_f64),
1362
1363    [0x98] = X86_OP_ENTRYw(SETcc, E,b),
1364    [0x99] = X86_OP_ENTRYw(SETcc, E,b),
1365    [0x9a] = X86_OP_ENTRYw(SETcc, E,b),
1366    [0x9b] = X86_OP_ENTRYw(SETcc, E,b),
1367    [0x9c] = X86_OP_ENTRYw(SETcc, E,b),
1368    [0x9d] = X86_OP_ENTRYw(SETcc, E,b),
1369    [0x9e] = X86_OP_ENTRYw(SETcc, E,b),
1370    [0x9f] = X86_OP_ENTRYw(SETcc, E,b),
1371
1372    [0xa8] = X86_OP_ENTRYr(PUSH,   GS, w),
1373    [0xa9] = X86_OP_ENTRYw(POP,    GS, w),
1374    [0xaa] = X86_OP_ENTRY0(RSM,             chk(smm) svm(RSM)),
1375    [0xab] = X86_OP_ENTRY2(BTS,    E,v, G,v,             btEvGv),
1376    [0xac] = X86_OP_ENTRY4(SHRD,   E,v, 2op,v, G,v),
1377    [0xad] = X86_OP_ENTRY3(SHRD,   E,v, 2op,v, G,v),
1378    [0xae] = X86_OP_GROUP0(group15),
1379    /*
1380     * It's slightly more efficient to put Ev operand in T0 and allow gen_IMUL3
1381     * to assume sextT0.  Multiplication is commutative anyway.
1382     */
1383    [0xaf] = X86_OP_ENTRY3(IMUL3,  G,v, E,v, 2op,v, sextT0),
1384
1385    [0xb8] = X86_OP_GROUP0(0FB8),
1386    /* decoded as modrm, which is visible as a difference between page fault and #UD */
1387    [0xb9] = X86_OP_ENTRYr(UD,     nop,v),                        /* UD1 */
1388    [0xba] = X86_OP_GROUP2(group8, E,v, I,b),
1389    [0xbb] = X86_OP_ENTRY2(BTC,    E,v, G,v,             btEvGv),
1390    [0xbc] = X86_OP_GROUP0(0FBC),
1391    [0xbd] = X86_OP_GROUP0(0FBD),
1392    [0xbe] = X86_OP_ENTRY3(MOV,    G,v, E,b, None, None, sextT0), /* MOVSX */
1393    [0xbf] = X86_OP_ENTRY3(MOV,    G,v, E,w, None, None, sextT0), /* MOVSX */
1394
1395    [0xc8] = X86_OP_ENTRY1(BSWAP,     LoBits,y),
1396    [0xc9] = X86_OP_ENTRY1(BSWAP,     LoBits,y),
1397    [0xca] = X86_OP_ENTRY1(BSWAP,     LoBits,y),
1398    [0xcb] = X86_OP_ENTRY1(BSWAP,     LoBits,y),
1399    [0xcc] = X86_OP_ENTRY1(BSWAP,     LoBits,y),
1400    [0xcd] = X86_OP_ENTRY1(BSWAP,     LoBits,y),
1401    [0xce] = X86_OP_ENTRY1(BSWAP,     LoBits,y),
1402    [0xcf] = X86_OP_ENTRY1(BSWAP,     LoBits,y),
1403
1404    /* Incorrectly missing from 2-17 */
1405    [0xd8] = X86_OP_ENTRY3(PSUBUSB,  V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
1406    [0xd9] = X86_OP_ENTRY3(PSUBUSW,  V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
1407    [0xda] = X86_OP_ENTRY3(PMINUB,   V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
1408    [0xdb] = X86_OP_ENTRY3(PAND,     V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
1409    [0xdc] = X86_OP_ENTRY3(PADDUSB,  V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
1410    [0xdd] = X86_OP_ENTRY3(PADDUSW,  V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
1411    [0xde] = X86_OP_ENTRY3(PMAXUB,   V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
1412    [0xdf] = X86_OP_ENTRY3(PANDN,    V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
1413
1414    [0xe8] = X86_OP_ENTRY3(PSUBSB,  V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
1415    [0xe9] = X86_OP_ENTRY3(PSUBSW,  V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
1416    [0xea] = X86_OP_ENTRY3(PMINSW,  V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
1417    [0xeb] = X86_OP_ENTRY3(POR,     V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
1418    [0xec] = X86_OP_ENTRY3(PADDSB,  V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
1419    [0xed] = X86_OP_ENTRY3(PADDSW,  V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
1420    [0xee] = X86_OP_ENTRY3(PMAXSW,  V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
1421    [0xef] = X86_OP_ENTRY3(PXOR,    V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
1422
1423    [0xf8] = X86_OP_ENTRY3(PSUBB,  V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
1424    [0xf9] = X86_OP_ENTRY3(PSUBW,  V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
1425    [0xfa] = X86_OP_ENTRY3(PSUBD,  V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
1426    [0xfb] = X86_OP_ENTRY3(PSUBQ,  V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
1427    [0xfc] = X86_OP_ENTRY3(PADDB,  V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
1428    [0xfd] = X86_OP_ENTRY3(PADDW,  V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
1429    [0xfe] = X86_OP_ENTRY3(PADDD,  V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
1430    [0xff] = X86_OP_ENTRYr(UD,     nop,v),                        /* UD0 */
1431};
1432
1433static void do_decode_0F(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
1434{
1435    *entry = opcodes_0F[*b];
1436}
1437
1438static void decode_0F(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
1439{
1440    *b = x86_ldub_code(env, s);
1441    do_decode_0F(s, env, entry, b);
1442}
1443
1444static void decode_63(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
1445{
1446    static const X86OpEntry arpl = X86_OP_ENTRY2(ARPL, E,w, G,w, chk(prot));
1447    static const X86OpEntry mov = X86_OP_ENTRY3(MOV, G,v, E,v, None, None);
1448    static const X86OpEntry movsxd = X86_OP_ENTRY3(MOV, G,v, E,d, None, None, sextT0);
1449    if (!CODE64(s)) {
1450        *entry = arpl;
1451    } else if (REX_W(s)) {
1452        *entry = movsxd;
1453    } else {
1454        *entry = mov;
1455    }
1456}
1457
1458static void decode_group1(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
1459{
1460    static const X86GenFunc group1_gen[8] = {
1461        gen_ADD, gen_OR, gen_ADC, gen_SBB, gen_AND, gen_SUB, gen_XOR, gen_SUB,
1462    };
1463    int op = (get_modrm(s, env) >> 3) & 7;
1464    entry->gen = group1_gen[op];
1465
1466    if (op == 7) {
1467        /* prevent writeback for CMP */
1468        entry->op1 = entry->op0;
1469        entry->op0 = X86_TYPE_None;
1470        entry->s0 = X86_SIZE_None;
1471    } else {
1472        entry->special = X86_SPECIAL_HasLock;
1473    }
1474}
1475
1476static void decode_group1A(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
1477{
1478    int op = (get_modrm(s, env) >> 3) & 7;
1479    if (op != 0) {
1480        /* could be XOP prefix too */
1481        *entry = UNKNOWN_OPCODE;
1482    } else {
1483        entry->gen = gen_POP;
1484        /* The address must use the value of ESP after the pop.  */
1485        s->popl_esp_hack = 1 << mo_pushpop(s, s->dflag);
1486    }
1487}
1488
1489static void decode_group2(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
1490{
1491    static const X86GenFunc group2_gen[8] = {
1492        gen_ROL, gen_ROR, gen_RCL, gen_RCR,
1493        gen_SHL, gen_SHR, gen_SHL /* SAL, undocumented */, gen_SAR,
1494    };
1495    int op = (get_modrm(s, env) >> 3) & 7;
1496    entry->gen = group2_gen[op];
1497    if (op == 7) {
1498        entry->special = X86_SPECIAL_SExtT0;
1499    } else {
1500        entry->special = X86_SPECIAL_ZExtT0;
1501    }
1502}
1503
1504static void decode_group3(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
1505{
1506    static const X86OpEntry opcodes_grp3[16] = {
1507        /* 0xf6 */
1508        [0x00] = X86_OP_ENTRYrr(AND, E,b, I,b),
1509        [0x02] = X86_OP_ENTRY1(NOT,  E,b,      lock),
1510        [0x03] = X86_OP_ENTRY1(NEG,  E,b,      lock),
1511        [0x04] = X86_OP_ENTRYrr(MUL, E,b, 0,b, zextT0),
1512        [0x05] = X86_OP_ENTRYrr(IMUL,E,b, 0,b, sextT0),
1513        [0x06] = X86_OP_ENTRYr(DIV,  E,b),
1514        [0x07] = X86_OP_ENTRYr(IDIV, E,b),
1515
1516        /* 0xf7 */
1517        [0x08] = X86_OP_ENTRYrr(AND, E,v, I,z),
1518        [0x0a] = X86_OP_ENTRY1(NOT,  E,v,      lock),
1519        [0x0b] = X86_OP_ENTRY1(NEG,  E,v,      lock),
1520        [0x0c] = X86_OP_ENTRYrr(MUL, E,v, 0,v, zextT0),
1521        [0x0d] = X86_OP_ENTRYrr(IMUL,E,v, 0,v, sextT0),
1522        [0x0e] = X86_OP_ENTRYr(DIV,  E,v),
1523        [0x0f] = X86_OP_ENTRYr(IDIV, E,v),
1524    };
1525
1526    int w = (*b & 1);
1527    int reg = (get_modrm(s, env) >> 3) & 7;
1528
1529    *entry = opcodes_grp3[(w << 3) | reg];
1530}
1531
1532static void decode_group4_5(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
1533{
1534    static const X86OpEntry opcodes_grp4_5[16] = {
1535        /* 0xfe */
1536        [0x00] = X86_OP_ENTRY1(INC,     E,b,                           lock),
1537        [0x01] = X86_OP_ENTRY1(DEC,     E,b,                           lock),
1538
1539        /* 0xff */
1540        [0x08] = X86_OP_ENTRY1(INC,     E,v,                           lock),
1541        [0x09] = X86_OP_ENTRY1(DEC,     E,v,                           lock),
1542        [0x0a] = X86_OP_ENTRYr(CALL_m,  E,f64,                         zextT0),
1543        [0x0b] = X86_OP_ENTRYr(CALLF_m, M,p),
1544        [0x0c] = X86_OP_ENTRYr(JMP_m,   E,f64,                         zextT0),
1545        [0x0d] = X86_OP_ENTRYr(JMPF_m,  M,p),
1546        [0x0e] = X86_OP_ENTRYr(PUSH,    E,d64),
1547    };
1548
1549    int w = (*b & 1);
1550    int reg = (get_modrm(s, env) >> 3) & 7;
1551
1552    *entry = opcodes_grp4_5[(w << 3) | reg];
1553}
1554
1555
1556static void decode_group11(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
1557{
1558    int op = (get_modrm(s, env) >> 3) & 7;
1559    if (op != 0) {
1560        *entry = UNKNOWN_OPCODE;
1561    } else {
1562        entry->gen = gen_MOV;
1563    }
1564}
1565
1566static void decode_90(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
1567{
1568    static X86OpEntry pause = X86_OP_ENTRY0(PAUSE, svm(PAUSE));
1569    static X86OpEntry nop = X86_OP_ENTRY0(NOP);
1570    static X86OpEntry xchg_ax = X86_OP_ENTRY2(XCHG, 0,v, LoBits,v);
1571
1572    if (REX_B(s)) {
1573        *entry = xchg_ax;
1574    } else {
1575        *entry = (s->prefix & PREFIX_REPZ) ? pause : nop;
1576    }
1577}
1578
1579static const X86OpEntry opcodes_root[256] = {
1580    [0x00] = X86_OP_ENTRY2(ADD, E,b, G,b, lock),
1581    [0x01] = X86_OP_ENTRY2(ADD, E,v, G,v, lock),
1582    [0x02] = X86_OP_ENTRY2(ADD, G,b, E,b, lock),
1583    [0x03] = X86_OP_ENTRY2(ADD, G,v, E,v, lock),
1584    [0x04] = X86_OP_ENTRY2(ADD, 0,b, I,b, lock),   /* AL, Ib */
1585    [0x05] = X86_OP_ENTRY2(ADD, 0,v, I,z, lock),   /* rAX, Iz */
1586    [0x06] = X86_OP_ENTRYr(PUSH, ES, w, chk(i64)),
1587    [0x07] = X86_OP_ENTRYw(POP, ES, w, chk(i64)),
1588
1589    [0x10] = X86_OP_ENTRY2(ADC, E,b, G,b, lock),
1590    [0x11] = X86_OP_ENTRY2(ADC, E,v, G,v, lock),
1591    [0x12] = X86_OP_ENTRY2(ADC, G,b, E,b, lock),
1592    [0x13] = X86_OP_ENTRY2(ADC, G,v, E,v, lock),
1593    [0x14] = X86_OP_ENTRY2(ADC, 0,b, I,b, lock),   /* AL, Ib */
1594    [0x15] = X86_OP_ENTRY2(ADC, 0,v, I,z, lock),   /* rAX, Iz */
1595    [0x16] = X86_OP_ENTRYr(PUSH, SS, w, chk(i64)),
1596    [0x17] = X86_OP_ENTRYw(POP, SS, w, chk(i64)),
1597
1598    [0x20] = X86_OP_ENTRY2(AND, E,b, G,b, lock),
1599    [0x21] = X86_OP_ENTRY2(AND, E,v, G,v, lock),
1600    [0x22] = X86_OP_ENTRY2(AND, G,b, E,b, lock),
1601    [0x23] = X86_OP_ENTRY2(AND, G,v, E,v, lock),
1602    [0x24] = X86_OP_ENTRY2(AND, 0,b, I,b, lock),   /* AL, Ib */
1603    [0x25] = X86_OP_ENTRY2(AND, 0,v, I,z, lock),   /* rAX, Iz */
1604    [0x26] = {},
1605    [0x27] = X86_OP_ENTRY0(DAA, chk(i64)),
1606
1607    [0x30] = X86_OP_ENTRY2(XOR, E,b, G,b, lock),
1608    [0x31] = X86_OP_ENTRY2(XOR, E,v, G,v, lock),
1609    [0x32] = X86_OP_ENTRY2(XOR, G,b, E,b, lock),
1610    [0x33] = X86_OP_ENTRY2(XOR, G,v, E,v, lock),
1611    [0x34] = X86_OP_ENTRY2(XOR, 0,b, I,b, lock),   /* AL, Ib */
1612    [0x35] = X86_OP_ENTRY2(XOR, 0,v, I,z, lock),   /* rAX, Iz */
1613    [0x36] = {},
1614    [0x37] = X86_OP_ENTRY0(AAA, chk(i64)),
1615
1616    [0x40] = X86_OP_ENTRY1(INC, 0,v, chk(i64)),
1617    [0x41] = X86_OP_ENTRY1(INC, 1,v, chk(i64)),
1618    [0x42] = X86_OP_ENTRY1(INC, 2,v, chk(i64)),
1619    [0x43] = X86_OP_ENTRY1(INC, 3,v, chk(i64)),
1620    [0x44] = X86_OP_ENTRY1(INC, 4,v, chk(i64)),
1621    [0x45] = X86_OP_ENTRY1(INC, 5,v, chk(i64)),
1622    [0x46] = X86_OP_ENTRY1(INC, 6,v, chk(i64)),
1623    [0x47] = X86_OP_ENTRY1(INC, 7,v, chk(i64)),
1624
1625    [0x50] = X86_OP_ENTRYr(PUSH, LoBits,d64),
1626    [0x51] = X86_OP_ENTRYr(PUSH, LoBits,d64),
1627    [0x52] = X86_OP_ENTRYr(PUSH, LoBits,d64),
1628    [0x53] = X86_OP_ENTRYr(PUSH, LoBits,d64),
1629    [0x54] = X86_OP_ENTRYr(PUSH, LoBits,d64),
1630    [0x55] = X86_OP_ENTRYr(PUSH, LoBits,d64),
1631    [0x56] = X86_OP_ENTRYr(PUSH, LoBits,d64),
1632    [0x57] = X86_OP_ENTRYr(PUSH, LoBits,d64),
1633
1634    [0x60] = X86_OP_ENTRY0(PUSHA, chk(i64)),
1635    [0x61] = X86_OP_ENTRY0(POPA, chk(i64)),
1636    [0x62] = X86_OP_ENTRYrr(BOUND, G,v, M,a, chk(i64)),
1637    [0x63] = X86_OP_GROUP0(63),
1638    [0x64] = {},
1639    [0x65] = {},
1640    [0x66] = {},
1641    [0x67] = {},
1642
1643    [0x70] = X86_OP_ENTRYr(Jcc, J,b),
1644    [0x71] = X86_OP_ENTRYr(Jcc, J,b),
1645    [0x72] = X86_OP_ENTRYr(Jcc, J,b),
1646    [0x73] = X86_OP_ENTRYr(Jcc, J,b),
1647    [0x74] = X86_OP_ENTRYr(Jcc, J,b),
1648    [0x75] = X86_OP_ENTRYr(Jcc, J,b),
1649    [0x76] = X86_OP_ENTRYr(Jcc, J,b),
1650    [0x77] = X86_OP_ENTRYr(Jcc, J,b),
1651
1652    [0x80] = X86_OP_GROUP2(group1, E,b, I,b),
1653    [0x81] = X86_OP_GROUP2(group1, E,v, I,z),
1654    [0x82] = X86_OP_GROUP2(group1, E,b, I,b, chk(i64)),
1655    [0x83] = X86_OP_GROUP2(group1, E,v, I,b),
1656    [0x84] = X86_OP_ENTRYrr(AND, E,b, G,b),
1657    [0x85] = X86_OP_ENTRYrr(AND, E,v, G,v),
1658    [0x86] = X86_OP_ENTRY2(XCHG, E,b, G,b, xchg),
1659    [0x87] = X86_OP_ENTRY2(XCHG, E,v, G,v, xchg),
1660
1661    [0x90] = X86_OP_GROUP0(90),
1662    [0x91] = X86_OP_ENTRY2(XCHG, 0,v, LoBits,v),
1663    [0x92] = X86_OP_ENTRY2(XCHG, 0,v, LoBits,v),
1664    [0x93] = X86_OP_ENTRY2(XCHG, 0,v, LoBits,v),
1665    [0x94] = X86_OP_ENTRY2(XCHG, 0,v, LoBits,v),
1666    [0x95] = X86_OP_ENTRY2(XCHG, 0,v, LoBits,v),
1667    [0x96] = X86_OP_ENTRY2(XCHG, 0,v, LoBits,v),
1668    [0x97] = X86_OP_ENTRY2(XCHG, 0,v, LoBits,v),
1669
1670    [0xA0] = X86_OP_ENTRY3(MOV, 0,b, O,b, None, None), /* AL, Ob */
1671    [0xA1] = X86_OP_ENTRY3(MOV, 0,v, O,v, None, None), /* rAX, Ov */
1672    [0xA2] = X86_OP_ENTRY3(MOV, O,b, 0,b, None, None), /* Ob, AL */
1673    [0xA3] = X86_OP_ENTRY3(MOV, O,v, 0,v, None, None), /* Ov, rAX */
1674    [0xA4] = X86_OP_ENTRYrr(MOVS, Y,b, X,b),
1675    [0xA5] = X86_OP_ENTRYrr(MOVS, Y,v, X,v),
1676    [0xA6] = X86_OP_ENTRYrr(CMPS, Y,b, X,b),
1677    [0xA7] = X86_OP_ENTRYrr(CMPS, Y,v, X,v),
1678
1679    [0xB0] = X86_OP_ENTRY3(MOV, LoBits,b, I,b, None, None),
1680    [0xB1] = X86_OP_ENTRY3(MOV, LoBits,b, I,b, None, None),
1681    [0xB2] = X86_OP_ENTRY3(MOV, LoBits,b, I,b, None, None),
1682    [0xB3] = X86_OP_ENTRY3(MOV, LoBits,b, I,b, None, None),
1683    [0xB4] = X86_OP_ENTRY3(MOV, LoBits,b, I,b, None, None),
1684    [0xB5] = X86_OP_ENTRY3(MOV, LoBits,b, I,b, None, None),
1685    [0xB6] = X86_OP_ENTRY3(MOV, LoBits,b, I,b, None, None),
1686    [0xB7] = X86_OP_ENTRY3(MOV, LoBits,b, I,b, None, None),
1687
1688    [0xC0] = X86_OP_GROUP2(group2, E,b, I,b),
1689    [0xC1] = X86_OP_GROUP2(group2, E,v, I,b),
1690    [0xC2] = X86_OP_ENTRYr(RET, I,w),
1691    [0xC3] = X86_OP_ENTRY0(RET),
1692    [0xC4] = X86_OP_ENTRY3(LES, G,z, EM,p, None, None, chk(i64)),
1693    [0xC5] = X86_OP_ENTRY3(LDS, G,z, EM,p, None, None, chk(i64)),
1694    [0xC6] = X86_OP_GROUP3(group11, E,b, I,b, None, None), /* reg=000b */
1695    [0xC7] = X86_OP_GROUP3(group11, E,v, I,z, None, None), /* reg=000b */
1696
1697    [0xD0] = X86_OP_GROUP1(group2, E,b),
1698    [0xD1] = X86_OP_GROUP1(group2, E,v),
1699    [0xD2] = X86_OP_GROUP2(group2, E,b, 1,b), /* CL */
1700    [0xD3] = X86_OP_GROUP2(group2, E,v, 1,b), /* CL */
1701    [0xD4] = X86_OP_ENTRY2(AAM, 0,w, I,b),
1702    [0xD5] = X86_OP_ENTRY2(AAD, 0,w, I,b),
1703    [0xD6] = X86_OP_ENTRYw(SALC, 0,b),
1704    [0xD7] = X86_OP_ENTRY1(XLAT, 0,b, zextT0), /* AL read/written */
1705
1706    [0xE0] = X86_OP_ENTRYr(LOOPNE, J,b), /* implicit: CX with aflag size */
1707    [0xE1] = X86_OP_ENTRYr(LOOPE,  J,b), /* implicit: CX with aflag size */
1708    [0xE2] = X86_OP_ENTRYr(LOOP,   J,b), /* implicit: CX with aflag size */
1709    [0xE3] = X86_OP_ENTRYr(JCXZ,   J,b), /* implicit: CX with aflag size */
1710    [0xE4] = X86_OP_ENTRYwr(IN,    0,b, I_unsigned,b), /* AL */
1711    [0xE5] = X86_OP_ENTRYwr(IN,    0,z, I_unsigned,b), /* AX/EAX */
1712    [0xE6] = X86_OP_ENTRYrr(OUT,   0,b, I_unsigned,b), /* AL */
1713    [0xE7] = X86_OP_ENTRYrr(OUT,   0,z, I_unsigned,b), /* AX/EAX */
1714
1715    [0xF1] = X86_OP_ENTRY0(INT1,   svm(ICEBP)),
1716    [0xF4] = X86_OP_ENTRY0(HLT,    chk(cpl0) svm(HLT)),
1717    [0xF5] = X86_OP_ENTRY0(CMC),
1718    [0xF6] = X86_OP_GROUP1(group3, E,b),
1719    [0xF7] = X86_OP_GROUP1(group3, E,v),
1720
1721    [0x08] = X86_OP_ENTRY2(OR, E,b, G,b, lock),
1722    [0x09] = X86_OP_ENTRY2(OR, E,v, G,v, lock),
1723    [0x0A] = X86_OP_ENTRY2(OR, G,b, E,b, lock),
1724    [0x0B] = X86_OP_ENTRY2(OR, G,v, E,v, lock),
1725    [0x0C] = X86_OP_ENTRY2(OR, 0,b, I,b, lock),   /* AL, Ib */
1726    [0x0D] = X86_OP_ENTRY2(OR, 0,v, I,z, lock),   /* rAX, Iz */
1727    [0x0E] = X86_OP_ENTRYr(PUSH, CS, w, chk(i64)),
1728    [0x0F] = X86_OP_GROUP0(0F),
1729
1730    [0x18] = X86_OP_ENTRY2(SBB, E,b, G,b, lock),
1731    [0x19] = X86_OP_ENTRY2(SBB, E,v, G,v, lock),
1732    [0x1A] = X86_OP_ENTRY2(SBB, G,b, E,b, lock),
1733    [0x1B] = X86_OP_ENTRY2(SBB, G,v, E,v, lock),
1734    [0x1C] = X86_OP_ENTRY2(SBB, 0,b, I,b, lock),   /* AL, Ib */
1735    [0x1D] = X86_OP_ENTRY2(SBB, 0,v, I,z, lock),   /* rAX, Iz */
1736    [0x1E] = X86_OP_ENTRYr(PUSH, DS, w, chk(i64)),
1737    [0x1F] = X86_OP_ENTRYw(POP, DS, w, chk(i64)),
1738
1739    [0x28] = X86_OP_ENTRY2(SUB, E,b, G,b, lock),
1740    [0x29] = X86_OP_ENTRY2(SUB, E,v, G,v, lock),
1741    [0x2A] = X86_OP_ENTRY2(SUB, G,b, E,b, lock),
1742    [0x2B] = X86_OP_ENTRY2(SUB, G,v, E,v, lock),
1743    [0x2C] = X86_OP_ENTRY2(SUB, 0,b, I,b, lock),   /* AL, Ib */
1744    [0x2D] = X86_OP_ENTRY2(SUB, 0,v, I,z, lock),   /* rAX, Iz */
1745    [0x2E] = {},
1746    [0x2F] = X86_OP_ENTRY0(DAS, chk(i64)),
1747
1748    [0x38] = X86_OP_ENTRYrr(SUB, E,b, G,b),
1749    [0x39] = X86_OP_ENTRYrr(SUB, E,v, G,v),
1750    [0x3A] = X86_OP_ENTRYrr(SUB, G,b, E,b),
1751    [0x3B] = X86_OP_ENTRYrr(SUB, G,v, E,v),
1752    [0x3C] = X86_OP_ENTRYrr(SUB, 0,b, I,b),   /* AL, Ib */
1753    [0x3D] = X86_OP_ENTRYrr(SUB, 0,v, I,z),   /* rAX, Iz */
1754    [0x3E] = {},
1755    [0x3F] = X86_OP_ENTRY0(AAS, chk(i64)),
1756
1757    [0x48] = X86_OP_ENTRY1(DEC, 0,v, chk(i64)),
1758    [0x49] = X86_OP_ENTRY1(DEC, 1,v, chk(i64)),
1759    [0x4A] = X86_OP_ENTRY1(DEC, 2,v, chk(i64)),
1760    [0x4B] = X86_OP_ENTRY1(DEC, 3,v, chk(i64)),
1761    [0x4C] = X86_OP_ENTRY1(DEC, 4,v, chk(i64)),
1762    [0x4D] = X86_OP_ENTRY1(DEC, 5,v, chk(i64)),
1763    [0x4E] = X86_OP_ENTRY1(DEC, 6,v, chk(i64)),
1764    [0x4F] = X86_OP_ENTRY1(DEC, 7,v, chk(i64)),
1765
1766    [0x58] = X86_OP_ENTRYw(POP, LoBits,d64),
1767    [0x59] = X86_OP_ENTRYw(POP, LoBits,d64),
1768    [0x5A] = X86_OP_ENTRYw(POP, LoBits,d64),
1769    [0x5B] = X86_OP_ENTRYw(POP, LoBits,d64),
1770    [0x5C] = X86_OP_ENTRYw(POP, LoBits,d64),
1771    [0x5D] = X86_OP_ENTRYw(POP, LoBits,d64),
1772    [0x5E] = X86_OP_ENTRYw(POP, LoBits,d64),
1773    [0x5F] = X86_OP_ENTRYw(POP, LoBits,d64),
1774
1775    [0x68] = X86_OP_ENTRYr(PUSH, I,z),
1776    [0x69] = X86_OP_ENTRY3(IMUL3, G,v, E,v, I,z, sextT0),
1777    [0x6A] = X86_OP_ENTRYr(PUSH, I,b),
1778    [0x6B] = X86_OP_ENTRY3(IMUL3, G,v, E,v, I,b, sextT0),
1779    [0x6C] = X86_OP_ENTRYrr(INS, Y,b, 2,w), /* DX */
1780    [0x6D] = X86_OP_ENTRYrr(INS, Y,z, 2,w), /* DX */
1781    [0x6E] = X86_OP_ENTRYrr(OUTS, X,b, 2,w), /* DX */
1782    [0x6F] = X86_OP_ENTRYrr(OUTS, X,z, 2,w), /* DX */
1783
1784    [0x78] = X86_OP_ENTRYr(Jcc, J,b),
1785    [0x79] = X86_OP_ENTRYr(Jcc, J,b),
1786    [0x7A] = X86_OP_ENTRYr(Jcc, J,b),
1787    [0x7B] = X86_OP_ENTRYr(Jcc, J,b),
1788    [0x7C] = X86_OP_ENTRYr(Jcc, J,b),
1789    [0x7D] = X86_OP_ENTRYr(Jcc, J,b),
1790    [0x7E] = X86_OP_ENTRYr(Jcc, J,b),
1791    [0x7F] = X86_OP_ENTRYr(Jcc, J,b),
1792
1793    [0x88] = X86_OP_ENTRYwr(MOV, E,b, G,b),
1794    [0x89] = X86_OP_ENTRYwr(MOV, E,v, G,v),
1795    [0x8A] = X86_OP_ENTRYwr(MOV, G,b, E,b),
1796    [0x8B] = X86_OP_ENTRYwr(MOV, G,v, E,v),
1797     /* Missing in Table A-2: memory destination is always 16-bit.  */
1798    [0x8C] = X86_OP_ENTRYwr(MOV, E,v, S,w, op0_Mw),
1799    [0x8D] = X86_OP_ENTRYwr(LEA, G,v, M,v, nolea),
1800    [0x8E] = X86_OP_ENTRYwr(MOV, S,w, E,w),
1801    [0x8F] = X86_OP_GROUPw(group1A, E,d64),
1802
1803    [0x98] = X86_OP_ENTRY1(CBW,    0,v), /* rAX */
1804    [0x99] = X86_OP_ENTRYwr(CWD,   2,v, 0,v), /* rDX, rAX */
1805    [0x9A] = X86_OP_ENTRYrr(CALLF, I_unsigned,p, I_unsigned,w, chk(i64)),
1806    [0x9B] = X86_OP_ENTRY0(WAIT),
1807    [0x9C] = X86_OP_ENTRY0(PUSHF,  chk(vm86_iopl) svm(PUSHF)),
1808    [0x9D] = X86_OP_ENTRY0(POPF,   chk(vm86_iopl) svm(POPF)),
1809    [0x9E] = X86_OP_ENTRY0(SAHF),
1810    [0x9F] = X86_OP_ENTRY0(LAHF),
1811
1812    [0xA8] = X86_OP_ENTRYrr(AND, 0,b, I,b),   /* AL, Ib */
1813    [0xA9] = X86_OP_ENTRYrr(AND, 0,v, I,z),   /* rAX, Iz */
1814    [0xAA] = X86_OP_ENTRYwr(STOS, Y,b, 0,b),
1815    [0xAB] = X86_OP_ENTRYwr(STOS, Y,v, 0,v),
1816    /* Manual writeback because REP LODS (!) has to write EAX/RAX after every LODS.  */
1817    [0xAC] = X86_OP_ENTRYr(LODS, X,b),
1818    [0xAD] = X86_OP_ENTRYr(LODS, X,v),
1819    [0xAE] = X86_OP_ENTRYrr(SCAS, 0,b, Y,b),
1820    [0xAF] = X86_OP_ENTRYrr(SCAS, 0,v, Y,v),
1821
1822    [0xB8] = X86_OP_ENTRYwr(MOV, LoBits,v, I,v),
1823    [0xB9] = X86_OP_ENTRYwr(MOV, LoBits,v, I,v),
1824    [0xBA] = X86_OP_ENTRYwr(MOV, LoBits,v, I,v),
1825    [0xBB] = X86_OP_ENTRYwr(MOV, LoBits,v, I,v),
1826    [0xBC] = X86_OP_ENTRYwr(MOV, LoBits,v, I,v),
1827    [0xBD] = X86_OP_ENTRYwr(MOV, LoBits,v, I,v),
1828    [0xBE] = X86_OP_ENTRYwr(MOV, LoBits,v, I,v),
1829    [0xBF] = X86_OP_ENTRYwr(MOV, LoBits,v, I,v),
1830
1831    [0xC8] = X86_OP_ENTRYrr(ENTER, I,w, I,b),
1832    [0xC9] = X86_OP_ENTRY1(LEAVE, A,d64),
1833    [0xCA] = X86_OP_ENTRYr(RETF,  I,w),
1834    [0xCB] = X86_OP_ENTRY0(RETF),
1835    [0xCC] = X86_OP_ENTRY0(INT3),
1836    [0xCD] = X86_OP_ENTRYr(INT, I,b,  chk(vm86_iopl)),
1837    [0xCE] = X86_OP_ENTRY0(INTO),
1838    [0xCF] = X86_OP_ENTRY0(IRET,      chk(vm86_iopl) svm(IRET)),
1839
1840    /*
1841     * x87 is nolea because it needs the address without segment base,
1842     * in order to store it in fdp.
1843     */
1844    [0xD8] = X86_OP_ENTRY1(x87,    nop,v, nolea),
1845    [0xD9] = X86_OP_ENTRY1(x87,    nop,v, nolea),
1846    [0xDA] = X86_OP_ENTRY1(x87,    nop,v, nolea),
1847    [0xDB] = X86_OP_ENTRY1(x87,    nop,v, nolea),
1848    [0xDC] = X86_OP_ENTRY1(x87,    nop,v, nolea),
1849    [0xDD] = X86_OP_ENTRY1(x87,    nop,v, nolea),
1850    [0xDE] = X86_OP_ENTRY1(x87,    nop,v, nolea),
1851    [0xDF] = X86_OP_ENTRY1(x87,    nop,v, nolea),
1852
1853    [0xE8] = X86_OP_ENTRYr(CALL,   J,z_f64),
1854    [0xE9] = X86_OP_ENTRYr(JMP,    J,z_f64),
1855    [0xEA] = X86_OP_ENTRYrr(JMPF,  I_unsigned,p, I_unsigned,w, chk(i64)),
1856    [0xEB] = X86_OP_ENTRYr(JMP,    J,b),
1857    [0xEC] = X86_OP_ENTRYwr(IN,    0,b, 2,w), /* AL, DX */
1858    [0xED] = X86_OP_ENTRYwr(IN,    0,z, 2,w), /* AX/EAX, DX */
1859    [0xEE] = X86_OP_ENTRYrr(OUT,   0,b, 2,w), /* DX, AL */
1860    [0xEF] = X86_OP_ENTRYrr(OUT,   0,z, 2,w), /* DX, AX/EAX */
1861
1862    [0xF8] = X86_OP_ENTRY0(CLC),
1863    [0xF9] = X86_OP_ENTRY0(STC),
1864    [0xFA] = X86_OP_ENTRY0(CLI,    chk(iopl)),
1865    [0xFB] = X86_OP_ENTRY0(STI,    chk(iopl)),
1866    [0xFC] = X86_OP_ENTRY0(CLD),
1867    [0xFD] = X86_OP_ENTRY0(STD),
1868    [0xFE] = X86_OP_GROUP1(group4_5, E,b),
1869    [0xFF] = X86_OP_GROUP1(group4_5, E,v),
1870};
1871
1872#undef mmx
1873#undef vex1
1874#undef vex2
1875#undef vex3
1876#undef vex4
1877#undef vex4_unal
1878#undef vex5
1879#undef vex6
1880#undef vex7
1881#undef vex8
1882#undef vex11
1883#undef vex12
1884#undef vex13
1885
1886/*
1887 * Decode the fixed part of the opcode and place the last
1888 * in b.
1889 */
1890static void decode_root(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
1891{
1892    *entry = opcodes_root[*b];
1893}
1894
1895
1896static int decode_modrm(DisasContext *s, CPUX86State *env,
1897                        X86DecodedInsn *decode, X86DecodedOp *op)
1898{
1899    int modrm = get_modrm(s, env);
1900    if ((modrm >> 6) == 3) {
1901        op->n = (modrm & 7);
1902        if (op->unit != X86_OP_MMX) {
1903            op->n |= REX_B(s);
1904        }
1905    } else {
1906        op->has_ea = true;
1907        op->n = -1;
1908        decode->mem = gen_lea_modrm_0(env, s, modrm,
1909                                      decode->e.vex_class == 12);
1910    }
1911    return modrm;
1912}
1913
1914static bool decode_op_size(DisasContext *s, X86OpEntry *e, X86OpSize size, MemOp *ot)
1915{
1916    switch (size) {
1917    case X86_SIZE_b:  /* byte */
1918        *ot = MO_8;
1919        return true;
1920
1921    case X86_SIZE_d:  /* 32-bit */
1922    case X86_SIZE_ss: /* SSE/AVX scalar single precision */
1923        *ot = MO_32;
1924        return true;
1925
1926    case X86_SIZE_p:  /* Far pointer, return offset size */
1927    case X86_SIZE_s:  /* Descriptor, return offset size */
1928    case X86_SIZE_v:  /* 16/32/64-bit, based on operand size */
1929        *ot = s->dflag;
1930        return true;
1931
1932    case X86_SIZE_pi: /* MMX */
1933    case X86_SIZE_q:  /* 64-bit */
1934    case X86_SIZE_sd: /* SSE/AVX scalar double precision */
1935        *ot = MO_64;
1936        return true;
1937
1938    case X86_SIZE_w:  /* 16-bit */
1939        *ot = MO_16;
1940        return true;
1941
1942    case X86_SIZE_y:  /* 32/64-bit, based on operand size */
1943        *ot = s->dflag == MO_16 ? MO_32 : s->dflag;
1944        return true;
1945
1946    case X86_SIZE_y_d64:  /* Full (not 16-bit) register access */
1947        *ot = CODE64(s) ? MO_64 : MO_32;
1948        return true;
1949
1950    case X86_SIZE_z:  /* 16-bit for 16-bit operand size, else 32-bit */
1951        *ot = s->dflag == MO_16 ? MO_16 : MO_32;
1952        return true;
1953
1954    case X86_SIZE_z_f64:  /* 32-bit for 32-bit operand size or 64-bit mode, else 16-bit */
1955        *ot = !CODE64(s) && s->dflag == MO_16 ? MO_16 : MO_32;
1956        return true;
1957
1958    case X86_SIZE_dq: /* SSE/AVX 128-bit */
1959        if (e->special == X86_SPECIAL_MMX &&
1960            !(s->prefix & (PREFIX_DATA | PREFIX_REPZ | PREFIX_REPNZ))) {
1961            *ot = MO_64;
1962            return true;
1963        }
1964        if (s->vex_l && e->s0 != X86_SIZE_qq && e->s1 != X86_SIZE_qq) {
1965            return false;
1966        }
1967        *ot = MO_128;
1968        return true;
1969
1970    case X86_SIZE_qq: /* AVX 256-bit */
1971        if (!s->vex_l) {
1972            return false;
1973        }
1974        *ot = MO_256;
1975        return true;
1976
1977    case X86_SIZE_x:  /* 128/256-bit, based on operand size */
1978        if (e->special == X86_SPECIAL_MMX &&
1979            !(s->prefix & (PREFIX_DATA | PREFIX_REPZ | PREFIX_REPNZ))) {
1980            *ot = MO_64;
1981            return true;
1982        }
1983        /* fall through */
1984    case X86_SIZE_ps: /* SSE/AVX packed single precision */
1985    case X86_SIZE_pd: /* SSE/AVX packed double precision */
1986        *ot = s->vex_l ? MO_256 : MO_128;
1987        return true;
1988
1989    case X86_SIZE_xh: /* SSE/AVX packed half register */
1990        *ot = s->vex_l ? MO_128 : MO_64;
1991        return true;
1992
1993    case X86_SIZE_d64:  /* Default to 64-bit in 64-bit mode */
1994        *ot = CODE64(s) && s->dflag == MO_32 ? MO_64 : s->dflag;
1995        return true;
1996
1997    case X86_SIZE_f64:  /* Ignore size override prefix in 64-bit mode */
1998        *ot = CODE64(s) ? MO_64 : s->dflag;
1999        return true;
2000
2001    default:
2002        *ot = -1;
2003        return true;
2004    }
2005}
2006
2007static bool decode_op(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode,
2008                      X86DecodedOp *op, X86OpType type, int b)
2009{
2010    int modrm;
2011
2012    switch (type) {
2013    case X86_TYPE_None:  /* Implicit or absent */
2014    case X86_TYPE_A:  /* Implicit */
2015    case X86_TYPE_F:  /* EFLAGS/RFLAGS */
2016    case X86_TYPE_X:  /* string source */
2017    case X86_TYPE_Y:  /* string destination */
2018        break;
2019
2020    case X86_TYPE_B:  /* VEX.vvvv selects a GPR */
2021        op->unit = X86_OP_INT;
2022        op->n = s->vex_v;
2023        break;
2024
2025    case X86_TYPE_C:  /* REG in the modrm byte selects a control register */
2026        op->unit = X86_OP_CR;
2027        op->n = ((get_modrm(s, env) >> 3) & 7) | REX_R(s);
2028        if (op->n == 0 && (s->prefix & PREFIX_LOCK) &&
2029            (s->cpuid_ext3_features & CPUID_EXT3_CR8LEG)) {
2030            op->n = 8;
2031            s->prefix &= ~PREFIX_LOCK;
2032        }
2033        if (op->n != 0 && op->n != 2 && op->n != 3 && op->n != 4 && op->n != 8) {
2034            return false;
2035        }
2036        if (decode->e.intercept) {
2037            decode->e.intercept += op->n;
2038        }
2039        break;
2040
2041    case X86_TYPE_D:  /* REG in the modrm byte selects a debug register */
2042        op->unit = X86_OP_DR;
2043        op->n = ((get_modrm(s, env) >> 3) & 7) | REX_R(s);
2044        if (op->n >= 8) {
2045            /*
2046             * illegal opcode.  The DR4 and DR5 case is checked in the generated
2047             * code instead, to save on hflags bits.
2048             */
2049            return false;
2050        }
2051        if (decode->e.intercept) {
2052            decode->e.intercept += op->n;
2053        }
2054        break;
2055
2056    case X86_TYPE_G:  /* REG in the modrm byte selects a GPR */
2057        op->unit = X86_OP_INT;
2058        goto get_reg;
2059
2060    case X86_TYPE_S:  /* reg selects a segment register */
2061        op->unit = X86_OP_SEG;
2062        goto get_reg;
2063
2064    case X86_TYPE_P:
2065        op->unit = X86_OP_MMX;
2066        goto get_reg;
2067
2068    case X86_TYPE_V:  /* reg in the modrm byte selects an XMM/YMM register */
2069        if (decode->e.special == X86_SPECIAL_MMX &&
2070            !(s->prefix & (PREFIX_DATA | PREFIX_REPZ | PREFIX_REPNZ))) {
2071            op->unit = X86_OP_MMX;
2072        } else {
2073            op->unit = X86_OP_SSE;
2074        }
2075    get_reg:
2076        op->n = ((get_modrm(s, env) >> 3) & 7);
2077        if (op->unit != X86_OP_MMX) {
2078            op->n |= REX_R(s);
2079        }
2080        break;
2081
2082    case X86_TYPE_E:  /* ALU modrm operand */
2083        op->unit = X86_OP_INT;
2084        goto get_modrm;
2085
2086    case X86_TYPE_Q:  /* MMX modrm operand */
2087        op->unit = X86_OP_MMX;
2088        goto get_modrm;
2089
2090    case X86_TYPE_W:  /* XMM/YMM modrm operand */
2091        if (decode->e.special == X86_SPECIAL_MMX &&
2092            !(s->prefix & (PREFIX_DATA | PREFIX_REPZ | PREFIX_REPNZ))) {
2093            op->unit = X86_OP_MMX;
2094        } else {
2095            op->unit = X86_OP_SSE;
2096        }
2097        goto get_modrm;
2098
2099    case X86_TYPE_N:  /* R/M in the modrm byte selects an MMX register */
2100        op->unit = X86_OP_MMX;
2101        goto get_modrm_reg;
2102
2103    case X86_TYPE_U:  /* R/M in the modrm byte selects an XMM/YMM register */
2104        if (decode->e.special == X86_SPECIAL_MMX &&
2105            !(s->prefix & (PREFIX_DATA | PREFIX_REPZ | PREFIX_REPNZ))) {
2106            op->unit = X86_OP_MMX;
2107        } else {
2108            op->unit = X86_OP_SSE;
2109        }
2110        goto get_modrm_reg;
2111
2112    case X86_TYPE_R:  /* R/M in the modrm byte selects a register */
2113        op->unit = X86_OP_INT;
2114    get_modrm_reg:
2115        modrm = get_modrm(s, env);
2116        if ((modrm >> 6) != 3) {
2117            return false;
2118        }
2119        goto get_modrm;
2120
2121    case X86_TYPE_WM:  /* modrm byte selects an XMM/YMM memory operand */
2122        op->unit = X86_OP_SSE;
2123        goto get_modrm_mem;
2124
2125    case X86_TYPE_EM:  /* modrm byte selects an ALU memory operand */
2126        op->unit = X86_OP_INT;
2127        /* fall through */
2128    case X86_TYPE_M:  /* modrm byte selects a memory operand */
2129    get_modrm_mem:
2130        modrm = get_modrm(s, env);
2131        if ((modrm >> 6) == 3) {
2132            return false;
2133        }
2134        /* fall through */
2135    case X86_TYPE_nop:  /* modrm operand decoded but not fetched */
2136    get_modrm:
2137        decode_modrm(s, env, decode, op);
2138        break;
2139
2140    case X86_TYPE_O:  /* Absolute address encoded in the instruction */
2141        op->unit = X86_OP_INT;
2142        op->has_ea = true;
2143        op->n = -1;
2144        decode->mem = (AddressParts) {
2145            .def_seg = R_DS,
2146            .base = -1,
2147            .index = -1,
2148            .disp = insn_get_addr(env, s, s->aflag)
2149        };
2150        break;
2151
2152    case X86_TYPE_H:  /* For AVX, VEX.vvvv selects an XMM/YMM register */
2153        if ((s->prefix & PREFIX_VEX)) {
2154            op->unit = X86_OP_SSE;
2155            op->n = s->vex_v;
2156            break;
2157        }
2158        if (op == &decode->op[0]) {
2159            /* shifts place the destination in VEX.vvvv, use modrm */
2160            return decode_op(s, env, decode, op, decode->e.op1, b);
2161        } else {
2162            return decode_op(s, env, decode, op, decode->e.op0, b);
2163        }
2164
2165    case X86_TYPE_I:  /* Immediate */
2166    case X86_TYPE_J:  /* Relative offset for a jump */
2167        op->unit = X86_OP_IMM;
2168        decode->immediate = op->imm = insn_get_signed(env, s, op->ot);
2169        break;
2170
2171    case X86_TYPE_I_unsigned:  /* Immediate */
2172        op->unit = X86_OP_IMM;
2173        decode->immediate = op->imm = insn_get(env, s, op->ot);
2174        break;
2175
2176    case X86_TYPE_L:  /* The upper 4 bits of the immediate select a 128-bit register */
2177        op->n = insn_get(env, s, op->ot) >> 4;
2178        break;
2179
2180    case X86_TYPE_2op:
2181        *op = decode->op[0];
2182        break;
2183
2184    case X86_TYPE_LoBits:
2185        op->n = (b & 7) | REX_B(s);
2186        op->unit = X86_OP_INT;
2187        break;
2188
2189    case X86_TYPE_0 ... X86_TYPE_7:
2190        op->n = type - X86_TYPE_0;
2191        op->unit = X86_OP_INT;
2192        break;
2193
2194    case X86_TYPE_ES ... X86_TYPE_GS:
2195        op->n = type - X86_TYPE_ES;
2196        op->unit = X86_OP_SEG;
2197        break;
2198    }
2199
2200    return true;
2201}
2202
2203static bool validate_sse_prefix(DisasContext *s, X86OpEntry *e)
2204{
2205    uint16_t sse_prefixes;
2206
2207    if (!e->valid_prefix) {
2208        return true;
2209    }
2210    if (s->prefix & (PREFIX_REPZ | PREFIX_REPNZ)) {
2211        /* In SSE instructions, 0xF3 and 0xF2 cancel 0x66.  */
2212        s->prefix &= ~PREFIX_DATA;
2213    }
2214
2215    /* Now, either zero or one bit is set in sse_prefixes.  */
2216    sse_prefixes = s->prefix & (PREFIX_REPZ | PREFIX_REPNZ | PREFIX_DATA);
2217    return e->valid_prefix & (1 << sse_prefixes);
2218}
2219
2220static bool decode_insn(DisasContext *s, CPUX86State *env, X86DecodeFunc decode_func,
2221                        X86DecodedInsn *decode)
2222{
2223    X86OpEntry *e = &decode->e;
2224
2225    decode_func(s, env, e, &decode->b);
2226    while (e->is_decode) {
2227        e->is_decode = false;
2228        e->decode(s, env, e, &decode->b);
2229    }
2230
2231    if (!validate_sse_prefix(s, e)) {
2232        return false;
2233    }
2234
2235    /* First compute size of operands in order to initialize s->rip_offset.  */
2236    if (e->op0 != X86_TYPE_None) {
2237        if (!decode_op_size(s, e, e->s0, &decode->op[0].ot)) {
2238            return false;
2239        }
2240        if (e->op0 == X86_TYPE_I) {
2241            s->rip_offset += 1 << decode->op[0].ot;
2242        }
2243    }
2244    if (e->op1 != X86_TYPE_None) {
2245        if (!decode_op_size(s, e, e->s1, &decode->op[1].ot)) {
2246            return false;
2247        }
2248        if (e->op1 == X86_TYPE_I) {
2249            s->rip_offset += 1 << decode->op[1].ot;
2250        }
2251    }
2252    if (e->op2 != X86_TYPE_None) {
2253        if (!decode_op_size(s, e, e->s2, &decode->op[2].ot)) {
2254            return false;
2255        }
2256        if (e->op2 == X86_TYPE_I) {
2257            s->rip_offset += 1 << decode->op[2].ot;
2258        }
2259    }
2260    if (e->op3 != X86_TYPE_None) {
2261        /*
2262         * A couple instructions actually use the extra immediate byte for an Lx
2263         * register operand; those are handled in the gen_* functions as one off.
2264         */
2265        assert(e->op3 == X86_TYPE_I && e->s3 == X86_SIZE_b);
2266        s->rip_offset += 1;
2267    }
2268
2269    if (e->op0 != X86_TYPE_None &&
2270        !decode_op(s, env, decode, &decode->op[0], e->op0, decode->b)) {
2271        return false;
2272    }
2273
2274    if (e->op1 != X86_TYPE_None &&
2275        !decode_op(s, env, decode, &decode->op[1], e->op1, decode->b)) {
2276        return false;
2277    }
2278
2279    if (e->op2 != X86_TYPE_None &&
2280        !decode_op(s, env, decode, &decode->op[2], e->op2, decode->b)) {
2281        return false;
2282    }
2283
2284    if (e->op3 != X86_TYPE_None) {
2285        decode->immediate = insn_get_signed(env, s, MO_8);
2286    }
2287
2288    return true;
2289}
2290
2291static bool has_cpuid_feature(DisasContext *s, X86CPUIDFeature cpuid)
2292{
2293    switch (cpuid) {
2294    case X86_FEAT_None:
2295        return true;
2296    case X86_FEAT_CMOV:
2297        return (s->cpuid_features & CPUID_CMOV);
2298    case X86_FEAT_CLFLUSH:
2299        return (s->cpuid_features & CPUID_CLFLUSH);
2300    case X86_FEAT_CX8:
2301        return (s->cpuid_features & CPUID_CX8);
2302    case X86_FEAT_FXSR:
2303        return (s->cpuid_features & CPUID_FXSR);
2304    case X86_FEAT_CX16:
2305        return (s->cpuid_ext_features & CPUID_EXT_CX16);
2306    case X86_FEAT_F16C:
2307        return (s->cpuid_ext_features & CPUID_EXT_F16C);
2308    case X86_FEAT_FMA:
2309        return (s->cpuid_ext_features & CPUID_EXT_FMA);
2310    case X86_FEAT_MOVBE:
2311        return (s->cpuid_ext_features & CPUID_EXT_MOVBE);
2312    case X86_FEAT_PCLMULQDQ:
2313        return (s->cpuid_ext_features & CPUID_EXT_PCLMULQDQ);
2314    case X86_FEAT_POPCNT:
2315        return (s->cpuid_ext_features & CPUID_EXT_POPCNT);
2316    case X86_FEAT_SSE:
2317        return (s->cpuid_features & CPUID_SSE);
2318    case X86_FEAT_SSE2:
2319        return (s->cpuid_features & CPUID_SSE2);
2320    case X86_FEAT_SSE3:
2321        return (s->cpuid_ext_features & CPUID_EXT_SSE3);
2322    case X86_FEAT_SSSE3:
2323        return (s->cpuid_ext_features & CPUID_EXT_SSSE3);
2324    case X86_FEAT_SSE41:
2325        return (s->cpuid_ext_features & CPUID_EXT_SSE41);
2326    case X86_FEAT_SSE42:
2327        return (s->cpuid_ext_features & CPUID_EXT_SSE42);
2328    case X86_FEAT_AES:
2329        if (!(s->cpuid_ext_features & CPUID_EXT_AES)) {
2330            return false;
2331        } else if (!(s->prefix & PREFIX_VEX)) {
2332            return true;
2333        } else if (!(s->cpuid_ext_features & CPUID_EXT_AVX)) {
2334            return false;
2335        } else {
2336            return !s->vex_l || (s->cpuid_7_0_ecx_features & CPUID_7_0_ECX_VAES);
2337        }
2338
2339    case X86_FEAT_AVX:
2340        return (s->cpuid_ext_features & CPUID_EXT_AVX);
2341    case X86_FEAT_XSAVE:
2342        return (s->cpuid_ext_features & CPUID_EXT_XSAVE);
2343
2344    case X86_FEAT_3DNOW:
2345        return (s->cpuid_ext2_features & CPUID_EXT2_3DNOW);
2346    case X86_FEAT_SSE4A:
2347        return (s->cpuid_ext3_features & CPUID_EXT3_SSE4A);
2348
2349    case X86_FEAT_ADX:
2350        return (s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_ADX);
2351    case X86_FEAT_BMI1:
2352        return (s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI1);
2353    case X86_FEAT_BMI2:
2354        return (s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI2);
2355    case X86_FEAT_AVX2:
2356        return (s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_AVX2);
2357    case X86_FEAT_CLFLUSHOPT:
2358        return (s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_CLFLUSHOPT);
2359    case X86_FEAT_CLWB:
2360        return (s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_CLWB);
2361    case X86_FEAT_FSGSBASE:
2362        return (s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_FSGSBASE);
2363    case X86_FEAT_SHA_NI:
2364        return (s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_SHA_NI);
2365
2366    case X86_FEAT_CMPCCXADD:
2367        return (s->cpuid_7_1_eax_features & CPUID_7_1_EAX_CMPCCXADD);
2368
2369    case X86_FEAT_XSAVEOPT:
2370        return (s->cpuid_xsave_features & CPUID_XSAVE_XSAVEOPT);
2371    }
2372    g_assert_not_reached();
2373}
2374
2375static bool validate_vex(DisasContext *s, X86DecodedInsn *decode)
2376{
2377    X86OpEntry *e = &decode->e;
2378
2379    switch (e->vex_special) {
2380    case X86_VEX_REPScalar:
2381        /*
2382         * Instructions which differ between 00/66 and F2/F3 in the
2383         * exception classification and the size of the memory operand.
2384         */
2385        assert(e->vex_class == 1 || e->vex_class == 2 || e->vex_class == 4);
2386        if (s->prefix & (PREFIX_REPZ | PREFIX_REPNZ)) {
2387            e->vex_class = e->vex_class < 4 ? 3 : 5;
2388            if (s->vex_l) {
2389                goto illegal;
2390            }
2391            assert(decode->e.s2 == X86_SIZE_x);
2392            if (decode->op[2].has_ea) {
2393                decode->op[2].ot = s->prefix & PREFIX_REPZ ? MO_32 : MO_64;
2394            }
2395        }
2396        break;
2397
2398    case X86_VEX_SSEUnaligned:
2399        /* handled in sse_needs_alignment.  */
2400        break;
2401
2402    case X86_VEX_AVX2_256:
2403        if ((s->prefix & PREFIX_VEX) && s->vex_l && !has_cpuid_feature(s, X86_FEAT_AVX2)) {
2404            goto illegal;
2405        }
2406    }
2407
2408    switch (e->vex_class) {
2409    case 0:
2410        if (s->prefix & PREFIX_VEX) {
2411            goto illegal;
2412        }
2413        return true;
2414    case 1:
2415    case 2:
2416    case 3:
2417    case 4:
2418    case 5:
2419    case 7:
2420        if (s->prefix & PREFIX_VEX) {
2421            if (!(s->flags & HF_AVX_EN_MASK)) {
2422                goto illegal;
2423            }
2424        } else if (e->special != X86_SPECIAL_MMX ||
2425                   (s->prefix & (PREFIX_REPZ | PREFIX_REPNZ | PREFIX_DATA))) {
2426            if (!(s->flags & HF_OSFXSR_MASK)) {
2427                goto illegal;
2428            }
2429        }
2430        break;
2431    case 12:
2432        /* Must have a VSIB byte and no address prefix.  */
2433        assert(s->has_modrm);
2434        if ((s->modrm & 7) != 4 || s->aflag == MO_16) {
2435            goto illegal;
2436        }
2437
2438        /* Check no overlap between registers.  */
2439        if (!decode->op[0].has_ea &&
2440            (decode->op[0].n == decode->mem.index || decode->op[0].n == decode->op[1].n)) {
2441            goto illegal;
2442        }
2443        assert(!decode->op[1].has_ea);
2444        if (decode->op[1].n == decode->mem.index) {
2445            goto illegal;
2446        }
2447        if (!decode->op[2].has_ea &&
2448            (decode->op[2].n == decode->mem.index || decode->op[2].n == decode->op[1].n)) {
2449            goto illegal;
2450        }
2451        /* fall through */
2452    case 6:
2453    case 11:
2454        if (!(s->prefix & PREFIX_VEX)) {
2455            goto illegal;
2456        }
2457        if (!(s->flags & HF_AVX_EN_MASK)) {
2458            goto illegal;
2459        }
2460        break;
2461    case 8:
2462        /* Non-VEX case handled in decode_0F77.  */
2463        assert(s->prefix & PREFIX_VEX);
2464        if (!(s->flags & HF_AVX_EN_MASK)) {
2465            goto illegal;
2466        }
2467        break;
2468    case 13:
2469        if (!(s->prefix & PREFIX_VEX)) {
2470            goto illegal;
2471        }
2472        if (s->vex_l) {
2473            goto illegal;
2474        }
2475        /* All integer instructions use VEX.vvvv, so exit.  */
2476        return true;
2477    }
2478
2479    if (s->vex_v != 0 &&
2480        e->op0 != X86_TYPE_H && e->op0 != X86_TYPE_B &&
2481        e->op1 != X86_TYPE_H && e->op1 != X86_TYPE_B &&
2482        e->op2 != X86_TYPE_H && e->op2 != X86_TYPE_B) {
2483        goto illegal;
2484    }
2485
2486    if (s->flags & HF_TS_MASK) {
2487        goto nm_exception;
2488    }
2489    if (s->flags & HF_EM_MASK) {
2490        goto illegal;
2491    }
2492
2493    if (e->check) {
2494        if (e->check & X86_CHECK_VEX128) {
2495            if (s->vex_l) {
2496                goto illegal;
2497            }
2498        }
2499        if (e->check & X86_CHECK_W0) {
2500            if (s->vex_w) {
2501                goto illegal;
2502            }
2503        }
2504        if (e->check & X86_CHECK_W1) {
2505            if (!s->vex_w) {
2506                goto illegal;
2507            }
2508        }
2509    }
2510    return true;
2511
2512nm_exception:
2513    gen_NM_exception(s);
2514    return false;
2515illegal:
2516    gen_illegal_opcode(s);
2517    return false;
2518}
2519
2520/*
2521 * Convert one instruction. s->base.is_jmp is set if the translation must
2522 * be stopped.
2523 */
2524static void disas_insn(DisasContext *s, CPUState *cpu)
2525{
2526    CPUX86State *env = cpu_env(cpu);
2527    X86DecodedInsn decode;
2528    X86DecodeFunc decode_func = decode_root;
2529    bool accept_lock = false;
2530    uint8_t cc_live, b;
2531
2532    s->pc = s->base.pc_next;
2533    s->override = -1;
2534    s->popl_esp_hack = 0;
2535#ifdef TARGET_X86_64
2536    s->rex_r = 0;
2537    s->rex_x = 0;
2538    s->rex_b = 0;
2539#endif
2540    s->rip_offset = 0; /* for relative ip address */
2541    s->vex_l = 0;
2542    s->vex_v = 0;
2543    s->vex_w = false;
2544    s->has_modrm = false;
2545    s->prefix = 0;
2546
2547 next_byte:;
2548#ifdef TARGET_X86_64
2549    /* clear any REX prefix followed by other prefixes.  */
2550    int rex;
2551    rex = -1;
2552 next_byte_rex:
2553#endif
2554    b = x86_ldub_code(env, s);
2555
2556    /* Collect prefixes.  */
2557    switch (b) {
2558    case 0xf3:
2559        s->prefix |= PREFIX_REPZ;
2560        s->prefix &= ~PREFIX_REPNZ;
2561        goto next_byte;
2562    case 0xf2:
2563        s->prefix |= PREFIX_REPNZ;
2564        s->prefix &= ~PREFIX_REPZ;
2565        goto next_byte;
2566    case 0xf0:
2567        s->prefix |= PREFIX_LOCK;
2568        goto next_byte;
2569    case 0x2e:
2570        s->override = R_CS;
2571        goto next_byte;
2572    case 0x36:
2573        s->override = R_SS;
2574        goto next_byte;
2575    case 0x3e:
2576        s->override = R_DS;
2577        goto next_byte;
2578    case 0x26:
2579        s->override = R_ES;
2580        goto next_byte;
2581    case 0x64:
2582        s->override = R_FS;
2583        goto next_byte;
2584    case 0x65:
2585        s->override = R_GS;
2586        goto next_byte;
2587    case 0x66:
2588        s->prefix |= PREFIX_DATA;
2589        goto next_byte;
2590    case 0x67:
2591        s->prefix |= PREFIX_ADR;
2592        goto next_byte;
2593#ifdef TARGET_X86_64
2594    case 0x40 ... 0x4f:
2595        if (CODE64(s)) {
2596            /*
2597             * REX prefix; ignored unless it is the last prefix, so
2598             * for now just stash it
2599             */
2600            rex = b;
2601            goto next_byte_rex;
2602        }
2603        break;
2604#endif
2605    case 0xc5: /* 2-byte VEX */
2606    case 0xc4: /* 3-byte VEX */
2607        /*
2608         * VEX prefixes cannot be used except in 32-bit mode.
2609         * Otherwise the instruction is LES or LDS.
2610         */
2611        if (CODE32(s) && !VM86(s)) {
2612            static const int pp_prefix[4] = {
2613                0, PREFIX_DATA, PREFIX_REPZ, PREFIX_REPNZ
2614            };
2615            int vex3, vex2 = x86_ldub_code(env, s);
2616
2617            if (!CODE64(s) && (vex2 & 0xc0) != 0xc0) {
2618                /*
2619                 * 4.1.4.6: In 32-bit mode, bits [7:6] must be 11b,
2620                 * otherwise the instruction is LES or LDS.
2621                 */
2622                s->pc--; /* rewind the advance_pc() x86_ldub_code() did */
2623                break;
2624            }
2625
2626            /* 4.1.1-4.1.3: No preceding lock, 66, f2, f3, or rex prefixes. */
2627            if (s->prefix & (PREFIX_REPZ | PREFIX_REPNZ
2628                             | PREFIX_LOCK | PREFIX_DATA)) {
2629                goto illegal_op;
2630            }
2631#ifdef TARGET_X86_64
2632            if (rex != -1) {
2633                goto illegal_op;
2634            }
2635            s->rex_r = (~vex2 >> 4) & 8;
2636#endif
2637            if (b == 0xc5) {
2638                /* 2-byte VEX prefix: RVVVVlpp, implied 0f leading opcode byte */
2639                vex3 = vex2;
2640                decode_func = decode_0F;
2641            } else {
2642                /* 3-byte VEX prefix: RXBmmmmm wVVVVlpp */
2643                vex3 = x86_ldub_code(env, s);
2644#ifdef TARGET_X86_64
2645                s->rex_x = (~vex2 >> 3) & 8;
2646                s->rex_b = (~vex2 >> 2) & 8;
2647#endif
2648                s->vex_w = (vex3 >> 7) & 1;
2649                switch (vex2 & 0x1f) {
2650                case 0x01: /* Implied 0f leading opcode bytes.  */
2651                    decode_func = decode_0F;
2652                    break;
2653                case 0x02: /* Implied 0f 38 leading opcode bytes.  */
2654                    decode_func = decode_0F38;
2655                    break;
2656                case 0x03: /* Implied 0f 3a leading opcode bytes.  */
2657                    decode_func = decode_0F3A;
2658                    break;
2659                default:   /* Reserved for future use.  */
2660                    goto unknown_op;
2661                }
2662            }
2663            s->vex_v = (~vex3 >> 3) & 0xf;
2664            s->vex_l = (vex3 >> 2) & 1;
2665            s->prefix |= pp_prefix[vex3 & 3] | PREFIX_VEX;
2666        }
2667        break;
2668    default:
2669        break;
2670    }
2671
2672    /* Post-process prefixes.  */
2673    if (CODE64(s)) {
2674#ifdef TARGET_X86_64
2675        if (rex != -1) {
2676            s->prefix |= PREFIX_REX;
2677            s->vex_w = (rex >> 3) & 1;
2678            s->rex_r = (rex & 0x4) << 1;
2679            s->rex_x = (rex & 0x2) << 2;
2680            s->rex_b = (rex & 0x1) << 3;
2681        }
2682#endif
2683
2684        /*
2685         * In 64-bit mode, the default data size is 32-bit.  Select 64-bit
2686         * data with rex_w, and 16-bit data with 0x66; rex_w takes precedence
2687         * over 0x66 if both are present.
2688         */
2689        s->dflag = (REX_W(s) ? MO_64 : s->prefix & PREFIX_DATA ? MO_16 : MO_32);
2690        /* In 64-bit mode, 0x67 selects 32-bit addressing.  */
2691        s->aflag = (s->prefix & PREFIX_ADR ? MO_32 : MO_64);
2692    } else {
2693        /* In 16/32-bit mode, 0x66 selects the opposite data size.  */
2694        if (CODE32(s) ^ ((s->prefix & PREFIX_DATA) != 0)) {
2695            s->dflag = MO_32;
2696        } else {
2697            s->dflag = MO_16;
2698        }
2699        /* In 16/32-bit mode, 0x67 selects the opposite addressing.  */
2700        if (CODE32(s) ^ ((s->prefix & PREFIX_ADR) != 0)) {
2701            s->aflag = MO_32;
2702        }  else {
2703            s->aflag = MO_16;
2704        }
2705    }
2706
2707    memset(&decode, 0, sizeof(decode));
2708    decode.cc_op = -1;
2709    decode.b = b;
2710    if (!decode_insn(s, env, decode_func, &decode)) {
2711        goto illegal_op;
2712    }
2713    if (!decode.e.gen) {
2714        goto unknown_op;
2715    }
2716
2717    if (!has_cpuid_feature(s, decode.e.cpuid)) {
2718        goto illegal_op;
2719    }
2720
2721    /* Checks that result in #UD come first.  */
2722    if (decode.e.check) {
2723        if (CODE64(s)) {
2724            if (decode.e.check & X86_CHECK_i64) {
2725                goto illegal_op;
2726            }
2727            if ((decode.e.check & X86_CHECK_i64_amd) && !IS_INTEL_CPU(env)) {
2728                goto illegal_op;
2729            }
2730        } else {
2731            if (decode.e.check & X86_CHECK_o64) {
2732                goto illegal_op;
2733            }
2734            if ((decode.e.check & X86_CHECK_o64_intel) && IS_INTEL_CPU(env)) {
2735                goto illegal_op;
2736            }
2737        }
2738        if (decode.e.check & X86_CHECK_prot_or_vm86) {
2739            if (!PE(s)) {
2740                goto illegal_op;
2741            }
2742        }
2743        if (decode.e.check & X86_CHECK_no_vm86) {
2744            if (VM86(s)) {
2745                goto illegal_op;
2746            }
2747        }
2748    }
2749
2750    switch (decode.e.special) {
2751    case X86_SPECIAL_None:
2752        break;
2753
2754    case X86_SPECIAL_Locked:
2755        if (decode.op[0].has_ea) {
2756            s->prefix |= PREFIX_LOCK;
2757        }
2758        /* fallthrough */
2759    case X86_SPECIAL_HasLock:
2760    case X86_SPECIAL_BitTest:
2761        accept_lock = decode.op[0].has_ea;
2762        break;
2763
2764    case X86_SPECIAL_Op0_Rd:
2765        assert(decode.op[0].unit == X86_OP_INT);
2766        if (!decode.op[0].has_ea) {
2767            decode.op[0].ot = MO_32;
2768        }
2769        break;
2770
2771    case X86_SPECIAL_Op2_Ry:
2772        assert(decode.op[2].unit == X86_OP_INT);
2773        if (!decode.op[2].has_ea) {
2774            decode.op[2].ot = s->dflag == MO_16 ? MO_32 : s->dflag;
2775        }
2776        break;
2777
2778    case X86_SPECIAL_AVXExtMov:
2779        if (!decode.op[2].has_ea) {
2780            decode.op[2].ot = s->vex_l ? MO_256 : MO_128;
2781        } else if (s->vex_l) {
2782            decode.op[2].ot++;
2783        }
2784        break;
2785
2786    case X86_SPECIAL_SExtT0:
2787    case X86_SPECIAL_ZExtT0:
2788        /* Handled in gen_load.  */
2789        assert(decode.op[1].unit == X86_OP_INT);
2790        break;
2791
2792    case X86_SPECIAL_Op0_Mw:
2793        assert(decode.op[0].unit == X86_OP_INT);
2794        if (decode.op[0].has_ea) {
2795            decode.op[0].ot = MO_16;
2796        }
2797        break;
2798
2799    default:
2800        break;
2801    }
2802
2803    if ((s->prefix & PREFIX_LOCK) && !accept_lock) {
2804        goto illegal_op;
2805    }
2806
2807    if (!validate_vex(s, &decode)) {
2808        return;
2809    }
2810
2811    /*
2812     * Checks that result in #GP or VMEXIT come second.  Intercepts are
2813     * generally checked after non-memory exceptions (i.e. after all
2814     * exceptions if there is no memory operand).  Exceptions are
2815     * vm86 checks (INTn, IRET, PUSHF/POPF), RSM and XSETBV (!).
2816     *
2817     * XSETBV will check for CPL0 in the gen_* function instead of using chk().
2818     */
2819    if (decode.e.check & X86_CHECK_cpl0) {
2820        if (CPL(s) != 0) {
2821            goto gp_fault;
2822        }
2823    }
2824    if (decode.e.has_intercept && unlikely(GUEST(s))) {
2825        gen_helper_svm_check_intercept(tcg_env,
2826                                       tcg_constant_i32(decode.e.intercept));
2827    }
2828    if (decode.e.check) {
2829        if ((decode.e.check & X86_CHECK_smm) && !(s->flags & HF_SMM_MASK)) {
2830            goto illegal_op;
2831        }
2832        if ((decode.e.check & X86_CHECK_vm86_iopl) && VM86(s)) {
2833            if (IOPL(s) < 3) {
2834                goto gp_fault;
2835            }
2836        } else if (decode.e.check & X86_CHECK_cpl_iopl) {
2837            if (IOPL(s) < CPL(s)) {
2838                goto gp_fault;
2839            }
2840        }
2841    }
2842
2843    if (decode.e.special == X86_SPECIAL_MMX &&
2844        !(s->prefix & (PREFIX_REPZ | PREFIX_REPNZ | PREFIX_DATA))) {
2845        gen_helper_enter_mmx(tcg_env);
2846    }
2847
2848    if (decode.e.special != X86_SPECIAL_NoLoadEA &&
2849        (decode.op[0].has_ea || decode.op[1].has_ea || decode.op[2].has_ea)) {
2850        gen_lea_modrm(s, &decode);
2851    }
2852    if (s->prefix & PREFIX_LOCK) {
2853        assert(decode.op[0].has_ea && !decode.op[2].has_ea);
2854        gen_load(s, &decode, 2, s->T1);
2855        decode.e.gen(s, &decode);
2856    } else {
2857        if (decode.op[0].unit == X86_OP_MMX) {
2858            compute_mmx_offset(&decode.op[0]);
2859        } else if (decode.op[0].unit == X86_OP_SSE) {
2860            compute_xmm_offset(&decode.op[0]);
2861        }
2862        gen_load(s, &decode, 1, s->T0);
2863        gen_load(s, &decode, 2, s->T1);
2864        decode.e.gen(s, &decode);
2865        gen_writeback(s, &decode, 0, s->T0);
2866    }
2867
2868    /*
2869     * Write back flags after last memory access.  Some older ALU instructions, as
2870     * well as SSE instructions, write flags in the gen_* function, but that can
2871     * cause incorrect tracking of CC_OP for instructions that write to both memory
2872     * and flags.
2873     */
2874    if (decode.cc_op != -1) {
2875        if (decode.cc_dst) {
2876            tcg_gen_mov_tl(cpu_cc_dst, decode.cc_dst);
2877        }
2878        if (decode.cc_src) {
2879            tcg_gen_mov_tl(cpu_cc_src, decode.cc_src);
2880        }
2881        if (decode.cc_src2) {
2882            tcg_gen_mov_tl(cpu_cc_src2, decode.cc_src2);
2883        }
2884        if (decode.cc_op == CC_OP_DYNAMIC) {
2885            tcg_gen_mov_i32(cpu_cc_op, decode.cc_op_dynamic);
2886        }
2887        set_cc_op(s, decode.cc_op);
2888        cc_live = cc_op_live(decode.cc_op);
2889    } else {
2890        cc_live = 0;
2891    }
2892    if (decode.cc_op != CC_OP_DYNAMIC) {
2893        assert(!decode.cc_op_dynamic);
2894        assert(!!decode.cc_dst == !!(cc_live & USES_CC_DST));
2895        assert(!!decode.cc_src == !!(cc_live & USES_CC_SRC));
2896        assert(!!decode.cc_src2 == !!(cc_live & USES_CC_SRC2));
2897    }
2898
2899    return;
2900 gp_fault:
2901    gen_exception_gpf(s);
2902    return;
2903 illegal_op:
2904    gen_illegal_opcode(s);
2905    return;
2906 unknown_op:
2907    gen_unknown_opcode(env, s);
2908}
2909