xref: /openbmc/qemu/target/i386/tcg/decode-new.c.inc (revision 5872966d)
1/*
2 * New-style decoder for i386 instructions
3 *
4 *  Copyright (c) 2022 Red Hat, Inc.
5 *
6 * Author: Paolo Bonzini <pbonzini@redhat.com>
7 *
8 * This library is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either
11 * version 2.1 of the License, or (at your option) any later version.
12 *
13 * This library is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
16 * Lesser General Public License for more details.
17 *
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
20 */
21
22/*
23 * The decoder is mostly based on tables copied from the Intel SDM.  As
24 * a result, most operand load and writeback is done entirely in common
25 * table-driven code using the same operand type (X86_TYPE_*) and
26 * size (X86_SIZE_*) codes used in the manual.  There are a few differences
27 * though.
28 *
29 * Operand sizes
30 * -------------
31 *
32 * The manual lists d64 ("cannot encode 32-bit size in 64-bit mode") and f64
33 * ("cannot encode 16-bit or 32-bit size in 64-bit mode") as modifiers of the
34 * "v" or "z" sizes.  The decoder simply makes them separate operand sizes.
35 *
36 * The manual lists immediate far destinations as Ap (technically an implicit
37 * argument).  The decoder splits them into two immediates, using "Ip" for
38 * the offset part (that comes first in the instruction stream) and "Iw" for
39 * the segment/selector part.  The size of the offset is given by s->dflag
40 * and the instructions are illegal in 64-bit mode, so the choice of "Ip"
41 * is somewhat arbitrary; "Iv" or "Iz" would work just as well.
42 *
43 * Operand types
44 * -------------
45 *
46 * For memory-only operands, if the emitter functions wants to rely on
47 * generic load and writeback, the decoder needs to know the type of the
48 * operand.  Therefore, M is often replaced by the more specific EM and WM
49 * (respectively selecting an ALU operand, like the operand type E, or a
50 * vector operand like the operand type W).
51 *
52 * Immediates are almost always signed or masked away in helpers.  Two
53 * common exceptions are IN/OUT and absolute jumps.  For these, there is
54 * an additional custom operand type "I_unsigned".  Alternatively, the
55 * mask could be applied (and the original sign-extended value would be
56 * optimized away by TCG) in the emitter function.
57 *
58 * Vector operands
59 * ---------------
60 *
61 * The main difference is that the V, U and W types are extended to
62 * cover MMX as well; if an instruction is like
63 *
64 *      por   Pq, Qq
65 *  66  por   Vx, Hx, Wx
66 *
67 * only the second row is included and the instruction is marked as a
68 * valid MMX instruction.  The MMX flag directs the decoder to rewrite
69 * the V/U/H/W types to P/N/P/Q if there is no prefix, as well as changing
70 * "x" to "q" if there is no prefix.
71 *
72 * In addition, the ss/ps/sd/pd types are sometimes mushed together as "x"
73 * if the difference is expressed via prefixes.  Individual instructions
74 * are separated by prefix in the generator functions.
75 *
76 * There is a custom size "xh" used to address half of a SSE/AVX operand.
77 * This points to a 64-bit operand for SSE operations, 128-bit operand
78 * for 256-bit AVX operands, etc.  It is used for conversion operations
79 * such as VCVTPH2PS or VCVTSS2SD.
80 *
81 * There are a couple cases in which instructions (e.g. MOVD) write the
82 * whole XMM or MM register but are established incorrectly in the manual
83 * as "d" or "q".  These have to be fixed for the decoder to work correctly.
84 *
85 * VEX exception classes
86 * ---------------------
87 *
88 * Speaking about imprecisions in the manual, the decoder treats all
89 * exception-class 4 instructions as having an optional VEX prefix, and
90 * all exception-class 6 instructions as having a mandatory VEX prefix.
91 * This is true except for a dozen instructions; these are in exception
92 * class 4 but do not ignore the VEX.W bit (which does not even exist
93 * without a VEX prefix).  These instructions are mostly listed in Intel's
94 * table 2-16, but with a few exceptions.
95 *
96 * The AMD manual has more precise subclasses for exceptions, and unlike Intel
97 * they list the VEX.W requirements in the exception classes as well (except
98 * when they don't).  AMD describes class 6 as "AVX Mixed Memory Argument"
99 * without defining what a mixed memory argument is, but still use 4 as the
100 * primary exception class... except when they don't.
101 *
102 * The summary is:
103 *                       Intel     AMD         VEX.W           note
104 * -------------------------------------------------------------------
105 * vpblendd              4         4J          0
106 * vpblendvb             4         4E-X        0               (*)
107 * vpbroadcastq          6         6D          0               (+)
108 * vpermd/vpermps        4         4H          0               (§)
109 * vpermq/vpermpd        4         4H-1        1               (§)
110 * vpermilpd/vpermilps   4         6E          0               (^)
111 * vpmaskmovd            6         4K          significant     (^)
112 * vpsllv                4         4K          significant
113 * vpsrav                4         4J          0
114 * vpsrlv                4         4K          significant
115 * vtestps/vtestpd       4         4G          0
116 *
117 *    (*)  AMD lists VPBLENDVB as related to SSE4.1 PBLENDVB, which may
118 *         explain why it is considered exception class 4.  However,
119 *         Intel says that VEX-only instructions should be in class 6...
120 *
121 *    (+)  Not found in Intel's table 2-16
122 *
123 *    (§)  4H and 4H-1 do not mention VEX.W requirements, which are
124 *         however present in the description of the instruction
125 *
126 *    (^)  these are the two cases in which Intel and AMD disagree on the
127 *         primary exception class
128 */
129
130#define X86_OP_NONE { 0 },
131
132#define X86_OP_GROUP3(op, op0_, s0_, op1_, s1_, op2_, s2_, ...) { \
133    .decode = glue(decode_, op),                                  \
134    .op0 = glue(X86_TYPE_, op0_),                                 \
135    .s0 = glue(X86_SIZE_, s0_),                                   \
136    .op1 = glue(X86_TYPE_, op1_),                                 \
137    .s1 = glue(X86_SIZE_, s1_),                                   \
138    .op2 = glue(X86_TYPE_, op2_),                                 \
139    .s2 = glue(X86_SIZE_, s2_),                                   \
140    .is_decode = true,                                            \
141    ## __VA_ARGS__                                                \
142}
143
144#define X86_OP_GROUP1(op, op0, s0, ...)                           \
145    X86_OP_GROUP3(op, op0, s0, 2op, s0, None, None, ## __VA_ARGS__)
146#define X86_OP_GROUP2(op, op0, s0, op1, s1, ...)                  \
147    X86_OP_GROUP3(op, op0, s0, 2op, s0, op1, s1, ## __VA_ARGS__)
148#define X86_OP_GROUPw(op, op0, s0, ...)                           \
149    X86_OP_GROUP3(op, op0, s0, None, None, None, None, ## __VA_ARGS__)
150#define X86_OP_GROUP0(op, ...)                                    \
151    X86_OP_GROUP3(op, None, None, None, None, None, None, ## __VA_ARGS__)
152
153#define X86_OP_ENTRY3(op, op0_, s0_, op1_, s1_, op2_, s2_, ...) { \
154    .gen = glue(gen_, op),                                        \
155    .op0 = glue(X86_TYPE_, op0_),                                 \
156    .s0 = glue(X86_SIZE_, s0_),                                   \
157    .op1 = glue(X86_TYPE_, op1_),                                 \
158    .s1 = glue(X86_SIZE_, s1_),                                   \
159    .op2 = glue(X86_TYPE_, op2_),                                 \
160    .s2 = glue(X86_SIZE_, s2_),                                   \
161    ## __VA_ARGS__                                                \
162}
163
164#define X86_OP_ENTRY4(op, op0_, s0_, op1_, s1_, op2_, s2_, ...)   \
165    X86_OP_ENTRY3(op, op0_, s0_, op1_, s1_, op2_, s2_,            \
166        .op3 = X86_TYPE_I, .s3 = X86_SIZE_b,                      \
167        ## __VA_ARGS__)
168
169/*
170 * Short forms that are mostly useful for ALU opcodes and other
171 * one-byte opcodes.  For vector instructions it is usually
172 * clearer to write all three operands explicitly, because the
173 * corresponding gen_* function will use OP_PTRn rather than s->T0
174 * and s->T1.
175 */
176#define X86_OP_ENTRYrr(op, op0, s0, op1, s1, ...)                 \
177    X86_OP_ENTRY3(op, None, None, op0, s0, op1, s1, ## __VA_ARGS__)
178#define X86_OP_ENTRYwr(op, op0, s0, op1, s1, ...)                 \
179    X86_OP_ENTRY3(op, op0, s0, None, None, op1, s1, ## __VA_ARGS__)
180#define X86_OP_ENTRY2(op, op0, s0, op1, s1, ...)                  \
181    X86_OP_ENTRY3(op, op0, s0, 2op, s0, op1, s1, ## __VA_ARGS__)
182#define X86_OP_ENTRYw(op, op0, s0, ...)                           \
183    X86_OP_ENTRY3(op, op0, s0, None, None, None, None, ## __VA_ARGS__)
184#define X86_OP_ENTRYr(op, op0, s0, ...)                           \
185    X86_OP_ENTRY3(op, None, None, None, None, op0, s0, ## __VA_ARGS__)
186#define X86_OP_ENTRY1(op, op0, s0, ...)                           \
187    X86_OP_ENTRY3(op, op0, s0, 2op, s0, None, None, ## __VA_ARGS__)
188#define X86_OP_ENTRY0(op, ...)                                    \
189    X86_OP_ENTRY3(op, None, None, None, None, None, None, ## __VA_ARGS__)
190
191#define cpuid(feat) .cpuid = X86_FEAT_##feat,
192#define noseg .special = X86_SPECIAL_NoSeg,
193#define xchg .special = X86_SPECIAL_Locked,
194#define lock .special = X86_SPECIAL_HasLock,
195#define mmx .special = X86_SPECIAL_MMX,
196#define op0_Rd .special = X86_SPECIAL_Op0_Rd,
197#define op2_Ry .special = X86_SPECIAL_Op2_Ry,
198#define avx_movx .special = X86_SPECIAL_AVXExtMov,
199#define sextT0 .special = X86_SPECIAL_SExtT0,
200#define zextT0 .special = X86_SPECIAL_ZExtT0,
201
202#define vex1 .vex_class = 1,
203#define vex1_rep3 .vex_class = 1, .vex_special = X86_VEX_REPScalar,
204#define vex2 .vex_class = 2,
205#define vex2_rep3 .vex_class = 2, .vex_special = X86_VEX_REPScalar,
206#define vex3 .vex_class = 3,
207#define vex4 .vex_class = 4,
208#define vex4_unal .vex_class = 4, .vex_special = X86_VEX_SSEUnaligned,
209#define vex4_rep5 .vex_class = 4, .vex_special = X86_VEX_REPScalar,
210#define vex5 .vex_class = 5,
211#define vex6 .vex_class = 6,
212#define vex7 .vex_class = 7,
213#define vex8 .vex_class = 8,
214#define vex11 .vex_class = 11,
215#define vex12 .vex_class = 12,
216#define vex13 .vex_class = 13,
217
218#define chk(a) .check = X86_CHECK_##a,
219#define svm(a) .intercept = SVM_EXIT_##a,
220
221#define avx2_256 .vex_special = X86_VEX_AVX2_256,
222
223#define P_00          1
224#define P_66          (1 << PREFIX_DATA)
225#define P_F3          (1 << PREFIX_REPZ)
226#define P_F2          (1 << PREFIX_REPNZ)
227
228#define p_00          .valid_prefix = P_00,
229#define p_66          .valid_prefix = P_66,
230#define p_f3          .valid_prefix = P_F3,
231#define p_f2          .valid_prefix = P_F2,
232#define p_00_66       .valid_prefix = P_00 | P_66,
233#define p_00_f3       .valid_prefix = P_00 | P_F3,
234#define p_66_f2       .valid_prefix = P_66 | P_F2,
235#define p_00_66_f3    .valid_prefix = P_00 | P_66 | P_F3,
236#define p_66_f3_f2    .valid_prefix = P_66 | P_F3 | P_F2,
237#define p_00_66_f3_f2 .valid_prefix = P_00 | P_66 | P_F3 | P_F2,
238
239#define UNKNOWN_OPCODE ((X86OpEntry) {})
240
241static uint8_t get_modrm(DisasContext *s, CPUX86State *env)
242{
243    if (!s->has_modrm) {
244        s->modrm = x86_ldub_code(env, s);
245        s->has_modrm = true;
246    }
247    return s->modrm;
248}
249
250static inline const X86OpEntry *decode_by_prefix(DisasContext *s, const X86OpEntry entries[4])
251{
252    if (s->prefix & PREFIX_REPNZ) {
253        return &entries[3];
254    } else if (s->prefix & PREFIX_REPZ) {
255        return &entries[2];
256    } else if (s->prefix & PREFIX_DATA) {
257        return &entries[1];
258    } else {
259        return &entries[0];
260    }
261}
262
263static void decode_group15(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
264{
265    /* only includes ldmxcsr and stmxcsr, because they have AVX variants.  */
266    static const X86OpEntry group15_reg[8] = {
267    };
268
269    static const X86OpEntry group15_mem[8] = {
270        [2] = X86_OP_ENTRYr(LDMXCSR,    E,d, vex5 chk(VEX128)),
271        [3] = X86_OP_ENTRYw(STMXCSR,    E,d, vex5 chk(VEX128)),
272    };
273
274    uint8_t modrm = get_modrm(s, env);
275    if ((modrm >> 6) == 3) {
276        *entry = group15_reg[(modrm >> 3) & 7];
277    } else {
278        *entry = group15_mem[(modrm >> 3) & 7];
279    }
280}
281
282static void decode_group17(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
283{
284    static const X86GenFunc group17_gen[8] = {
285        NULL, gen_BLSR, gen_BLSMSK, gen_BLSI,
286    };
287    int op = (get_modrm(s, env) >> 3) & 7;
288    entry->gen = group17_gen[op];
289}
290
291static void decode_group12(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
292{
293    static const X86OpEntry opcodes_group12[8] = {
294        {},
295        {},
296        X86_OP_ENTRY3(PSRLW_i,  H,x, U,x, I,b, vex7 mmx avx2_256 p_00_66),
297        {},
298        X86_OP_ENTRY3(PSRAW_i,  H,x, U,x, I,b, vex7 mmx avx2_256 p_00_66),
299        {},
300        X86_OP_ENTRY3(PSLLW_i,  H,x, U,x, I,b, vex7 mmx avx2_256 p_00_66),
301        {},
302    };
303
304    int op = (get_modrm(s, env) >> 3) & 7;
305    *entry = opcodes_group12[op];
306}
307
308static void decode_group13(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
309{
310    static const X86OpEntry opcodes_group13[8] = {
311        {},
312        {},
313        X86_OP_ENTRY3(PSRLD_i,  H,x, U,x, I,b, vex7 mmx avx2_256 p_00_66),
314        {},
315        X86_OP_ENTRY3(PSRAD_i,  H,x, U,x, I,b, vex7 mmx avx2_256 p_00_66),
316        {},
317        X86_OP_ENTRY3(PSLLD_i,  H,x, U,x, I,b, vex7 mmx avx2_256 p_00_66),
318        {},
319    };
320
321    int op = (get_modrm(s, env) >> 3) & 7;
322    *entry = opcodes_group13[op];
323}
324
325static void decode_group14(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
326{
327    static const X86OpEntry opcodes_group14[8] = {
328        /* grp14 */
329        {},
330        {},
331        X86_OP_ENTRY3(PSRLQ_i,  H,x, U,x, I,b, vex7 mmx avx2_256 p_00_66),
332        X86_OP_ENTRY3(PSRLDQ_i, H,x, U,x, I,b, vex7 avx2_256 p_66),
333        {},
334        {},
335        X86_OP_ENTRY3(PSLLQ_i,  H,x, U,x, I,b, vex7 mmx avx2_256 p_00_66),
336        X86_OP_ENTRY3(PSLLDQ_i, H,x, U,x, I,b, vex7 avx2_256 p_66),
337    };
338
339    int op = (get_modrm(s, env) >> 3) & 7;
340    *entry = opcodes_group14[op];
341}
342
343static void decode_0F6F(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
344{
345    static const X86OpEntry opcodes_0F6F[4] = {
346        X86_OP_ENTRY3(MOVDQ,       P,q, None,None, Q,q, vex5 mmx),  /* movq */
347        X86_OP_ENTRY3(MOVDQ,       V,x, None,None, W,x, vex1),      /* movdqa */
348        X86_OP_ENTRY3(MOVDQ,       V,x, None,None, W,x, vex4_unal), /* movdqu */
349        {},
350    };
351    *entry = *decode_by_prefix(s, opcodes_0F6F);
352}
353
354static void decode_0F70(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
355{
356    static const X86OpEntry pshufw[4] = {
357        X86_OP_ENTRY3(PSHUFW,  P,q, Q,q, I,b, vex4 mmx),
358        X86_OP_ENTRY3(PSHUFD,  V,x, W,x, I,b, vex4 avx2_256),
359        X86_OP_ENTRY3(PSHUFHW, V,x, W,x, I,b, vex4 avx2_256),
360        X86_OP_ENTRY3(PSHUFLW, V,x, W,x, I,b, vex4 avx2_256),
361    };
362
363    *entry = *decode_by_prefix(s, pshufw);
364}
365
366static void decode_0F77(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
367{
368    if (!(s->prefix & PREFIX_VEX)) {
369        entry->gen = gen_EMMS;
370    } else if (!s->vex_l) {
371        entry->gen = gen_VZEROUPPER;
372        entry->vex_class = 8;
373    } else {
374        entry->gen = gen_VZEROALL;
375        entry->vex_class = 8;
376    }
377}
378
379static void decode_0F78(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
380{
381    static const X86OpEntry opcodes_0F78[4] = {
382        {},
383        X86_OP_ENTRY3(EXTRQ_i,       V,x, None,None, I,w,  cpuid(SSE4A)), /* AMD extension */
384        {},
385        X86_OP_ENTRY3(INSERTQ_i,     V,x, U,x, I,w,        cpuid(SSE4A)), /* AMD extension */
386    };
387    *entry = *decode_by_prefix(s, opcodes_0F78);
388}
389
390static void decode_0F79(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
391{
392    if (s->prefix & PREFIX_REPNZ) {
393        entry->gen = gen_INSERTQ_r; /* AMD extension */
394    } else if (s->prefix & PREFIX_DATA) {
395        entry->gen = gen_EXTRQ_r; /* AMD extension */
396    } else {
397        entry->gen = NULL;
398    };
399}
400
401static void decode_0F7E(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
402{
403    static const X86OpEntry opcodes_0F7E[4] = {
404        X86_OP_ENTRY3(MOVD_from,  E,y, None,None, P,y, vex5 mmx),
405        X86_OP_ENTRY3(MOVD_from,  E,y, None,None, V,y, vex5),
406        X86_OP_ENTRY3(MOVQ,       V,x, None,None, W,q, vex5),  /* wrong dest Vy on SDM! */
407        {},
408    };
409    *entry = *decode_by_prefix(s, opcodes_0F7E);
410}
411
412static void decode_0F7F(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
413{
414    static const X86OpEntry opcodes_0F7F[4] = {
415        X86_OP_ENTRY3(MOVDQ,       W,x, None,None, V,x, vex5 mmx), /* movq */
416        X86_OP_ENTRY3(MOVDQ,       W,x, None,None, V,x, vex1), /* movdqa */
417        X86_OP_ENTRY3(MOVDQ,       W,x, None,None, V,x, vex4_unal), /* movdqu */
418        {},
419    };
420    *entry = *decode_by_prefix(s, opcodes_0F7F);
421}
422
423static void decode_0FD6(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
424{
425    static const X86OpEntry movq[4] = {
426        {},
427        X86_OP_ENTRY3(MOVQ,    W,x,  None, None, V,q, vex5),
428        X86_OP_ENTRY3(MOVq_dq, V,dq, None, None, N,q),
429        X86_OP_ENTRY3(MOVq_dq, P,q,  None, None, U,q),
430    };
431
432    *entry = *decode_by_prefix(s, movq);
433}
434
435static const X86OpEntry opcodes_0F38_00toEF[240] = {
436    [0x00] = X86_OP_ENTRY3(PSHUFB,    V,x,  H,x,   W,x,  vex4 cpuid(SSSE3) mmx avx2_256 p_00_66),
437    [0x01] = X86_OP_ENTRY3(PHADDW,    V,x,  H,x,   W,x,  vex4 cpuid(SSSE3) mmx avx2_256 p_00_66),
438    [0x02] = X86_OP_ENTRY3(PHADDD,    V,x,  H,x,   W,x,  vex4 cpuid(SSSE3) mmx avx2_256 p_00_66),
439    [0x03] = X86_OP_ENTRY3(PHADDSW,   V,x,  H,x,   W,x,  vex4 cpuid(SSSE3) mmx avx2_256 p_00_66),
440    [0x04] = X86_OP_ENTRY3(PMADDUBSW, V,x,  H,x,   W,x,  vex4 cpuid(SSSE3) mmx avx2_256 p_00_66),
441    [0x05] = X86_OP_ENTRY3(PHSUBW,    V,x,  H,x,   W,x,  vex4 cpuid(SSSE3) mmx avx2_256 p_00_66),
442    [0x06] = X86_OP_ENTRY3(PHSUBD,    V,x,  H,x,   W,x,  vex4 cpuid(SSSE3) mmx avx2_256 p_00_66),
443    [0x07] = X86_OP_ENTRY3(PHSUBSW,   V,x,  H,x,   W,x,  vex4 cpuid(SSSE3) mmx avx2_256 p_00_66),
444
445    [0x10] = X86_OP_ENTRY2(PBLENDVB,  V,x,         W,x,  vex4 cpuid(SSE41) avx2_256 p_66),
446    [0x13] = X86_OP_ENTRY2(VCVTPH2PS, V,x,         W,xh, vex11 chk(W0) cpuid(F16C) p_66),
447    [0x14] = X86_OP_ENTRY2(BLENDVPS,  V,x,         W,x,  vex4 cpuid(SSE41) p_66),
448    [0x15] = X86_OP_ENTRY2(BLENDVPD,  V,x,         W,x,  vex4 cpuid(SSE41) p_66),
449    /* Listed incorrectly as type 4 */
450    [0x16] = X86_OP_ENTRY3(VPERMD,    V,qq, H,qq,      W,qq,  vex6 chk(W0) cpuid(AVX2) p_66), /* vpermps */
451    [0x17] = X86_OP_ENTRY3(VPTEST,    None,None, V,x,  W,x,   vex4 cpuid(SSE41) p_66),
452
453    /*
454     * Source operand listed as Mq/Ux and similar in the manual; incorrectly listed
455     * as 128-bit only in 2-17.
456     */
457    [0x20] = X86_OP_ENTRY3(VPMOVSXBW, V,x,  None,None, W,q,   vex5 cpuid(SSE41) avx_movx avx2_256 p_66),
458    [0x21] = X86_OP_ENTRY3(VPMOVSXBD, V,x,  None,None, W,d,   vex5 cpuid(SSE41) avx_movx avx2_256 p_66),
459    [0x22] = X86_OP_ENTRY3(VPMOVSXBQ, V,x,  None,None, W,w,   vex5 cpuid(SSE41) avx_movx avx2_256 p_66),
460    [0x23] = X86_OP_ENTRY3(VPMOVSXWD, V,x,  None,None, W,q,   vex5 cpuid(SSE41) avx_movx avx2_256 p_66),
461    [0x24] = X86_OP_ENTRY3(VPMOVSXWQ, V,x,  None,None, W,d,   vex5 cpuid(SSE41) avx_movx avx2_256 p_66),
462    [0x25] = X86_OP_ENTRY3(VPMOVSXDQ, V,x,  None,None, W,q,   vex5 cpuid(SSE41) avx_movx avx2_256 p_66),
463
464    /* Same as PMOVSX.  */
465    [0x30] = X86_OP_ENTRY3(VPMOVZXBW, V,x,  None,None, W,q,   vex5 cpuid(SSE41) avx_movx avx2_256 p_66),
466    [0x31] = X86_OP_ENTRY3(VPMOVZXBD, V,x,  None,None, W,d,   vex5 cpuid(SSE41) avx_movx avx2_256 p_66),
467    [0x32] = X86_OP_ENTRY3(VPMOVZXBQ, V,x,  None,None, W,w,   vex5 cpuid(SSE41) avx_movx avx2_256 p_66),
468    [0x33] = X86_OP_ENTRY3(VPMOVZXWD, V,x,  None,None, W,q,   vex5 cpuid(SSE41) avx_movx avx2_256 p_66),
469    [0x34] = X86_OP_ENTRY3(VPMOVZXWQ, V,x,  None,None, W,d,   vex5 cpuid(SSE41) avx_movx avx2_256 p_66),
470    [0x35] = X86_OP_ENTRY3(VPMOVZXDQ, V,x,  None,None, W,q,   vex5 cpuid(SSE41) avx_movx avx2_256 p_66),
471    [0x36] = X86_OP_ENTRY3(VPERMD,    V,qq, H,qq,      W,qq,  vex6 chk(W0) cpuid(AVX2) p_66),
472    [0x37] = X86_OP_ENTRY3(PCMPGTQ,   V,x,  H,x,       W,x,   vex4 cpuid(SSE42) avx2_256 p_66),
473
474    [0x40] = X86_OP_ENTRY3(PMULLD,      V,x,  H,x,       W,x,  vex4 cpuid(SSE41) avx2_256 p_66),
475    [0x41] = X86_OP_ENTRY3(VPHMINPOSUW, V,dq, None,None, W,dq, vex4 cpuid(SSE41) p_66),
476    /* Listed incorrectly as type 4 */
477    [0x45] = X86_OP_ENTRY3(VPSRLV,      V,x,  H,x,       W,x,  vex6 cpuid(AVX2) p_66),
478    [0x46] = X86_OP_ENTRY3(VPSRAV,      V,x,  H,x,       W,x,  vex6 chk(W0) cpuid(AVX2) p_66),
479    [0x47] = X86_OP_ENTRY3(VPSLLV,      V,x,  H,x,       W,x,  vex6 cpuid(AVX2) p_66),
480
481    [0x90] = X86_OP_ENTRY3(VPGATHERD, V,x,  H,x,  M,d,  vex12 cpuid(AVX2) p_66), /* vpgatherdd/q */
482    [0x91] = X86_OP_ENTRY3(VPGATHERQ, V,x,  H,x,  M,q,  vex12 cpuid(AVX2) p_66), /* vpgatherqd/q */
483    [0x92] = X86_OP_ENTRY3(VPGATHERD, V,x,  H,x,  M,d,  vex12 cpuid(AVX2) p_66), /* vgatherdps/d */
484    [0x93] = X86_OP_ENTRY3(VPGATHERQ, V,x,  H,x,  M,q,  vex12 cpuid(AVX2) p_66), /* vgatherqps/d */
485
486    /* Should be exception type 2 but they do not have legacy SSE equivalents? */
487    [0x96] = X86_OP_ENTRY3(VFMADDSUB132Px, V,x,  H,x, W,x,  vex6 cpuid(FMA) p_66),
488    [0x97] = X86_OP_ENTRY3(VFMSUBADD132Px, V,x,  H,x, W,x,  vex6 cpuid(FMA) p_66),
489
490    [0xa6] = X86_OP_ENTRY3(VFMADDSUB213Px, V,x,  H,x, W,x,  vex6 cpuid(FMA) p_66),
491    [0xa7] = X86_OP_ENTRY3(VFMSUBADD213Px, V,x,  H,x, W,x,  vex6 cpuid(FMA) p_66),
492
493    [0xb6] = X86_OP_ENTRY3(VFMADDSUB231Px, V,x,  H,x, W,x,  vex6 cpuid(FMA) p_66),
494    [0xb7] = X86_OP_ENTRY3(VFMSUBADD231Px, V,x,  H,x, W,x,  vex6 cpuid(FMA) p_66),
495
496    [0x08] = X86_OP_ENTRY3(PSIGNB,    V,x,        H,x,  W,x,  vex4 cpuid(SSSE3) mmx avx2_256 p_00_66),
497    [0x09] = X86_OP_ENTRY3(PSIGNW,    V,x,        H,x,  W,x,  vex4 cpuid(SSSE3) mmx avx2_256 p_00_66),
498    [0x0a] = X86_OP_ENTRY3(PSIGND,    V,x,        H,x,  W,x,  vex4 cpuid(SSSE3) mmx avx2_256 p_00_66),
499    [0x0b] = X86_OP_ENTRY3(PMULHRSW,  V,x,        H,x,  W,x,  vex4 cpuid(SSSE3) mmx avx2_256 p_00_66),
500    /* Listed incorrectly as type 4 */
501    [0x0c] = X86_OP_ENTRY3(VPERMILPS, V,x,        H,x,  W,x,  vex6 chk(W0) cpuid(AVX) p_00_66),
502    [0x0d] = X86_OP_ENTRY3(VPERMILPD, V,x,        H,x,  W,x,  vex6 chk(W0) cpuid(AVX) p_66),
503    [0x0e] = X86_OP_ENTRY3(VTESTPS,   None,None,  V,x,  W,x,  vex6 chk(W0) cpuid(AVX) p_66),
504    [0x0f] = X86_OP_ENTRY3(VTESTPD,   None,None,  V,x,  W,x,  vex6 chk(W0) cpuid(AVX) p_66),
505
506    [0x18] = X86_OP_ENTRY3(VPBROADCASTD,   V,x,  None,None, W,d,  vex6 chk(W0) cpuid(AVX) p_66), /* vbroadcastss */
507    [0x19] = X86_OP_ENTRY3(VPBROADCASTQ,   V,qq, None,None, W,q,  vex6 chk(W0) cpuid(AVX) p_66), /* vbroadcastsd */
508    [0x1a] = X86_OP_ENTRY3(VBROADCASTx128, V,qq, None,None, WM,dq,vex6 chk(W0) cpuid(AVX) p_66),
509    [0x1c] = X86_OP_ENTRY3(PABSB,          V,x,  None,None, W,x,  vex4 cpuid(SSSE3) mmx avx2_256 p_00_66),
510    [0x1d] = X86_OP_ENTRY3(PABSW,          V,x,  None,None, W,x,  vex4 cpuid(SSSE3) mmx avx2_256 p_00_66),
511    [0x1e] = X86_OP_ENTRY3(PABSD,          V,x,  None,None, W,x,  vex4 cpuid(SSSE3) mmx avx2_256 p_00_66),
512
513    [0x28] = X86_OP_ENTRY3(PMULDQ,        V,x, H,x,       W,x,  vex4 cpuid(SSE41) avx2_256 p_66),
514    [0x29] = X86_OP_ENTRY3(PCMPEQQ,       V,x, H,x,       W,x,  vex4 cpuid(SSE41) avx2_256 p_66),
515    [0x2a] = X86_OP_ENTRY3(MOVDQ,         V,x, None,None, WM,x, vex1 cpuid(SSE41) avx2_256 p_66), /* movntdqa */
516    [0x2b] = X86_OP_ENTRY3(VPACKUSDW,     V,x, H,x,       W,x,  vex4 cpuid(SSE41) avx2_256 p_66),
517    [0x2c] = X86_OP_ENTRY3(VMASKMOVPS,    V,x, H,x,       WM,x, vex6 chk(W0) cpuid(AVX) p_66),
518    [0x2d] = X86_OP_ENTRY3(VMASKMOVPD,    V,x, H,x,       WM,x, vex6 chk(W0) cpuid(AVX) p_66),
519    /* Incorrectly listed as Mx,Hx,Vx in the manual */
520    [0x2e] = X86_OP_ENTRY3(VMASKMOVPS_st, M,x, V,x,       H,x,  vex6 chk(W0) cpuid(AVX) p_66),
521    [0x2f] = X86_OP_ENTRY3(VMASKMOVPD_st, M,x, V,x,       H,x,  vex6 chk(W0) cpuid(AVX) p_66),
522
523    [0x38] = X86_OP_ENTRY3(PMINSB,        V,x,  H,x, W,x,  vex4 cpuid(SSE41) avx2_256 p_66),
524    [0x39] = X86_OP_ENTRY3(PMINSD,        V,x,  H,x, W,x,  vex4 cpuid(SSE41) avx2_256 p_66),
525    [0x3a] = X86_OP_ENTRY3(PMINUW,        V,x,  H,x, W,x,  vex4 cpuid(SSE41) avx2_256 p_66),
526    [0x3b] = X86_OP_ENTRY3(PMINUD,        V,x,  H,x, W,x,  vex4 cpuid(SSE41) avx2_256 p_66),
527    [0x3c] = X86_OP_ENTRY3(PMAXSB,        V,x,  H,x, W,x,  vex4 cpuid(SSE41) avx2_256 p_66),
528    [0x3d] = X86_OP_ENTRY3(PMAXSD,        V,x,  H,x, W,x,  vex4 cpuid(SSE41) avx2_256 p_66),
529    [0x3e] = X86_OP_ENTRY3(PMAXUW,        V,x,  H,x, W,x,  vex4 cpuid(SSE41) avx2_256 p_66),
530    [0x3f] = X86_OP_ENTRY3(PMAXUD,        V,x,  H,x, W,x,  vex4 cpuid(SSE41) avx2_256 p_66),
531
532    /* VPBROADCASTQ not listed as W0 in table 2-16 */
533    [0x58] = X86_OP_ENTRY3(VPBROADCASTD,   V,x,  None,None, W,d,  vex6 chk(W0) cpuid(AVX2) p_66),
534    [0x59] = X86_OP_ENTRY3(VPBROADCASTQ,   V,x,  None,None, W,q,  vex6 chk(W0) cpuid(AVX2) p_66),
535    [0x5a] = X86_OP_ENTRY3(VBROADCASTx128, V,qq, None,None, WM,dq,vex6 chk(W0) cpuid(AVX2) p_66),
536
537    [0x78] = X86_OP_ENTRY3(VPBROADCASTB,   V,x,  None,None, W,b,  vex6 chk(W0) cpuid(AVX2) p_66),
538    [0x79] = X86_OP_ENTRY3(VPBROADCASTW,   V,x,  None,None, W,w,  vex6 chk(W0) cpuid(AVX2) p_66),
539
540    [0x8c] = X86_OP_ENTRY3(VPMASKMOV,    V,x,  H,x, WM,x, vex6 cpuid(AVX2) p_66),
541    [0x8e] = X86_OP_ENTRY3(VPMASKMOV_st, M,x,  V,x, H,x,  vex6 cpuid(AVX2) p_66),
542
543    /* Should be exception type 2 or 3 but they do not have legacy SSE equivalents? */
544    [0x98] = X86_OP_ENTRY3(VFMADD132Px,  V,x,  H,x, W,x,  vex6 cpuid(FMA) p_66),
545    [0x99] = X86_OP_ENTRY3(VFMADD132Sx,  V,x,  H,x, W,x,  vex6 cpuid(FMA) p_66),
546    [0x9a] = X86_OP_ENTRY3(VFMSUB132Px,  V,x,  H,x, W,x,  vex6 cpuid(FMA) p_66),
547    [0x9b] = X86_OP_ENTRY3(VFMSUB132Sx,  V,x,  H,x, W,x,  vex6 cpuid(FMA) p_66),
548    [0x9c] = X86_OP_ENTRY3(VFNMADD132Px, V,x,  H,x, W,x,  vex6 cpuid(FMA) p_66),
549    [0x9d] = X86_OP_ENTRY3(VFNMADD132Sx, V,x,  H,x, W,x,  vex6 cpuid(FMA) p_66),
550    [0x9e] = X86_OP_ENTRY3(VFNMSUB132Px, V,x,  H,x, W,x,  vex6 cpuid(FMA) p_66),
551    [0x9f] = X86_OP_ENTRY3(VFNMSUB132Sx, V,x,  H,x, W,x,  vex6 cpuid(FMA) p_66),
552
553    [0xa8] = X86_OP_ENTRY3(VFMADD213Px,  V,x,  H,x, W,x,  vex6 cpuid(FMA) p_66),
554    [0xa9] = X86_OP_ENTRY3(VFMADD213Sx,  V,x,  H,x, W,x,  vex6 cpuid(FMA) p_66),
555    [0xaa] = X86_OP_ENTRY3(VFMSUB213Px,  V,x,  H,x, W,x,  vex6 cpuid(FMA) p_66),
556    [0xab] = X86_OP_ENTRY3(VFMSUB213Sx,  V,x,  H,x, W,x,  vex6 cpuid(FMA) p_66),
557    [0xac] = X86_OP_ENTRY3(VFNMADD213Px, V,x,  H,x, W,x,  vex6 cpuid(FMA) p_66),
558    [0xad] = X86_OP_ENTRY3(VFNMADD213Sx, V,x,  H,x, W,x,  vex6 cpuid(FMA) p_66),
559    [0xae] = X86_OP_ENTRY3(VFNMSUB213Px, V,x,  H,x, W,x,  vex6 cpuid(FMA) p_66),
560    [0xaf] = X86_OP_ENTRY3(VFNMSUB213Sx, V,x,  H,x, W,x,  vex6 cpuid(FMA) p_66),
561
562    [0xb8] = X86_OP_ENTRY3(VFMADD231Px,  V,x,  H,x, W,x,  vex6 cpuid(FMA) p_66),
563    [0xb9] = X86_OP_ENTRY3(VFMADD231Sx,  V,x,  H,x, W,x,  vex6 cpuid(FMA) p_66),
564    [0xba] = X86_OP_ENTRY3(VFMSUB231Px,  V,x,  H,x, W,x,  vex6 cpuid(FMA) p_66),
565    [0xbb] = X86_OP_ENTRY3(VFMSUB231Sx,  V,x,  H,x, W,x,  vex6 cpuid(FMA) p_66),
566    [0xbc] = X86_OP_ENTRY3(VFNMADD231Px, V,x,  H,x, W,x,  vex6 cpuid(FMA) p_66),
567    [0xbd] = X86_OP_ENTRY3(VFNMADD231Sx, V,x,  H,x, W,x,  vex6 cpuid(FMA) p_66),
568    [0xbe] = X86_OP_ENTRY3(VFNMSUB231Px, V,x,  H,x, W,x,  vex6 cpuid(FMA) p_66),
569    [0xbf] = X86_OP_ENTRY3(VFNMSUB231Sx, V,x,  H,x, W,x,  vex6 cpuid(FMA) p_66),
570
571    [0xc8] = X86_OP_ENTRY2(SHA1NEXTE,   V,dq, W,dq, cpuid(SHA_NI)),
572    [0xc9] = X86_OP_ENTRY2(SHA1MSG1,    V,dq, W,dq, cpuid(SHA_NI)),
573    [0xca] = X86_OP_ENTRY2(SHA1MSG2,    V,dq, W,dq, cpuid(SHA_NI)),
574    [0xcb] = X86_OP_ENTRY2(SHA256RNDS2, V,dq, W,dq, cpuid(SHA_NI)),
575    [0xcc] = X86_OP_ENTRY2(SHA256MSG1,  V,dq, W,dq, cpuid(SHA_NI)),
576    [0xcd] = X86_OP_ENTRY2(SHA256MSG2,  V,dq, W,dq, cpuid(SHA_NI)),
577
578    [0xdb] = X86_OP_ENTRY3(VAESIMC,     V,dq, None,None, W,dq, vex4 cpuid(AES) p_66),
579    [0xdc] = X86_OP_ENTRY3(VAESENC,     V,x,  H,x,       W,x,  vex4 cpuid(AES) p_66),
580    [0xdd] = X86_OP_ENTRY3(VAESENCLAST, V,x,  H,x,       W,x,  vex4 cpuid(AES) p_66),
581    [0xde] = X86_OP_ENTRY3(VAESDEC,     V,x,  H,x,       W,x,  vex4 cpuid(AES) p_66),
582    [0xdf] = X86_OP_ENTRY3(VAESDECLAST, V,x,  H,x,       W,x,  vex4 cpuid(AES) p_66),
583
584    /*
585     * REG selects srcdest2 operand, VEX.vvvv selects src3.  VEX class not found
586     * in manual, assumed to be 13 from the VEX.L0 constraint.
587     */
588    [0xe0] = X86_OP_ENTRY3(CMPccXADD,   M,y, G,y, B,y,  vex13 xchg chk(o64) cpuid(CMPCCXADD) p_66),
589    [0xe1] = X86_OP_ENTRY3(CMPccXADD,   M,y, G,y, B,y,  vex13 xchg chk(o64) cpuid(CMPCCXADD) p_66),
590    [0xe2] = X86_OP_ENTRY3(CMPccXADD,   M,y, G,y, B,y,  vex13 xchg chk(o64) cpuid(CMPCCXADD) p_66),
591    [0xe3] = X86_OP_ENTRY3(CMPccXADD,   M,y, G,y, B,y,  vex13 xchg chk(o64) cpuid(CMPCCXADD) p_66),
592    [0xe4] = X86_OP_ENTRY3(CMPccXADD,   M,y, G,y, B,y,  vex13 xchg chk(o64) cpuid(CMPCCXADD) p_66),
593    [0xe5] = X86_OP_ENTRY3(CMPccXADD,   M,y, G,y, B,y,  vex13 xchg chk(o64) cpuid(CMPCCXADD) p_66),
594    [0xe6] = X86_OP_ENTRY3(CMPccXADD,   M,y, G,y, B,y,  vex13 xchg chk(o64) cpuid(CMPCCXADD) p_66),
595    [0xe7] = X86_OP_ENTRY3(CMPccXADD,   M,y, G,y, B,y,  vex13 xchg chk(o64) cpuid(CMPCCXADD) p_66),
596
597    [0xe8] = X86_OP_ENTRY3(CMPccXADD,   M,y, G,y, B,y,  vex13 xchg chk(o64) cpuid(CMPCCXADD) p_66),
598    [0xe9] = X86_OP_ENTRY3(CMPccXADD,   M,y, G,y, B,y,  vex13 xchg chk(o64) cpuid(CMPCCXADD) p_66),
599    [0xea] = X86_OP_ENTRY3(CMPccXADD,   M,y, G,y, B,y,  vex13 xchg chk(o64) cpuid(CMPCCXADD) p_66),
600    [0xeb] = X86_OP_ENTRY3(CMPccXADD,   M,y, G,y, B,y,  vex13 xchg chk(o64) cpuid(CMPCCXADD) p_66),
601    [0xec] = X86_OP_ENTRY3(CMPccXADD,   M,y, G,y, B,y,  vex13 xchg chk(o64) cpuid(CMPCCXADD) p_66),
602    [0xed] = X86_OP_ENTRY3(CMPccXADD,   M,y, G,y, B,y,  vex13 xchg chk(o64) cpuid(CMPCCXADD) p_66),
603    [0xee] = X86_OP_ENTRY3(CMPccXADD,   M,y, G,y, B,y,  vex13 xchg chk(o64) cpuid(CMPCCXADD) p_66),
604    [0xef] = X86_OP_ENTRY3(CMPccXADD,   M,y, G,y, B,y,  vex13 xchg chk(o64) cpuid(CMPCCXADD) p_66),
605};
606
607/* five rows for no prefix, 66, F3, F2, 66+F2  */
608static const X86OpEntry opcodes_0F38_F0toFF[16][5] = {
609    [0] = {
610        X86_OP_ENTRY3(MOVBE, G,y, M,y, None,None, cpuid(MOVBE)),
611        X86_OP_ENTRY3(MOVBE, G,w, M,w, None,None, cpuid(MOVBE)),
612        {},
613        X86_OP_ENTRY2(CRC32, G,d, E,b, cpuid(SSE42)),
614        X86_OP_ENTRY2(CRC32, G,d, E,b, cpuid(SSE42)),
615    },
616    [1] = {
617        X86_OP_ENTRY3(MOVBE, M,y, G,y, None,None, cpuid(MOVBE)),
618        X86_OP_ENTRY3(MOVBE, M,w, G,w, None,None, cpuid(MOVBE)),
619        {},
620        X86_OP_ENTRY2(CRC32, G,d, E,y, cpuid(SSE42)),
621        X86_OP_ENTRY2(CRC32, G,d, E,w, cpuid(SSE42)),
622    },
623    [2] = {
624        X86_OP_ENTRY3(ANDN, G,y, B,y, E,y, vex13 cpuid(BMI1)),
625        {},
626        {},
627        {},
628        {},
629    },
630    [3] = {
631        X86_OP_GROUP3(group17, B,y, E,y, None,None, vex13 cpuid(BMI1)),
632        {},
633        {},
634        {},
635        {},
636    },
637    [5] = {
638        X86_OP_ENTRY3(BZHI, G,y, E,y, B,y, vex13 cpuid(BMI1)),
639        {},
640        X86_OP_ENTRY3(PEXT, G,y, B,y, E,y, vex13 zextT0 cpuid(BMI2)),
641        X86_OP_ENTRY3(PDEP, G,y, B,y, E,y, vex13 zextT0 cpuid(BMI2)),
642        {},
643    },
644    [6] = {
645        {},
646        X86_OP_ENTRY2(ADCX, G,y, E,y, cpuid(ADX)),
647        X86_OP_ENTRY2(ADOX, G,y, E,y, cpuid(ADX)),
648        X86_OP_ENTRY3(MULX, /* B,y, */ G,y, E,y, 2,y, vex13 cpuid(BMI2)),
649        {},
650    },
651    [7] = {
652        X86_OP_ENTRY3(BEXTR, G,y, E,y, B,y, vex13 zextT0 cpuid(BMI1)),
653        X86_OP_ENTRY3(SHLX, G,y, E,y, B,y, vex13 cpuid(BMI1)),
654        X86_OP_ENTRY3(SARX, G,y, E,y, B,y, vex13 sextT0 cpuid(BMI1)),
655        X86_OP_ENTRY3(SHRX, G,y, E,y, B,y, vex13 zextT0 cpuid(BMI1)),
656        {},
657    },
658};
659
660static void decode_0F38(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
661{
662    *b = x86_ldub_code(env, s);
663    if (*b < 0xf0) {
664        *entry = opcodes_0F38_00toEF[*b];
665    } else {
666        int row = 0;
667        if (s->prefix & PREFIX_REPZ) {
668            /* The REPZ (F3) prefix has priority over 66 */
669            row = 2;
670        } else {
671            row += s->prefix & PREFIX_REPNZ ? 3 : 0;
672            row += s->prefix & PREFIX_DATA ? 1 : 0;
673        }
674        *entry = opcodes_0F38_F0toFF[*b & 15][row];
675    }
676}
677
678static void decode_VINSERTPS(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
679{
680    static const X86OpEntry
681        vinsertps_reg = X86_OP_ENTRY4(VINSERTPS_r, V,dq, H,dq, U,dq, vex5 cpuid(SSE41) p_66),
682        vinsertps_mem = X86_OP_ENTRY4(VINSERTPS_m, V,dq, H,dq, M,d,  vex5 cpuid(SSE41) p_66);
683
684    int modrm = get_modrm(s, env);
685    *entry = (modrm >> 6) == 3 ? vinsertps_reg : vinsertps_mem;
686}
687
688static const X86OpEntry opcodes_0F3A[256] = {
689    /*
690     * These are VEX-only, but incorrectly listed in the manual as exception type 4.
691     * Also the "qq" instructions are sometimes omitted by Table 2-17, but are VEX256
692     * only.
693     */
694    [0x00] = X86_OP_ENTRY3(VPERMQ,      V,qq, W,qq, I,b,  vex6 chk(W1) cpuid(AVX2) p_66),
695    [0x01] = X86_OP_ENTRY3(VPERMQ,      V,qq, W,qq, I,b,  vex6 chk(W1) cpuid(AVX2) p_66), /* VPERMPD */
696    [0x02] = X86_OP_ENTRY4(VBLENDPS,    V,x,  H,x,  W,x,  vex6 chk(W0) cpuid(AVX2) p_66), /* VPBLENDD */
697    [0x04] = X86_OP_ENTRY3(VPERMILPS_i, V,x,  W,x,  I,b,  vex6 chk(W0) cpuid(AVX) p_66),
698    [0x05] = X86_OP_ENTRY3(VPERMILPD_i, V,x,  W,x,  I,b,  vex6 chk(W0) cpuid(AVX) p_66),
699    [0x06] = X86_OP_ENTRY4(VPERM2x128,  V,qq, H,qq, W,qq, vex6 chk(W0) cpuid(AVX) p_66),
700
701    [0x14] = X86_OP_ENTRY3(PEXTRB,     E,b,  V,dq, I,b,  vex5 cpuid(SSE41) op0_Rd p_66),
702    [0x15] = X86_OP_ENTRY3(PEXTRW,     E,w,  V,dq, I,b,  vex5 cpuid(SSE41) op0_Rd p_66),
703    [0x16] = X86_OP_ENTRY3(PEXTR,      E,y,  V,dq, I,b,  vex5 cpuid(SSE41) p_66),
704    [0x17] = X86_OP_ENTRY3(VEXTRACTPS, E,d,  V,dq, I,b,  vex5 cpuid(SSE41) p_66),
705    [0x1d] = X86_OP_ENTRY3(VCVTPS2PH,  W,xh, V,x,  I,b,  vex11 chk(W0) cpuid(F16C) p_66),
706
707    [0x20] = X86_OP_ENTRY4(PINSRB,     V,dq, H,dq, E,b,  vex5 cpuid(SSE41) op2_Ry p_66),
708    [0x21] = X86_OP_GROUP0(VINSERTPS),
709    [0x22] = X86_OP_ENTRY4(PINSR,      V,dq, H,dq, E,y,  vex5 cpuid(SSE41) p_66),
710
711    [0x40] = X86_OP_ENTRY4(VDDPS,      V,x,  H,x,  W,x,  vex2 cpuid(SSE41) p_66),
712    [0x41] = X86_OP_ENTRY4(VDDPD,      V,dq, H,dq, W,dq, vex2 cpuid(SSE41) p_66),
713    [0x42] = X86_OP_ENTRY4(VMPSADBW,   V,x,  H,x,  W,x,  vex2 cpuid(SSE41) avx2_256 p_66),
714    [0x44] = X86_OP_ENTRY4(PCLMULQDQ,  V,dq, H,dq, W,dq, vex4 cpuid(PCLMULQDQ) p_66),
715    [0x46] = X86_OP_ENTRY4(VPERM2x128, V,qq, H,qq, W,qq, vex6 chk(W0) cpuid(AVX2) p_66),
716
717    [0x60] = X86_OP_ENTRY4(PCMPESTRM,  None,None, V,dq, W,dq, vex4_unal cpuid(SSE42) p_66),
718    [0x61] = X86_OP_ENTRY4(PCMPESTRI,  None,None, V,dq, W,dq, vex4_unal cpuid(SSE42) p_66),
719    [0x62] = X86_OP_ENTRY4(PCMPISTRM,  None,None, V,dq, W,dq, vex4_unal cpuid(SSE42) p_66),
720    [0x63] = X86_OP_ENTRY4(PCMPISTRI,  None,None, V,dq, W,dq, vex4_unal cpuid(SSE42) p_66),
721
722    [0x08] = X86_OP_ENTRY3(VROUNDPS,   V,x,  W,x,  I,b,  vex2 cpuid(SSE41) p_66),
723    [0x09] = X86_OP_ENTRY3(VROUNDPD,   V,x,  W,x,  I,b,  vex2 cpuid(SSE41) p_66),
724    /*
725     * Not listed as four operand in the manual.  Also writes and reads 128-bits
726     * from the first two operands due to the V operand picking higher entries of
727     * the H operand; the "Vss,Hss,Wss" description from the manual is incorrect.
728     * For other unary operations such as VSQRTSx this is hidden by the "REPScalar"
729     * value of vex_special, because the table lists the operand types of VSQRTPx.
730     */
731    [0x0a] = X86_OP_ENTRY4(VROUNDSS,   V,x,  H,x, W,ss, vex3 cpuid(SSE41) p_66),
732    [0x0b] = X86_OP_ENTRY4(VROUNDSD,   V,x,  H,x, W,sd, vex3 cpuid(SSE41) p_66),
733    [0x0c] = X86_OP_ENTRY4(VBLENDPS,   V,x,  H,x,  W,x,  vex4 cpuid(SSE41) p_66),
734    [0x0d] = X86_OP_ENTRY4(VBLENDPD,   V,x,  H,x,  W,x,  vex4 cpuid(SSE41) p_66),
735    [0x0e] = X86_OP_ENTRY4(VPBLENDW,   V,x,  H,x,  W,x,  vex4 cpuid(SSE41) avx2_256 p_66),
736    [0x0f] = X86_OP_ENTRY4(PALIGNR,    V,x,  H,x,  W,x,  vex4 cpuid(SSSE3) mmx avx2_256 p_00_66),
737
738    [0x18] = X86_OP_ENTRY4(VINSERTx128,  V,qq, H,qq, W,qq, vex6 chk(W0) cpuid(AVX) p_66),
739    [0x19] = X86_OP_ENTRY3(VEXTRACTx128, W,dq, V,qq, I,b,  vex6 chk(W0) cpuid(AVX) p_66),
740
741    [0x38] = X86_OP_ENTRY4(VINSERTx128,  V,qq, H,qq, W,qq, vex6 chk(W0) cpuid(AVX2) p_66),
742    [0x39] = X86_OP_ENTRY3(VEXTRACTx128, W,dq, V,qq, I,b,  vex6 chk(W0) cpuid(AVX2) p_66),
743
744    /* Listed incorrectly as type 4 */
745    [0x4a] = X86_OP_ENTRY4(VBLENDVPS, V,x,  H,x,  W,x,   vex6 chk(W0) cpuid(AVX) p_66),
746    [0x4b] = X86_OP_ENTRY4(VBLENDVPD, V,x,  H,x,  W,x,   vex6 chk(W0) cpuid(AVX) p_66),
747    [0x4c] = X86_OP_ENTRY4(VPBLENDVB, V,x,  H,x,  W,x,   vex6 chk(W0) cpuid(AVX) p_66 avx2_256),
748
749    [0xcc] = X86_OP_ENTRY3(SHA1RNDS4,  V,dq, W,dq, I,b,  cpuid(SHA_NI)),
750
751    [0xdf] = X86_OP_ENTRY3(VAESKEYGEN, V,dq, W,dq, I,b,  vex4 cpuid(AES) p_66),
752
753    [0xF0] = X86_OP_ENTRY3(RORX, G,y, E,y, I,b, vex13 cpuid(BMI2) p_f2),
754};
755
756static void decode_0F3A(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
757{
758    *b = x86_ldub_code(env, s);
759    *entry = opcodes_0F3A[*b];
760}
761
762/*
763 * There are some mistakes in the operands in the manual, and the load/store/register
764 * cases are easiest to keep separate, so the entries for 10-17 follow simplicity and
765 * efficiency of implementation rather than copying what the manual says.
766 *
767 * In particular:
768 *
769 * 1) "VMOVSS m32, xmm1" and "VMOVSD m64, xmm1" do not support VEX.vvvv != 1111b,
770 * but this is not mentioned in the tables.
771 *
772 * 2) MOVHLPS, MOVHPS, MOVHPD, MOVLPD, MOVLPS read the high quadword of one of their
773 * operands, which must therefore be dq; MOVLPD and MOVLPS also write the high
774 * quadword of the V operand.
775 */
776static void decode_0F10(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
777{
778    static const X86OpEntry opcodes_0F10_reg[4] = {
779        X86_OP_ENTRY3(MOVDQ,   V,x,  None,None, W,x, vex4_unal), /* MOVUPS */
780        X86_OP_ENTRY3(MOVDQ,   V,x,  None,None, W,x, vex4_unal), /* MOVUPD */
781        X86_OP_ENTRY3(VMOVSS,  V,x,  H,x,       W,x, vex5),
782        X86_OP_ENTRY3(VMOVLPx, V,x,  H,x,       W,x, vex5), /* MOVSD */
783    };
784
785    static const X86OpEntry opcodes_0F10_mem[4] = {
786        X86_OP_ENTRY3(MOVDQ,      V,x,  None,None, W,x,  vex4_unal), /* MOVUPS */
787        X86_OP_ENTRY3(MOVDQ,      V,x,  None,None, W,x,  vex4_unal), /* MOVUPD */
788        X86_OP_ENTRY3(VMOVSS_ld,  V,x,  H,x,       M,ss, vex5),
789        X86_OP_ENTRY3(VMOVSD_ld,  V,x,  H,x,       M,sd, vex5),
790    };
791
792    if ((get_modrm(s, env) >> 6) == 3) {
793        *entry = *decode_by_prefix(s, opcodes_0F10_reg);
794    } else {
795        *entry = *decode_by_prefix(s, opcodes_0F10_mem);
796    }
797}
798
799static void decode_0F11(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
800{
801    static const X86OpEntry opcodes_0F11_reg[4] = {
802        X86_OP_ENTRY3(MOVDQ,   W,x,  None,None, V,x, vex4), /* MOVUPS */
803        X86_OP_ENTRY3(MOVDQ,   W,x,  None,None, V,x, vex4), /* MOVUPD */
804        X86_OP_ENTRY3(VMOVSS,  W,x,  H,x,       V,x, vex5),
805        X86_OP_ENTRY3(VMOVLPx, W,x,  H,x,       V,q, vex5), /* MOVSD */
806    };
807
808    static const X86OpEntry opcodes_0F11_mem[4] = {
809        X86_OP_ENTRY3(MOVDQ,      W,x,  None,None, V,x, vex4), /* MOVUPS */
810        X86_OP_ENTRY3(MOVDQ,      W,x,  None,None, V,x, vex4), /* MOVUPD */
811        X86_OP_ENTRY3(VMOVSS_st,  M,ss, None,None, V,x, vex5),
812        X86_OP_ENTRY3(VMOVLPx_st, M,sd, None,None, V,x, vex5), /* MOVSD */
813    };
814
815    if ((get_modrm(s, env) >> 6) == 3) {
816        *entry = *decode_by_prefix(s, opcodes_0F11_reg);
817    } else {
818        *entry = *decode_by_prefix(s, opcodes_0F11_mem);
819    }
820}
821
822static void decode_0F12(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
823{
824    static const X86OpEntry opcodes_0F12_mem[4] = {
825        /*
826         * Use dq for operand for compatibility with gen_MOVSD and
827         * to allow VEX128 only.
828         */
829        X86_OP_ENTRY3(VMOVLPx_ld, V,dq, H,dq,      M,q, vex5), /* MOVLPS */
830        X86_OP_ENTRY3(VMOVLPx_ld, V,dq, H,dq,      M,q, vex5), /* MOVLPD */
831        X86_OP_ENTRY3(VMOVSLDUP,  V,x,  None,None, W,x, vex4 cpuid(SSE3)),
832        X86_OP_ENTRY3(VMOVDDUP,   V,x,  None,None, WM,q, vex5 cpuid(SSE3)), /* qq if VEX.256 */
833    };
834    static const X86OpEntry opcodes_0F12_reg[4] = {
835        X86_OP_ENTRY3(VMOVHLPS,  V,dq, H,dq,       U,dq, vex7),
836        X86_OP_ENTRY3(VMOVLPx,   W,x,  H,x,        U,q,  vex5), /* MOVLPD */
837        X86_OP_ENTRY3(VMOVSLDUP, V,x,  None,None,  U,x,  vex4 cpuid(SSE3)),
838        X86_OP_ENTRY3(VMOVDDUP,  V,x,  None,None,  U,x,  vex5 cpuid(SSE3)),
839    };
840
841    if ((get_modrm(s, env) >> 6) == 3) {
842        *entry = *decode_by_prefix(s, opcodes_0F12_reg);
843    } else {
844        *entry = *decode_by_prefix(s, opcodes_0F12_mem);
845        if ((s->prefix & PREFIX_REPNZ) && s->vex_l) {
846            entry->s2 = X86_SIZE_qq;
847        }
848    }
849}
850
851static void decode_0F16(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
852{
853    static const X86OpEntry opcodes_0F16_mem[4] = {
854        /*
855         * Operand 1 technically only reads the low 64 bits, but uses dq so that
856         * it is easier to check for op0 == op1 in an endianness-neutral manner.
857         */
858        X86_OP_ENTRY3(VMOVHPx_ld, V,dq, H,dq,      M,q, vex5), /* MOVHPS */
859        X86_OP_ENTRY3(VMOVHPx_ld, V,dq, H,dq,      M,q, vex5), /* MOVHPD */
860        X86_OP_ENTRY3(VMOVSHDUP,  V,x,  None,None, W,x, vex4 cpuid(SSE3)),
861        {},
862    };
863    static const X86OpEntry opcodes_0F16_reg[4] = {
864        /* Same as above, operand 1 could be Hq if it wasn't for big-endian.  */
865        X86_OP_ENTRY3(VMOVLHPS,  V,dq, H,dq,      U,q, vex7),
866        X86_OP_ENTRY3(VMOVHPx,   V,x,  H,x,       U,x, vex5), /* MOVHPD */
867        X86_OP_ENTRY3(VMOVSHDUP, V,x,  None,None, U,x, vex4 cpuid(SSE3)),
868        {},
869    };
870
871    if ((get_modrm(s, env) >> 6) == 3) {
872        *entry = *decode_by_prefix(s, opcodes_0F16_reg);
873    } else {
874        *entry = *decode_by_prefix(s, opcodes_0F16_mem);
875    }
876}
877
878static void decode_0F2A(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
879{
880    static const X86OpEntry opcodes_0F2A[4] = {
881        X86_OP_ENTRY3(CVTPI2Px,  V,x,  None,None, Q,q),
882        X86_OP_ENTRY3(CVTPI2Px,  V,x,  None,None, Q,q),
883        X86_OP_ENTRY3(VCVTSI2Sx, V,x,  H,x, E,y,        vex3),
884        X86_OP_ENTRY3(VCVTSI2Sx, V,x,  H,x, E,y,        vex3),
885    };
886    *entry = *decode_by_prefix(s, opcodes_0F2A);
887}
888
889static void decode_0F2B(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
890{
891    static const X86OpEntry opcodes_0F2B[4] = {
892        X86_OP_ENTRY3(MOVDQ,      M,x,  None,None, V,x, vex1), /* MOVNTPS */
893        X86_OP_ENTRY3(MOVDQ,      M,x,  None,None, V,x, vex1), /* MOVNTPD */
894        /* AMD extensions */
895        X86_OP_ENTRY3(VMOVSS_st,  M,ss, None,None, V,x, vex4 cpuid(SSE4A)), /* MOVNTSS */
896        X86_OP_ENTRY3(VMOVLPx_st, M,sd, None,None, V,x, vex4 cpuid(SSE4A)), /* MOVNTSD */
897    };
898
899    *entry = *decode_by_prefix(s, opcodes_0F2B);
900}
901
902static void decode_0F2C(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
903{
904    static const X86OpEntry opcodes_0F2C[4] = {
905        /* Listed as ps/pd in the manual, but CVTTPS2PI only reads 64-bit.  */
906        X86_OP_ENTRY3(CVTTPx2PI,  P,q,  None,None, W,q),
907        X86_OP_ENTRY3(CVTTPx2PI,  P,q,  None,None, W,dq),
908        X86_OP_ENTRY3(VCVTTSx2SI, G,y,  None,None, W,ss, vex3),
909        X86_OP_ENTRY3(VCVTTSx2SI, G,y,  None,None, W,sd, vex3),
910    };
911    *entry = *decode_by_prefix(s, opcodes_0F2C);
912}
913
914static void decode_0F2D(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
915{
916    static const X86OpEntry opcodes_0F2D[4] = {
917        /* Listed as ps/pd in the manual, but CVTPS2PI only reads 64-bit.  */
918        X86_OP_ENTRY3(CVTPx2PI,  P,q,  None,None, W,q),
919        X86_OP_ENTRY3(CVTPx2PI,  P,q,  None,None, W,dq),
920        X86_OP_ENTRY3(VCVTSx2SI, G,y,  None,None, W,ss, vex3),
921        X86_OP_ENTRY3(VCVTSx2SI, G,y,  None,None, W,sd, vex3),
922    };
923    *entry = *decode_by_prefix(s, opcodes_0F2D);
924}
925
926static void decode_VxCOMISx(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
927{
928    /*
929     * VUCOMISx and VCOMISx are different and use no-prefix and 0x66 for SS and SD
930     * respectively.  Scalar values usually are associated with 0xF2 and 0xF3, for
931     * which X86_VEX_REPScalar exists, but here it has to be decoded by hand.
932     */
933    entry->s1 = entry->s2 = (s->prefix & PREFIX_DATA ? X86_SIZE_sd : X86_SIZE_ss);
934    entry->gen = (*b == 0x2E ? gen_VUCOMI : gen_VCOMI);
935}
936
937static void decode_sse_unary(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
938{
939    if (!(s->prefix & (PREFIX_REPZ | PREFIX_REPNZ))) {
940        entry->op1 = X86_TYPE_None;
941        entry->s1 = X86_SIZE_None;
942    }
943    switch (*b) {
944    case 0x51: entry->gen = gen_VSQRT; break;
945    case 0x52: entry->gen = gen_VRSQRT; break;
946    case 0x53: entry->gen = gen_VRCP; break;
947    }
948}
949
950static void decode_0F5A(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
951{
952    static const X86OpEntry opcodes_0F5A[4] = {
953        X86_OP_ENTRY2(VCVTPS2PD,  V,x,       W,xh, vex2),      /* VCVTPS2PD */
954        X86_OP_ENTRY2(VCVTPD2PS,  V,x,       W,x,  vex2),      /* VCVTPD2PS */
955        X86_OP_ENTRY3(VCVTSS2SD,  V,x,  H,x, W,x,  vex2_rep3), /* VCVTSS2SD */
956        X86_OP_ENTRY3(VCVTSD2SS,  V,x,  H,x, W,x,  vex2_rep3), /* VCVTSD2SS */
957    };
958    *entry = *decode_by_prefix(s, opcodes_0F5A);
959}
960
961static void decode_0F5B(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
962{
963    static const X86OpEntry opcodes_0F5B[4] = {
964        X86_OP_ENTRY2(VCVTDQ2PS,   V,x, W,x,      vex2),
965        X86_OP_ENTRY2(VCVTPS2DQ,   V,x, W,x,      vex2),
966        X86_OP_ENTRY2(VCVTTPS2DQ,  V,x, W,x,      vex2),
967        {},
968    };
969    *entry = *decode_by_prefix(s, opcodes_0F5B);
970}
971
972static void decode_0FE6(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
973{
974    static const X86OpEntry opcodes_0FE6[4] = {
975        {},
976        X86_OP_ENTRY2(VCVTTPD2DQ,  V,x, W,x,      vex2),
977        X86_OP_ENTRY2(VCVTDQ2PD,   V,x, W,x,      vex5),
978        X86_OP_ENTRY2(VCVTPD2DQ,   V,x, W,x,      vex2),
979    };
980    *entry = *decode_by_prefix(s, opcodes_0FE6);
981}
982
983static const X86OpEntry opcodes_0F[256] = {
984    [0x0E] = X86_OP_ENTRY0(EMMS,                              cpuid(3DNOW)), /* femms */
985    /*
986     * 3DNow!'s opcode byte comes *after* modrm and displacements, making it
987     * more like an Ib operand.  Dispatch to the right helper in a single gen_*
988     * function.
989     */
990    [0x0F] = X86_OP_ENTRY3(3dnow,       P,q, Q,q, I,b,        cpuid(3DNOW)),
991
992    [0x10] = X86_OP_GROUP0(0F10),
993    [0x11] = X86_OP_GROUP0(0F11),
994    [0x12] = X86_OP_GROUP0(0F12),
995    [0x13] = X86_OP_ENTRY3(VMOVLPx_st,  M,q, None,None, V,q,  vex5 p_00_66),
996    [0x14] = X86_OP_ENTRY3(VUNPCKLPx,   V,x, H,x, W,x,        vex4 p_00_66),
997    [0x15] = X86_OP_ENTRY3(VUNPCKHPx,   V,x, H,x, W,x,        vex4 p_00_66),
998    [0x16] = X86_OP_GROUP0(0F16),
999    /* Incorrectly listed as Mq,Vq in the manual */
1000    [0x17] = X86_OP_ENTRY3(VMOVHPx_st,  M,q, None,None, V,dq, vex5 p_00_66),
1001
1002    [0x40] = X86_OP_ENTRY2(CMOVcc,     G,v, E,v, cpuid(CMOV)),
1003    [0x41] = X86_OP_ENTRY2(CMOVcc,     G,v, E,v, cpuid(CMOV)),
1004    [0x42] = X86_OP_ENTRY2(CMOVcc,     G,v, E,v, cpuid(CMOV)),
1005    [0x43] = X86_OP_ENTRY2(CMOVcc,     G,v, E,v, cpuid(CMOV)),
1006    [0x44] = X86_OP_ENTRY2(CMOVcc,     G,v, E,v, cpuid(CMOV)),
1007    [0x45] = X86_OP_ENTRY2(CMOVcc,     G,v, E,v, cpuid(CMOV)),
1008    [0x46] = X86_OP_ENTRY2(CMOVcc,     G,v, E,v, cpuid(CMOV)),
1009    [0x47] = X86_OP_ENTRY2(CMOVcc,     G,v, E,v, cpuid(CMOV)),
1010
1011    [0x50] = X86_OP_ENTRY3(MOVMSK,     G,y, None,None, U,x, vex7 p_00_66),
1012    [0x51] = X86_OP_GROUP3(sse_unary,  V,x, H,x, W,x, vex2_rep3 p_00_66_f3_f2), /* sqrtps */
1013    [0x52] = X86_OP_GROUP3(sse_unary,  V,x, H,x, W,x, vex4_rep5 p_00_f3), /* rsqrtps */
1014    [0x53] = X86_OP_GROUP3(sse_unary,  V,x, H,x, W,x, vex4_rep5 p_00_f3), /* rcpps */
1015    [0x54] = X86_OP_ENTRY3(PAND,       V,x, H,x, W,x,  vex4 p_00_66), /* vand */
1016    [0x55] = X86_OP_ENTRY3(PANDN,      V,x, H,x, W,x,  vex4 p_00_66), /* vandn */
1017    [0x56] = X86_OP_ENTRY3(POR,        V,x, H,x, W,x,  vex4 p_00_66), /* vor */
1018    [0x57] = X86_OP_ENTRY3(PXOR,       V,x, H,x, W,x,  vex4 p_00_66), /* vxor */
1019
1020    [0x60] = X86_OP_ENTRY3(PUNPCKLBW,  V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
1021    [0x61] = X86_OP_ENTRY3(PUNPCKLWD,  V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
1022    [0x62] = X86_OP_ENTRY3(PUNPCKLDQ,  V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
1023    [0x63] = X86_OP_ENTRY3(PACKSSWB,   V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
1024    [0x64] = X86_OP_ENTRY3(PCMPGTB,    V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
1025    [0x65] = X86_OP_ENTRY3(PCMPGTW,    V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
1026    [0x66] = X86_OP_ENTRY3(PCMPGTD,    V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
1027    [0x67] = X86_OP_ENTRY3(PACKUSWB,   V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
1028
1029    [0x70] = X86_OP_GROUP0(0F70),
1030    [0x71] = X86_OP_GROUP0(group12),
1031    [0x72] = X86_OP_GROUP0(group13),
1032    [0x73] = X86_OP_GROUP0(group14),
1033    [0x74] = X86_OP_ENTRY3(PCMPEQB,    V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
1034    [0x75] = X86_OP_ENTRY3(PCMPEQW,    V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
1035    [0x76] = X86_OP_ENTRY3(PCMPEQD,    V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
1036    [0x77] = X86_OP_GROUP0(0F77),
1037
1038    [0x80] = X86_OP_ENTRYr(Jcc, J,z_f64),
1039    [0x81] = X86_OP_ENTRYr(Jcc, J,z_f64),
1040    [0x82] = X86_OP_ENTRYr(Jcc, J,z_f64),
1041    [0x83] = X86_OP_ENTRYr(Jcc, J,z_f64),
1042    [0x84] = X86_OP_ENTRYr(Jcc, J,z_f64),
1043    [0x85] = X86_OP_ENTRYr(Jcc, J,z_f64),
1044    [0x86] = X86_OP_ENTRYr(Jcc, J,z_f64),
1045    [0x87] = X86_OP_ENTRYr(Jcc, J,z_f64),
1046
1047    [0x90] = X86_OP_ENTRYw(SETcc, E,b),
1048    [0x91] = X86_OP_ENTRYw(SETcc, E,b),
1049    [0x92] = X86_OP_ENTRYw(SETcc, E,b),
1050    [0x93] = X86_OP_ENTRYw(SETcc, E,b),
1051    [0x94] = X86_OP_ENTRYw(SETcc, E,b),
1052    [0x95] = X86_OP_ENTRYw(SETcc, E,b),
1053    [0x96] = X86_OP_ENTRYw(SETcc, E,b),
1054    [0x97] = X86_OP_ENTRYw(SETcc, E,b),
1055
1056    [0xa0] = X86_OP_ENTRYr(PUSH, FS, w),
1057    [0xa1] = X86_OP_ENTRYw(POP, FS, w),
1058
1059    [0x28] = X86_OP_ENTRY3(MOVDQ,      V,x,  None,None, W,x, vex1 p_00_66), /* MOVAPS */
1060    [0x29] = X86_OP_ENTRY3(MOVDQ,      W,x,  None,None, V,x, vex1 p_00_66), /* MOVAPS */
1061    [0x2A] = X86_OP_GROUP0(0F2A),
1062    [0x2B] = X86_OP_GROUP0(0F2B),
1063    [0x2C] = X86_OP_GROUP0(0F2C),
1064    [0x2D] = X86_OP_GROUP0(0F2D),
1065    [0x2E] = X86_OP_GROUP3(VxCOMISx,   None,None, V,x, W,x,  vex3 p_00_66), /* VUCOMISS/SD */
1066    [0x2F] = X86_OP_GROUP3(VxCOMISx,   None,None, V,x, W,x,  vex3 p_00_66), /* VCOMISS/SD */
1067
1068    [0x38] = X86_OP_GROUP0(0F38),
1069    [0x3a] = X86_OP_GROUP0(0F3A),
1070
1071    [0x48] = X86_OP_ENTRY2(CMOVcc,     G,v, E,v, cpuid(CMOV)),
1072    [0x49] = X86_OP_ENTRY2(CMOVcc,     G,v, E,v, cpuid(CMOV)),
1073    [0x4a] = X86_OP_ENTRY2(CMOVcc,     G,v, E,v, cpuid(CMOV)),
1074    [0x4b] = X86_OP_ENTRY2(CMOVcc,     G,v, E,v, cpuid(CMOV)),
1075    [0x4c] = X86_OP_ENTRY2(CMOVcc,     G,v, E,v, cpuid(CMOV)),
1076    [0x4d] = X86_OP_ENTRY2(CMOVcc,     G,v, E,v, cpuid(CMOV)),
1077    [0x4e] = X86_OP_ENTRY2(CMOVcc,     G,v, E,v, cpuid(CMOV)),
1078    [0x4f] = X86_OP_ENTRY2(CMOVcc,     G,v, E,v, cpuid(CMOV)),
1079
1080    [0x58] = X86_OP_ENTRY3(VADD,       V,x, H,x, W,x, vex2_rep3 p_00_66_f3_f2),
1081    [0x59] = X86_OP_ENTRY3(VMUL,       V,x, H,x, W,x, vex2_rep3 p_00_66_f3_f2),
1082    [0x5a] = X86_OP_GROUP0(0F5A),
1083    [0x5b] = X86_OP_GROUP0(0F5B),
1084    [0x5c] = X86_OP_ENTRY3(VSUB,       V,x, H,x, W,x, vex2_rep3 p_00_66_f3_f2),
1085    [0x5d] = X86_OP_ENTRY3(VMIN,       V,x, H,x, W,x, vex2_rep3 p_00_66_f3_f2),
1086    [0x5e] = X86_OP_ENTRY3(VDIV,       V,x, H,x, W,x, vex2_rep3 p_00_66_f3_f2),
1087    [0x5f] = X86_OP_ENTRY3(VMAX,       V,x, H,x, W,x, vex2_rep3 p_00_66_f3_f2),
1088
1089    [0x68] = X86_OP_ENTRY3(PUNPCKHBW,  V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
1090    [0x69] = X86_OP_ENTRY3(PUNPCKHWD,  V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
1091    [0x6a] = X86_OP_ENTRY3(PUNPCKHDQ,  V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
1092    [0x6b] = X86_OP_ENTRY3(PACKSSDW,   V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
1093    [0x6c] = X86_OP_ENTRY3(PUNPCKLQDQ, V,x, H,x, W,x,  vex4 p_66 avx2_256),
1094    [0x6d] = X86_OP_ENTRY3(PUNPCKHQDQ, V,x, H,x, W,x,  vex4 p_66 avx2_256),
1095    [0x6e] = X86_OP_ENTRY3(MOVD_to,    V,x, None,None, E,y, vex5 mmx p_00_66),  /* wrong dest Vy on SDM! */
1096    [0x6f] = X86_OP_GROUP0(0F6F),
1097
1098    [0x78] = X86_OP_GROUP0(0F78),
1099    [0x79] = X86_OP_GROUP2(0F79,       V,x, U,x,       cpuid(SSE4A)),
1100    [0x7c] = X86_OP_ENTRY3(VHADD,      V,x, H,x, W,x,  vex2 cpuid(SSE3) p_66_f2),
1101    [0x7d] = X86_OP_ENTRY3(VHSUB,      V,x, H,x, W,x,  vex2 cpuid(SSE3) p_66_f2),
1102    [0x7e] = X86_OP_GROUP0(0F7E),
1103    [0x7f] = X86_OP_GROUP0(0F7F),
1104
1105    [0x88] = X86_OP_ENTRYr(Jcc, J,z_f64),
1106    [0x89] = X86_OP_ENTRYr(Jcc, J,z_f64),
1107    [0x8a] = X86_OP_ENTRYr(Jcc, J,z_f64),
1108    [0x8b] = X86_OP_ENTRYr(Jcc, J,z_f64),
1109    [0x8c] = X86_OP_ENTRYr(Jcc, J,z_f64),
1110    [0x8d] = X86_OP_ENTRYr(Jcc, J,z_f64),
1111    [0x8e] = X86_OP_ENTRYr(Jcc, J,z_f64),
1112    [0x8f] = X86_OP_ENTRYr(Jcc, J,z_f64),
1113
1114    [0x98] = X86_OP_ENTRYw(SETcc, E,b),
1115    [0x99] = X86_OP_ENTRYw(SETcc, E,b),
1116    [0x9a] = X86_OP_ENTRYw(SETcc, E,b),
1117    [0x9b] = X86_OP_ENTRYw(SETcc, E,b),
1118    [0x9c] = X86_OP_ENTRYw(SETcc, E,b),
1119    [0x9d] = X86_OP_ENTRYw(SETcc, E,b),
1120    [0x9e] = X86_OP_ENTRYw(SETcc, E,b),
1121    [0x9f] = X86_OP_ENTRYw(SETcc, E,b),
1122
1123    [0xa8] = X86_OP_ENTRYr(PUSH,   GS, w),
1124    [0xa9] = X86_OP_ENTRYw(POP,    GS, w),
1125    [0xae] = X86_OP_GROUP0(group15),
1126    /*
1127     * It's slightly more efficient to put Ev operand in T0 and allow gen_IMUL3
1128     * to assume sextT0.  Multiplication is commutative anyway.
1129     */
1130    [0xaf] = X86_OP_ENTRY3(IMUL3,  G,v, E,v, 2op,v, sextT0),
1131
1132    [0xb2] = X86_OP_ENTRY3(LSS,    G,v, EM,p, None, None),
1133    [0xb4] = X86_OP_ENTRY3(LFS,    G,v, EM,p, None, None),
1134    [0xb5] = X86_OP_ENTRY3(LGS,    G,v, EM,p, None, None),
1135    [0xb6] = X86_OP_ENTRY3(MOV,    G,v, E,b, None, None, zextT0), /* MOVZX */
1136    [0xb7] = X86_OP_ENTRY3(MOV,    G,v, E,w, None, None, zextT0), /* MOVZX */
1137
1138    [0xbe] = X86_OP_ENTRY3(MOV,    G,v, E,b, None, None, sextT0), /* MOVSX */
1139    [0xbf] = X86_OP_ENTRY3(MOV,    G,v, E,w, None, None, sextT0), /* MOVSX */
1140
1141    [0xc2] = X86_OP_ENTRY4(VCMP,       V,x, H,x, W,x,       vex2_rep3 p_00_66_f3_f2),
1142    [0xc3] = X86_OP_ENTRY3(MOV,        EM,y,G,y, None,None, cpuid(SSE2)), /* MOVNTI */
1143    [0xc4] = X86_OP_ENTRY4(PINSRW,     V,dq,H,dq,E,w,       vex5 mmx p_00_66),
1144    [0xc5] = X86_OP_ENTRY3(PEXTRW,     G,d, U,dq,I,b,       vex5 mmx p_00_66),
1145    [0xc6] = X86_OP_ENTRY4(VSHUF,      V,x, H,x, W,x,       vex4 p_00_66),
1146
1147    [0xc8] = X86_OP_ENTRY1(BSWAP,     LoBits,y),
1148    [0xc9] = X86_OP_ENTRY1(BSWAP,     LoBits,y),
1149    [0xca] = X86_OP_ENTRY1(BSWAP,     LoBits,y),
1150    [0xcb] = X86_OP_ENTRY1(BSWAP,     LoBits,y),
1151    [0xcc] = X86_OP_ENTRY1(BSWAP,     LoBits,y),
1152    [0xcd] = X86_OP_ENTRY1(BSWAP,     LoBits,y),
1153    [0xce] = X86_OP_ENTRY1(BSWAP,     LoBits,y),
1154    [0xcf] = X86_OP_ENTRY1(BSWAP,     LoBits,y),
1155
1156    [0xd0] = X86_OP_ENTRY3(VADDSUB,   V,x, H,x, W,x,        vex2 cpuid(SSE3) p_66_f2),
1157    [0xd1] = X86_OP_ENTRY3(PSRLW_r,   V,x, H,x, W,x,        vex4 mmx avx2_256 p_00_66),
1158    [0xd2] = X86_OP_ENTRY3(PSRLD_r,   V,x, H,x, W,x,        vex4 mmx avx2_256 p_00_66),
1159    [0xd3] = X86_OP_ENTRY3(PSRLQ_r,   V,x, H,x, W,x,        vex4 mmx avx2_256 p_00_66),
1160    [0xd4] = X86_OP_ENTRY3(PADDQ,     V,x, H,x, W,x,        vex4 mmx avx2_256 p_00_66),
1161    [0xd5] = X86_OP_ENTRY3(PMULLW,    V,x, H,x, W,x,        vex4 mmx avx2_256 p_00_66),
1162    [0xd6] = X86_OP_GROUP0(0FD6),
1163    [0xd7] = X86_OP_ENTRY3(PMOVMSKB,  G,d, None,None, U,x,  vex7 mmx avx2_256 p_00_66),
1164
1165    [0xe0] = X86_OP_ENTRY3(PAVGB,     V,x, H,x, W,x,        vex4 mmx avx2_256 p_00_66),
1166    [0xe1] = X86_OP_ENTRY3(PSRAW_r,   V,x, H,x, W,x,        vex7 mmx avx2_256 p_00_66),
1167    [0xe2] = X86_OP_ENTRY3(PSRAD_r,   V,x, H,x, W,x,        vex7 mmx avx2_256 p_00_66),
1168    [0xe3] = X86_OP_ENTRY3(PAVGW,     V,x, H,x, W,x,        vex4 mmx avx2_256 p_00_66),
1169    [0xe4] = X86_OP_ENTRY3(PMULHUW,   V,x, H,x, W,x,        vex4 mmx avx2_256 p_00_66),
1170    [0xe5] = X86_OP_ENTRY3(PMULHW,    V,x, H,x, W,x,        vex4 mmx avx2_256 p_00_66),
1171    [0xe6] = X86_OP_GROUP0(0FE6),
1172    [0xe7] = X86_OP_ENTRY3(MOVDQ,     W,x, None,None, V,x,  vex1 mmx p_00_66), /* MOVNTQ/MOVNTDQ */
1173
1174    [0xf0] = X86_OP_ENTRY3(MOVDQ,    V,x, None,None, WM,x,  vex4_unal cpuid(SSE3) p_f2), /* LDDQU */
1175    [0xf1] = X86_OP_ENTRY3(PSLLW_r,  V,x, H,x, W,x,         vex7 mmx avx2_256 p_00_66),
1176    [0xf2] = X86_OP_ENTRY3(PSLLD_r,  V,x, H,x, W,x,         vex7 mmx avx2_256 p_00_66),
1177    [0xf3] = X86_OP_ENTRY3(PSLLQ_r,  V,x, H,x, W,x,         vex7 mmx avx2_256 p_00_66),
1178    [0xf4] = X86_OP_ENTRY3(PMULUDQ,  V,x, H,x, W,x,         vex4 mmx avx2_256 p_00_66),
1179    [0xf5] = X86_OP_ENTRY3(PMADDWD,  V,x, H,x, W,x,         vex4 mmx avx2_256 p_00_66),
1180    [0xf6] = X86_OP_ENTRY3(PSADBW,   V,x, H,x, W,x,         vex4 mmx avx2_256 p_00_66),
1181    [0xf7] = X86_OP_ENTRY3(MASKMOV,  None,None, V,dq, U,dq, vex4_unal avx2_256 mmx p_00_66),
1182
1183    /* Incorrectly missing from 2-17 */
1184    [0xd8] = X86_OP_ENTRY3(PSUBUSB,  V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
1185    [0xd9] = X86_OP_ENTRY3(PSUBUSW,  V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
1186    [0xda] = X86_OP_ENTRY3(PMINUB,   V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
1187    [0xdb] = X86_OP_ENTRY3(PAND,     V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
1188    [0xdc] = X86_OP_ENTRY3(PADDUSB,  V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
1189    [0xdd] = X86_OP_ENTRY3(PADDUSW,  V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
1190    [0xde] = X86_OP_ENTRY3(PMAXUB,   V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
1191    [0xdf] = X86_OP_ENTRY3(PANDN,    V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
1192
1193    [0xe8] = X86_OP_ENTRY3(PSUBSB,  V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
1194    [0xe9] = X86_OP_ENTRY3(PSUBSW,  V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
1195    [0xea] = X86_OP_ENTRY3(PMINSW,  V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
1196    [0xeb] = X86_OP_ENTRY3(POR,     V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
1197    [0xec] = X86_OP_ENTRY3(PADDSB,  V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
1198    [0xed] = X86_OP_ENTRY3(PADDSW,  V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
1199    [0xee] = X86_OP_ENTRY3(PMAXSW,  V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
1200    [0xef] = X86_OP_ENTRY3(PXOR,    V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
1201
1202    [0xf8] = X86_OP_ENTRY3(PSUBB,  V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
1203    [0xf9] = X86_OP_ENTRY3(PSUBW,  V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
1204    [0xfa] = X86_OP_ENTRY3(PSUBD,  V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
1205    [0xfb] = X86_OP_ENTRY3(PSUBQ,  V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
1206    [0xfc] = X86_OP_ENTRY3(PADDB,  V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
1207    [0xfd] = X86_OP_ENTRY3(PADDW,  V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
1208    [0xfe] = X86_OP_ENTRY3(PADDD,  V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
1209    /* 0xff = UD0 */
1210};
1211
1212static void do_decode_0F(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
1213{
1214    *entry = opcodes_0F[*b];
1215}
1216
1217static void decode_0F(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
1218{
1219    *b = x86_ldub_code(env, s);
1220    do_decode_0F(s, env, entry, b);
1221}
1222
1223static void decode_63(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
1224{
1225    static const X86OpEntry arpl = X86_OP_ENTRY2(ARPL, E,w, G,w, chk(prot));
1226    static const X86OpEntry mov = X86_OP_ENTRY3(MOV, G,v, E,v, None, None);
1227    static const X86OpEntry movsxd = X86_OP_ENTRY3(MOV, G,v, E,d, None, None, sextT0);
1228    if (!CODE64(s)) {
1229        *entry = arpl;
1230    } else if (REX_W(s)) {
1231        *entry = movsxd;
1232    } else {
1233        *entry = mov;
1234    }
1235}
1236
1237static void decode_group1(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
1238{
1239    static const X86GenFunc group1_gen[8] = {
1240        gen_ADD, gen_OR, gen_ADC, gen_SBB, gen_AND, gen_SUB, gen_XOR, gen_SUB,
1241    };
1242    int op = (get_modrm(s, env) >> 3) & 7;
1243    entry->gen = group1_gen[op];
1244
1245    if (op == 7) {
1246        /* prevent writeback for CMP */
1247        entry->op1 = entry->op0;
1248        entry->op0 = X86_TYPE_None;
1249        entry->s0 = X86_SIZE_None;
1250    } else {
1251        entry->special = X86_SPECIAL_HasLock;
1252    }
1253}
1254
1255static void decode_group1A(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
1256{
1257    int op = (get_modrm(s, env) >> 3) & 7;
1258    if (op != 0) {
1259        /* could be XOP prefix too */
1260        *entry = UNKNOWN_OPCODE;
1261    } else {
1262        entry->gen = gen_POP;
1263        /* The address must use the value of ESP after the pop.  */
1264        s->popl_esp_hack = 1 << mo_pushpop(s, s->dflag);
1265    }
1266}
1267
1268static void decode_group2(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
1269{
1270    static const X86GenFunc group2_gen[8] = {
1271        gen_ROL, gen_ROR, gen_RCL, gen_RCR,
1272        gen_SHL, gen_SHR, gen_SHL /* SAL, undocumented */, gen_SAR,
1273    };
1274    int op = (get_modrm(s, env) >> 3) & 7;
1275    entry->gen = group2_gen[op];
1276    if (op == 7) {
1277        entry->special = X86_SPECIAL_SExtT0;
1278    } else {
1279        entry->special = X86_SPECIAL_ZExtT0;
1280    }
1281}
1282
1283static void decode_group3(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
1284{
1285    static const X86OpEntry opcodes_grp3[16] = {
1286        /* 0xf6 */
1287        [0x00] = X86_OP_ENTRYrr(AND, E,b, I,b),
1288        [0x02] = X86_OP_ENTRY1(NOT,  E,b,      lock),
1289        [0x03] = X86_OP_ENTRY1(NEG,  E,b,      lock),
1290        [0x04] = X86_OP_ENTRYrr(MUL, E,b, 0,b, zextT0),
1291        [0x05] = X86_OP_ENTRYrr(IMUL,E,b, 0,b, sextT0),
1292        [0x06] = X86_OP_ENTRYr(DIV,  E,b),
1293        [0x07] = X86_OP_ENTRYr(IDIV, E,b),
1294
1295        /* 0xf7 */
1296        [0x08] = X86_OP_ENTRYrr(AND, E,v, I,z),
1297        [0x0a] = X86_OP_ENTRY1(NOT,  E,v,      lock),
1298        [0x0b] = X86_OP_ENTRY1(NEG,  E,v,      lock),
1299        [0x0c] = X86_OP_ENTRYrr(MUL, E,v, 0,v, zextT0),
1300        [0x0d] = X86_OP_ENTRYrr(IMUL,E,v, 0,v, sextT0),
1301        [0x0e] = X86_OP_ENTRYr(DIV,  E,v),
1302        [0x0f] = X86_OP_ENTRYr(IDIV, E,v),
1303    };
1304
1305    int w = (*b & 1);
1306    int reg = (get_modrm(s, env) >> 3) & 7;
1307
1308    *entry = opcodes_grp3[(w << 3) | reg];
1309}
1310
1311static void decode_group4_5(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
1312{
1313    static const X86OpEntry opcodes_grp4_5[16] = {
1314        /* 0xfe */
1315        [0x00] = X86_OP_ENTRY1(INC,     E,b,                           lock),
1316        [0x01] = X86_OP_ENTRY1(DEC,     E,b,                           lock),
1317
1318        /* 0xff */
1319        [0x08] = X86_OP_ENTRY1(INC,     E,v,                           lock),
1320        [0x09] = X86_OP_ENTRY1(DEC,     E,v,                           lock),
1321        [0x0a] = X86_OP_ENTRY3(CALL_m,  None, None, E,f64, None, None, zextT0),
1322        [0x0b] = X86_OP_ENTRYr(CALLF_m, M,p),
1323        [0x0c] = X86_OP_ENTRY3(JMP_m,   None, None, E,f64, None, None, zextT0),
1324        [0x0d] = X86_OP_ENTRYr(JMPF_m,  M,p),
1325        [0x0e] = X86_OP_ENTRYr(PUSH,    E,f64),
1326    };
1327
1328    int w = (*b & 1);
1329    int reg = (get_modrm(s, env) >> 3) & 7;
1330
1331    *entry = opcodes_grp4_5[(w << 3) | reg];
1332}
1333
1334
1335static void decode_group11(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
1336{
1337    int op = (get_modrm(s, env) >> 3) & 7;
1338    if (op != 0) {
1339        *entry = UNKNOWN_OPCODE;
1340    } else {
1341        entry->gen = gen_MOV;
1342    }
1343}
1344
1345static const X86OpEntry opcodes_root[256] = {
1346    [0x00] = X86_OP_ENTRY2(ADD, E,b, G,b, lock),
1347    [0x01] = X86_OP_ENTRY2(ADD, E,v, G,v, lock),
1348    [0x02] = X86_OP_ENTRY2(ADD, G,b, E,b, lock),
1349    [0x03] = X86_OP_ENTRY2(ADD, G,v, E,v, lock),
1350    [0x04] = X86_OP_ENTRY2(ADD, 0,b, I,b, lock),   /* AL, Ib */
1351    [0x05] = X86_OP_ENTRY2(ADD, 0,v, I,z, lock),   /* rAX, Iz */
1352    [0x06] = X86_OP_ENTRYr(PUSH, ES, w, chk(i64)),
1353    [0x07] = X86_OP_ENTRYw(POP, ES, w, chk(i64)),
1354
1355    [0x10] = X86_OP_ENTRY2(ADC, E,b, G,b, lock),
1356    [0x11] = X86_OP_ENTRY2(ADC, E,v, G,v, lock),
1357    [0x12] = X86_OP_ENTRY2(ADC, G,b, E,b, lock),
1358    [0x13] = X86_OP_ENTRY2(ADC, G,v, E,v, lock),
1359    [0x14] = X86_OP_ENTRY2(ADC, 0,b, I,b, lock),   /* AL, Ib */
1360    [0x15] = X86_OP_ENTRY2(ADC, 0,v, I,z, lock),   /* rAX, Iz */
1361    [0x16] = X86_OP_ENTRYr(PUSH, SS, w, chk(i64)),
1362    [0x17] = X86_OP_ENTRYw(POP, SS, w, chk(i64)),
1363
1364    [0x20] = X86_OP_ENTRY2(AND, E,b, G,b, lock),
1365    [0x21] = X86_OP_ENTRY2(AND, E,v, G,v, lock),
1366    [0x22] = X86_OP_ENTRY2(AND, G,b, E,b, lock),
1367    [0x23] = X86_OP_ENTRY2(AND, G,v, E,v, lock),
1368    [0x24] = X86_OP_ENTRY2(AND, 0,b, I,b, lock),   /* AL, Ib */
1369    [0x25] = X86_OP_ENTRY2(AND, 0,v, I,z, lock),   /* rAX, Iz */
1370    [0x26] = {},
1371    [0x27] = X86_OP_ENTRY0(DAA, chk(i64)),
1372
1373    [0x30] = X86_OP_ENTRY2(XOR, E,b, G,b, lock),
1374    [0x31] = X86_OP_ENTRY2(XOR, E,v, G,v, lock),
1375    [0x32] = X86_OP_ENTRY2(XOR, G,b, E,b, lock),
1376    [0x33] = X86_OP_ENTRY2(XOR, G,v, E,v, lock),
1377    [0x34] = X86_OP_ENTRY2(XOR, 0,b, I,b, lock),   /* AL, Ib */
1378    [0x35] = X86_OP_ENTRY2(XOR, 0,v, I,z, lock),   /* rAX, Iz */
1379    [0x36] = {},
1380    [0x37] = X86_OP_ENTRY0(AAA, chk(i64)),
1381
1382    [0x40] = X86_OP_ENTRY1(INC, 0,v, chk(i64)),
1383    [0x41] = X86_OP_ENTRY1(INC, 1,v, chk(i64)),
1384    [0x42] = X86_OP_ENTRY1(INC, 2,v, chk(i64)),
1385    [0x43] = X86_OP_ENTRY1(INC, 3,v, chk(i64)),
1386    [0x44] = X86_OP_ENTRY1(INC, 4,v, chk(i64)),
1387    [0x45] = X86_OP_ENTRY1(INC, 5,v, chk(i64)),
1388    [0x46] = X86_OP_ENTRY1(INC, 6,v, chk(i64)),
1389    [0x47] = X86_OP_ENTRY1(INC, 7,v, chk(i64)),
1390
1391    [0x50] = X86_OP_ENTRYr(PUSH, LoBits,d64),
1392    [0x51] = X86_OP_ENTRYr(PUSH, LoBits,d64),
1393    [0x52] = X86_OP_ENTRYr(PUSH, LoBits,d64),
1394    [0x53] = X86_OP_ENTRYr(PUSH, LoBits,d64),
1395    [0x54] = X86_OP_ENTRYr(PUSH, LoBits,d64),
1396    [0x55] = X86_OP_ENTRYr(PUSH, LoBits,d64),
1397    [0x56] = X86_OP_ENTRYr(PUSH, LoBits,d64),
1398    [0x57] = X86_OP_ENTRYr(PUSH, LoBits,d64),
1399
1400    [0x60] = X86_OP_ENTRY0(PUSHA, chk(i64)),
1401    [0x61] = X86_OP_ENTRY0(POPA, chk(i64)),
1402    [0x62] = X86_OP_ENTRYrr(BOUND, G,v, M,a, chk(i64)),
1403    [0x63] = X86_OP_GROUP0(63),
1404    [0x64] = {},
1405    [0x65] = {},
1406    [0x66] = {},
1407    [0x67] = {},
1408
1409    [0x70] = X86_OP_ENTRYr(Jcc, J,b),
1410    [0x71] = X86_OP_ENTRYr(Jcc, J,b),
1411    [0x72] = X86_OP_ENTRYr(Jcc, J,b),
1412    [0x73] = X86_OP_ENTRYr(Jcc, J,b),
1413    [0x74] = X86_OP_ENTRYr(Jcc, J,b),
1414    [0x75] = X86_OP_ENTRYr(Jcc, J,b),
1415    [0x76] = X86_OP_ENTRYr(Jcc, J,b),
1416    [0x77] = X86_OP_ENTRYr(Jcc, J,b),
1417
1418    [0x80] = X86_OP_GROUP2(group1, E,b, I,b),
1419    [0x81] = X86_OP_GROUP2(group1, E,v, I,z),
1420    [0x82] = X86_OP_GROUP2(group1, E,b, I,b, chk(i64)),
1421    [0x83] = X86_OP_GROUP2(group1, E,v, I,b),
1422    [0x84] = X86_OP_ENTRYrr(AND, E,b, G,b),
1423    [0x85] = X86_OP_ENTRYrr(AND, E,v, G,v),
1424    [0x86] = X86_OP_ENTRY2(XCHG, E,b, G,b, xchg),
1425    [0x87] = X86_OP_ENTRY2(XCHG, E,v, G,v, xchg),
1426
1427    [0x90] = X86_OP_ENTRY2(XCHG, 0,v, LoBits,v),
1428    [0x91] = X86_OP_ENTRY2(XCHG, 0,v, LoBits,v),
1429    [0x92] = X86_OP_ENTRY2(XCHG, 0,v, LoBits,v),
1430    [0x93] = X86_OP_ENTRY2(XCHG, 0,v, LoBits,v),
1431    [0x94] = X86_OP_ENTRY2(XCHG, 0,v, LoBits,v),
1432    [0x95] = X86_OP_ENTRY2(XCHG, 0,v, LoBits,v),
1433    [0x96] = X86_OP_ENTRY2(XCHG, 0,v, LoBits,v),
1434    [0x97] = X86_OP_ENTRY2(XCHG, 0,v, LoBits,v),
1435
1436    [0xA0] = X86_OP_ENTRY3(MOV, 0,b, O,b, None, None), /* AL, Ob */
1437    [0xA1] = X86_OP_ENTRY3(MOV, 0,v, O,v, None, None), /* rAX, Ov */
1438    [0xA2] = X86_OP_ENTRY3(MOV, O,b, 0,b, None, None), /* Ob, AL */
1439    [0xA3] = X86_OP_ENTRY3(MOV, O,v, 0,v, None, None), /* Ov, rAX */
1440    [0xA4] = X86_OP_ENTRYrr(MOVS, Y,b, X,b),
1441    [0xA5] = X86_OP_ENTRYrr(MOVS, Y,v, X,v),
1442    [0xA6] = X86_OP_ENTRYrr(CMPS, Y,b, X,b),
1443    [0xA7] = X86_OP_ENTRYrr(CMPS, Y,v, X,v),
1444
1445    [0xB0] = X86_OP_ENTRY3(MOV, LoBits,b, I,b, None, None),
1446    [0xB1] = X86_OP_ENTRY3(MOV, LoBits,b, I,b, None, None),
1447    [0xB2] = X86_OP_ENTRY3(MOV, LoBits,b, I,b, None, None),
1448    [0xB3] = X86_OP_ENTRY3(MOV, LoBits,b, I,b, None, None),
1449    [0xB4] = X86_OP_ENTRY3(MOV, LoBits,b, I,b, None, None),
1450    [0xB5] = X86_OP_ENTRY3(MOV, LoBits,b, I,b, None, None),
1451    [0xB6] = X86_OP_ENTRY3(MOV, LoBits,b, I,b, None, None),
1452    [0xB7] = X86_OP_ENTRY3(MOV, LoBits,b, I,b, None, None),
1453
1454    [0xC0] = X86_OP_GROUP2(group2, E,b, I,b),
1455    [0xC1] = X86_OP_GROUP2(group2, E,v, I,b),
1456    [0xC2] = X86_OP_ENTRYr(RET, I,w),
1457    [0xC3] = X86_OP_ENTRY0(RET),
1458    [0xC4] = X86_OP_ENTRY3(LES, G,z, EM,p, None, None, chk(i64)),
1459    [0xC5] = X86_OP_ENTRY3(LDS, G,z, EM,p, None, None, chk(i64)),
1460    [0xC6] = X86_OP_GROUP3(group11, E,b, I,b, None, None), /* reg=000b */
1461    [0xC7] = X86_OP_GROUP3(group11, E,v, I,z, None, None), /* reg=000b */
1462
1463    [0xD0] = X86_OP_GROUP1(group2, E,b),
1464    [0xD1] = X86_OP_GROUP1(group2, E,v),
1465    [0xD2] = X86_OP_GROUP2(group2, E,b, 1,b), /* CL */
1466    [0xD3] = X86_OP_GROUP2(group2, E,v, 1,b), /* CL */
1467    [0xD4] = X86_OP_ENTRYr(AAM, I,b),
1468    [0xD5] = X86_OP_ENTRYr(AAD, I,b),
1469    [0xD6] = X86_OP_ENTRYw(SALC, 0,b),
1470    [0xD7] = X86_OP_ENTRY1(XLAT, 0,b, zextT0), /* AL read/written */
1471
1472    [0xE0] = X86_OP_ENTRYr(LOOPNE, J,b), /* implicit: CX with aflag size */
1473    [0xE1] = X86_OP_ENTRYr(LOOPE,  J,b), /* implicit: CX with aflag size */
1474    [0xE2] = X86_OP_ENTRYr(LOOP,   J,b), /* implicit: CX with aflag size */
1475    [0xE3] = X86_OP_ENTRYr(JCXZ,   J,b), /* implicit: CX with aflag size */
1476    [0xE4] = X86_OP_ENTRYwr(IN,    0,b, I_unsigned,b), /* AL */
1477    [0xE5] = X86_OP_ENTRYwr(IN,    0,v, I_unsigned,b), /* AX/EAX */
1478    [0xE6] = X86_OP_ENTRYrr(OUT,   0,b, I_unsigned,b), /* AL */
1479    [0xE7] = X86_OP_ENTRYrr(OUT,   0,v, I_unsigned,b), /* AX/EAX */
1480
1481    [0xF1] = X86_OP_ENTRY0(INT1,   svm(ICEBP)),
1482    [0xF4] = X86_OP_ENTRY0(HLT,    chk(cpl0)),
1483    [0xF5] = X86_OP_ENTRY0(CMC),
1484    [0xF6] = X86_OP_GROUP1(group3, E,b),
1485    [0xF7] = X86_OP_GROUP1(group3, E,v),
1486
1487    [0x08] = X86_OP_ENTRY2(OR, E,b, G,b, lock),
1488    [0x09] = X86_OP_ENTRY2(OR, E,v, G,v, lock),
1489    [0x0A] = X86_OP_ENTRY2(OR, G,b, E,b, lock),
1490    [0x0B] = X86_OP_ENTRY2(OR, G,v, E,v, lock),
1491    [0x0C] = X86_OP_ENTRY2(OR, 0,b, I,b, lock),   /* AL, Ib */
1492    [0x0D] = X86_OP_ENTRY2(OR, 0,v, I,z, lock),   /* rAX, Iz */
1493    [0x0E] = X86_OP_ENTRYr(PUSH, CS, w, chk(i64)),
1494    [0x0F] = X86_OP_GROUP0(0F),
1495
1496    [0x18] = X86_OP_ENTRY2(SBB, E,b, G,b, lock),
1497    [0x19] = X86_OP_ENTRY2(SBB, E,v, G,v, lock),
1498    [0x1A] = X86_OP_ENTRY2(SBB, G,b, E,b, lock),
1499    [0x1B] = X86_OP_ENTRY2(SBB, G,v, E,v, lock),
1500    [0x1C] = X86_OP_ENTRY2(SBB, 0,b, I,b, lock),   /* AL, Ib */
1501    [0x1D] = X86_OP_ENTRY2(SBB, 0,v, I,z, lock),   /* rAX, Iz */
1502    [0x1E] = X86_OP_ENTRYr(PUSH, DS, w, chk(i64)),
1503    [0x1F] = X86_OP_ENTRYw(POP, DS, w, chk(i64)),
1504
1505    [0x28] = X86_OP_ENTRY2(SUB, E,b, G,b, lock),
1506    [0x29] = X86_OP_ENTRY2(SUB, E,v, G,v, lock),
1507    [0x2A] = X86_OP_ENTRY2(SUB, G,b, E,b, lock),
1508    [0x2B] = X86_OP_ENTRY2(SUB, G,v, E,v, lock),
1509    [0x2C] = X86_OP_ENTRY2(SUB, 0,b, I,b, lock),   /* AL, Ib */
1510    [0x2D] = X86_OP_ENTRY2(SUB, 0,v, I,z, lock),   /* rAX, Iz */
1511    [0x2E] = {},
1512    [0x2F] = X86_OP_ENTRY0(DAS, chk(i64)),
1513
1514    [0x38] = X86_OP_ENTRYrr(SUB, E,b, G,b),
1515    [0x39] = X86_OP_ENTRYrr(SUB, E,v, G,v),
1516    [0x3A] = X86_OP_ENTRYrr(SUB, G,b, E,b),
1517    [0x3B] = X86_OP_ENTRYrr(SUB, G,v, E,v),
1518    [0x3C] = X86_OP_ENTRYrr(SUB, 0,b, I,b),   /* AL, Ib */
1519    [0x3D] = X86_OP_ENTRYrr(SUB, 0,v, I,z),   /* rAX, Iz */
1520    [0x3E] = {},
1521    [0x3F] = X86_OP_ENTRY0(AAS, chk(i64)),
1522
1523    [0x48] = X86_OP_ENTRY1(DEC, 0,v, chk(i64)),
1524    [0x49] = X86_OP_ENTRY1(DEC, 1,v, chk(i64)),
1525    [0x4A] = X86_OP_ENTRY1(DEC, 2,v, chk(i64)),
1526    [0x4B] = X86_OP_ENTRY1(DEC, 3,v, chk(i64)),
1527    [0x4C] = X86_OP_ENTRY1(DEC, 4,v, chk(i64)),
1528    [0x4D] = X86_OP_ENTRY1(DEC, 5,v, chk(i64)),
1529    [0x4E] = X86_OP_ENTRY1(DEC, 6,v, chk(i64)),
1530    [0x4F] = X86_OP_ENTRY1(DEC, 7,v, chk(i64)),
1531
1532    [0x58] = X86_OP_ENTRYw(POP, LoBits,d64),
1533    [0x59] = X86_OP_ENTRYw(POP, LoBits,d64),
1534    [0x5A] = X86_OP_ENTRYw(POP, LoBits,d64),
1535    [0x5B] = X86_OP_ENTRYw(POP, LoBits,d64),
1536    [0x5C] = X86_OP_ENTRYw(POP, LoBits,d64),
1537    [0x5D] = X86_OP_ENTRYw(POP, LoBits,d64),
1538    [0x5E] = X86_OP_ENTRYw(POP, LoBits,d64),
1539    [0x5F] = X86_OP_ENTRYw(POP, LoBits,d64),
1540
1541    [0x68] = X86_OP_ENTRYr(PUSH, I,z),
1542    [0x69] = X86_OP_ENTRY3(IMUL3, G,v, E,v, I,z, sextT0),
1543    [0x6A] = X86_OP_ENTRYr(PUSH, I,b),
1544    [0x6B] = X86_OP_ENTRY3(IMUL3, G,v, E,v, I,b, sextT0),
1545    [0x6C] = X86_OP_ENTRYrr(INS, Y,b, 2,w), /* DX */
1546    [0x6D] = X86_OP_ENTRYrr(INS, Y,z, 2,w), /* DX */
1547    [0x6E] = X86_OP_ENTRYrr(OUTS, X,b, 2,w), /* DX */
1548    [0x6F] = X86_OP_ENTRYrr(OUTS, X,z, 2,w), /* DX */
1549
1550    [0x78] = X86_OP_ENTRYr(Jcc, J,b),
1551    [0x79] = X86_OP_ENTRYr(Jcc, J,b),
1552    [0x7A] = X86_OP_ENTRYr(Jcc, J,b),
1553    [0x7B] = X86_OP_ENTRYr(Jcc, J,b),
1554    [0x7C] = X86_OP_ENTRYr(Jcc, J,b),
1555    [0x7D] = X86_OP_ENTRYr(Jcc, J,b),
1556    [0x7E] = X86_OP_ENTRYr(Jcc, J,b),
1557    [0x7F] = X86_OP_ENTRYr(Jcc, J,b),
1558
1559    [0x88] = X86_OP_ENTRY3(MOV, E,b, G,b, None, None),
1560    [0x89] = X86_OP_ENTRY3(MOV, E,v, G,v, None, None),
1561    [0x8A] = X86_OP_ENTRY3(MOV, G,b, E,b, None, None),
1562    [0x8B] = X86_OP_ENTRY3(MOV, G,v, E,v, None, None),
1563    [0x8C] = X86_OP_ENTRY3(MOV, E,v, S,w, None, None),
1564    [0x8D] = X86_OP_ENTRY3(LEA, G,v, M,v, None, None, noseg),
1565    [0x8E] = X86_OP_ENTRY3(MOV, S,w, E,v, None, None),
1566    [0x8F] = X86_OP_GROUPw(group1A, E,v),
1567
1568    [0x98] = X86_OP_ENTRY1(CBW,    0,v), /* rAX */
1569    [0x99] = X86_OP_ENTRY3(CWD,    2,v, 0,v, None, None), /* rDX, rAX */
1570    [0x9A] = X86_OP_ENTRYrr(CALLF, I_unsigned,p, I_unsigned,w, chk(i64)),
1571    [0x9B] = X86_OP_ENTRY0(WAIT),
1572    [0x9C] = X86_OP_ENTRY0(PUSHF,  chk(vm86_iopl) svm(PUSHF)),
1573    [0x9D] = X86_OP_ENTRY0(POPF,   chk(vm86_iopl) svm(POPF)),
1574    [0x9E] = X86_OP_ENTRY0(SAHF),
1575    [0x9F] = X86_OP_ENTRY0(LAHF),
1576
1577    [0xA8] = X86_OP_ENTRYrr(AND, 0,b, I,b),   /* AL, Ib */
1578    [0xA9] = X86_OP_ENTRYrr(AND, 0,v, I,z),   /* rAX, Iz */
1579    [0xAA] = X86_OP_ENTRY3(STOS, Y,b, 0,b, None, None),
1580    [0xAB] = X86_OP_ENTRY3(STOS, Y,v, 0,v, None, None),
1581    /* Manual writeback because REP LODS (!) has to write EAX/RAX after every LODS.  */
1582    [0xAC] = X86_OP_ENTRYr(LODS, X,b),
1583    [0xAD] = X86_OP_ENTRYr(LODS, X,v),
1584    [0xAE] = X86_OP_ENTRYrr(SCAS, 0,b, Y,b),
1585    [0xAF] = X86_OP_ENTRYrr(SCAS, 0,v, Y,v),
1586
1587    [0xB8] = X86_OP_ENTRY3(MOV, LoBits,v, I,v, None, None),
1588    [0xB9] = X86_OP_ENTRY3(MOV, LoBits,v, I,v, None, None),
1589    [0xBA] = X86_OP_ENTRY3(MOV, LoBits,v, I,v, None, None),
1590    [0xBB] = X86_OP_ENTRY3(MOV, LoBits,v, I,v, None, None),
1591    [0xBC] = X86_OP_ENTRY3(MOV, LoBits,v, I,v, None, None),
1592    [0xBD] = X86_OP_ENTRY3(MOV, LoBits,v, I,v, None, None),
1593    [0xBE] = X86_OP_ENTRY3(MOV, LoBits,v, I,v, None, None),
1594    [0xBF] = X86_OP_ENTRY3(MOV, LoBits,v, I,v, None, None),
1595
1596    [0xC8] = X86_OP_ENTRYrr(ENTER, I,w, I,b),
1597    [0xC9] = X86_OP_ENTRY1(LEAVE, A,d64),
1598    [0xCA] = X86_OP_ENTRYr(RETF,  I,w),
1599    [0xCB] = X86_OP_ENTRY0(RETF),
1600    [0xCC] = X86_OP_ENTRY0(INT3),
1601    [0xCD] = X86_OP_ENTRYr(INT, I,b,  chk(vm86_iopl)),
1602    [0xCE] = X86_OP_ENTRY0(INTO),
1603    [0xCF] = X86_OP_ENTRY0(IRET,      chk(vm86_iopl) svm(IRET)),
1604
1605    [0xE8] = X86_OP_ENTRYr(CALL,   J,z_f64),
1606    [0xE9] = X86_OP_ENTRYr(JMP,    J,z_f64),
1607    [0xEA] = X86_OP_ENTRYrr(JMPF,  I_unsigned,p, I_unsigned,w, chk(i64)),
1608    [0xEB] = X86_OP_ENTRYr(JMP,    J,b),
1609    [0xEC] = X86_OP_ENTRYwr(IN,    0,b, 2,w), /* AL, DX */
1610    [0xED] = X86_OP_ENTRYwr(IN,    0,v, 2,w), /* AX/EAX, DX */
1611    [0xEE] = X86_OP_ENTRYrr(OUT,   0,b, 2,w), /* DX, AL */
1612    [0xEF] = X86_OP_ENTRYrr(OUT,   0,v, 2,w), /* DX, AX/EAX */
1613
1614    [0xF8] = X86_OP_ENTRY0(CLC),
1615    [0xF9] = X86_OP_ENTRY0(STC),
1616    [0xFA] = X86_OP_ENTRY0(CLI,    chk(iopl)),
1617    [0xFB] = X86_OP_ENTRY0(STI,    chk(iopl)),
1618    [0xFC] = X86_OP_ENTRY0(CLD),
1619    [0xFD] = X86_OP_ENTRY0(STD),
1620    [0xFE] = X86_OP_GROUP1(group4_5, E,b),
1621    [0xFF] = X86_OP_GROUP1(group4_5, E,v),
1622};
1623
1624#undef mmx
1625#undef vex1
1626#undef vex2
1627#undef vex3
1628#undef vex4
1629#undef vex4_unal
1630#undef vex5
1631#undef vex6
1632#undef vex7
1633#undef vex8
1634#undef vex11
1635#undef vex12
1636#undef vex13
1637
1638/*
1639 * Decode the fixed part of the opcode and place the last
1640 * in b.
1641 */
1642static void decode_root(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
1643{
1644    *entry = opcodes_root[*b];
1645}
1646
1647
1648static int decode_modrm(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode,
1649                        X86DecodedOp *op, X86OpType type)
1650{
1651    int modrm = get_modrm(s, env);
1652    if ((modrm >> 6) == 3) {
1653        op->n = (modrm & 7);
1654        if (type != X86_TYPE_Q && type != X86_TYPE_N) {
1655            op->n |= REX_B(s);
1656        }
1657    } else {
1658        op->has_ea = true;
1659        op->n = -1;
1660        decode->mem = gen_lea_modrm_0(env, s, get_modrm(s, env));
1661    }
1662    return modrm;
1663}
1664
1665static bool decode_op_size(DisasContext *s, X86OpEntry *e, X86OpSize size, MemOp *ot)
1666{
1667    switch (size) {
1668    case X86_SIZE_b:  /* byte */
1669        *ot = MO_8;
1670        return true;
1671
1672    case X86_SIZE_d:  /* 32-bit */
1673    case X86_SIZE_ss: /* SSE/AVX scalar single precision */
1674        *ot = MO_32;
1675        return true;
1676
1677    case X86_SIZE_p:  /* Far pointer, return offset size */
1678    case X86_SIZE_s:  /* Descriptor, return offset size */
1679    case X86_SIZE_v:  /* 16/32/64-bit, based on operand size */
1680        *ot = s->dflag;
1681        return true;
1682
1683    case X86_SIZE_pi: /* MMX */
1684    case X86_SIZE_q:  /* 64-bit */
1685    case X86_SIZE_sd: /* SSE/AVX scalar double precision */
1686        *ot = MO_64;
1687        return true;
1688
1689    case X86_SIZE_w:  /* 16-bit */
1690        *ot = MO_16;
1691        return true;
1692
1693    case X86_SIZE_y:  /* 32/64-bit, based on operand size */
1694        *ot = s->dflag == MO_16 ? MO_32 : s->dflag;
1695        return true;
1696
1697    case X86_SIZE_z:  /* 16-bit for 16-bit operand size, else 32-bit */
1698        *ot = s->dflag == MO_16 ? MO_16 : MO_32;
1699        return true;
1700
1701    case X86_SIZE_z_f64:  /* 32-bit for 32-bit operand size or 64-bit mode, else 16-bit */
1702        *ot = !CODE64(s) && s->dflag == MO_16 ? MO_16 : MO_32;
1703        return true;
1704
1705    case X86_SIZE_dq: /* SSE/AVX 128-bit */
1706        if (e->special == X86_SPECIAL_MMX &&
1707            !(s->prefix & (PREFIX_DATA | PREFIX_REPZ | PREFIX_REPNZ))) {
1708            *ot = MO_64;
1709            return true;
1710        }
1711        if (s->vex_l && e->s0 != X86_SIZE_qq && e->s1 != X86_SIZE_qq) {
1712            return false;
1713        }
1714        *ot = MO_128;
1715        return true;
1716
1717    case X86_SIZE_qq: /* AVX 256-bit */
1718        if (!s->vex_l) {
1719            return false;
1720        }
1721        *ot = MO_256;
1722        return true;
1723
1724    case X86_SIZE_x:  /* 128/256-bit, based on operand size */
1725        if (e->special == X86_SPECIAL_MMX &&
1726            !(s->prefix & (PREFIX_DATA | PREFIX_REPZ | PREFIX_REPNZ))) {
1727            *ot = MO_64;
1728            return true;
1729        }
1730        /* fall through */
1731    case X86_SIZE_ps: /* SSE/AVX packed single precision */
1732    case X86_SIZE_pd: /* SSE/AVX packed double precision */
1733        *ot = s->vex_l ? MO_256 : MO_128;
1734        return true;
1735
1736    case X86_SIZE_xh: /* SSE/AVX packed half register */
1737        *ot = s->vex_l ? MO_128 : MO_64;
1738        return true;
1739
1740    case X86_SIZE_d64:  /* Default to 64-bit in 64-bit mode */
1741        *ot = CODE64(s) && s->dflag == MO_32 ? MO_64 : s->dflag;
1742        return true;
1743
1744    case X86_SIZE_f64:  /* Ignore size override prefix in 64-bit mode */
1745        *ot = CODE64(s) ? MO_64 : s->dflag;
1746        return true;
1747
1748    default:
1749        *ot = -1;
1750        return true;
1751    }
1752}
1753
1754static bool decode_op(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode,
1755                      X86DecodedOp *op, X86OpType type, int b)
1756{
1757    int modrm;
1758
1759    switch (type) {
1760    case X86_TYPE_None:  /* Implicit or absent */
1761    case X86_TYPE_A:  /* Implicit */
1762    case X86_TYPE_F:  /* EFLAGS/RFLAGS */
1763    case X86_TYPE_X:  /* string source */
1764    case X86_TYPE_Y:  /* string destination */
1765        break;
1766
1767    case X86_TYPE_B:  /* VEX.vvvv selects a GPR */
1768        op->unit = X86_OP_INT;
1769        op->n = s->vex_v;
1770        break;
1771
1772    case X86_TYPE_C:  /* REG in the modrm byte selects a control register */
1773        op->unit = X86_OP_CR;
1774        goto get_reg;
1775
1776    case X86_TYPE_D:  /* REG in the modrm byte selects a debug register */
1777        op->unit = X86_OP_DR;
1778        goto get_reg;
1779
1780    case X86_TYPE_G:  /* REG in the modrm byte selects a GPR */
1781        op->unit = X86_OP_INT;
1782        goto get_reg;
1783
1784    case X86_TYPE_S:  /* reg selects a segment register */
1785        op->unit = X86_OP_SEG;
1786        goto get_reg;
1787
1788    case X86_TYPE_P:
1789        op->unit = X86_OP_MMX;
1790        goto get_reg;
1791
1792    case X86_TYPE_V:  /* reg in the modrm byte selects an XMM/YMM register */
1793        if (decode->e.special == X86_SPECIAL_MMX &&
1794            !(s->prefix & (PREFIX_DATA | PREFIX_REPZ | PREFIX_REPNZ))) {
1795            op->unit = X86_OP_MMX;
1796        } else {
1797            op->unit = X86_OP_SSE;
1798        }
1799    get_reg:
1800        op->n = ((get_modrm(s, env) >> 3) & 7) | REX_R(s);
1801        break;
1802
1803    case X86_TYPE_E:  /* ALU modrm operand */
1804        op->unit = X86_OP_INT;
1805        goto get_modrm;
1806
1807    case X86_TYPE_Q:  /* MMX modrm operand */
1808        op->unit = X86_OP_MMX;
1809        goto get_modrm;
1810
1811    case X86_TYPE_W:  /* XMM/YMM modrm operand */
1812        if (decode->e.special == X86_SPECIAL_MMX &&
1813            !(s->prefix & (PREFIX_DATA | PREFIX_REPZ | PREFIX_REPNZ))) {
1814            op->unit = X86_OP_MMX;
1815        } else {
1816            op->unit = X86_OP_SSE;
1817        }
1818        goto get_modrm;
1819
1820    case X86_TYPE_N:  /* R/M in the modrm byte selects an MMX register */
1821        op->unit = X86_OP_MMX;
1822        goto get_modrm_reg;
1823
1824    case X86_TYPE_U:  /* R/M in the modrm byte selects an XMM/YMM register */
1825        if (decode->e.special == X86_SPECIAL_MMX &&
1826            !(s->prefix & (PREFIX_DATA | PREFIX_REPZ | PREFIX_REPNZ))) {
1827            op->unit = X86_OP_MMX;
1828        } else {
1829            op->unit = X86_OP_SSE;
1830        }
1831        goto get_modrm_reg;
1832
1833    case X86_TYPE_R:  /* R/M in the modrm byte selects a register */
1834        op->unit = X86_OP_INT;
1835    get_modrm_reg:
1836        modrm = get_modrm(s, env);
1837        if ((modrm >> 6) != 3) {
1838            return false;
1839        }
1840        goto get_modrm;
1841
1842    case X86_TYPE_WM:  /* modrm byte selects an XMM/YMM memory operand */
1843        op->unit = X86_OP_SSE;
1844        goto get_modrm_mem;
1845
1846    case X86_TYPE_EM:  /* modrm byte selects an ALU memory operand */
1847        op->unit = X86_OP_INT;
1848        /* fall through */
1849    case X86_TYPE_M:  /* modrm byte selects a memory operand */
1850    get_modrm_mem:
1851        modrm = get_modrm(s, env);
1852        if ((modrm >> 6) == 3) {
1853            return false;
1854        }
1855    get_modrm:
1856        decode_modrm(s, env, decode, op, type);
1857        break;
1858
1859    case X86_TYPE_O:  /* Absolute address encoded in the instruction */
1860        op->unit = X86_OP_INT;
1861        op->has_ea = true;
1862        op->n = -1;
1863        decode->mem = (AddressParts) {
1864            .def_seg = R_DS,
1865            .base = -1,
1866            .index = -1,
1867            .disp = insn_get_addr(env, s, s->aflag)
1868        };
1869        break;
1870
1871    case X86_TYPE_H:  /* For AVX, VEX.vvvv selects an XMM/YMM register */
1872        if ((s->prefix & PREFIX_VEX)) {
1873            op->unit = X86_OP_SSE;
1874            op->n = s->vex_v;
1875            break;
1876        }
1877        if (op == &decode->op[0]) {
1878            /* shifts place the destination in VEX.vvvv, use modrm */
1879            return decode_op(s, env, decode, op, decode->e.op1, b);
1880        } else {
1881            return decode_op(s, env, decode, op, decode->e.op0, b);
1882        }
1883
1884    case X86_TYPE_I:  /* Immediate */
1885    case X86_TYPE_J:  /* Relative offset for a jump */
1886        op->unit = X86_OP_IMM;
1887        decode->immediate = op->imm = insn_get_signed(env, s, op->ot);
1888        break;
1889
1890    case X86_TYPE_I_unsigned:  /* Immediate */
1891        op->unit = X86_OP_IMM;
1892        decode->immediate = op->imm = insn_get(env, s, op->ot);
1893        break;
1894
1895    case X86_TYPE_L:  /* The upper 4 bits of the immediate select a 128-bit register */
1896        op->n = insn_get(env, s, op->ot) >> 4;
1897        break;
1898
1899    case X86_TYPE_2op:
1900        *op = decode->op[0];
1901        break;
1902
1903    case X86_TYPE_LoBits:
1904        op->n = (b & 7) | REX_B(s);
1905        op->unit = X86_OP_INT;
1906        break;
1907
1908    case X86_TYPE_0 ... X86_TYPE_7:
1909        op->n = type - X86_TYPE_0;
1910        op->unit = X86_OP_INT;
1911        break;
1912
1913    case X86_TYPE_ES ... X86_TYPE_GS:
1914        op->n = type - X86_TYPE_ES;
1915        op->unit = X86_OP_SEG;
1916        break;
1917    }
1918
1919    return true;
1920}
1921
1922static bool validate_sse_prefix(DisasContext *s, X86OpEntry *e)
1923{
1924    uint16_t sse_prefixes;
1925
1926    if (!e->valid_prefix) {
1927        return true;
1928    }
1929    if (s->prefix & (PREFIX_REPZ | PREFIX_REPNZ)) {
1930        /* In SSE instructions, 0xF3 and 0xF2 cancel 0x66.  */
1931        s->prefix &= ~PREFIX_DATA;
1932    }
1933
1934    /* Now, either zero or one bit is set in sse_prefixes.  */
1935    sse_prefixes = s->prefix & (PREFIX_REPZ | PREFIX_REPNZ | PREFIX_DATA);
1936    return e->valid_prefix & (1 << sse_prefixes);
1937}
1938
1939static bool decode_insn(DisasContext *s, CPUX86State *env, X86DecodeFunc decode_func,
1940                        X86DecodedInsn *decode)
1941{
1942    X86OpEntry *e = &decode->e;
1943
1944    decode_func(s, env, e, &decode->b);
1945    while (e->is_decode) {
1946        e->is_decode = false;
1947        e->decode(s, env, e, &decode->b);
1948    }
1949
1950    if (!validate_sse_prefix(s, e)) {
1951        return false;
1952    }
1953
1954    /* First compute size of operands in order to initialize s->rip_offset.  */
1955    if (e->op0 != X86_TYPE_None) {
1956        if (!decode_op_size(s, e, e->s0, &decode->op[0].ot)) {
1957            return false;
1958        }
1959        if (e->op0 == X86_TYPE_I) {
1960            s->rip_offset += 1 << decode->op[0].ot;
1961        }
1962    }
1963    if (e->op1 != X86_TYPE_None) {
1964        if (!decode_op_size(s, e, e->s1, &decode->op[1].ot)) {
1965            return false;
1966        }
1967        if (e->op1 == X86_TYPE_I) {
1968            s->rip_offset += 1 << decode->op[1].ot;
1969        }
1970    }
1971    if (e->op2 != X86_TYPE_None) {
1972        if (!decode_op_size(s, e, e->s2, &decode->op[2].ot)) {
1973            return false;
1974        }
1975        if (e->op2 == X86_TYPE_I) {
1976            s->rip_offset += 1 << decode->op[2].ot;
1977        }
1978    }
1979    if (e->op3 != X86_TYPE_None) {
1980        /*
1981         * A couple instructions actually use the extra immediate byte for an Lx
1982         * register operand; those are handled in the gen_* functions as one off.
1983         */
1984        assert(e->op3 == X86_TYPE_I && e->s3 == X86_SIZE_b);
1985        s->rip_offset += 1;
1986    }
1987
1988    if (e->op0 != X86_TYPE_None &&
1989        !decode_op(s, env, decode, &decode->op[0], e->op0, decode->b)) {
1990        return false;
1991    }
1992
1993    if (e->op1 != X86_TYPE_None &&
1994        !decode_op(s, env, decode, &decode->op[1], e->op1, decode->b)) {
1995        return false;
1996    }
1997
1998    if (e->op2 != X86_TYPE_None &&
1999        !decode_op(s, env, decode, &decode->op[2], e->op2, decode->b)) {
2000        return false;
2001    }
2002
2003    if (e->op3 != X86_TYPE_None) {
2004        decode->immediate = insn_get_signed(env, s, MO_8);
2005    }
2006
2007    return true;
2008}
2009
2010static bool has_cpuid_feature(DisasContext *s, X86CPUIDFeature cpuid)
2011{
2012    switch (cpuid) {
2013    case X86_FEAT_None:
2014        return true;
2015    case X86_FEAT_CMOV:
2016        return (s->cpuid_features & CPUID_CMOV);
2017    case X86_FEAT_F16C:
2018        return (s->cpuid_ext_features & CPUID_EXT_F16C);
2019    case X86_FEAT_FMA:
2020        return (s->cpuid_ext_features & CPUID_EXT_FMA);
2021    case X86_FEAT_MOVBE:
2022        return (s->cpuid_ext_features & CPUID_EXT_MOVBE);
2023    case X86_FEAT_PCLMULQDQ:
2024        return (s->cpuid_ext_features & CPUID_EXT_PCLMULQDQ);
2025    case X86_FEAT_SSE:
2026        return (s->cpuid_ext_features & CPUID_SSE);
2027    case X86_FEAT_SSE2:
2028        return (s->cpuid_ext_features & CPUID_SSE2);
2029    case X86_FEAT_SSE3:
2030        return (s->cpuid_ext_features & CPUID_EXT_SSE3);
2031    case X86_FEAT_SSSE3:
2032        return (s->cpuid_ext_features & CPUID_EXT_SSSE3);
2033    case X86_FEAT_SSE41:
2034        return (s->cpuid_ext_features & CPUID_EXT_SSE41);
2035    case X86_FEAT_SSE42:
2036        return (s->cpuid_ext_features & CPUID_EXT_SSE42);
2037    case X86_FEAT_AES:
2038        if (!(s->cpuid_ext_features & CPUID_EXT_AES)) {
2039            return false;
2040        } else if (!(s->prefix & PREFIX_VEX)) {
2041            return true;
2042        } else if (!(s->cpuid_ext_features & CPUID_EXT_AVX)) {
2043            return false;
2044        } else {
2045            return !s->vex_l || (s->cpuid_7_0_ecx_features & CPUID_7_0_ECX_VAES);
2046        }
2047
2048    case X86_FEAT_AVX:
2049        return (s->cpuid_ext_features & CPUID_EXT_AVX);
2050
2051    case X86_FEAT_3DNOW:
2052        return (s->cpuid_ext2_features & CPUID_EXT2_3DNOW);
2053    case X86_FEAT_SSE4A:
2054        return (s->cpuid_ext3_features & CPUID_EXT3_SSE4A);
2055
2056    case X86_FEAT_ADX:
2057        return (s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_ADX);
2058    case X86_FEAT_BMI1:
2059        return (s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI1);
2060    case X86_FEAT_BMI2:
2061        return (s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI2);
2062    case X86_FEAT_AVX2:
2063        return (s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_AVX2);
2064    case X86_FEAT_SHA_NI:
2065        return (s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_SHA_NI);
2066
2067    case X86_FEAT_CMPCCXADD:
2068        return (s->cpuid_7_1_eax_features & CPUID_7_1_EAX_CMPCCXADD);
2069    }
2070    g_assert_not_reached();
2071}
2072
2073static bool validate_vex(DisasContext *s, X86DecodedInsn *decode)
2074{
2075    X86OpEntry *e = &decode->e;
2076
2077    switch (e->vex_special) {
2078    case X86_VEX_REPScalar:
2079        /*
2080         * Instructions which differ between 00/66 and F2/F3 in the
2081         * exception classification and the size of the memory operand.
2082         */
2083        assert(e->vex_class == 1 || e->vex_class == 2 || e->vex_class == 4);
2084        if (s->prefix & (PREFIX_REPZ | PREFIX_REPNZ)) {
2085            e->vex_class = e->vex_class < 4 ? 3 : 5;
2086            if (s->vex_l) {
2087                goto illegal;
2088            }
2089            assert(decode->e.s2 == X86_SIZE_x);
2090            if (decode->op[2].has_ea) {
2091                decode->op[2].ot = s->prefix & PREFIX_REPZ ? MO_32 : MO_64;
2092            }
2093        }
2094        break;
2095
2096    case X86_VEX_SSEUnaligned:
2097        /* handled in sse_needs_alignment.  */
2098        break;
2099
2100    case X86_VEX_AVX2_256:
2101        if ((s->prefix & PREFIX_VEX) && s->vex_l && !has_cpuid_feature(s, X86_FEAT_AVX2)) {
2102            goto illegal;
2103        }
2104    }
2105
2106    switch (e->vex_class) {
2107    case 0:
2108        if (s->prefix & PREFIX_VEX) {
2109            goto illegal;
2110        }
2111        return true;
2112    case 1:
2113    case 2:
2114    case 3:
2115    case 4:
2116    case 5:
2117    case 7:
2118        if (s->prefix & PREFIX_VEX) {
2119            if (!(s->flags & HF_AVX_EN_MASK)) {
2120                goto illegal;
2121            }
2122        } else if (e->special != X86_SPECIAL_MMX ||
2123                   (s->prefix & (PREFIX_REPZ | PREFIX_REPNZ | PREFIX_DATA))) {
2124            if (!(s->flags & HF_OSFXSR_MASK)) {
2125                goto illegal;
2126            }
2127        }
2128        break;
2129    case 12:
2130        /* Must have a VSIB byte and no address prefix.  */
2131        assert(s->has_modrm);
2132        if ((s->modrm & 7) != 4 || s->aflag == MO_16) {
2133            goto illegal;
2134        }
2135
2136        /* Check no overlap between registers.  */
2137        if (!decode->op[0].has_ea &&
2138            (decode->op[0].n == decode->mem.index || decode->op[0].n == decode->op[1].n)) {
2139            goto illegal;
2140        }
2141        assert(!decode->op[1].has_ea);
2142        if (decode->op[1].n == decode->mem.index) {
2143            goto illegal;
2144        }
2145        if (!decode->op[2].has_ea &&
2146            (decode->op[2].n == decode->mem.index || decode->op[2].n == decode->op[1].n)) {
2147            goto illegal;
2148        }
2149        /* fall through */
2150    case 6:
2151    case 11:
2152        if (!(s->prefix & PREFIX_VEX)) {
2153            goto illegal;
2154        }
2155        if (!(s->flags & HF_AVX_EN_MASK)) {
2156            goto illegal;
2157        }
2158        break;
2159    case 8:
2160        /* Non-VEX case handled in decode_0F77.  */
2161        assert(s->prefix & PREFIX_VEX);
2162        if (!(s->flags & HF_AVX_EN_MASK)) {
2163            goto illegal;
2164        }
2165        break;
2166    case 13:
2167        if (!(s->prefix & PREFIX_VEX)) {
2168            goto illegal;
2169        }
2170        if (s->vex_l) {
2171            goto illegal;
2172        }
2173        /* All integer instructions use VEX.vvvv, so exit.  */
2174        return true;
2175    }
2176
2177    if (s->vex_v != 0 &&
2178        e->op0 != X86_TYPE_H && e->op0 != X86_TYPE_B &&
2179        e->op1 != X86_TYPE_H && e->op1 != X86_TYPE_B &&
2180        e->op2 != X86_TYPE_H && e->op2 != X86_TYPE_B) {
2181        goto illegal;
2182    }
2183
2184    if (s->flags & HF_TS_MASK) {
2185        goto nm_exception;
2186    }
2187    if (s->flags & HF_EM_MASK) {
2188        goto illegal;
2189    }
2190
2191    if (e->check) {
2192        if (e->check & X86_CHECK_VEX128) {
2193            if (s->vex_l) {
2194                goto illegal;
2195            }
2196        }
2197        if (e->check & X86_CHECK_W0) {
2198            if (s->vex_w) {
2199                goto illegal;
2200            }
2201        }
2202        if (e->check & X86_CHECK_W1) {
2203            if (!s->vex_w) {
2204                goto illegal;
2205            }
2206        }
2207    }
2208    return true;
2209
2210nm_exception:
2211    gen_NM_exception(s);
2212    return false;
2213illegal:
2214    gen_illegal_opcode(s);
2215    return false;
2216}
2217
2218/*
2219 * Convert one instruction. s->base.is_jmp is set if the translation must
2220 * be stopped.
2221 */
2222static void disas_insn(DisasContext *s, CPUState *cpu)
2223{
2224    CPUX86State *env = cpu_env(cpu);
2225    X86DecodedInsn decode;
2226    X86DecodeFunc decode_func = decode_root;
2227    uint8_t cc_live, b;
2228
2229    s->pc = s->base.pc_next;
2230    s->override = -1;
2231    s->popl_esp_hack = 0;
2232#ifdef TARGET_X86_64
2233    s->rex_r = 0;
2234    s->rex_x = 0;
2235    s->rex_b = 0;
2236#endif
2237    s->rip_offset = 0; /* for relative ip address */
2238    s->vex_l = 0;
2239    s->vex_v = 0;
2240    s->vex_w = false;
2241    s->has_modrm = false;
2242    s->prefix = 0;
2243
2244 next_byte:
2245    b = x86_ldub_code(env, s);
2246
2247    /* Collect prefixes.  */
2248    switch (b) {
2249    case 0xf3:
2250        s->prefix |= PREFIX_REPZ;
2251        s->prefix &= ~PREFIX_REPNZ;
2252        goto next_byte;
2253    case 0xf2:
2254        s->prefix |= PREFIX_REPNZ;
2255        s->prefix &= ~PREFIX_REPZ;
2256        goto next_byte;
2257    case 0xf0:
2258        s->prefix |= PREFIX_LOCK;
2259        goto next_byte;
2260    case 0x2e:
2261        s->override = R_CS;
2262        goto next_byte;
2263    case 0x36:
2264        s->override = R_SS;
2265        goto next_byte;
2266    case 0x3e:
2267        s->override = R_DS;
2268        goto next_byte;
2269    case 0x26:
2270        s->override = R_ES;
2271        goto next_byte;
2272    case 0x64:
2273        s->override = R_FS;
2274        goto next_byte;
2275    case 0x65:
2276        s->override = R_GS;
2277        goto next_byte;
2278    case 0x66:
2279        s->prefix |= PREFIX_DATA;
2280        goto next_byte;
2281    case 0x67:
2282        s->prefix |= PREFIX_ADR;
2283        goto next_byte;
2284#ifdef TARGET_X86_64
2285    case 0x40 ... 0x4f:
2286        if (CODE64(s)) {
2287            /* REX prefix */
2288            s->prefix |= PREFIX_REX;
2289            s->vex_w = (b >> 3) & 1;
2290            s->rex_r = (b & 0x4) << 1;
2291            s->rex_x = (b & 0x2) << 2;
2292            s->rex_b = (b & 0x1) << 3;
2293            goto next_byte;
2294        }
2295        break;
2296#endif
2297    case 0xc5: /* 2-byte VEX */
2298    case 0xc4: /* 3-byte VEX */
2299        /*
2300         * VEX prefixes cannot be used except in 32-bit mode.
2301         * Otherwise the instruction is LES or LDS.
2302         */
2303        if (CODE32(s) && !VM86(s)) {
2304            static const int pp_prefix[4] = {
2305                0, PREFIX_DATA, PREFIX_REPZ, PREFIX_REPNZ
2306            };
2307            int vex3, vex2 = x86_ldub_code(env, s);
2308
2309            if (!CODE64(s) && (vex2 & 0xc0) != 0xc0) {
2310                /*
2311                 * 4.1.4.6: In 32-bit mode, bits [7:6] must be 11b,
2312                 * otherwise the instruction is LES or LDS.
2313                 */
2314                s->pc--; /* rewind the advance_pc() x86_ldub_code() did */
2315                break;
2316            }
2317
2318            /* 4.1.1-4.1.3: No preceding lock, 66, f2, f3, or rex prefixes. */
2319            if (s->prefix & (PREFIX_REPZ | PREFIX_REPNZ
2320                             | PREFIX_LOCK | PREFIX_DATA | PREFIX_REX)) {
2321                goto illegal_op;
2322            }
2323#ifdef TARGET_X86_64
2324            s->rex_r = (~vex2 >> 4) & 8;
2325#endif
2326            if (b == 0xc5) {
2327                /* 2-byte VEX prefix: RVVVVlpp, implied 0f leading opcode byte */
2328                vex3 = vex2;
2329                decode_func = decode_0F;
2330            } else {
2331                /* 3-byte VEX prefix: RXBmmmmm wVVVVlpp */
2332                vex3 = x86_ldub_code(env, s);
2333#ifdef TARGET_X86_64
2334                s->rex_x = (~vex2 >> 3) & 8;
2335                s->rex_b = (~vex2 >> 2) & 8;
2336#endif
2337                s->vex_w = (vex3 >> 7) & 1;
2338                switch (vex2 & 0x1f) {
2339                case 0x01: /* Implied 0f leading opcode bytes.  */
2340                    decode_func = decode_0F;
2341                    break;
2342                case 0x02: /* Implied 0f 38 leading opcode bytes.  */
2343                    decode_func = decode_0F38;
2344                    break;
2345                case 0x03: /* Implied 0f 3a leading opcode bytes.  */
2346                    decode_func = decode_0F3A;
2347                    break;
2348                default:   /* Reserved for future use.  */
2349                    goto unknown_op;
2350                }
2351            }
2352            s->vex_v = (~vex3 >> 3) & 0xf;
2353            s->vex_l = (vex3 >> 2) & 1;
2354            s->prefix |= pp_prefix[vex3 & 3] | PREFIX_VEX;
2355        }
2356        break;
2357    default:
2358        break;
2359    }
2360
2361    /* Post-process prefixes.  */
2362    if (CODE64(s)) {
2363        /*
2364         * In 64-bit mode, the default data size is 32-bit.  Select 64-bit
2365         * data with rex_w, and 16-bit data with 0x66; rex_w takes precedence
2366         * over 0x66 if both are present.
2367         */
2368        s->dflag = (REX_W(s) ? MO_64 : s->prefix & PREFIX_DATA ? MO_16 : MO_32);
2369        /* In 64-bit mode, 0x67 selects 32-bit addressing.  */
2370        s->aflag = (s->prefix & PREFIX_ADR ? MO_32 : MO_64);
2371    } else {
2372        /* In 16/32-bit mode, 0x66 selects the opposite data size.  */
2373        if (CODE32(s) ^ ((s->prefix & PREFIX_DATA) != 0)) {
2374            s->dflag = MO_32;
2375        } else {
2376            s->dflag = MO_16;
2377        }
2378        /* In 16/32-bit mode, 0x67 selects the opposite addressing.  */
2379        if (CODE32(s) ^ ((s->prefix & PREFIX_ADR) != 0)) {
2380            s->aflag = MO_32;
2381        }  else {
2382            s->aflag = MO_16;
2383        }
2384    }
2385
2386    /* Go back to old decoder for unconverted opcodes.  */
2387    if (!(s->prefix & PREFIX_VEX)) {
2388        if ((b & ~7) == 0xd8) {
2389            if (!disas_insn_x87(s, cpu, b)) {
2390                goto unknown_op;
2391            }
2392            return;
2393        }
2394
2395        if (b == 0x0f) {
2396            b = x86_ldub_code(env, s);
2397            switch (b) {
2398            case 0x00 ... 0x03: /* mostly privileged instructions */
2399            case 0x05 ... 0x09:
2400            case 0x0d:          /* 3DNow! prefetch */
2401            case 0x18 ... 0x23: /* prefetch, MPX, mov from/to CR and DR */
2402            case 0x30 ... 0x35: /* more privileged instructions */
2403            case 0xa2 ... 0xa5: /* CPUID, BT, SHLD */
2404            case 0xaa ... 0xae: /* RSM, SHRD, grp15 */
2405            case 0xb0 ... 0xb1: /* cmpxchg */
2406            case 0xb3:          /* btr */
2407            case 0xb8:          /* integer ops */
2408            case 0xba ... 0xbd: /* integer ops */
2409            case 0xc0 ... 0xc1: /* xadd */
2410            case 0xc7:          /* grp9 */
2411                disas_insn_old(s, cpu, b + 0x100);
2412                return;
2413            default:
2414                decode_func = do_decode_0F;
2415                break;
2416            }
2417        }
2418    }
2419
2420    memset(&decode, 0, sizeof(decode));
2421    decode.cc_op = -1;
2422    decode.b = b;
2423    if (!decode_insn(s, env, decode_func, &decode)) {
2424        goto illegal_op;
2425    }
2426    if (!decode.e.gen) {
2427        goto unknown_op;
2428    }
2429
2430    if (!has_cpuid_feature(s, decode.e.cpuid)) {
2431        goto illegal_op;
2432    }
2433
2434    /* Checks that result in #UD come first.  */
2435    if (decode.e.check) {
2436        if (decode.e.check & X86_CHECK_i64) {
2437            if (CODE64(s)) {
2438                goto illegal_op;
2439            }
2440        }
2441        if (decode.e.check & X86_CHECK_o64) {
2442            if (!CODE64(s)) {
2443                goto illegal_op;
2444            }
2445        }
2446        if (decode.e.check & X86_CHECK_prot) {
2447            if (!PE(s) || VM86(s)) {
2448                goto illegal_op;
2449            }
2450        }
2451    }
2452
2453    switch (decode.e.special) {
2454    case X86_SPECIAL_None:
2455        break;
2456
2457    case X86_SPECIAL_Locked:
2458        if (decode.op[0].has_ea) {
2459            s->prefix |= PREFIX_LOCK;
2460        }
2461        decode.e.special = X86_SPECIAL_HasLock;
2462        /* fallthrough */
2463    case X86_SPECIAL_HasLock:
2464        break;
2465
2466    case X86_SPECIAL_Op0_Rd:
2467        assert(decode.op[0].unit == X86_OP_INT);
2468        if (!decode.op[0].has_ea) {
2469            decode.op[0].ot = MO_32;
2470        }
2471        break;
2472
2473    case X86_SPECIAL_Op2_Ry:
2474        assert(decode.op[2].unit == X86_OP_INT);
2475        if (!decode.op[2].has_ea) {
2476            decode.op[2].ot = s->dflag == MO_16 ? MO_32 : s->dflag;
2477        }
2478        break;
2479
2480    case X86_SPECIAL_AVXExtMov:
2481        if (!decode.op[2].has_ea) {
2482            decode.op[2].ot = s->vex_l ? MO_256 : MO_128;
2483        } else if (s->vex_l) {
2484            decode.op[2].ot++;
2485        }
2486        break;
2487
2488    case X86_SPECIAL_SExtT0:
2489    case X86_SPECIAL_ZExtT0:
2490        /* Handled in gen_load.  */
2491        assert(decode.op[1].unit == X86_OP_INT);
2492        break;
2493
2494    case X86_SPECIAL_NoSeg:
2495        decode.mem.def_seg = -1;
2496        s->override = -1;
2497        break;
2498
2499    default:
2500        break;
2501    }
2502
2503    if (s->prefix & PREFIX_LOCK) {
2504        if (decode.e.special != X86_SPECIAL_HasLock || !decode.op[0].has_ea) {
2505            goto illegal_op;
2506        }
2507    }
2508
2509    if (!validate_vex(s, &decode)) {
2510        return;
2511    }
2512
2513    /*
2514     * Checks that result in #GP or VMEXIT come second.  Intercepts are
2515     * generally checked after non-memory exceptions (i.e. before all
2516     * exceptions if there is no memory operand).  Exceptions are
2517     * vm86 checks (INTn, IRET, PUSHF/POPF), RSM and XSETBV (!).
2518     *
2519     * RSM and XSETBV will be handled in the gen_* functions
2520     * instead of using chk().
2521     */
2522    if (decode.e.check & X86_CHECK_cpl0) {
2523        if (CPL(s) != 0) {
2524            goto gp_fault;
2525        }
2526    }
2527    if (decode.e.intercept && unlikely(GUEST(s))) {
2528        gen_helper_svm_check_intercept(tcg_env,
2529                                       tcg_constant_i32(decode.e.intercept));
2530    }
2531    if (decode.e.check) {
2532        if ((decode.e.check & X86_CHECK_vm86_iopl) && VM86(s)) {
2533            if (IOPL(s) < 3) {
2534                goto gp_fault;
2535            }
2536        } else if (decode.e.check & X86_CHECK_cpl_iopl) {
2537            if (IOPL(s) < CPL(s)) {
2538                goto gp_fault;
2539            }
2540        }
2541    }
2542
2543    if (decode.e.special == X86_SPECIAL_MMX &&
2544        !(s->prefix & (PREFIX_REPZ | PREFIX_REPNZ | PREFIX_DATA))) {
2545        gen_helper_enter_mmx(tcg_env);
2546    }
2547
2548    if (decode.op[0].has_ea || decode.op[1].has_ea || decode.op[2].has_ea) {
2549        gen_load_ea(s, &decode.mem, decode.e.vex_class == 12);
2550    }
2551    if (s->prefix & PREFIX_LOCK) {
2552        gen_load(s, &decode, 2, s->T1);
2553        decode.e.gen(s, env, &decode);
2554    } else {
2555        if (decode.op[0].unit == X86_OP_MMX) {
2556            compute_mmx_offset(&decode.op[0]);
2557        } else if (decode.op[0].unit == X86_OP_SSE) {
2558            compute_xmm_offset(&decode.op[0]);
2559        }
2560        gen_load(s, &decode, 1, s->T0);
2561        gen_load(s, &decode, 2, s->T1);
2562        decode.e.gen(s, env, &decode);
2563        gen_writeback(s, &decode, 0, s->T0);
2564    }
2565
2566    /*
2567     * Write back flags after last memory access.  Some newer ALU instructions, as
2568     * well as SSE instructions, write flags in the gen_* function, but that can
2569     * cause incorrect tracking of CC_OP for instructions that write to both memory
2570     * and flags.
2571     */
2572    if (decode.cc_op != -1) {
2573        if (decode.cc_dst) {
2574            tcg_gen_mov_tl(cpu_cc_dst, decode.cc_dst);
2575        }
2576        if (decode.cc_src) {
2577            tcg_gen_mov_tl(cpu_cc_src, decode.cc_src);
2578        }
2579        if (decode.cc_src2) {
2580            tcg_gen_mov_tl(cpu_cc_src2, decode.cc_src2);
2581        }
2582        if (decode.cc_op == CC_OP_DYNAMIC) {
2583            tcg_gen_mov_i32(cpu_cc_op, decode.cc_op_dynamic);
2584        }
2585        set_cc_op(s, decode.cc_op);
2586        cc_live = cc_op_live[decode.cc_op];
2587    } else {
2588        cc_live = 0;
2589    }
2590    if (decode.cc_op != CC_OP_DYNAMIC) {
2591        assert(!decode.cc_op_dynamic);
2592        assert(!!decode.cc_dst == !!(cc_live & USES_CC_DST));
2593        assert(!!decode.cc_src == !!(cc_live & USES_CC_SRC));
2594        assert(!!decode.cc_src2 == !!(cc_live & USES_CC_SRC2));
2595    }
2596
2597    return;
2598 gp_fault:
2599    gen_exception_gpf(s);
2600    return;
2601 illegal_op:
2602    gen_illegal_opcode(s);
2603    return;
2604 unknown_op:
2605    gen_unknown_opcode(env, s);
2606}
2607