xref: /openbmc/qemu/target/i386/tcg/decode-new.c.inc (revision 4d7dd4ed)
1/*
2 * New-style decoder for i386 instructions
3 *
4 *  Copyright (c) 2022 Red Hat, Inc.
5 *
6 * Author: Paolo Bonzini <pbonzini@redhat.com>
7 *
8 * This library is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either
11 * version 2.1 of the License, or (at your option) any later version.
12 *
13 * This library is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
16 * Lesser General Public License for more details.
17 *
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
20 */
21
22/*
23 * The decoder is mostly based on tables copied from the Intel SDM.  As
24 * a result, most operand load and writeback is done entirely in common
25 * table-driven code using the same operand type (X86_TYPE_*) and
26 * size (X86_SIZE_*) codes used in the manual.  There are a few differences
27 * though.
28 *
29 * Vector operands
30 * ---------------
31 *
32 * The main difference is that the V, U and W types are extended to
33 * cover MMX as well; if an instruction is like
34 *
35 *      por   Pq, Qq
36 *  66  por   Vx, Hx, Wx
37 *
38 * only the second row is included and the instruction is marked as a
39 * valid MMX instruction.  The MMX flag directs the decoder to rewrite
40 * the V/U/H/W types to P/N/P/Q if there is no prefix, as well as changing
41 * "x" to "q" if there is no prefix.
42 *
43 * In addition, the ss/ps/sd/pd types are sometimes mushed together as "x"
44 * if the difference is expressed via prefixes.  Individual instructions
45 * are separated by prefix in the generator functions.
46 *
47 * There are a couple cases in which instructions (e.g. MOVD) write the
48 * whole XMM or MM register but are established incorrectly in the manual
49 * as "d" or "q".  These have to be fixed for the decoder to work correctly.
50 *
51 * VEX exception classes
52 * ---------------------
53 *
54 * Speaking about imprecisions in the manual, the decoder treats all
55 * exception-class 4 instructions as having an optional VEX prefix, and
56 * all exception-class 6 instructions as having a mandatory VEX prefix.
57 * This is true except for a dozen instructions; these are in exception
58 * class 4 but do not ignore the VEX.W bit (which does not even exist
59 * without a VEX prefix).  These instructions are mostly listed in Intel's
60 * table 2-16, but with a few exceptions.
61 *
62 * The AMD manual has more precise subclasses for exceptions, and unlike Intel
63 * they list the VEX.W requirements in the exception classes as well (except
64 * when they don't).  AMD describes class 6 as "AVX Mixed Memory Argument"
65 * without defining what a mixed memory argument is, but still use 4 as the
66 * primary exception class... except when they don't.
67 *
68 * The summary is:
69 *                       Intel     AMD         VEX.W           note
70 * -------------------------------------------------------------------
71 * vpblendd              4         4J          0
72 * vpblendvb             4         4E-X        0               (*)
73 * vpbroadcastq          6         6D          0               (+)
74 * vpermd/vpermps        4         4H          0               (§)
75 * vpermq/vpermpd        4         4H-1        1               (§)
76 * vpermilpd/vpermilps   4         6E          0               (^)
77 * vpmaskmovd            6         4K          significant     (^)
78 * vpsllv                4         4K          significant
79 * vpsrav                4         4J          0
80 * vpsrlv                4         4K          significant
81 * vtestps/vtestpd       4         4G          0
82 *
83 *    (*)  AMD lists VPBLENDVB as related to SSE4.1 PBLENDVB, which may
84 *         explain why it is considered exception class 4.  However,
85 *         Intel says that VEX-only instructions should be in class 6...
86 *
87 *    (+)  Not found in Intel's table 2-16
88 *
89 *    (§)  4H and 4H-1 do not mention VEX.W requirements, which are
90 *         however present in the description of the instruction
91 *
92 *    (^)  these are the two cases in which Intel and AMD disagree on the
93 *         primary exception class
94 */
95
96#define X86_OP_NONE { 0 },
97
98#define X86_OP_GROUP3(op, op0_, s0_, op1_, s1_, op2_, s2_, ...) { \
99    .decode = glue(decode_, op),                                  \
100    .op0 = glue(X86_TYPE_, op0_),                                 \
101    .s0 = glue(X86_SIZE_, s0_),                                   \
102    .op1 = glue(X86_TYPE_, op1_),                                 \
103    .s1 = glue(X86_SIZE_, s1_),                                   \
104    .op2 = glue(X86_TYPE_, op2_),                                 \
105    .s2 = glue(X86_SIZE_, s2_),                                   \
106    .is_decode = true,                                            \
107    ## __VA_ARGS__                                                \
108}
109
110#define X86_OP_GROUP2(op, op0, s0, op1, s1, ...)                  \
111    X86_OP_GROUP3(op, op0, s0, 2op, s0, op1, s1, ## __VA_ARGS__)
112#define X86_OP_GROUP0(op, ...)                                    \
113    X86_OP_GROUP3(op, None, None, None, None, None, None, ## __VA_ARGS__)
114
115#define X86_OP_ENTRY3(op, op0_, s0_, op1_, s1_, op2_, s2_, ...) { \
116    .gen = glue(gen_, op),                                        \
117    .op0 = glue(X86_TYPE_, op0_),                                 \
118    .s0 = glue(X86_SIZE_, s0_),                                   \
119    .op1 = glue(X86_TYPE_, op1_),                                 \
120    .s1 = glue(X86_SIZE_, s1_),                                   \
121    .op2 = glue(X86_TYPE_, op2_),                                 \
122    .s2 = glue(X86_SIZE_, s2_),                                   \
123    ## __VA_ARGS__                                                \
124}
125
126#define X86_OP_ENTRY4(op, op0_, s0_, op1_, s1_, op2_, s2_, ...)   \
127    X86_OP_ENTRY3(op, op0_, s0_, op1_, s1_, op2_, s2_,            \
128        .op3 = X86_TYPE_I, .s3 = X86_SIZE_b,                      \
129        ## __VA_ARGS__)
130
131#define X86_OP_ENTRY2(op, op0, s0, op1, s1, ...)                  \
132    X86_OP_ENTRY3(op, op0, s0, 2op, s0, op1, s1, ## __VA_ARGS__)
133#define X86_OP_ENTRYw(op, op0, s0, ...)                           \
134    X86_OP_ENTRY3(op, op0, s0, None, None, None, None, ## __VA_ARGS__)
135#define X86_OP_ENTRYr(op, op0, s0, ...)                           \
136    X86_OP_ENTRY3(op, None, None, None, None, op0, s0, ## __VA_ARGS__)
137#define X86_OP_ENTRY0(op, ...)                                    \
138    X86_OP_ENTRY3(op, None, None, None, None, None, None, ## __VA_ARGS__)
139
140#define cpuid(feat) .cpuid = X86_FEAT_##feat,
141#define xchg .special = X86_SPECIAL_Locked,
142#define mmx .special = X86_SPECIAL_MMX,
143#define zext0 .special = X86_SPECIAL_ZExtOp0,
144#define zext2 .special = X86_SPECIAL_ZExtOp2,
145#define avx_movx .special = X86_SPECIAL_AVXExtMov,
146
147#define vex1 .vex_class = 1,
148#define vex1_rep3 .vex_class = 1, .vex_special = X86_VEX_REPScalar,
149#define vex2 .vex_class = 2,
150#define vex2_rep3 .vex_class = 2, .vex_special = X86_VEX_REPScalar,
151#define vex3 .vex_class = 3,
152#define vex4 .vex_class = 4,
153#define vex4_unal .vex_class = 4, .vex_special = X86_VEX_SSEUnaligned,
154#define vex4_rep5 .vex_class = 4, .vex_special = X86_VEX_REPScalar,
155#define vex5 .vex_class = 5,
156#define vex6 .vex_class = 6,
157#define vex7 .vex_class = 7,
158#define vex8 .vex_class = 8,
159#define vex11 .vex_class = 11,
160#define vex12 .vex_class = 12,
161#define vex13 .vex_class = 13,
162
163#define chk(a) .check = X86_CHECK_##a,
164#define svm(a) .intercept = SVM_EXIT_##a,
165
166#define avx2_256 .vex_special = X86_VEX_AVX2_256,
167
168#define P_00          1
169#define P_66          (1 << PREFIX_DATA)
170#define P_F3          (1 << PREFIX_REPZ)
171#define P_F2          (1 << PREFIX_REPNZ)
172
173#define p_00          .valid_prefix = P_00,
174#define p_66          .valid_prefix = P_66,
175#define p_f3          .valid_prefix = P_F3,
176#define p_f2          .valid_prefix = P_F2,
177#define p_00_66       .valid_prefix = P_00 | P_66,
178#define p_00_f3       .valid_prefix = P_00 | P_F3,
179#define p_66_f2       .valid_prefix = P_66 | P_F2,
180#define p_00_66_f3    .valid_prefix = P_00 | P_66 | P_F3,
181#define p_66_f3_f2    .valid_prefix = P_66 | P_F3 | P_F2,
182#define p_00_66_f3_f2 .valid_prefix = P_00 | P_66 | P_F3 | P_F2,
183
184static uint8_t get_modrm(DisasContext *s, CPUX86State *env)
185{
186    if (!s->has_modrm) {
187        s->modrm = x86_ldub_code(env, s);
188        s->has_modrm = true;
189    }
190    return s->modrm;
191}
192
193static inline const X86OpEntry *decode_by_prefix(DisasContext *s, const X86OpEntry entries[4])
194{
195    if (s->prefix & PREFIX_REPNZ) {
196        return &entries[3];
197    } else if (s->prefix & PREFIX_REPZ) {
198        return &entries[2];
199    } else if (s->prefix & PREFIX_DATA) {
200        return &entries[1];
201    } else {
202        return &entries[0];
203    }
204}
205
206static void decode_group15(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
207{
208    /* only includes ldmxcsr and stmxcsr, because they have AVX variants.  */
209    static const X86OpEntry group15_reg[8] = {
210    };
211
212    static const X86OpEntry group15_mem[8] = {
213        [2] = X86_OP_ENTRYr(LDMXCSR,    E,d, vex5 chk(VEX128)),
214        [3] = X86_OP_ENTRYw(STMXCSR,    E,d, vex5 chk(VEX128)),
215    };
216
217    uint8_t modrm = get_modrm(s, env);
218    if ((modrm >> 6) == 3) {
219        *entry = group15_reg[(modrm >> 3) & 7];
220    } else {
221        *entry = group15_mem[(modrm >> 3) & 7];
222    }
223}
224
225static void decode_group17(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
226{
227    static const X86GenFunc group17_gen[8] = {
228        NULL, gen_BLSR, gen_BLSMSK, gen_BLSI,
229    };
230    int op = (get_modrm(s, env) >> 3) & 7;
231    entry->gen = group17_gen[op];
232}
233
234static void decode_group12(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
235{
236    static const X86OpEntry opcodes_group12[8] = {
237        {},
238        {},
239        X86_OP_ENTRY3(PSRLW_i,  H,x, U,x, I,b, vex7 mmx avx2_256 p_00_66),
240        {},
241        X86_OP_ENTRY3(PSRAW_i,  H,x, U,x, I,b, vex7 mmx avx2_256 p_00_66),
242        {},
243        X86_OP_ENTRY3(PSLLW_i,  H,x, U,x, I,b, vex7 mmx avx2_256 p_00_66),
244        {},
245    };
246
247    int op = (get_modrm(s, env) >> 3) & 7;
248    *entry = opcodes_group12[op];
249}
250
251static void decode_group13(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
252{
253    static const X86OpEntry opcodes_group13[8] = {
254        {},
255        {},
256        X86_OP_ENTRY3(PSRLD_i,  H,x, U,x, I,b, vex7 mmx avx2_256 p_00_66),
257        {},
258        X86_OP_ENTRY3(PSRAD_i,  H,x, U,x, I,b, vex7 mmx avx2_256 p_00_66),
259        {},
260        X86_OP_ENTRY3(PSLLD_i,  H,x, U,x, I,b, vex7 mmx avx2_256 p_00_66),
261        {},
262    };
263
264    int op = (get_modrm(s, env) >> 3) & 7;
265    *entry = opcodes_group13[op];
266}
267
268static void decode_group14(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
269{
270    static const X86OpEntry opcodes_group14[8] = {
271        /* grp14 */
272        {},
273        {},
274        X86_OP_ENTRY3(PSRLQ_i,  H,x, U,x, I,b, vex7 mmx avx2_256 p_00_66),
275        X86_OP_ENTRY3(PSRLDQ_i, H,x, U,x, I,b, vex7 avx2_256 p_66),
276        {},
277        {},
278        X86_OP_ENTRY3(PSLLQ_i,  H,x, U,x, I,b, vex7 mmx avx2_256 p_00_66),
279        X86_OP_ENTRY3(PSLLDQ_i, H,x, U,x, I,b, vex7 avx2_256 p_66),
280    };
281
282    int op = (get_modrm(s, env) >> 3) & 7;
283    *entry = opcodes_group14[op];
284}
285
286static void decode_0F6F(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
287{
288    static const X86OpEntry opcodes_0F6F[4] = {
289        X86_OP_ENTRY3(MOVDQ,       P,q, None,None, Q,q, vex5 mmx),  /* movq */
290        X86_OP_ENTRY3(MOVDQ,       V,x, None,None, W,x, vex1),      /* movdqa */
291        X86_OP_ENTRY3(MOVDQ,       V,x, None,None, W,x, vex4_unal), /* movdqu */
292        {},
293    };
294    *entry = *decode_by_prefix(s, opcodes_0F6F);
295}
296
297static void decode_0F70(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
298{
299    static const X86OpEntry pshufw[4] = {
300        X86_OP_ENTRY3(PSHUFW,  P,q, Q,q, I,b, vex4 mmx),
301        X86_OP_ENTRY3(PSHUFD,  V,x, W,x, I,b, vex4 avx2_256),
302        X86_OP_ENTRY3(PSHUFHW, V,x, W,x, I,b, vex4 avx2_256),
303        X86_OP_ENTRY3(PSHUFLW, V,x, W,x, I,b, vex4 avx2_256),
304    };
305
306    *entry = *decode_by_prefix(s, pshufw);
307}
308
309static void decode_0F77(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
310{
311    if (!(s->prefix & PREFIX_VEX)) {
312        entry->gen = gen_EMMS;
313    } else if (!s->vex_l) {
314        entry->gen = gen_VZEROUPPER;
315        entry->vex_class = 8;
316    } else {
317        entry->gen = gen_VZEROALL;
318        entry->vex_class = 8;
319    }
320}
321
322static void decode_0F78(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
323{
324    static const X86OpEntry opcodes_0F78[4] = {
325        {},
326        X86_OP_ENTRY3(EXTRQ_i,       V,x, None,None, I,w,  cpuid(SSE4A)), /* AMD extension */
327        {},
328        X86_OP_ENTRY3(INSERTQ_i,     V,x, U,x, I,w,        cpuid(SSE4A)), /* AMD extension */
329    };
330    *entry = *decode_by_prefix(s, opcodes_0F78);
331}
332
333static void decode_0F79(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
334{
335    if (s->prefix & PREFIX_REPNZ) {
336        entry->gen = gen_INSERTQ_r; /* AMD extension */
337    } else if (s->prefix & PREFIX_DATA) {
338        entry->gen = gen_EXTRQ_r; /* AMD extension */
339    } else {
340        entry->gen = NULL;
341    };
342}
343
344static void decode_0F7E(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
345{
346    static const X86OpEntry opcodes_0F7E[4] = {
347        X86_OP_ENTRY3(MOVD_from,  E,y, None,None, P,y, vex5 mmx),
348        X86_OP_ENTRY3(MOVD_from,  E,y, None,None, V,y, vex5),
349        X86_OP_ENTRY3(MOVQ,       V,x, None,None, W,q, vex5),  /* wrong dest Vy on SDM! */
350        {},
351    };
352    *entry = *decode_by_prefix(s, opcodes_0F7E);
353}
354
355static void decode_0F7F(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
356{
357    static const X86OpEntry opcodes_0F7F[4] = {
358        X86_OP_ENTRY3(MOVDQ,       W,x, None,None, V,x, vex5 mmx), /* movq */
359        X86_OP_ENTRY3(MOVDQ,       W,x, None,None, V,x, vex1), /* movdqa */
360        X86_OP_ENTRY3(MOVDQ,       W,x, None,None, V,x, vex4_unal), /* movdqu */
361        {},
362    };
363    *entry = *decode_by_prefix(s, opcodes_0F7F);
364}
365
366static void decode_0FD6(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
367{
368    static const X86OpEntry movq[4] = {
369        {},
370        X86_OP_ENTRY3(MOVQ,    W,x,  None, None, V,q, vex5),
371        X86_OP_ENTRY3(MOVq_dq, V,dq, None, None, N,q),
372        X86_OP_ENTRY3(MOVq_dq, P,q,  None, None, U,q),
373    };
374
375    *entry = *decode_by_prefix(s, movq);
376}
377
378static const X86OpEntry opcodes_0F38_00toEF[240] = {
379    [0x00] = X86_OP_ENTRY3(PSHUFB,    V,x,  H,x,   W,x,  vex4 cpuid(SSSE3) mmx avx2_256 p_00_66),
380    [0x01] = X86_OP_ENTRY3(PHADDW,    V,x,  H,x,   W,x,  vex4 cpuid(SSSE3) mmx avx2_256 p_00_66),
381    [0x02] = X86_OP_ENTRY3(PHADDD,    V,x,  H,x,   W,x,  vex4 cpuid(SSSE3) mmx avx2_256 p_00_66),
382    [0x03] = X86_OP_ENTRY3(PHADDSW,   V,x,  H,x,   W,x,  vex4 cpuid(SSSE3) mmx avx2_256 p_00_66),
383    [0x04] = X86_OP_ENTRY3(PMADDUBSW, V,x,  H,x,   W,x,  vex4 cpuid(SSSE3) mmx avx2_256 p_00_66),
384    [0x05] = X86_OP_ENTRY3(PHSUBW,    V,x,  H,x,   W,x,  vex4 cpuid(SSSE3) mmx avx2_256 p_00_66),
385    [0x06] = X86_OP_ENTRY3(PHSUBD,    V,x,  H,x,   W,x,  vex4 cpuid(SSSE3) mmx avx2_256 p_00_66),
386    [0x07] = X86_OP_ENTRY3(PHSUBSW,   V,x,  H,x,   W,x,  vex4 cpuid(SSSE3) mmx avx2_256 p_00_66),
387
388    [0x10] = X86_OP_ENTRY2(PBLENDVB,  V,x,         W,x,  vex4 cpuid(SSE41) avx2_256 p_66),
389    [0x13] = X86_OP_ENTRY2(VCVTPH2PS, V,x,         W,xh, vex11 chk(W0) cpuid(F16C) p_66),
390    [0x14] = X86_OP_ENTRY2(BLENDVPS,  V,x,         W,x,  vex4 cpuid(SSE41) p_66),
391    [0x15] = X86_OP_ENTRY2(BLENDVPD,  V,x,         W,x,  vex4 cpuid(SSE41) p_66),
392    /* Listed incorrectly as type 4 */
393    [0x16] = X86_OP_ENTRY3(VPERMD,    V,qq, H,qq,      W,qq,  vex6 chk(W0) cpuid(AVX2) p_66), /* vpermps */
394    [0x17] = X86_OP_ENTRY3(VPTEST,    None,None, V,x,  W,x,   vex4 cpuid(SSE41) p_66),
395
396    /*
397     * Source operand listed as Mq/Ux and similar in the manual; incorrectly listed
398     * as 128-bit only in 2-17.
399     */
400    [0x20] = X86_OP_ENTRY3(VPMOVSXBW, V,x,  None,None, W,q,   vex5 cpuid(SSE41) avx_movx avx2_256 p_66),
401    [0x21] = X86_OP_ENTRY3(VPMOVSXBD, V,x,  None,None, W,d,   vex5 cpuid(SSE41) avx_movx avx2_256 p_66),
402    [0x22] = X86_OP_ENTRY3(VPMOVSXBQ, V,x,  None,None, W,w,   vex5 cpuid(SSE41) avx_movx avx2_256 p_66),
403    [0x23] = X86_OP_ENTRY3(VPMOVSXWD, V,x,  None,None, W,q,   vex5 cpuid(SSE41) avx_movx avx2_256 p_66),
404    [0x24] = X86_OP_ENTRY3(VPMOVSXWQ, V,x,  None,None, W,d,   vex5 cpuid(SSE41) avx_movx avx2_256 p_66),
405    [0x25] = X86_OP_ENTRY3(VPMOVSXDQ, V,x,  None,None, W,q,   vex5 cpuid(SSE41) avx_movx avx2_256 p_66),
406
407    /* Same as PMOVSX.  */
408    [0x30] = X86_OP_ENTRY3(VPMOVZXBW, V,x,  None,None, W,q,   vex5 cpuid(SSE41) avx_movx avx2_256 p_66),
409    [0x31] = X86_OP_ENTRY3(VPMOVZXBD, V,x,  None,None, W,d,   vex5 cpuid(SSE41) avx_movx avx2_256 p_66),
410    [0x32] = X86_OP_ENTRY3(VPMOVZXBQ, V,x,  None,None, W,w,   vex5 cpuid(SSE41) avx_movx avx2_256 p_66),
411    [0x33] = X86_OP_ENTRY3(VPMOVZXWD, V,x,  None,None, W,q,   vex5 cpuid(SSE41) avx_movx avx2_256 p_66),
412    [0x34] = X86_OP_ENTRY3(VPMOVZXWQ, V,x,  None,None, W,d,   vex5 cpuid(SSE41) avx_movx avx2_256 p_66),
413    [0x35] = X86_OP_ENTRY3(VPMOVZXDQ, V,x,  None,None, W,q,   vex5 cpuid(SSE41) avx_movx avx2_256 p_66),
414    [0x36] = X86_OP_ENTRY3(VPERMD,    V,qq, H,qq,      W,qq,  vex6 chk(W0) cpuid(AVX2) p_66),
415    [0x37] = X86_OP_ENTRY3(PCMPGTQ,   V,x,  H,x,       W,x,   vex4 cpuid(SSE42) avx2_256 p_66),
416
417    [0x40] = X86_OP_ENTRY3(PMULLD,      V,x,  H,x,       W,x,  vex4 cpuid(SSE41) avx2_256 p_66),
418    [0x41] = X86_OP_ENTRY3(VPHMINPOSUW, V,dq, None,None, W,dq, vex4 cpuid(SSE41) p_66),
419    /* Listed incorrectly as type 4 */
420    [0x45] = X86_OP_ENTRY3(VPSRLV,      V,x,  H,x,       W,x,  vex6 cpuid(AVX2) p_66),
421    [0x46] = X86_OP_ENTRY3(VPSRAV,      V,x,  H,x,       W,x,  vex6 chk(W0) cpuid(AVX2) p_66),
422    [0x47] = X86_OP_ENTRY3(VPSLLV,      V,x,  H,x,       W,x,  vex6 cpuid(AVX2) p_66),
423
424    [0x90] = X86_OP_ENTRY3(VPGATHERD, V,x,  H,x,  M,d,  vex12 cpuid(AVX2) p_66), /* vpgatherdd/q */
425    [0x91] = X86_OP_ENTRY3(VPGATHERQ, V,x,  H,x,  M,q,  vex12 cpuid(AVX2) p_66), /* vpgatherqd/q */
426    [0x92] = X86_OP_ENTRY3(VPGATHERD, V,x,  H,x,  M,d,  vex12 cpuid(AVX2) p_66), /* vgatherdps/d */
427    [0x93] = X86_OP_ENTRY3(VPGATHERQ, V,x,  H,x,  M,q,  vex12 cpuid(AVX2) p_66), /* vgatherqps/d */
428
429    /* Should be exception type 2 but they do not have legacy SSE equivalents? */
430    [0x96] = X86_OP_ENTRY3(VFMADDSUB132Px, V,x,  H,x, W,x,  vex6 cpuid(FMA) p_66),
431    [0x97] = X86_OP_ENTRY3(VFMSUBADD132Px, V,x,  H,x, W,x,  vex6 cpuid(FMA) p_66),
432
433    [0xa6] = X86_OP_ENTRY3(VFMADDSUB213Px, V,x,  H,x, W,x,  vex6 cpuid(FMA) p_66),
434    [0xa7] = X86_OP_ENTRY3(VFMSUBADD213Px, V,x,  H,x, W,x,  vex6 cpuid(FMA) p_66),
435
436    [0xb6] = X86_OP_ENTRY3(VFMADDSUB231Px, V,x,  H,x, W,x,  vex6 cpuid(FMA) p_66),
437    [0xb7] = X86_OP_ENTRY3(VFMSUBADD231Px, V,x,  H,x, W,x,  vex6 cpuid(FMA) p_66),
438
439    [0x08] = X86_OP_ENTRY3(PSIGNB,    V,x,        H,x,  W,x,  vex4 cpuid(SSSE3) mmx avx2_256 p_00_66),
440    [0x09] = X86_OP_ENTRY3(PSIGNW,    V,x,        H,x,  W,x,  vex4 cpuid(SSSE3) mmx avx2_256 p_00_66),
441    [0x0a] = X86_OP_ENTRY3(PSIGND,    V,x,        H,x,  W,x,  vex4 cpuid(SSSE3) mmx avx2_256 p_00_66),
442    [0x0b] = X86_OP_ENTRY3(PMULHRSW,  V,x,        H,x,  W,x,  vex4 cpuid(SSSE3) mmx avx2_256 p_00_66),
443    /* Listed incorrectly as type 4 */
444    [0x0c] = X86_OP_ENTRY3(VPERMILPS, V,x,        H,x,  W,x,  vex6 chk(W0) cpuid(AVX) p_00_66),
445    [0x0d] = X86_OP_ENTRY3(VPERMILPD, V,x,        H,x,  W,x,  vex6 chk(W0) cpuid(AVX) p_66),
446    [0x0e] = X86_OP_ENTRY3(VTESTPS,   None,None,  V,x,  W,x,  vex6 chk(W0) cpuid(AVX) p_66),
447    [0x0f] = X86_OP_ENTRY3(VTESTPD,   None,None,  V,x,  W,x,  vex6 chk(W0) cpuid(AVX) p_66),
448
449    [0x18] = X86_OP_ENTRY3(VPBROADCASTD,   V,x,  None,None, W,d,  vex6 chk(W0) cpuid(AVX) p_66), /* vbroadcastss */
450    [0x19] = X86_OP_ENTRY3(VPBROADCASTQ,   V,qq, None,None, W,q,  vex6 chk(W0) cpuid(AVX) p_66), /* vbroadcastsd */
451    [0x1a] = X86_OP_ENTRY3(VBROADCASTx128, V,qq, None,None, WM,dq,vex6 chk(W0) cpuid(AVX) p_66),
452    [0x1c] = X86_OP_ENTRY3(PABSB,          V,x,  None,None, W,x,  vex4 cpuid(SSSE3) mmx avx2_256 p_00_66),
453    [0x1d] = X86_OP_ENTRY3(PABSW,          V,x,  None,None, W,x,  vex4 cpuid(SSSE3) mmx avx2_256 p_00_66),
454    [0x1e] = X86_OP_ENTRY3(PABSD,          V,x,  None,None, W,x,  vex4 cpuid(SSSE3) mmx avx2_256 p_00_66),
455
456    [0x28] = X86_OP_ENTRY3(PMULDQ,        V,x, H,x,       W,x,  vex4 cpuid(SSE41) avx2_256 p_66),
457    [0x29] = X86_OP_ENTRY3(PCMPEQQ,       V,x, H,x,       W,x,  vex4 cpuid(SSE41) avx2_256 p_66),
458    [0x2a] = X86_OP_ENTRY3(MOVDQ,         V,x, None,None, WM,x, vex1 cpuid(SSE41) avx2_256 p_66), /* movntdqa */
459    [0x2b] = X86_OP_ENTRY3(VPACKUSDW,     V,x, H,x,       W,x,  vex4 cpuid(SSE41) avx2_256 p_66),
460    [0x2c] = X86_OP_ENTRY3(VMASKMOVPS,    V,x, H,x,       WM,x, vex6 chk(W0) cpuid(AVX) p_66),
461    [0x2d] = X86_OP_ENTRY3(VMASKMOVPD,    V,x, H,x,       WM,x, vex6 chk(W0) cpuid(AVX) p_66),
462    /* Incorrectly listed as Mx,Hx,Vx in the manual */
463    [0x2e] = X86_OP_ENTRY3(VMASKMOVPS_st, M,x, V,x,       H,x,  vex6 chk(W0) cpuid(AVX) p_66),
464    [0x2f] = X86_OP_ENTRY3(VMASKMOVPD_st, M,x, V,x,       H,x,  vex6 chk(W0) cpuid(AVX) p_66),
465
466    [0x38] = X86_OP_ENTRY3(PMINSB,        V,x,  H,x, W,x,  vex4 cpuid(SSE41) avx2_256 p_66),
467    [0x39] = X86_OP_ENTRY3(PMINSD,        V,x,  H,x, W,x,  vex4 cpuid(SSE41) avx2_256 p_66),
468    [0x3a] = X86_OP_ENTRY3(PMINUW,        V,x,  H,x, W,x,  vex4 cpuid(SSE41) avx2_256 p_66),
469    [0x3b] = X86_OP_ENTRY3(PMINUD,        V,x,  H,x, W,x,  vex4 cpuid(SSE41) avx2_256 p_66),
470    [0x3c] = X86_OP_ENTRY3(PMAXSB,        V,x,  H,x, W,x,  vex4 cpuid(SSE41) avx2_256 p_66),
471    [0x3d] = X86_OP_ENTRY3(PMAXSD,        V,x,  H,x, W,x,  vex4 cpuid(SSE41) avx2_256 p_66),
472    [0x3e] = X86_OP_ENTRY3(PMAXUW,        V,x,  H,x, W,x,  vex4 cpuid(SSE41) avx2_256 p_66),
473    [0x3f] = X86_OP_ENTRY3(PMAXUD,        V,x,  H,x, W,x,  vex4 cpuid(SSE41) avx2_256 p_66),
474
475    /* VPBROADCASTQ not listed as W0 in table 2-16 */
476    [0x58] = X86_OP_ENTRY3(VPBROADCASTD,   V,x,  None,None, W,d,  vex6 chk(W0) cpuid(AVX2) p_66),
477    [0x59] = X86_OP_ENTRY3(VPBROADCASTQ,   V,x,  None,None, W,q,  vex6 chk(W0) cpuid(AVX2) p_66),
478    [0x5a] = X86_OP_ENTRY3(VBROADCASTx128, V,qq, None,None, WM,dq,vex6 chk(W0) cpuid(AVX2) p_66),
479
480    [0x78] = X86_OP_ENTRY3(VPBROADCASTB,   V,x,  None,None, W,b,  vex6 chk(W0) cpuid(AVX2) p_66),
481    [0x79] = X86_OP_ENTRY3(VPBROADCASTW,   V,x,  None,None, W,w,  vex6 chk(W0) cpuid(AVX2) p_66),
482
483    [0x8c] = X86_OP_ENTRY3(VPMASKMOV,    V,x,  H,x, WM,x, vex6 cpuid(AVX2) p_66),
484    [0x8e] = X86_OP_ENTRY3(VPMASKMOV_st, M,x,  V,x, H,x,  vex6 cpuid(AVX2) p_66),
485
486    /* Should be exception type 2 or 3 but they do not have legacy SSE equivalents? */
487    [0x98] = X86_OP_ENTRY3(VFMADD132Px,  V,x,  H,x, W,x,  vex6 cpuid(FMA) p_66),
488    [0x99] = X86_OP_ENTRY3(VFMADD132Sx,  V,x,  H,x, W,x,  vex6 cpuid(FMA) p_66),
489    [0x9a] = X86_OP_ENTRY3(VFMSUB132Px,  V,x,  H,x, W,x,  vex6 cpuid(FMA) p_66),
490    [0x9b] = X86_OP_ENTRY3(VFMSUB132Sx,  V,x,  H,x, W,x,  vex6 cpuid(FMA) p_66),
491    [0x9c] = X86_OP_ENTRY3(VFNMADD132Px, V,x,  H,x, W,x,  vex6 cpuid(FMA) p_66),
492    [0x9d] = X86_OP_ENTRY3(VFNMADD132Sx, V,x,  H,x, W,x,  vex6 cpuid(FMA) p_66),
493    [0x9e] = X86_OP_ENTRY3(VFNMSUB132Px, V,x,  H,x, W,x,  vex6 cpuid(FMA) p_66),
494    [0x9f] = X86_OP_ENTRY3(VFNMSUB132Sx, V,x,  H,x, W,x,  vex6 cpuid(FMA) p_66),
495
496    [0xa8] = X86_OP_ENTRY3(VFMADD213Px,  V,x,  H,x, W,x,  vex6 cpuid(FMA) p_66),
497    [0xa9] = X86_OP_ENTRY3(VFMADD213Sx,  V,x,  H,x, W,x,  vex6 cpuid(FMA) p_66),
498    [0xaa] = X86_OP_ENTRY3(VFMSUB213Px,  V,x,  H,x, W,x,  vex6 cpuid(FMA) p_66),
499    [0xab] = X86_OP_ENTRY3(VFMSUB213Sx,  V,x,  H,x, W,x,  vex6 cpuid(FMA) p_66),
500    [0xac] = X86_OP_ENTRY3(VFNMADD213Px, V,x,  H,x, W,x,  vex6 cpuid(FMA) p_66),
501    [0xad] = X86_OP_ENTRY3(VFNMADD213Sx, V,x,  H,x, W,x,  vex6 cpuid(FMA) p_66),
502    [0xae] = X86_OP_ENTRY3(VFNMSUB213Px, V,x,  H,x, W,x,  vex6 cpuid(FMA) p_66),
503    [0xaf] = X86_OP_ENTRY3(VFNMSUB213Sx, V,x,  H,x, W,x,  vex6 cpuid(FMA) p_66),
504
505    [0xb8] = X86_OP_ENTRY3(VFMADD231Px,  V,x,  H,x, W,x,  vex6 cpuid(FMA) p_66),
506    [0xb9] = X86_OP_ENTRY3(VFMADD231Sx,  V,x,  H,x, W,x,  vex6 cpuid(FMA) p_66),
507    [0xba] = X86_OP_ENTRY3(VFMSUB231Px,  V,x,  H,x, W,x,  vex6 cpuid(FMA) p_66),
508    [0xbb] = X86_OP_ENTRY3(VFMSUB231Sx,  V,x,  H,x, W,x,  vex6 cpuid(FMA) p_66),
509    [0xbc] = X86_OP_ENTRY3(VFNMADD231Px, V,x,  H,x, W,x,  vex6 cpuid(FMA) p_66),
510    [0xbd] = X86_OP_ENTRY3(VFNMADD231Sx, V,x,  H,x, W,x,  vex6 cpuid(FMA) p_66),
511    [0xbe] = X86_OP_ENTRY3(VFNMSUB231Px, V,x,  H,x, W,x,  vex6 cpuid(FMA) p_66),
512    [0xbf] = X86_OP_ENTRY3(VFNMSUB231Sx, V,x,  H,x, W,x,  vex6 cpuid(FMA) p_66),
513
514    [0xc8] = X86_OP_ENTRY2(SHA1NEXTE,   V,dq, W,dq, cpuid(SHA_NI)),
515    [0xc9] = X86_OP_ENTRY2(SHA1MSG1,    V,dq, W,dq, cpuid(SHA_NI)),
516    [0xca] = X86_OP_ENTRY2(SHA1MSG2,    V,dq, W,dq, cpuid(SHA_NI)),
517    [0xcb] = X86_OP_ENTRY2(SHA256RNDS2, V,dq, W,dq, cpuid(SHA_NI)),
518    [0xcc] = X86_OP_ENTRY2(SHA256MSG1,  V,dq, W,dq, cpuid(SHA_NI)),
519    [0xcd] = X86_OP_ENTRY2(SHA256MSG2,  V,dq, W,dq, cpuid(SHA_NI)),
520
521    [0xdb] = X86_OP_ENTRY3(VAESIMC,     V,dq, None,None, W,dq, vex4 cpuid(AES) p_66),
522    [0xdc] = X86_OP_ENTRY3(VAESENC,     V,x,  H,x,       W,x,  vex4 cpuid(AES) p_66),
523    [0xdd] = X86_OP_ENTRY3(VAESENCLAST, V,x,  H,x,       W,x,  vex4 cpuid(AES) p_66),
524    [0xde] = X86_OP_ENTRY3(VAESDEC,     V,x,  H,x,       W,x,  vex4 cpuid(AES) p_66),
525    [0xdf] = X86_OP_ENTRY3(VAESDECLAST, V,x,  H,x,       W,x,  vex4 cpuid(AES) p_66),
526};
527
528/* five rows for no prefix, 66, F3, F2, 66+F2  */
529static const X86OpEntry opcodes_0F38_F0toFF[16][5] = {
530    [0] = {
531        X86_OP_ENTRY3(MOVBE, G,y, M,y, None,None, cpuid(MOVBE)),
532        X86_OP_ENTRY3(MOVBE, G,w, M,w, None,None, cpuid(MOVBE)),
533        {},
534        X86_OP_ENTRY2(CRC32, G,d, E,b, cpuid(SSE42)),
535        X86_OP_ENTRY2(CRC32, G,d, E,b, cpuid(SSE42)),
536    },
537    [1] = {
538        X86_OP_ENTRY3(MOVBE, M,y, G,y, None,None, cpuid(MOVBE)),
539        X86_OP_ENTRY3(MOVBE, M,w, G,w, None,None, cpuid(MOVBE)),
540        {},
541        X86_OP_ENTRY2(CRC32, G,d, E,y, cpuid(SSE42)),
542        X86_OP_ENTRY2(CRC32, G,d, E,w, cpuid(SSE42)),
543    },
544    [2] = {
545        X86_OP_ENTRY3(ANDN, G,y, B,y, E,y, vex13 cpuid(BMI1)),
546        {},
547        {},
548        {},
549        {},
550    },
551    [3] = {
552        X86_OP_GROUP3(group17, B,y, E,y, None,None, vex13 cpuid(BMI1)),
553        {},
554        {},
555        {},
556        {},
557    },
558    [5] = {
559        X86_OP_ENTRY3(BZHI, G,y, E,y, B,y, vex13 cpuid(BMI1)),
560        {},
561        X86_OP_ENTRY3(PEXT, G,y, B,y, E,y, vex13 cpuid(BMI2)),
562        X86_OP_ENTRY3(PDEP, G,y, B,y, E,y, vex13 cpuid(BMI2)),
563        {},
564    },
565    [6] = {
566        {},
567        X86_OP_ENTRY2(ADCX, G,y, E,y, cpuid(ADX)),
568        X86_OP_ENTRY2(ADOX, G,y, E,y, cpuid(ADX)),
569        X86_OP_ENTRY3(MULX, /* B,y, */ G,y, E,y, 2,y, vex13 cpuid(BMI2)),
570        {},
571    },
572    [7] = {
573        X86_OP_ENTRY3(BEXTR, G,y, E,y, B,y, vex13 cpuid(BMI1)),
574        X86_OP_ENTRY3(SHLX, G,y, E,y, B,y, vex13 cpuid(BMI1)),
575        X86_OP_ENTRY3(SARX, G,y, E,y, B,y, vex13 cpuid(BMI1)),
576        X86_OP_ENTRY3(SHRX, G,y, E,y, B,y, vex13 cpuid(BMI1)),
577        {},
578    },
579};
580
581static void decode_0F38(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
582{
583    *b = x86_ldub_code(env, s);
584    if (*b < 0xf0) {
585        *entry = opcodes_0F38_00toEF[*b];
586    } else {
587        int row = 0;
588        if (s->prefix & PREFIX_REPZ) {
589            /* The REPZ (F3) prefix has priority over 66 */
590            row = 2;
591        } else {
592            row += s->prefix & PREFIX_REPNZ ? 3 : 0;
593            row += s->prefix & PREFIX_DATA ? 1 : 0;
594        }
595        *entry = opcodes_0F38_F0toFF[*b & 15][row];
596    }
597}
598
599static void decode_VINSERTPS(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
600{
601    static const X86OpEntry
602        vinsertps_reg = X86_OP_ENTRY4(VINSERTPS_r, V,dq, H,dq, U,dq, vex5 cpuid(SSE41) p_66),
603        vinsertps_mem = X86_OP_ENTRY4(VINSERTPS_m, V,dq, H,dq, M,d,  vex5 cpuid(SSE41) p_66);
604
605    int modrm = get_modrm(s, env);
606    *entry = (modrm >> 6) == 3 ? vinsertps_reg : vinsertps_mem;
607}
608
609static const X86OpEntry opcodes_0F3A[256] = {
610    /*
611     * These are VEX-only, but incorrectly listed in the manual as exception type 4.
612     * Also the "qq" instructions are sometimes omitted by Table 2-17, but are VEX256
613     * only.
614     */
615    [0x00] = X86_OP_ENTRY3(VPERMQ,      V,qq, W,qq, I,b,  vex6 chk(W1) cpuid(AVX2) p_66),
616    [0x01] = X86_OP_ENTRY3(VPERMQ,      V,qq, W,qq, I,b,  vex6 chk(W1) cpuid(AVX2) p_66), /* VPERMPD */
617    [0x02] = X86_OP_ENTRY4(VBLENDPS,    V,x,  H,x,  W,x,  vex6 chk(W0) cpuid(AVX2) p_66), /* VPBLENDD */
618    [0x04] = X86_OP_ENTRY3(VPERMILPS_i, V,x,  W,x,  I,b,  vex6 chk(W0) cpuid(AVX) p_66),
619    [0x05] = X86_OP_ENTRY3(VPERMILPD_i, V,x,  W,x,  I,b,  vex6 chk(W0) cpuid(AVX) p_66),
620    [0x06] = X86_OP_ENTRY4(VPERM2x128,  V,qq, H,qq, W,qq, vex6 chk(W0) cpuid(AVX) p_66),
621
622    [0x14] = X86_OP_ENTRY3(PEXTRB,     E,b,  V,dq, I,b,  vex5 cpuid(SSE41) zext0 p_66),
623    [0x15] = X86_OP_ENTRY3(PEXTRW,     E,w,  V,dq, I,b,  vex5 cpuid(SSE41) zext0 p_66),
624    [0x16] = X86_OP_ENTRY3(PEXTR,      E,y,  V,dq, I,b,  vex5 cpuid(SSE41) p_66),
625    [0x17] = X86_OP_ENTRY3(VEXTRACTPS, E,d,  V,dq, I,b,  vex5 cpuid(SSE41) p_66),
626    [0x1d] = X86_OP_ENTRY3(VCVTPS2PH,  W,xh, V,x,  I,b,  vex11 chk(W0) cpuid(F16C) p_66),
627
628    [0x20] = X86_OP_ENTRY4(PINSRB,     V,dq, H,dq, E,b,  vex5 cpuid(SSE41) zext2 p_66),
629    [0x21] = X86_OP_GROUP0(VINSERTPS),
630    [0x22] = X86_OP_ENTRY4(PINSR,      V,dq, H,dq, E,y,  vex5 cpuid(SSE41) p_66),
631
632    [0x40] = X86_OP_ENTRY4(VDDPS,      V,x,  H,x,  W,x,  vex2 cpuid(SSE41) p_66),
633    [0x41] = X86_OP_ENTRY4(VDDPD,      V,dq, H,dq, W,dq, vex2 cpuid(SSE41) p_66),
634    [0x42] = X86_OP_ENTRY4(VMPSADBW,   V,x,  H,x,  W,x,  vex2 cpuid(SSE41) avx2_256 p_66),
635    [0x44] = X86_OP_ENTRY4(PCLMULQDQ,  V,dq, H,dq, W,dq, vex4 cpuid(PCLMULQDQ) p_66),
636    [0x46] = X86_OP_ENTRY4(VPERM2x128, V,qq, H,qq, W,qq, vex6 chk(W0) cpuid(AVX2) p_66),
637
638    [0x60] = X86_OP_ENTRY4(PCMPESTRM,  None,None, V,dq, W,dq, vex4_unal cpuid(SSE42) p_66),
639    [0x61] = X86_OP_ENTRY4(PCMPESTRI,  None,None, V,dq, W,dq, vex4_unal cpuid(SSE42) p_66),
640    [0x62] = X86_OP_ENTRY4(PCMPISTRM,  None,None, V,dq, W,dq, vex4_unal cpuid(SSE42) p_66),
641    [0x63] = X86_OP_ENTRY4(PCMPISTRI,  None,None, V,dq, W,dq, vex4_unal cpuid(SSE42) p_66),
642
643    [0x08] = X86_OP_ENTRY3(VROUNDPS,   V,x,  W,x,  I,b,  vex2 cpuid(SSE41) p_66),
644    [0x09] = X86_OP_ENTRY3(VROUNDPD,   V,x,  W,x,  I,b,  vex2 cpuid(SSE41) p_66),
645    /*
646     * Not listed as four operand in the manual.  Also writes and reads 128-bits
647     * from the first two operands due to the V operand picking higher entries of
648     * the H operand; the "Vss,Hss,Wss" description from the manual is incorrect.
649     * For other unary operations such as VSQRTSx this is hidden by the "REPScalar"
650     * value of vex_special, because the table lists the operand types of VSQRTPx.
651     */
652    [0x0a] = X86_OP_ENTRY4(VROUNDSS,   V,x,  H,x, W,ss, vex3 cpuid(SSE41) p_66),
653    [0x0b] = X86_OP_ENTRY4(VROUNDSD,   V,x,  H,x, W,sd, vex3 cpuid(SSE41) p_66),
654    [0x0c] = X86_OP_ENTRY4(VBLENDPS,   V,x,  H,x,  W,x,  vex4 cpuid(SSE41) p_66),
655    [0x0d] = X86_OP_ENTRY4(VBLENDPD,   V,x,  H,x,  W,x,  vex4 cpuid(SSE41) p_66),
656    [0x0e] = X86_OP_ENTRY4(VPBLENDW,   V,x,  H,x,  W,x,  vex4 cpuid(SSE41) avx2_256 p_66),
657    [0x0f] = X86_OP_ENTRY4(PALIGNR,    V,x,  H,x,  W,x,  vex4 cpuid(SSSE3) mmx avx2_256 p_00_66),
658
659    [0x18] = X86_OP_ENTRY4(VINSERTx128,  V,qq, H,qq, W,qq, vex6 chk(W0) cpuid(AVX) p_66),
660    [0x19] = X86_OP_ENTRY3(VEXTRACTx128, W,dq, V,qq, I,b,  vex6 chk(W0) cpuid(AVX) p_66),
661
662    [0x38] = X86_OP_ENTRY4(VINSERTx128,  V,qq, H,qq, W,qq, vex6 chk(W0) cpuid(AVX2) p_66),
663    [0x39] = X86_OP_ENTRY3(VEXTRACTx128, W,dq, V,qq, I,b,  vex6 chk(W0) cpuid(AVX2) p_66),
664
665    /* Listed incorrectly as type 4 */
666    [0x4a] = X86_OP_ENTRY4(VBLENDVPS, V,x,  H,x,  W,x,   vex6 chk(W0) cpuid(AVX) p_66),
667    [0x4b] = X86_OP_ENTRY4(VBLENDVPD, V,x,  H,x,  W,x,   vex6 chk(W0) cpuid(AVX) p_66),
668    [0x4c] = X86_OP_ENTRY4(VPBLENDVB, V,x,  H,x,  W,x,   vex6 chk(W0) cpuid(AVX) p_66 avx2_256),
669
670    [0xcc] = X86_OP_ENTRY3(SHA1RNDS4,  V,dq, W,dq, I,b,  cpuid(SHA_NI)),
671
672    [0xdf] = X86_OP_ENTRY3(VAESKEYGEN, V,dq, W,dq, I,b,  vex4 cpuid(AES) p_66),
673
674    [0xF0] = X86_OP_ENTRY3(RORX, G,y, E,y, I,b, vex13 cpuid(BMI2) p_f2),
675};
676
677static void decode_0F3A(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
678{
679    *b = x86_ldub_code(env, s);
680    *entry = opcodes_0F3A[*b];
681}
682
683/*
684 * There are some mistakes in the operands in the manual, and the load/store/register
685 * cases are easiest to keep separate, so the entries for 10-17 follow simplicity and
686 * efficiency of implementation rather than copying what the manual says.
687 *
688 * In particular:
689 *
690 * 1) "VMOVSS m32, xmm1" and "VMOVSD m64, xmm1" do not support VEX.vvvv != 1111b,
691 * but this is not mentioned in the tables.
692 *
693 * 2) MOVHLPS, MOVHPS, MOVHPD, MOVLPD, MOVLPS read the high quadword of one of their
694 * operands, which must therefore be dq; MOVLPD and MOVLPS also write the high
695 * quadword of the V operand.
696 */
697static void decode_0F10(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
698{
699    static const X86OpEntry opcodes_0F10_reg[4] = {
700        X86_OP_ENTRY3(MOVDQ,   V,x,  None,None, W,x, vex4_unal), /* MOVUPS */
701        X86_OP_ENTRY3(MOVDQ,   V,x,  None,None, W,x, vex4_unal), /* MOVUPD */
702        X86_OP_ENTRY3(VMOVSS,  V,x,  H,x,       W,x, vex5),
703        X86_OP_ENTRY3(VMOVLPx, V,x,  H,x,       W,x, vex5), /* MOVSD */
704    };
705
706    static const X86OpEntry opcodes_0F10_mem[4] = {
707        X86_OP_ENTRY3(MOVDQ,      V,x,  None,None, W,x,  vex4_unal), /* MOVUPS */
708        X86_OP_ENTRY3(MOVDQ,      V,x,  None,None, W,x,  vex4_unal), /* MOVUPD */
709        X86_OP_ENTRY3(VMOVSS_ld,  V,x,  H,x,       M,ss, vex5),
710        X86_OP_ENTRY3(VMOVSD_ld,  V,x,  H,x,       M,sd, vex5),
711    };
712
713    if ((get_modrm(s, env) >> 6) == 3) {
714        *entry = *decode_by_prefix(s, opcodes_0F10_reg);
715    } else {
716        *entry = *decode_by_prefix(s, opcodes_0F10_mem);
717    }
718}
719
720static void decode_0F11(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
721{
722    static const X86OpEntry opcodes_0F11_reg[4] = {
723        X86_OP_ENTRY3(MOVDQ,   W,x,  None,None, V,x, vex4), /* MOVUPS */
724        X86_OP_ENTRY3(MOVDQ,   W,x,  None,None, V,x, vex4), /* MOVUPD */
725        X86_OP_ENTRY3(VMOVSS,  W,x,  H,x,       V,x, vex5),
726        X86_OP_ENTRY3(VMOVLPx, W,x,  H,x,       V,q, vex5), /* MOVSD */
727    };
728
729    static const X86OpEntry opcodes_0F11_mem[4] = {
730        X86_OP_ENTRY3(MOVDQ,      W,x,  None,None, V,x, vex4), /* MOVUPS */
731        X86_OP_ENTRY3(MOVDQ,      W,x,  None,None, V,x, vex4), /* MOVUPD */
732        X86_OP_ENTRY3(VMOVSS_st,  M,ss, None,None, V,x, vex5),
733        X86_OP_ENTRY3(VMOVLPx_st, M,sd, None,None, V,x, vex5), /* MOVSD */
734    };
735
736    if ((get_modrm(s, env) >> 6) == 3) {
737        *entry = *decode_by_prefix(s, opcodes_0F11_reg);
738    } else {
739        *entry = *decode_by_prefix(s, opcodes_0F11_mem);
740    }
741}
742
743static void decode_0F12(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
744{
745    static const X86OpEntry opcodes_0F12_mem[4] = {
746        /*
747         * Use dq for operand for compatibility with gen_MOVSD and
748         * to allow VEX128 only.
749         */
750        X86_OP_ENTRY3(VMOVLPx_ld, V,dq, H,dq,      M,q, vex5), /* MOVLPS */
751        X86_OP_ENTRY3(VMOVLPx_ld, V,dq, H,dq,      M,q, vex5), /* MOVLPD */
752        X86_OP_ENTRY3(VMOVSLDUP,  V,x,  None,None, W,x, vex4 cpuid(SSE3)),
753        X86_OP_ENTRY3(VMOVDDUP,   V,x,  None,None, WM,q, vex5 cpuid(SSE3)), /* qq if VEX.256 */
754    };
755    static const X86OpEntry opcodes_0F12_reg[4] = {
756        X86_OP_ENTRY3(VMOVHLPS,  V,dq, H,dq,       U,dq, vex7),
757        X86_OP_ENTRY3(VMOVLPx,   W,x,  H,x,        U,q,  vex5), /* MOVLPD */
758        X86_OP_ENTRY3(VMOVSLDUP, V,x,  None,None,  U,x,  vex4 cpuid(SSE3)),
759        X86_OP_ENTRY3(VMOVDDUP,  V,x,  None,None,  U,x,  vex5 cpuid(SSE3)),
760    };
761
762    if ((get_modrm(s, env) >> 6) == 3) {
763        *entry = *decode_by_prefix(s, opcodes_0F12_reg);
764    } else {
765        *entry = *decode_by_prefix(s, opcodes_0F12_mem);
766        if ((s->prefix & PREFIX_REPNZ) && s->vex_l) {
767            entry->s2 = X86_SIZE_qq;
768        }
769    }
770}
771
772static void decode_0F16(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
773{
774    static const X86OpEntry opcodes_0F16_mem[4] = {
775        /*
776         * Operand 1 technically only reads the low 64 bits, but uses dq so that
777         * it is easier to check for op0 == op1 in an endianness-neutral manner.
778         */
779        X86_OP_ENTRY3(VMOVHPx_ld, V,dq, H,dq,      M,q, vex5), /* MOVHPS */
780        X86_OP_ENTRY3(VMOVHPx_ld, V,dq, H,dq,      M,q, vex5), /* MOVHPD */
781        X86_OP_ENTRY3(VMOVSHDUP,  V,x,  None,None, W,x, vex4 cpuid(SSE3)),
782        {},
783    };
784    static const X86OpEntry opcodes_0F16_reg[4] = {
785        /* Same as above, operand 1 could be Hq if it wasn't for big-endian.  */
786        X86_OP_ENTRY3(VMOVLHPS,  V,dq, H,dq,      U,q, vex7),
787        X86_OP_ENTRY3(VMOVHPx,   V,x,  H,x,       U,x, vex5), /* MOVHPD */
788        X86_OP_ENTRY3(VMOVSHDUP, V,x,  None,None, U,x, vex4 cpuid(SSE3)),
789        {},
790    };
791
792    if ((get_modrm(s, env) >> 6) == 3) {
793        *entry = *decode_by_prefix(s, opcodes_0F16_reg);
794    } else {
795        *entry = *decode_by_prefix(s, opcodes_0F16_mem);
796    }
797}
798
799static void decode_0F2A(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
800{
801    static const X86OpEntry opcodes_0F2A[4] = {
802        X86_OP_ENTRY3(CVTPI2Px,  V,x,  None,None, Q,q),
803        X86_OP_ENTRY3(CVTPI2Px,  V,x,  None,None, Q,q),
804        X86_OP_ENTRY3(VCVTSI2Sx, V,x,  H,x, E,y,        vex3),
805        X86_OP_ENTRY3(VCVTSI2Sx, V,x,  H,x, E,y,        vex3),
806    };
807    *entry = *decode_by_prefix(s, opcodes_0F2A);
808}
809
810static void decode_0F2B(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
811{
812    static const X86OpEntry opcodes_0F2B[4] = {
813        X86_OP_ENTRY3(MOVDQ,      M,x,  None,None, V,x, vex1), /* MOVNTPS */
814        X86_OP_ENTRY3(MOVDQ,      M,x,  None,None, V,x, vex1), /* MOVNTPD */
815        /* AMD extensions */
816        X86_OP_ENTRY3(VMOVSS_st,  M,ss, None,None, V,x, vex4 cpuid(SSE4A)), /* MOVNTSS */
817        X86_OP_ENTRY3(VMOVLPx_st, M,sd, None,None, V,x, vex4 cpuid(SSE4A)), /* MOVNTSD */
818    };
819
820    *entry = *decode_by_prefix(s, opcodes_0F2B);
821}
822
823static void decode_0F2C(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
824{
825    static const X86OpEntry opcodes_0F2C[4] = {
826        /* Listed as ps/pd in the manual, but CVTTPS2PI only reads 64-bit.  */
827        X86_OP_ENTRY3(CVTTPx2PI,  P,q,  None,None, W,q),
828        X86_OP_ENTRY3(CVTTPx2PI,  P,q,  None,None, W,dq),
829        X86_OP_ENTRY3(VCVTTSx2SI, G,y,  None,None, W,ss, vex3),
830        X86_OP_ENTRY3(VCVTTSx2SI, G,y,  None,None, W,sd, vex3),
831    };
832    *entry = *decode_by_prefix(s, opcodes_0F2C);
833}
834
835static void decode_0F2D(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
836{
837    static const X86OpEntry opcodes_0F2D[4] = {
838        /* Listed as ps/pd in the manual, but CVTPS2PI only reads 64-bit.  */
839        X86_OP_ENTRY3(CVTPx2PI,  P,q,  None,None, W,q),
840        X86_OP_ENTRY3(CVTPx2PI,  P,q,  None,None, W,dq),
841        X86_OP_ENTRY3(VCVTSx2SI, G,y,  None,None, W,ss, vex3),
842        X86_OP_ENTRY3(VCVTSx2SI, G,y,  None,None, W,sd, vex3),
843    };
844    *entry = *decode_by_prefix(s, opcodes_0F2D);
845}
846
847static void decode_VxCOMISx(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
848{
849    /*
850     * VUCOMISx and VCOMISx are different and use no-prefix and 0x66 for SS and SD
851     * respectively.  Scalar values usually are associated with 0xF2 and 0xF3, for
852     * which X86_VEX_REPScalar exists, but here it has to be decoded by hand.
853     */
854    entry->s1 = entry->s2 = (s->prefix & PREFIX_DATA ? X86_SIZE_sd : X86_SIZE_ss);
855    entry->gen = (*b == 0x2E ? gen_VUCOMI : gen_VCOMI);
856}
857
858static void decode_sse_unary(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
859{
860    if (!(s->prefix & (PREFIX_REPZ | PREFIX_REPNZ))) {
861        entry->op1 = X86_TYPE_None;
862        entry->s1 = X86_SIZE_None;
863    }
864    switch (*b) {
865    case 0x51: entry->gen = gen_VSQRT; break;
866    case 0x52: entry->gen = gen_VRSQRT; break;
867    case 0x53: entry->gen = gen_VRCP; break;
868    }
869}
870
871static void decode_0F5A(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
872{
873    static const X86OpEntry opcodes_0F5A[4] = {
874        X86_OP_ENTRY2(VCVTPS2PD,  V,x,       W,xh, vex2),      /* VCVTPS2PD */
875        X86_OP_ENTRY2(VCVTPD2PS,  V,x,       W,x,  vex2),      /* VCVTPD2PS */
876        X86_OP_ENTRY3(VCVTSS2SD,  V,x,  H,x, W,x,  vex2_rep3), /* VCVTSS2SD */
877        X86_OP_ENTRY3(VCVTSD2SS,  V,x,  H,x, W,x,  vex2_rep3), /* VCVTSD2SS */
878    };
879    *entry = *decode_by_prefix(s, opcodes_0F5A);
880}
881
882static void decode_0F5B(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
883{
884    static const X86OpEntry opcodes_0F5B[4] = {
885        X86_OP_ENTRY2(VCVTDQ2PS,   V,x, W,x,      vex2),
886        X86_OP_ENTRY2(VCVTPS2DQ,   V,x, W,x,      vex2),
887        X86_OP_ENTRY2(VCVTTPS2DQ,  V,x, W,x,      vex2),
888        {},
889    };
890    *entry = *decode_by_prefix(s, opcodes_0F5B);
891}
892
893static void decode_0FE6(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
894{
895    static const X86OpEntry opcodes_0FE6[4] = {
896        {},
897        X86_OP_ENTRY2(VCVTTPD2DQ,  V,x, W,x,      vex2),
898        X86_OP_ENTRY2(VCVTDQ2PD,   V,x, W,x,      vex5),
899        X86_OP_ENTRY2(VCVTPD2DQ,   V,x, W,x,      vex2),
900    };
901    *entry = *decode_by_prefix(s, opcodes_0FE6);
902}
903
904static const X86OpEntry opcodes_0F[256] = {
905    [0x0E] = X86_OP_ENTRY0(EMMS,                              cpuid(3DNOW)), /* femms */
906    /*
907     * 3DNow!'s opcode byte comes *after* modrm and displacements, making it
908     * more like an Ib operand.  Dispatch to the right helper in a single gen_*
909     * function.
910     */
911    [0x0F] = X86_OP_ENTRY3(3dnow,       P,q, Q,q, I,b,        cpuid(3DNOW)),
912
913    [0x10] = X86_OP_GROUP0(0F10),
914    [0x11] = X86_OP_GROUP0(0F11),
915    [0x12] = X86_OP_GROUP0(0F12),
916    [0x13] = X86_OP_ENTRY3(VMOVLPx_st,  M,q, None,None, V,q,  vex5 p_00_66),
917    [0x14] = X86_OP_ENTRY3(VUNPCKLPx,   V,x, H,x, W,x,        vex4 p_00_66),
918    [0x15] = X86_OP_ENTRY3(VUNPCKHPx,   V,x, H,x, W,x,        vex4 p_00_66),
919    [0x16] = X86_OP_GROUP0(0F16),
920    /* Incorrectly listed as Mq,Vq in the manual */
921    [0x17] = X86_OP_ENTRY3(VMOVHPx_st,  M,q, None,None, V,dq, vex5 p_00_66),
922
923    [0x50] = X86_OP_ENTRY3(MOVMSK,     G,y, None,None, U,x, vex7 p_00_66),
924    [0x51] = X86_OP_GROUP3(sse_unary,  V,x, H,x, W,x, vex2_rep3 p_00_66_f3_f2), /* sqrtps */
925    [0x52] = X86_OP_GROUP3(sse_unary,  V,x, H,x, W,x, vex4_rep5 p_00_f3), /* rsqrtps */
926    [0x53] = X86_OP_GROUP3(sse_unary,  V,x, H,x, W,x, vex4_rep5 p_00_f3), /* rcpps */
927    [0x54] = X86_OP_ENTRY3(PAND,       V,x, H,x, W,x,  vex4 p_00_66), /* vand */
928    [0x55] = X86_OP_ENTRY3(PANDN,      V,x, H,x, W,x,  vex4 p_00_66), /* vandn */
929    [0x56] = X86_OP_ENTRY3(POR,        V,x, H,x, W,x,  vex4 p_00_66), /* vor */
930    [0x57] = X86_OP_ENTRY3(PXOR,       V,x, H,x, W,x,  vex4 p_00_66), /* vxor */
931
932    [0x60] = X86_OP_ENTRY3(PUNPCKLBW,  V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
933    [0x61] = X86_OP_ENTRY3(PUNPCKLWD,  V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
934    [0x62] = X86_OP_ENTRY3(PUNPCKLDQ,  V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
935    [0x63] = X86_OP_ENTRY3(PACKSSWB,   V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
936    [0x64] = X86_OP_ENTRY3(PCMPGTB,    V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
937    [0x65] = X86_OP_ENTRY3(PCMPGTW,    V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
938    [0x66] = X86_OP_ENTRY3(PCMPGTD,    V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
939    [0x67] = X86_OP_ENTRY3(PACKUSWB,   V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
940
941    [0x70] = X86_OP_GROUP0(0F70),
942    [0x71] = X86_OP_GROUP0(group12),
943    [0x72] = X86_OP_GROUP0(group13),
944    [0x73] = X86_OP_GROUP0(group14),
945    [0x74] = X86_OP_ENTRY3(PCMPEQB,    V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
946    [0x75] = X86_OP_ENTRY3(PCMPEQW,    V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
947    [0x76] = X86_OP_ENTRY3(PCMPEQD,    V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
948    [0x77] = X86_OP_GROUP0(0F77),
949
950    [0x28] = X86_OP_ENTRY3(MOVDQ,      V,x,  None,None, W,x, vex1 p_00_66), /* MOVAPS */
951    [0x29] = X86_OP_ENTRY3(MOVDQ,      W,x,  None,None, V,x, vex1 p_00_66), /* MOVAPS */
952    [0x2A] = X86_OP_GROUP0(0F2A),
953    [0x2B] = X86_OP_GROUP0(0F2B),
954    [0x2C] = X86_OP_GROUP0(0F2C),
955    [0x2D] = X86_OP_GROUP0(0F2D),
956    [0x2E] = X86_OP_GROUP3(VxCOMISx,   None,None, V,x, W,x,  vex3 p_00_66), /* VUCOMISS/SD */
957    [0x2F] = X86_OP_GROUP3(VxCOMISx,   None,None, V,x, W,x,  vex3 p_00_66), /* VCOMISS/SD */
958
959    [0x38] = X86_OP_GROUP0(0F38),
960    [0x3a] = X86_OP_GROUP0(0F3A),
961
962    [0x58] = X86_OP_ENTRY3(VADD,       V,x, H,x, W,x, vex2_rep3 p_00_66_f3_f2),
963    [0x59] = X86_OP_ENTRY3(VMUL,       V,x, H,x, W,x, vex2_rep3 p_00_66_f3_f2),
964    [0x5a] = X86_OP_GROUP0(0F5A),
965    [0x5b] = X86_OP_GROUP0(0F5B),
966    [0x5c] = X86_OP_ENTRY3(VSUB,       V,x, H,x, W,x, vex2_rep3 p_00_66_f3_f2),
967    [0x5d] = X86_OP_ENTRY3(VMIN,       V,x, H,x, W,x, vex2_rep3 p_00_66_f3_f2),
968    [0x5e] = X86_OP_ENTRY3(VDIV,       V,x, H,x, W,x, vex2_rep3 p_00_66_f3_f2),
969    [0x5f] = X86_OP_ENTRY3(VMAX,       V,x, H,x, W,x, vex2_rep3 p_00_66_f3_f2),
970
971    [0x68] = X86_OP_ENTRY3(PUNPCKHBW,  V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
972    [0x69] = X86_OP_ENTRY3(PUNPCKHWD,  V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
973    [0x6a] = X86_OP_ENTRY3(PUNPCKHDQ,  V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
974    [0x6b] = X86_OP_ENTRY3(PACKSSDW,   V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
975    [0x6c] = X86_OP_ENTRY3(PUNPCKLQDQ, V,x, H,x, W,x,  vex4 p_66 avx2_256),
976    [0x6d] = X86_OP_ENTRY3(PUNPCKHQDQ, V,x, H,x, W,x,  vex4 p_66 avx2_256),
977    [0x6e] = X86_OP_ENTRY3(MOVD_to,    V,x, None,None, E,y, vex5 mmx p_00_66),  /* wrong dest Vy on SDM! */
978    [0x6f] = X86_OP_GROUP0(0F6F),
979
980    [0x78] = X86_OP_GROUP0(0F78),
981    [0x79] = X86_OP_GROUP2(0F79,       V,x, U,x,       cpuid(SSE4A)),
982    [0x7c] = X86_OP_ENTRY3(VHADD,      V,x, H,x, W,x,  vex2 cpuid(SSE3) p_66_f2),
983    [0x7d] = X86_OP_ENTRY3(VHSUB,      V,x, H,x, W,x,  vex2 cpuid(SSE3) p_66_f2),
984    [0x7e] = X86_OP_GROUP0(0F7E),
985    [0x7f] = X86_OP_GROUP0(0F7F),
986
987    [0xae] = X86_OP_GROUP0(group15),
988
989    [0xc2] = X86_OP_ENTRY4(VCMP,       V,x, H,x, W,x,       vex2_rep3 p_00_66_f3_f2),
990    [0xc4] = X86_OP_ENTRY4(PINSRW,     V,dq,H,dq,E,w,       vex5 mmx p_00_66),
991    [0xc5] = X86_OP_ENTRY3(PEXTRW,     G,d, U,dq,I,b,       vex5 mmx p_00_66),
992    [0xc6] = X86_OP_ENTRY4(VSHUF,      V,x, H,x, W,x,       vex4 p_00_66),
993
994    [0xd0] = X86_OP_ENTRY3(VADDSUB,   V,x, H,x, W,x,        vex2 cpuid(SSE3) p_66_f2),
995    [0xd1] = X86_OP_ENTRY3(PSRLW_r,   V,x, H,x, W,x,        vex4 mmx avx2_256 p_00_66),
996    [0xd2] = X86_OP_ENTRY3(PSRLD_r,   V,x, H,x, W,x,        vex4 mmx avx2_256 p_00_66),
997    [0xd3] = X86_OP_ENTRY3(PSRLQ_r,   V,x, H,x, W,x,        vex4 mmx avx2_256 p_00_66),
998    [0xd4] = X86_OP_ENTRY3(PADDQ,     V,x, H,x, W,x,        vex4 mmx avx2_256 p_00_66),
999    [0xd5] = X86_OP_ENTRY3(PMULLW,    V,x, H,x, W,x,        vex4 mmx avx2_256 p_00_66),
1000    [0xd6] = X86_OP_GROUP0(0FD6),
1001    [0xd7] = X86_OP_ENTRY3(PMOVMSKB,  G,d, None,None, U,x,  vex7 mmx avx2_256 p_00_66),
1002
1003    [0xe0] = X86_OP_ENTRY3(PAVGB,     V,x, H,x, W,x,        vex4 mmx avx2_256 p_00_66),
1004    [0xe1] = X86_OP_ENTRY3(PSRAW_r,   V,x, H,x, W,x,        vex7 mmx avx2_256 p_00_66),
1005    [0xe2] = X86_OP_ENTRY3(PSRAD_r,   V,x, H,x, W,x,        vex7 mmx avx2_256 p_00_66),
1006    [0xe3] = X86_OP_ENTRY3(PAVGW,     V,x, H,x, W,x,        vex4 mmx avx2_256 p_00_66),
1007    [0xe4] = X86_OP_ENTRY3(PMULHUW,   V,x, H,x, W,x,        vex4 mmx avx2_256 p_00_66),
1008    [0xe5] = X86_OP_ENTRY3(PMULHW,    V,x, H,x, W,x,        vex4 mmx avx2_256 p_00_66),
1009    [0xe6] = X86_OP_GROUP0(0FE6),
1010    [0xe7] = X86_OP_ENTRY3(MOVDQ,     W,x, None,None, V,x,  vex1 mmx p_00_66), /* MOVNTQ/MOVNTDQ */
1011
1012    [0xf0] = X86_OP_ENTRY3(MOVDQ,    V,x, None,None, WM,x,  vex4_unal cpuid(SSE3) p_f2), /* LDDQU */
1013    [0xf1] = X86_OP_ENTRY3(PSLLW_r,  V,x, H,x, W,x,         vex7 mmx avx2_256 p_00_66),
1014    [0xf2] = X86_OP_ENTRY3(PSLLD_r,  V,x, H,x, W,x,         vex7 mmx avx2_256 p_00_66),
1015    [0xf3] = X86_OP_ENTRY3(PSLLQ_r,  V,x, H,x, W,x,         vex7 mmx avx2_256 p_00_66),
1016    [0xf4] = X86_OP_ENTRY3(PMULUDQ,  V,x, H,x, W,x,         vex4 mmx avx2_256 p_00_66),
1017    [0xf5] = X86_OP_ENTRY3(PMADDWD,  V,x, H,x, W,x,         vex4 mmx avx2_256 p_00_66),
1018    [0xf6] = X86_OP_ENTRY3(PSADBW,   V,x, H,x, W,x,         vex4 mmx avx2_256 p_00_66),
1019    [0xf7] = X86_OP_ENTRY3(MASKMOV,  None,None, V,dq, U,dq, vex4_unal avx2_256 mmx p_00_66),
1020
1021    /* Incorrectly missing from 2-17 */
1022    [0xd8] = X86_OP_ENTRY3(PSUBUSB,  V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
1023    [0xd9] = X86_OP_ENTRY3(PSUBUSW,  V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
1024    [0xda] = X86_OP_ENTRY3(PMINUB,   V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
1025    [0xdb] = X86_OP_ENTRY3(PAND,     V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
1026    [0xdc] = X86_OP_ENTRY3(PADDUSB,  V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
1027    [0xdd] = X86_OP_ENTRY3(PADDUSW,  V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
1028    [0xde] = X86_OP_ENTRY3(PMAXUB,   V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
1029    [0xdf] = X86_OP_ENTRY3(PANDN,    V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
1030
1031    [0xe8] = X86_OP_ENTRY3(PSUBSB,  V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
1032    [0xe9] = X86_OP_ENTRY3(PSUBSW,  V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
1033    [0xea] = X86_OP_ENTRY3(PMINSW,  V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
1034    [0xeb] = X86_OP_ENTRY3(POR,     V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
1035    [0xec] = X86_OP_ENTRY3(PADDSB,  V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
1036    [0xed] = X86_OP_ENTRY3(PADDSW,  V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
1037    [0xee] = X86_OP_ENTRY3(PMAXSW,  V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
1038    [0xef] = X86_OP_ENTRY3(PXOR,    V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
1039
1040    [0xf8] = X86_OP_ENTRY3(PSUBB,  V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
1041    [0xf9] = X86_OP_ENTRY3(PSUBW,  V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
1042    [0xfa] = X86_OP_ENTRY3(PSUBD,  V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
1043    [0xfb] = X86_OP_ENTRY3(PSUBQ,  V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
1044    [0xfc] = X86_OP_ENTRY3(PADDB,  V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
1045    [0xfd] = X86_OP_ENTRY3(PADDW,  V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
1046    [0xfe] = X86_OP_ENTRY3(PADDD,  V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
1047    /* 0xff = UD0 */
1048};
1049
1050static void do_decode_0F(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
1051{
1052    *entry = opcodes_0F[*b];
1053}
1054
1055static void decode_0F(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
1056{
1057    *b = x86_ldub_code(env, s);
1058    do_decode_0F(s, env, entry, b);
1059}
1060
1061static const X86OpEntry opcodes_root[256] = {
1062    [0x0F] = X86_OP_GROUP0(0F),
1063};
1064
1065#undef mmx
1066#undef vex1
1067#undef vex2
1068#undef vex3
1069#undef vex4
1070#undef vex4_unal
1071#undef vex5
1072#undef vex6
1073#undef vex7
1074#undef vex8
1075#undef vex11
1076#undef vex12
1077#undef vex13
1078
1079/*
1080 * Decode the fixed part of the opcode and place the last
1081 * in b.
1082 */
1083static void decode_root(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
1084{
1085    *entry = opcodes_root[*b];
1086}
1087
1088
1089static int decode_modrm(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode,
1090                        X86DecodedOp *op, X86OpType type)
1091{
1092    int modrm = get_modrm(s, env);
1093    if ((modrm >> 6) == 3) {
1094        if (s->prefix & PREFIX_LOCK) {
1095            decode->e.gen = gen_illegal;
1096            return 0xff;
1097        }
1098        op->n = (modrm & 7);
1099        if (type != X86_TYPE_Q && type != X86_TYPE_N) {
1100            op->n |= REX_B(s);
1101        }
1102    } else {
1103        op->has_ea = true;
1104        op->n = -1;
1105        decode->mem = gen_lea_modrm_0(env, s, get_modrm(s, env));
1106    }
1107    return modrm;
1108}
1109
1110static bool decode_op_size(DisasContext *s, X86OpEntry *e, X86OpSize size, MemOp *ot)
1111{
1112    switch (size) {
1113    case X86_SIZE_b:  /* byte */
1114        *ot = MO_8;
1115        return true;
1116
1117    case X86_SIZE_d:  /* 32-bit */
1118    case X86_SIZE_ss: /* SSE/AVX scalar single precision */
1119        *ot = MO_32;
1120        return true;
1121
1122    case X86_SIZE_p:  /* Far pointer, return offset size */
1123    case X86_SIZE_s:  /* Descriptor, return offset size */
1124    case X86_SIZE_v:  /* 16/32/64-bit, based on operand size */
1125        *ot = s->dflag;
1126        return true;
1127
1128    case X86_SIZE_pi: /* MMX */
1129    case X86_SIZE_q:  /* 64-bit */
1130    case X86_SIZE_sd: /* SSE/AVX scalar double precision */
1131        *ot = MO_64;
1132        return true;
1133
1134    case X86_SIZE_w:  /* 16-bit */
1135        *ot = MO_16;
1136        return true;
1137
1138    case X86_SIZE_y:  /* 32/64-bit, based on operand size */
1139        *ot = s->dflag == MO_16 ? MO_32 : s->dflag;
1140        return true;
1141
1142    case X86_SIZE_z:  /* 16-bit for 16-bit operand size, else 32-bit */
1143        *ot = s->dflag == MO_16 ? MO_16 : MO_32;
1144        return true;
1145
1146    case X86_SIZE_dq: /* SSE/AVX 128-bit */
1147        if (e->special == X86_SPECIAL_MMX &&
1148            !(s->prefix & (PREFIX_DATA | PREFIX_REPZ | PREFIX_REPNZ))) {
1149            *ot = MO_64;
1150            return true;
1151        }
1152        if (s->vex_l && e->s0 != X86_SIZE_qq && e->s1 != X86_SIZE_qq) {
1153            return false;
1154        }
1155        *ot = MO_128;
1156        return true;
1157
1158    case X86_SIZE_qq: /* AVX 256-bit */
1159        if (!s->vex_l) {
1160            return false;
1161        }
1162        *ot = MO_256;
1163        return true;
1164
1165    case X86_SIZE_x:  /* 128/256-bit, based on operand size */
1166        if (e->special == X86_SPECIAL_MMX &&
1167            !(s->prefix & (PREFIX_DATA | PREFIX_REPZ | PREFIX_REPNZ))) {
1168            *ot = MO_64;
1169            return true;
1170        }
1171        /* fall through */
1172    case X86_SIZE_ps: /* SSE/AVX packed single precision */
1173    case X86_SIZE_pd: /* SSE/AVX packed double precision */
1174        *ot = s->vex_l ? MO_256 : MO_128;
1175        return true;
1176
1177    case X86_SIZE_xh: /* SSE/AVX packed half register */
1178        *ot = s->vex_l ? MO_128 : MO_64;
1179        return true;
1180
1181    case X86_SIZE_d64:  /* Default to 64-bit in 64-bit mode */
1182        *ot = CODE64(s) && s->dflag == MO_32 ? MO_64 : s->dflag;
1183        return true;
1184
1185    case X86_SIZE_f64:  /* Ignore size override prefix in 64-bit mode */
1186        *ot = CODE64(s) ? MO_64 : s->dflag;
1187        return true;
1188
1189    default:
1190        *ot = -1;
1191        return true;
1192    }
1193}
1194
1195static bool decode_op(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode,
1196                      X86DecodedOp *op, X86OpType type, int b)
1197{
1198    int modrm;
1199
1200    switch (type) {
1201    case X86_TYPE_None:  /* Implicit or absent */
1202    case X86_TYPE_A:  /* Implicit */
1203    case X86_TYPE_F:  /* EFLAGS/RFLAGS */
1204        break;
1205
1206    case X86_TYPE_B:  /* VEX.vvvv selects a GPR */
1207        op->unit = X86_OP_INT;
1208        op->n = s->vex_v;
1209        break;
1210
1211    case X86_TYPE_C:  /* REG in the modrm byte selects a control register */
1212        op->unit = X86_OP_CR;
1213        goto get_reg;
1214
1215    case X86_TYPE_D:  /* REG in the modrm byte selects a debug register */
1216        op->unit = X86_OP_DR;
1217        goto get_reg;
1218
1219    case X86_TYPE_G:  /* REG in the modrm byte selects a GPR */
1220        op->unit = X86_OP_INT;
1221        goto get_reg;
1222
1223    case X86_TYPE_S:  /* reg selects a segment register */
1224        op->unit = X86_OP_SEG;
1225        goto get_reg;
1226
1227    case X86_TYPE_P:
1228        op->unit = X86_OP_MMX;
1229        goto get_reg;
1230
1231    case X86_TYPE_V:  /* reg in the modrm byte selects an XMM/YMM register */
1232        if (decode->e.special == X86_SPECIAL_MMX &&
1233            !(s->prefix & (PREFIX_DATA | PREFIX_REPZ | PREFIX_REPNZ))) {
1234            op->unit = X86_OP_MMX;
1235        } else {
1236            op->unit = X86_OP_SSE;
1237        }
1238    get_reg:
1239        op->n = ((get_modrm(s, env) >> 3) & 7) | REX_R(s);
1240        break;
1241
1242    case X86_TYPE_E:  /* ALU modrm operand */
1243        op->unit = X86_OP_INT;
1244        goto get_modrm;
1245
1246    case X86_TYPE_Q:  /* MMX modrm operand */
1247        op->unit = X86_OP_MMX;
1248        goto get_modrm;
1249
1250    case X86_TYPE_W:  /* XMM/YMM modrm operand */
1251        if (decode->e.special == X86_SPECIAL_MMX &&
1252            !(s->prefix & (PREFIX_DATA | PREFIX_REPZ | PREFIX_REPNZ))) {
1253            op->unit = X86_OP_MMX;
1254        } else {
1255            op->unit = X86_OP_SSE;
1256        }
1257        goto get_modrm;
1258
1259    case X86_TYPE_N:  /* R/M in the modrm byte selects an MMX register */
1260        op->unit = X86_OP_MMX;
1261        goto get_modrm_reg;
1262
1263    case X86_TYPE_U:  /* R/M in the modrm byte selects an XMM/YMM register */
1264        if (decode->e.special == X86_SPECIAL_MMX &&
1265            !(s->prefix & (PREFIX_DATA | PREFIX_REPZ | PREFIX_REPNZ))) {
1266            op->unit = X86_OP_MMX;
1267        } else {
1268            op->unit = X86_OP_SSE;
1269        }
1270        goto get_modrm_reg;
1271
1272    case X86_TYPE_R:  /* R/M in the modrm byte selects a register */
1273        op->unit = X86_OP_INT;
1274    get_modrm_reg:
1275        modrm = get_modrm(s, env);
1276        if ((modrm >> 6) != 3) {
1277            return false;
1278        }
1279        goto get_modrm;
1280
1281    case X86_TYPE_WM:  /* modrm byte selects an XMM/YMM memory operand */
1282        op->unit = X86_OP_SSE;
1283        /* fall through */
1284    case X86_TYPE_M:  /* modrm byte selects a memory operand */
1285        modrm = get_modrm(s, env);
1286        if ((modrm >> 6) == 3) {
1287            return false;
1288        }
1289    get_modrm:
1290        decode_modrm(s, env, decode, op, type);
1291        break;
1292
1293    case X86_TYPE_O:  /* Absolute address encoded in the instruction */
1294        op->unit = X86_OP_INT;
1295        op->has_ea = true;
1296        op->n = -1;
1297        decode->mem = (AddressParts) {
1298            .def_seg = R_DS,
1299            .base = -1,
1300            .index = -1,
1301            .disp = insn_get_addr(env, s, s->aflag)
1302        };
1303        break;
1304
1305    case X86_TYPE_H:  /* For AVX, VEX.vvvv selects an XMM/YMM register */
1306        if ((s->prefix & PREFIX_VEX)) {
1307            op->unit = X86_OP_SSE;
1308            op->n = s->vex_v;
1309            break;
1310        }
1311        if (op == &decode->op[0]) {
1312            /* shifts place the destination in VEX.vvvv, use modrm */
1313            return decode_op(s, env, decode, op, decode->e.op1, b);
1314        } else {
1315            return decode_op(s, env, decode, op, decode->e.op0, b);
1316        }
1317
1318    case X86_TYPE_I:  /* Immediate */
1319        op->unit = X86_OP_IMM;
1320        decode->immediate = insn_get_signed(env, s, op->ot);
1321        break;
1322
1323    case X86_TYPE_J:  /* Relative offset for a jump */
1324        op->unit = X86_OP_IMM;
1325        decode->immediate = insn_get_signed(env, s, op->ot);
1326        decode->immediate += s->pc - s->cs_base;
1327        if (s->dflag == MO_16) {
1328            decode->immediate &= 0xffff;
1329        } else if (!CODE64(s)) {
1330            decode->immediate &= 0xffffffffu;
1331        }
1332        break;
1333
1334    case X86_TYPE_L:  /* The upper 4 bits of the immediate select a 128-bit register */
1335        op->n = insn_get(env, s, op->ot) >> 4;
1336        break;
1337
1338    case X86_TYPE_X:  /* string source */
1339        op->n = -1;
1340        decode->mem = (AddressParts) {
1341            .def_seg = R_DS,
1342            .base = R_ESI,
1343            .index = -1,
1344        };
1345        break;
1346
1347    case X86_TYPE_Y:  /* string destination */
1348        op->n = -1;
1349        decode->mem = (AddressParts) {
1350            .def_seg = R_ES,
1351            .base = R_EDI,
1352            .index = -1,
1353        };
1354        break;
1355
1356    case X86_TYPE_2op:
1357        *op = decode->op[0];
1358        break;
1359
1360    case X86_TYPE_LoBits:
1361        op->n = (b & 7) | REX_B(s);
1362        op->unit = X86_OP_INT;
1363        break;
1364
1365    case X86_TYPE_0 ... X86_TYPE_7:
1366        op->n = type - X86_TYPE_0;
1367        op->unit = X86_OP_INT;
1368        break;
1369
1370    case X86_TYPE_ES ... X86_TYPE_GS:
1371        op->n = type - X86_TYPE_ES;
1372        op->unit = X86_OP_SEG;
1373        break;
1374    }
1375
1376    return true;
1377}
1378
1379static bool validate_sse_prefix(DisasContext *s, X86OpEntry *e)
1380{
1381    uint16_t sse_prefixes;
1382
1383    if (!e->valid_prefix) {
1384        return true;
1385    }
1386    if (s->prefix & (PREFIX_REPZ | PREFIX_REPNZ)) {
1387        /* In SSE instructions, 0xF3 and 0xF2 cancel 0x66.  */
1388        s->prefix &= ~PREFIX_DATA;
1389    }
1390
1391    /* Now, either zero or one bit is set in sse_prefixes.  */
1392    sse_prefixes = s->prefix & (PREFIX_REPZ | PREFIX_REPNZ | PREFIX_DATA);
1393    return e->valid_prefix & (1 << sse_prefixes);
1394}
1395
1396static bool decode_insn(DisasContext *s, CPUX86State *env, X86DecodeFunc decode_func,
1397                        X86DecodedInsn *decode)
1398{
1399    X86OpEntry *e = &decode->e;
1400
1401    decode_func(s, env, e, &decode->b);
1402    while (e->is_decode) {
1403        e->is_decode = false;
1404        e->decode(s, env, e, &decode->b);
1405    }
1406
1407    if (!validate_sse_prefix(s, e)) {
1408        return false;
1409    }
1410
1411    /* First compute size of operands in order to initialize s->rip_offset.  */
1412    if (e->op0 != X86_TYPE_None) {
1413        if (!decode_op_size(s, e, e->s0, &decode->op[0].ot)) {
1414            return false;
1415        }
1416        if (e->op0 == X86_TYPE_I) {
1417            s->rip_offset += 1 << decode->op[0].ot;
1418        }
1419    }
1420    if (e->op1 != X86_TYPE_None) {
1421        if (!decode_op_size(s, e, e->s1, &decode->op[1].ot)) {
1422            return false;
1423        }
1424        if (e->op1 == X86_TYPE_I) {
1425            s->rip_offset += 1 << decode->op[1].ot;
1426        }
1427    }
1428    if (e->op2 != X86_TYPE_None) {
1429        if (!decode_op_size(s, e, e->s2, &decode->op[2].ot)) {
1430            return false;
1431        }
1432        if (e->op2 == X86_TYPE_I) {
1433            s->rip_offset += 1 << decode->op[2].ot;
1434        }
1435    }
1436    if (e->op3 != X86_TYPE_None) {
1437        /*
1438         * A couple instructions actually use the extra immediate byte for an Lx
1439         * register operand; those are handled in the gen_* functions as one off.
1440         */
1441        assert(e->op3 == X86_TYPE_I && e->s3 == X86_SIZE_b);
1442        s->rip_offset += 1;
1443    }
1444
1445    if (e->op0 != X86_TYPE_None &&
1446        !decode_op(s, env, decode, &decode->op[0], e->op0, decode->b)) {
1447        return false;
1448    }
1449
1450    if (e->op1 != X86_TYPE_None &&
1451        !decode_op(s, env, decode, &decode->op[1], e->op1, decode->b)) {
1452        return false;
1453    }
1454
1455    if (e->op2 != X86_TYPE_None &&
1456        !decode_op(s, env, decode, &decode->op[2], e->op2, decode->b)) {
1457        return false;
1458    }
1459
1460    if (e->op3 != X86_TYPE_None) {
1461        decode->immediate = insn_get_signed(env, s, MO_8);
1462    }
1463
1464    return true;
1465}
1466
1467static bool has_cpuid_feature(DisasContext *s, X86CPUIDFeature cpuid)
1468{
1469    switch (cpuid) {
1470    case X86_FEAT_None:
1471        return true;
1472    case X86_FEAT_F16C:
1473        return (s->cpuid_ext_features & CPUID_EXT_F16C);
1474    case X86_FEAT_FMA:
1475        return (s->cpuid_ext_features & CPUID_EXT_FMA);
1476    case X86_FEAT_MOVBE:
1477        return (s->cpuid_ext_features & CPUID_EXT_MOVBE);
1478    case X86_FEAT_PCLMULQDQ:
1479        return (s->cpuid_ext_features & CPUID_EXT_PCLMULQDQ);
1480    case X86_FEAT_SSE:
1481        return (s->cpuid_ext_features & CPUID_SSE);
1482    case X86_FEAT_SSE2:
1483        return (s->cpuid_ext_features & CPUID_SSE2);
1484    case X86_FEAT_SSE3:
1485        return (s->cpuid_ext_features & CPUID_EXT_SSE3);
1486    case X86_FEAT_SSSE3:
1487        return (s->cpuid_ext_features & CPUID_EXT_SSSE3);
1488    case X86_FEAT_SSE41:
1489        return (s->cpuid_ext_features & CPUID_EXT_SSE41);
1490    case X86_FEAT_SSE42:
1491        return (s->cpuid_ext_features & CPUID_EXT_SSE42);
1492    case X86_FEAT_AES:
1493        if (!(s->cpuid_ext_features & CPUID_EXT_AES)) {
1494            return false;
1495        } else if (!(s->prefix & PREFIX_VEX)) {
1496            return true;
1497        } else if (!(s->cpuid_ext_features & CPUID_EXT_AVX)) {
1498            return false;
1499        } else {
1500            return !s->vex_l || (s->cpuid_7_0_ecx_features & CPUID_7_0_ECX_VAES);
1501        }
1502
1503    case X86_FEAT_AVX:
1504        return (s->cpuid_ext_features & CPUID_EXT_AVX);
1505
1506    case X86_FEAT_3DNOW:
1507        return (s->cpuid_ext2_features & CPUID_EXT2_3DNOW);
1508    case X86_FEAT_SSE4A:
1509        return (s->cpuid_ext3_features & CPUID_EXT3_SSE4A);
1510
1511    case X86_FEAT_ADX:
1512        return (s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_ADX);
1513    case X86_FEAT_BMI1:
1514        return (s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI1);
1515    case X86_FEAT_BMI2:
1516        return (s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI2);
1517    case X86_FEAT_AVX2:
1518        return (s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_AVX2);
1519    case X86_FEAT_SHA_NI:
1520        return (s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_SHA_NI);
1521    }
1522    g_assert_not_reached();
1523}
1524
1525static bool validate_vex(DisasContext *s, X86DecodedInsn *decode)
1526{
1527    X86OpEntry *e = &decode->e;
1528
1529    switch (e->vex_special) {
1530    case X86_VEX_REPScalar:
1531        /*
1532         * Instructions which differ between 00/66 and F2/F3 in the
1533         * exception classification and the size of the memory operand.
1534         */
1535        assert(e->vex_class == 1 || e->vex_class == 2 || e->vex_class == 4);
1536        if (s->prefix & (PREFIX_REPZ | PREFIX_REPNZ)) {
1537            e->vex_class = e->vex_class < 4 ? 3 : 5;
1538            if (s->vex_l) {
1539                goto illegal;
1540            }
1541            assert(decode->e.s2 == X86_SIZE_x);
1542            if (decode->op[2].has_ea) {
1543                decode->op[2].ot = s->prefix & PREFIX_REPZ ? MO_32 : MO_64;
1544            }
1545        }
1546        break;
1547
1548    case X86_VEX_SSEUnaligned:
1549        /* handled in sse_needs_alignment.  */
1550        break;
1551
1552    case X86_VEX_AVX2_256:
1553        if ((s->prefix & PREFIX_VEX) && s->vex_l && !has_cpuid_feature(s, X86_FEAT_AVX2)) {
1554            goto illegal;
1555        }
1556    }
1557
1558    switch (e->vex_class) {
1559    case 0:
1560        if (s->prefix & PREFIX_VEX) {
1561            goto illegal;
1562        }
1563        return true;
1564    case 1:
1565    case 2:
1566    case 3:
1567    case 4:
1568    case 5:
1569    case 7:
1570        if (s->prefix & PREFIX_VEX) {
1571            if (!(s->flags & HF_AVX_EN_MASK)) {
1572                goto illegal;
1573            }
1574        } else if (e->special != X86_SPECIAL_MMX ||
1575                   (s->prefix & (PREFIX_REPZ | PREFIX_REPNZ | PREFIX_DATA))) {
1576            if (!(s->flags & HF_OSFXSR_MASK)) {
1577                goto illegal;
1578            }
1579        }
1580        break;
1581    case 12:
1582        /* Must have a VSIB byte and no address prefix.  */
1583        assert(s->has_modrm);
1584        if ((s->modrm & 7) != 4 || s->aflag == MO_16) {
1585            goto illegal;
1586        }
1587
1588        /* Check no overlap between registers.  */
1589        if (!decode->op[0].has_ea &&
1590            (decode->op[0].n == decode->mem.index || decode->op[0].n == decode->op[1].n)) {
1591            goto illegal;
1592        }
1593        assert(!decode->op[1].has_ea);
1594        if (decode->op[1].n == decode->mem.index) {
1595            goto illegal;
1596        }
1597        if (!decode->op[2].has_ea &&
1598            (decode->op[2].n == decode->mem.index || decode->op[2].n == decode->op[1].n)) {
1599            goto illegal;
1600        }
1601        /* fall through */
1602    case 6:
1603    case 11:
1604        if (!(s->prefix & PREFIX_VEX)) {
1605            goto illegal;
1606        }
1607        if (!(s->flags & HF_AVX_EN_MASK)) {
1608            goto illegal;
1609        }
1610        break;
1611    case 8:
1612        /* Non-VEX case handled in decode_0F77.  */
1613        assert(s->prefix & PREFIX_VEX);
1614        if (!(s->flags & HF_AVX_EN_MASK)) {
1615            goto illegal;
1616        }
1617        break;
1618    case 13:
1619        if (!(s->prefix & PREFIX_VEX)) {
1620            goto illegal;
1621        }
1622        if (s->vex_l) {
1623            goto illegal;
1624        }
1625        /* All integer instructions use VEX.vvvv, so exit.  */
1626        return true;
1627    }
1628
1629    if (s->vex_v != 0 &&
1630        e->op0 != X86_TYPE_H && e->op0 != X86_TYPE_B &&
1631        e->op1 != X86_TYPE_H && e->op1 != X86_TYPE_B &&
1632        e->op2 != X86_TYPE_H && e->op2 != X86_TYPE_B) {
1633        goto illegal;
1634    }
1635
1636    if (s->flags & HF_TS_MASK) {
1637        goto nm_exception;
1638    }
1639    if (s->flags & HF_EM_MASK) {
1640        goto illegal;
1641    }
1642
1643    if (e->check) {
1644        if (e->check & X86_CHECK_VEX128) {
1645            if (s->vex_l) {
1646                goto illegal;
1647            }
1648        }
1649        if (e->check & X86_CHECK_W0) {
1650            if (s->vex_w) {
1651                goto illegal;
1652            }
1653        }
1654        if (e->check & X86_CHECK_W1) {
1655            if (!s->vex_w) {
1656                goto illegal;
1657            }
1658        }
1659    }
1660    return true;
1661
1662nm_exception:
1663    gen_NM_exception(s);
1664    return false;
1665illegal:
1666    gen_illegal_opcode(s);
1667    return false;
1668}
1669
1670/*
1671 * Convert one instruction. s->base.is_jmp is set if the translation must
1672 * be stopped.
1673 */
1674static void disas_insn_new(DisasContext *s, CPUState *cpu, int b)
1675{
1676    CPUX86State *env = cpu_env(cpu);
1677    bool first = true;
1678    X86DecodedInsn decode;
1679    X86DecodeFunc decode_func = decode_root;
1680
1681    s->has_modrm = false;
1682
1683 next_byte:
1684    if (first) {
1685        first = false;
1686    } else {
1687        b = x86_ldub_code(env, s);
1688    }
1689    /* Collect prefixes.  */
1690    switch (b) {
1691    case 0xf3:
1692        s->prefix |= PREFIX_REPZ;
1693        s->prefix &= ~PREFIX_REPNZ;
1694        goto next_byte;
1695    case 0xf2:
1696        s->prefix |= PREFIX_REPNZ;
1697        s->prefix &= ~PREFIX_REPZ;
1698        goto next_byte;
1699    case 0xf0:
1700        s->prefix |= PREFIX_LOCK;
1701        goto next_byte;
1702    case 0x2e:
1703        s->override = R_CS;
1704        goto next_byte;
1705    case 0x36:
1706        s->override = R_SS;
1707        goto next_byte;
1708    case 0x3e:
1709        s->override = R_DS;
1710        goto next_byte;
1711    case 0x26:
1712        s->override = R_ES;
1713        goto next_byte;
1714    case 0x64:
1715        s->override = R_FS;
1716        goto next_byte;
1717    case 0x65:
1718        s->override = R_GS;
1719        goto next_byte;
1720    case 0x66:
1721        s->prefix |= PREFIX_DATA;
1722        goto next_byte;
1723    case 0x67:
1724        s->prefix |= PREFIX_ADR;
1725        goto next_byte;
1726#ifdef TARGET_X86_64
1727    case 0x40 ... 0x4f:
1728        if (CODE64(s)) {
1729            /* REX prefix */
1730            s->prefix |= PREFIX_REX;
1731            s->vex_w = (b >> 3) & 1;
1732            s->rex_r = (b & 0x4) << 1;
1733            s->rex_x = (b & 0x2) << 2;
1734            s->rex_b = (b & 0x1) << 3;
1735            goto next_byte;
1736        }
1737        break;
1738#endif
1739    case 0xc5: /* 2-byte VEX */
1740    case 0xc4: /* 3-byte VEX */
1741        /*
1742         * VEX prefixes cannot be used except in 32-bit mode.
1743         * Otherwise the instruction is LES or LDS.
1744         */
1745        if (CODE32(s) && !VM86(s)) {
1746            static const int pp_prefix[4] = {
1747                0, PREFIX_DATA, PREFIX_REPZ, PREFIX_REPNZ
1748            };
1749            int vex3, vex2 = x86_ldub_code(env, s);
1750
1751            if (!CODE64(s) && (vex2 & 0xc0) != 0xc0) {
1752                /*
1753                 * 4.1.4.6: In 32-bit mode, bits [7:6] must be 11b,
1754                 * otherwise the instruction is LES or LDS.
1755                 */
1756                s->pc--; /* rewind the advance_pc() x86_ldub_code() did */
1757                break;
1758            }
1759
1760            /* 4.1.1-4.1.3: No preceding lock, 66, f2, f3, or rex prefixes. */
1761            if (s->prefix & (PREFIX_REPZ | PREFIX_REPNZ
1762                             | PREFIX_LOCK | PREFIX_DATA | PREFIX_REX)) {
1763                goto illegal_op;
1764            }
1765#ifdef TARGET_X86_64
1766            s->rex_r = (~vex2 >> 4) & 8;
1767#endif
1768            if (b == 0xc5) {
1769                /* 2-byte VEX prefix: RVVVVlpp, implied 0f leading opcode byte */
1770                vex3 = vex2;
1771                decode_func = decode_0F;
1772            } else {
1773                /* 3-byte VEX prefix: RXBmmmmm wVVVVlpp */
1774                vex3 = x86_ldub_code(env, s);
1775#ifdef TARGET_X86_64
1776                s->rex_x = (~vex2 >> 3) & 8;
1777                s->rex_b = (~vex2 >> 2) & 8;
1778#endif
1779                s->vex_w = (vex3 >> 7) & 1;
1780                switch (vex2 & 0x1f) {
1781                case 0x01: /* Implied 0f leading opcode bytes.  */
1782                    decode_func = decode_0F;
1783                    break;
1784                case 0x02: /* Implied 0f 38 leading opcode bytes.  */
1785                    decode_func = decode_0F38;
1786                    break;
1787                case 0x03: /* Implied 0f 3a leading opcode bytes.  */
1788                    decode_func = decode_0F3A;
1789                    break;
1790                default:   /* Reserved for future use.  */
1791                    goto unknown_op;
1792                }
1793            }
1794            s->vex_v = (~vex3 >> 3) & 0xf;
1795            s->vex_l = (vex3 >> 2) & 1;
1796            s->prefix |= pp_prefix[vex3 & 3] | PREFIX_VEX;
1797        }
1798        break;
1799    default:
1800        if (b >= 0x100) {
1801            b -= 0x100;
1802            decode_func = do_decode_0F;
1803        }
1804        break;
1805    }
1806
1807    /* Post-process prefixes.  */
1808    if (CODE64(s)) {
1809        /*
1810         * In 64-bit mode, the default data size is 32-bit.  Select 64-bit
1811         * data with rex_w, and 16-bit data with 0x66; rex_w takes precedence
1812         * over 0x66 if both are present.
1813         */
1814        s->dflag = (REX_W(s) ? MO_64 : s->prefix & PREFIX_DATA ? MO_16 : MO_32);
1815        /* In 64-bit mode, 0x67 selects 32-bit addressing.  */
1816        s->aflag = (s->prefix & PREFIX_ADR ? MO_32 : MO_64);
1817    } else {
1818        /* In 16/32-bit mode, 0x66 selects the opposite data size.  */
1819        if (CODE32(s) ^ ((s->prefix & PREFIX_DATA) != 0)) {
1820            s->dflag = MO_32;
1821        } else {
1822            s->dflag = MO_16;
1823        }
1824        /* In 16/32-bit mode, 0x67 selects the opposite addressing.  */
1825        if (CODE32(s) ^ ((s->prefix & PREFIX_ADR) != 0)) {
1826            s->aflag = MO_32;
1827        }  else {
1828            s->aflag = MO_16;
1829        }
1830    }
1831
1832    memset(&decode, 0, sizeof(decode));
1833    decode.b = b;
1834    if (!decode_insn(s, env, decode_func, &decode)) {
1835        goto illegal_op;
1836    }
1837    if (!decode.e.gen) {
1838        goto unknown_op;
1839    }
1840
1841    if (!has_cpuid_feature(s, decode.e.cpuid)) {
1842        goto illegal_op;
1843    }
1844
1845    /* Checks that result in #UD come first.  */
1846    if (decode.e.check) {
1847        if (decode.e.check & X86_CHECK_i64) {
1848            if (CODE64(s)) {
1849                goto illegal_op;
1850            }
1851        }
1852        if (decode.e.check & X86_CHECK_o64) {
1853            if (!CODE64(s)) {
1854                goto illegal_op;
1855            }
1856        }
1857        if (decode.e.check & X86_CHECK_prot) {
1858            if (!PE(s) || VM86(s)) {
1859                goto illegal_op;
1860            }
1861        }
1862    }
1863
1864    switch (decode.e.special) {
1865    case X86_SPECIAL_None:
1866        break;
1867
1868    case X86_SPECIAL_Locked:
1869        if (decode.op[0].has_ea) {
1870            s->prefix |= PREFIX_LOCK;
1871        }
1872        break;
1873
1874    case X86_SPECIAL_ZExtOp0:
1875        assert(decode.op[0].unit == X86_OP_INT);
1876        if (!decode.op[0].has_ea) {
1877            decode.op[0].ot = MO_32;
1878        }
1879        break;
1880
1881    case X86_SPECIAL_ZExtOp2:
1882        assert(decode.op[2].unit == X86_OP_INT);
1883        if (!decode.op[2].has_ea) {
1884            decode.op[2].ot = MO_32;
1885        }
1886        break;
1887
1888    case X86_SPECIAL_AVXExtMov:
1889        if (!decode.op[2].has_ea) {
1890            decode.op[2].ot = s->vex_l ? MO_256 : MO_128;
1891        } else if (s->vex_l) {
1892            decode.op[2].ot++;
1893        }
1894        break;
1895
1896    default:
1897        break;
1898    }
1899
1900    if (!validate_vex(s, &decode)) {
1901        return;
1902    }
1903
1904    /*
1905     * Checks that result in #GP or VMEXIT come second.  Intercepts are
1906     * generally checked after non-memory exceptions (i.e. before all
1907     * exceptions if there is no memory operand).  Exceptions are
1908     * vm86 checks (INTn, IRET, PUSHF/POPF), RSM and XSETBV (!).
1909     *
1910     * RSM and XSETBV will be handled in the gen_* functions
1911     * instead of using chk().
1912     */
1913    if (decode.e.check & X86_CHECK_cpl0) {
1914        if (CPL(s) != 0) {
1915            goto gp_fault;
1916        }
1917    }
1918    if (decode.e.intercept && unlikely(GUEST(s))) {
1919        gen_helper_svm_check_intercept(tcg_env,
1920                                       tcg_constant_i32(decode.e.intercept));
1921    }
1922    if (decode.e.check) {
1923        if ((decode.e.check & X86_CHECK_vm86_iopl) && VM86(s)) {
1924            if (IOPL(s) < 3) {
1925                goto gp_fault;
1926            }
1927        } else if (decode.e.check & X86_CHECK_cpl_iopl) {
1928            if (IOPL(s) < CPL(s)) {
1929                goto gp_fault;
1930            }
1931        }
1932    }
1933
1934    if (decode.e.special == X86_SPECIAL_MMX &&
1935        !(s->prefix & (PREFIX_REPZ | PREFIX_REPNZ | PREFIX_DATA))) {
1936        gen_helper_enter_mmx(tcg_env);
1937    }
1938
1939    if (decode.op[0].has_ea || decode.op[1].has_ea || decode.op[2].has_ea) {
1940        gen_load_ea(s, &decode.mem, decode.e.vex_class == 12);
1941    }
1942    if (s->prefix & PREFIX_LOCK) {
1943        if (decode.op[0].unit != X86_OP_INT || !decode.op[0].has_ea) {
1944            goto illegal_op;
1945        }
1946        gen_load(s, &decode, 2, s->T1);
1947        decode.e.gen(s, env, &decode);
1948    } else {
1949        if (decode.op[0].unit == X86_OP_MMX) {
1950            compute_mmx_offset(&decode.op[0]);
1951        } else if (decode.op[0].unit == X86_OP_SSE) {
1952            compute_xmm_offset(&decode.op[0]);
1953        }
1954        gen_load(s, &decode, 1, s->T0);
1955        gen_load(s, &decode, 2, s->T1);
1956        decode.e.gen(s, env, &decode);
1957        gen_writeback(s, &decode, 0, s->T0);
1958    }
1959    return;
1960 gp_fault:
1961    gen_exception_gpf(s);
1962    return;
1963 illegal_op:
1964    gen_illegal_opcode(s);
1965    return;
1966 unknown_op:
1967    gen_unknown_opcode(env, s);
1968}
1969