xref: /openbmc/qemu/target/i386/tcg/decode-new.c.inc (revision c1774bdb)
1/*
2 * New-style decoder for i386 instructions
3 *
4 *  Copyright (c) 2022 Red Hat, Inc.
5 *
6 * Author: Paolo Bonzini <pbonzini@redhat.com>
7 *
8 * This library is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either
11 * version 2.1 of the License, or (at your option) any later version.
12 *
13 * This library is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
16 * Lesser General Public License for more details.
17 *
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
20 */
21
22/*
23 * The decoder is mostly based on tables copied from the Intel SDM.  As
24 * a result, most operand load and writeback is done entirely in common
25 * table-driven code using the same operand type (X86_TYPE_*) and
26 * size (X86_SIZE_*) codes used in the manual.
27 *
28 * The main difference is that the V, U and W types are extended to
29 * cover MMX as well; if an instruction is like
30 *
31 *      por   Pq, Qq
32 *  66  por   Vx, Hx, Wx
33 *
34 * only the second row is included and the instruction is marked as a
35 * valid MMX instruction.  The MMX flag directs the decoder to rewrite
36 * the V/U/H/W types to P/N/P/Q if there is no prefix, as well as changing
37 * "x" to "q" if there is no prefix.
38 *
39 * In addition, the ss/ps/sd/pd types are sometimes mushed together as "x"
40 * if the difference is expressed via prefixes.  Individual instructions
41 * are separated by prefix in the generator functions.
42 *
43 * There are a couple cases in which instructions (e.g. MOVD) write the
44 * whole XMM or MM register but are established incorrectly in the manual
45 * as "d" or "q".  These have to be fixed for the decoder to work correctly.
46 */
47
48#define X86_OP_NONE { 0 },
49
50#define X86_OP_GROUP3(op, op0_, s0_, op1_, s1_, op2_, s2_, ...) { \
51    .decode = glue(decode_, op),                                  \
52    .op0 = glue(X86_TYPE_, op0_),                                 \
53    .s0 = glue(X86_SIZE_, s0_),                                   \
54    .op1 = glue(X86_TYPE_, op1_),                                 \
55    .s1 = glue(X86_SIZE_, s1_),                                   \
56    .op2 = glue(X86_TYPE_, op2_),                                 \
57    .s2 = glue(X86_SIZE_, s2_),                                   \
58    .is_decode = true,                                            \
59    ## __VA_ARGS__                                                \
60}
61
62#define X86_OP_GROUP2(op, op0, s0, op1, s1, ...)                  \
63    X86_OP_GROUP3(op, op0, s0, 2op, s0, op1, s1, ## __VA_ARGS__)
64#define X86_OP_GROUP0(op, ...)                                    \
65    X86_OP_GROUP3(op, None, None, None, None, None, None, ## __VA_ARGS__)
66
67#define X86_OP_ENTRY3(op, op0_, s0_, op1_, s1_, op2_, s2_, ...) { \
68    .gen = glue(gen_, op),                                        \
69    .op0 = glue(X86_TYPE_, op0_),                                 \
70    .s0 = glue(X86_SIZE_, s0_),                                   \
71    .op1 = glue(X86_TYPE_, op1_),                                 \
72    .s1 = glue(X86_SIZE_, s1_),                                   \
73    .op2 = glue(X86_TYPE_, op2_),                                 \
74    .s2 = glue(X86_SIZE_, s2_),                                   \
75    ## __VA_ARGS__                                                \
76}
77
78#define X86_OP_ENTRY4(op, op0_, s0_, op1_, s1_, op2_, s2_, ...)   \
79    X86_OP_ENTRY3(op, op0_, s0_, op1_, s1_, op2_, s2_,            \
80        .op3 = X86_TYPE_I, .s3 = X86_SIZE_b,                      \
81        ## __VA_ARGS__)
82
83#define X86_OP_ENTRY2(op, op0, s0, op1, s1, ...)                  \
84    X86_OP_ENTRY3(op, op0, s0, 2op, s0, op1, s1, ## __VA_ARGS__)
85#define X86_OP_ENTRYw(op, op0, s0, ...)                           \
86    X86_OP_ENTRY3(op, op0, s0, None, None, None, None, ## __VA_ARGS__)
87#define X86_OP_ENTRYr(op, op0, s0, ...)                           \
88    X86_OP_ENTRY3(op, None, None, None, None, op0, s0, ## __VA_ARGS__)
89#define X86_OP_ENTRY0(op, ...)                                    \
90    X86_OP_ENTRY3(op, None, None, None, None, None, None, ## __VA_ARGS__)
91
92#define cpuid(feat) .cpuid = X86_FEAT_##feat,
93#define i64 .special = X86_SPECIAL_i64,
94#define o64 .special = X86_SPECIAL_o64,
95#define xchg .special = X86_SPECIAL_Locked,
96#define mmx .special = X86_SPECIAL_MMX,
97#define zext0 .special = X86_SPECIAL_ZExtOp0,
98#define zext2 .special = X86_SPECIAL_ZExtOp2,
99#define avx_movx .special = X86_SPECIAL_AVXExtMov,
100
101#define vex1 .vex_class = 1,
102#define vex1_rep3 .vex_class = 1, .vex_special = X86_VEX_REPScalar,
103#define vex2 .vex_class = 2,
104#define vex2_rep3 .vex_class = 2, .vex_special = X86_VEX_REPScalar,
105#define vex3 .vex_class = 3,
106#define vex4 .vex_class = 4,
107#define vex4_unal .vex_class = 4, .vex_special = X86_VEX_SSEUnaligned,
108#define vex4_rep5 .vex_class = 4, .vex_special = X86_VEX_REPScalar,
109#define vex5 .vex_class = 5,
110#define vex6 .vex_class = 6,
111#define vex7 .vex_class = 7,
112#define vex8 .vex_class = 8,
113#define vex11 .vex_class = 11,
114#define vex12 .vex_class = 12,
115#define vex13 .vex_class = 13,
116
117#define avx2_256 .vex_special = X86_VEX_AVX2_256,
118
119#define P_00          1
120#define P_66          (1 << PREFIX_DATA)
121#define P_F3          (1 << PREFIX_REPZ)
122#define P_F2          (1 << PREFIX_REPNZ)
123
124#define p_00          .valid_prefix = P_00,
125#define p_66          .valid_prefix = P_66,
126#define p_f3          .valid_prefix = P_F3,
127#define p_f2          .valid_prefix = P_F2,
128#define p_00_66       .valid_prefix = P_00 | P_66,
129#define p_00_f3       .valid_prefix = P_00 | P_F3,
130#define p_66_f2       .valid_prefix = P_66 | P_F2,
131#define p_00_66_f3    .valid_prefix = P_00 | P_66 | P_F3,
132#define p_66_f3_f2    .valid_prefix = P_66 | P_F3 | P_F2,
133#define p_00_66_f3_f2 .valid_prefix = P_00 | P_66 | P_F3 | P_F2,
134
135static uint8_t get_modrm(DisasContext *s, CPUX86State *env)
136{
137    if (!s->has_modrm) {
138        s->modrm = x86_ldub_code(env, s);
139        s->has_modrm = true;
140    }
141    return s->modrm;
142}
143
144static inline const X86OpEntry *decode_by_prefix(DisasContext *s, const X86OpEntry entries[4])
145{
146    if (s->prefix & PREFIX_REPNZ) {
147        return &entries[3];
148    } else if (s->prefix & PREFIX_REPZ) {
149        return &entries[2];
150    } else if (s->prefix & PREFIX_DATA) {
151        return &entries[1];
152    } else {
153        return &entries[0];
154    }
155}
156
157static void decode_group15(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
158{
159    /* only includes ldmxcsr and stmxcsr, because they have AVX variants.  */
160    static const X86OpEntry group15_reg[8] = {
161    };
162
163    static const X86OpEntry group15_mem[8] = {
164        [2] = X86_OP_ENTRYr(LDMXCSR,    E,d, vex5),
165        [3] = X86_OP_ENTRYw(STMXCSR,    E,d, vex5),
166    };
167
168    uint8_t modrm = get_modrm(s, env);
169    if ((modrm >> 6) == 3) {
170        *entry = group15_reg[(modrm >> 3) & 7];
171    } else {
172        *entry = group15_mem[(modrm >> 3) & 7];
173    }
174}
175
176static void decode_group17(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
177{
178    static const X86GenFunc group17_gen[8] = {
179        NULL, gen_BLSR, gen_BLSMSK, gen_BLSI,
180    };
181    int op = (get_modrm(s, env) >> 3) & 7;
182    entry->gen = group17_gen[op];
183}
184
185static void decode_group12(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
186{
187    static const X86OpEntry opcodes_group12[8] = {
188        {},
189        {},
190        X86_OP_ENTRY3(PSRLW_i,  H,x, U,x, I,b, vex7 mmx avx2_256 p_00_66),
191        {},
192        X86_OP_ENTRY3(PSRAW_i,  H,x, U,x, I,b, vex7 mmx avx2_256 p_00_66),
193        {},
194        X86_OP_ENTRY3(PSLLW_i,  H,x, U,x, I,b, vex7 mmx avx2_256 p_00_66),
195        {},
196    };
197
198    int op = (get_modrm(s, env) >> 3) & 7;
199    *entry = opcodes_group12[op];
200}
201
202static void decode_group13(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
203{
204    static const X86OpEntry opcodes_group13[8] = {
205        {},
206        {},
207        X86_OP_ENTRY3(PSRLD_i,  H,x, U,x, I,b, vex7 mmx avx2_256 p_00_66),
208        {},
209        X86_OP_ENTRY3(PSRAD_i,  H,x, U,x, I,b, vex7 mmx avx2_256 p_00_66),
210        {},
211        X86_OP_ENTRY3(PSLLD_i,  H,x, U,x, I,b, vex7 mmx avx2_256 p_00_66),
212        {},
213    };
214
215    int op = (get_modrm(s, env) >> 3) & 7;
216    *entry = opcodes_group13[op];
217}
218
219static void decode_group14(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
220{
221    static const X86OpEntry opcodes_group14[8] = {
222        /* grp14 */
223        {},
224        {},
225        X86_OP_ENTRY3(PSRLQ_i,  H,x, U,x, I,b, vex7 mmx avx2_256 p_00_66),
226        X86_OP_ENTRY3(PSRLDQ_i, H,x, U,x, I,b, vex7 avx2_256 p_66),
227        {},
228        {},
229        X86_OP_ENTRY3(PSLLQ_i,  H,x, U,x, I,b, vex7 mmx avx2_256 p_00_66),
230        X86_OP_ENTRY3(PSLLDQ_i, H,x, U,x, I,b, vex7 avx2_256 p_66),
231    };
232
233    int op = (get_modrm(s, env) >> 3) & 7;
234    *entry = opcodes_group14[op];
235}
236
237static void decode_0F6F(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
238{
239    static const X86OpEntry opcodes_0F6F[4] = {
240        X86_OP_ENTRY3(MOVDQ,       P,q, None,None, Q,q, vex5 mmx),  /* movq */
241        X86_OP_ENTRY3(MOVDQ,       V,x, None,None, W,x, vex1),      /* movdqa */
242        X86_OP_ENTRY3(MOVDQ,       V,x, None,None, W,x, vex4_unal), /* movdqu */
243        {},
244    };
245    *entry = *decode_by_prefix(s, opcodes_0F6F);
246}
247
248static void decode_0F70(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
249{
250    static const X86OpEntry pshufw[4] = {
251        X86_OP_ENTRY3(PSHUFW,  P,q, Q,q, I,b, vex4 mmx),
252        X86_OP_ENTRY3(PSHUFD,  V,x, W,x, I,b, vex4 avx2_256),
253        X86_OP_ENTRY3(PSHUFHW, V,x, W,x, I,b, vex4 avx2_256),
254        X86_OP_ENTRY3(PSHUFLW, V,x, W,x, I,b, vex4 avx2_256),
255    };
256
257    *entry = *decode_by_prefix(s, pshufw);
258}
259
260static void decode_0F77(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
261{
262    if (!(s->prefix & PREFIX_VEX)) {
263        entry->gen = gen_EMMS;
264    } else if (!s->vex_l) {
265        entry->gen = gen_VZEROUPPER;
266        entry->vex_class = 8;
267    } else {
268        entry->gen = gen_VZEROALL;
269        entry->vex_class = 8;
270    }
271}
272
273static void decode_0F78(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
274{
275    static const X86OpEntry opcodes_0F78[4] = {
276        {},
277        X86_OP_ENTRY3(EXTRQ_i,       V,x, None,None, I,w,  cpuid(SSE4A)), /* AMD extension */
278        {},
279        X86_OP_ENTRY3(INSERTQ_i,     V,x, U,x, I,w,        cpuid(SSE4A)), /* AMD extension */
280    };
281    *entry = *decode_by_prefix(s, opcodes_0F78);
282}
283
284static void decode_0F79(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
285{
286    if (s->prefix & PREFIX_REPNZ) {
287        entry->gen = gen_INSERTQ_r; /* AMD extension */
288    } else if (s->prefix & PREFIX_DATA) {
289        entry->gen = gen_EXTRQ_r; /* AMD extension */
290    } else {
291        entry->gen = NULL;
292    };
293}
294
295static void decode_0F7E(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
296{
297    static const X86OpEntry opcodes_0F7E[4] = {
298        X86_OP_ENTRY3(MOVD_from,  E,y, None,None, P,y, vex5 mmx),
299        X86_OP_ENTRY3(MOVD_from,  E,y, None,None, V,y, vex5),
300        X86_OP_ENTRY3(MOVQ,       V,x, None,None, W,q, vex5),  /* wrong dest Vy on SDM! */
301        {},
302    };
303    *entry = *decode_by_prefix(s, opcodes_0F7E);
304}
305
306static void decode_0F7F(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
307{
308    static const X86OpEntry opcodes_0F7F[4] = {
309        X86_OP_ENTRY3(MOVDQ,       W,x, None,None, V,x, vex5 mmx), /* movq */
310        X86_OP_ENTRY3(MOVDQ,       W,x, None,None, V,x, vex1), /* movdqa */
311        X86_OP_ENTRY3(MOVDQ,       W,x, None,None, V,x, vex4_unal), /* movdqu */
312        {},
313    };
314    *entry = *decode_by_prefix(s, opcodes_0F7F);
315}
316
317static void decode_0FD6(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
318{
319    static const X86OpEntry movq[4] = {
320        {},
321        X86_OP_ENTRY3(MOVQ,    W,x,  None, None, V,q, vex5),
322        X86_OP_ENTRY3(MOVq_dq, V,dq, None, None, N,q),
323        X86_OP_ENTRY3(MOVq_dq, P,q,  None, None, U,q),
324    };
325
326    *entry = *decode_by_prefix(s, movq);
327}
328
329static const X86OpEntry opcodes_0F38_00toEF[240] = {
330    [0x00] = X86_OP_ENTRY3(PSHUFB,    V,x,  H,x,   W,x,  vex4 cpuid(SSSE3) mmx avx2_256 p_00_66),
331    [0x01] = X86_OP_ENTRY3(PHADDW,    V,x,  H,x,   W,x,  vex4 cpuid(SSSE3) mmx avx2_256 p_00_66),
332    [0x02] = X86_OP_ENTRY3(PHADDD,    V,x,  H,x,   W,x,  vex4 cpuid(SSSE3) mmx avx2_256 p_00_66),
333    [0x03] = X86_OP_ENTRY3(PHADDSW,   V,x,  H,x,   W,x,  vex4 cpuid(SSSE3) mmx avx2_256 p_00_66),
334    [0x04] = X86_OP_ENTRY3(PMADDUBSW, V,x,  H,x,   W,x,  vex4 cpuid(SSSE3) mmx avx2_256 p_00_66),
335    [0x05] = X86_OP_ENTRY3(PHSUBW,    V,x,  H,x,   W,x,  vex4 cpuid(SSSE3) mmx avx2_256 p_00_66),
336    [0x06] = X86_OP_ENTRY3(PHSUBD,    V,x,  H,x,   W,x,  vex4 cpuid(SSSE3) mmx avx2_256 p_00_66),
337    [0x07] = X86_OP_ENTRY3(PHSUBSW,   V,x,  H,x,   W,x,  vex4 cpuid(SSSE3) mmx avx2_256 p_00_66),
338
339    [0x10] = X86_OP_ENTRY2(PBLENDVB,  V,x,         W,x,  vex4 cpuid(SSE41) avx2_256 p_66),
340    [0x13] = X86_OP_ENTRY2(VCVTPH2PS, V,x,         W,xh, vex11 cpuid(F16C) p_66),
341    [0x14] = X86_OP_ENTRY2(BLENDVPS,  V,x,         W,x,  vex4 cpuid(SSE41) p_66),
342    [0x15] = X86_OP_ENTRY2(BLENDVPD,  V,x,         W,x,  vex4 cpuid(SSE41) p_66),
343    /* Listed incorrectly as type 4 */
344    [0x16] = X86_OP_ENTRY3(VPERMD,    V,qq, H,qq,      W,qq,  vex6 cpuid(AVX2) p_66),
345    [0x17] = X86_OP_ENTRY3(VPTEST,    None,None, V,x,  W,x,   vex4 cpuid(SSE41) p_66),
346
347    /*
348     * Source operand listed as Mq/Ux and similar in the manual; incorrectly listed
349     * as 128-bit only in 2-17.
350     */
351    [0x20] = X86_OP_ENTRY3(VPMOVSXBW, V,x,  None,None, W,q,   vex5 cpuid(SSE41) avx_movx avx2_256 p_66),
352    [0x21] = X86_OP_ENTRY3(VPMOVSXBD, V,x,  None,None, W,d,   vex5 cpuid(SSE41) avx_movx avx2_256 p_66),
353    [0x22] = X86_OP_ENTRY3(VPMOVSXBQ, V,x,  None,None, W,w,   vex5 cpuid(SSE41) avx_movx avx2_256 p_66),
354    [0x23] = X86_OP_ENTRY3(VPMOVSXWD, V,x,  None,None, W,q,   vex5 cpuid(SSE41) avx_movx avx2_256 p_66),
355    [0x24] = X86_OP_ENTRY3(VPMOVSXWQ, V,x,  None,None, W,d,   vex5 cpuid(SSE41) avx_movx avx2_256 p_66),
356    [0x25] = X86_OP_ENTRY3(VPMOVSXDQ, V,x,  None,None, W,q,   vex5 cpuid(SSE41) avx_movx avx2_256 p_66),
357
358    /* Same as PMOVSX.  */
359    [0x30] = X86_OP_ENTRY3(VPMOVZXBW, V,x,  None,None, W,q,   vex5 cpuid(SSE41) avx_movx avx2_256 p_66),
360    [0x31] = X86_OP_ENTRY3(VPMOVZXBD, V,x,  None,None, W,d,   vex5 cpuid(SSE41) avx_movx avx2_256 p_66),
361    [0x32] = X86_OP_ENTRY3(VPMOVZXBQ, V,x,  None,None, W,w,   vex5 cpuid(SSE41) avx_movx avx2_256 p_66),
362    [0x33] = X86_OP_ENTRY3(VPMOVZXWD, V,x,  None,None, W,q,   vex5 cpuid(SSE41) avx_movx avx2_256 p_66),
363    [0x34] = X86_OP_ENTRY3(VPMOVZXWQ, V,x,  None,None, W,d,   vex5 cpuid(SSE41) avx_movx avx2_256 p_66),
364    [0x35] = X86_OP_ENTRY3(VPMOVZXDQ, V,x,  None,None, W,q,   vex5 cpuid(SSE41) avx_movx avx2_256 p_66),
365    [0x36] = X86_OP_ENTRY3(VPERMD,    V,qq, H,qq,      W,qq,  vex6 cpuid(AVX2) p_66),
366    [0x37] = X86_OP_ENTRY3(PCMPGTQ,   V,x,  H,x,       W,x,   vex4 cpuid(SSE42) avx2_256 p_66),
367
368    [0x40] = X86_OP_ENTRY3(PMULLD,      V,x,  H,x,       W,x,  vex4 cpuid(SSE41) avx2_256 p_66),
369    [0x41] = X86_OP_ENTRY3(VPHMINPOSUW, V,dq, None,None, W,dq, vex4 cpuid(SSE41) p_66),
370    /* Listed incorrectly as type 4 */
371    [0x45] = X86_OP_ENTRY3(VPSRLV,      V,x,  H,x,       W,x,  vex6 cpuid(AVX2) p_66),
372    [0x46] = X86_OP_ENTRY3(VPSRAV,      V,x,  H,x,       W,x,  vex6 cpuid(AVX2) p_66),
373    [0x47] = X86_OP_ENTRY3(VPSLLV,      V,x,  H,x,       W,x,  vex6 cpuid(AVX2) p_66),
374
375    [0x90] = X86_OP_ENTRY3(VPGATHERD, V,x,  H,x,  M,d,  vex12 cpuid(AVX2) p_66), /* vpgatherdd/q */
376    [0x91] = X86_OP_ENTRY3(VPGATHERQ, V,x,  H,x,  M,q,  vex12 cpuid(AVX2) p_66), /* vpgatherqd/q */
377    [0x92] = X86_OP_ENTRY3(VPGATHERD, V,x,  H,x,  M,d,  vex12 cpuid(AVX2) p_66), /* vgatherdps/d */
378    [0x93] = X86_OP_ENTRY3(VPGATHERQ, V,x,  H,x,  M,q,  vex12 cpuid(AVX2) p_66), /* vgatherqps/d */
379
380    /* Should be exception type 2 but they do not have legacy SSE equivalents? */
381    [0x96] = X86_OP_ENTRY3(VFMADDSUB132Px, V,x,  H,x, W,x,  vex6 cpuid(FMA) p_66),
382    [0x97] = X86_OP_ENTRY3(VFMSUBADD132Px, V,x,  H,x, W,x,  vex6 cpuid(FMA) p_66),
383
384    [0xa6] = X86_OP_ENTRY3(VFMADDSUB213Px, V,x,  H,x, W,x,  vex6 cpuid(FMA) p_66),
385    [0xa7] = X86_OP_ENTRY3(VFMSUBADD213Px, V,x,  H,x, W,x,  vex6 cpuid(FMA) p_66),
386
387    [0xb6] = X86_OP_ENTRY3(VFMADDSUB231Px, V,x,  H,x, W,x,  vex6 cpuid(FMA) p_66),
388    [0xb7] = X86_OP_ENTRY3(VFMSUBADD231Px, V,x,  H,x, W,x,  vex6 cpuid(FMA) p_66),
389
390    [0x08] = X86_OP_ENTRY3(PSIGNB,    V,x,        H,x,  W,x,  vex4 cpuid(SSSE3) mmx avx2_256 p_00_66),
391    [0x09] = X86_OP_ENTRY3(PSIGNW,    V,x,        H,x,  W,x,  vex4 cpuid(SSSE3) mmx avx2_256 p_00_66),
392    [0x0a] = X86_OP_ENTRY3(PSIGND,    V,x,        H,x,  W,x,  vex4 cpuid(SSSE3) mmx avx2_256 p_00_66),
393    [0x0b] = X86_OP_ENTRY3(PMULHRSW,  V,x,        H,x,  W,x,  vex4 cpuid(SSSE3) mmx avx2_256 p_00_66),
394    [0x0c] = X86_OP_ENTRY3(VPERMILPS, V,x,        H,x,  W,x,  vex4 cpuid(AVX) p_00_66),
395    [0x0d] = X86_OP_ENTRY3(VPERMILPD, V,x,        H,x,  W,x,  vex4 cpuid(AVX) p_66),
396    [0x0e] = X86_OP_ENTRY3(VTESTPS,   None,None,  V,x,  W,x,  vex4 cpuid(AVX) p_66),
397    [0x0f] = X86_OP_ENTRY3(VTESTPD,   None,None,  V,x,  W,x,  vex4 cpuid(AVX) p_66),
398
399    [0x18] = X86_OP_ENTRY3(VPBROADCASTD,   V,x,  None,None, W,d,  vex6 cpuid(AVX) p_66), /* vbroadcastss */
400    [0x19] = X86_OP_ENTRY3(VPBROADCASTQ,   V,qq, None,None, W,q,  vex6 cpuid(AVX) p_66), /* vbroadcastsd */
401    [0x1a] = X86_OP_ENTRY3(VBROADCASTx128, V,qq, None,None, WM,dq,vex6 cpuid(AVX) p_66),
402    [0x1c] = X86_OP_ENTRY3(PABSB,          V,x,  None,None, W,x,  vex4 cpuid(SSSE3) mmx avx2_256 p_00_66),
403    [0x1d] = X86_OP_ENTRY3(PABSW,          V,x,  None,None, W,x,  vex4 cpuid(SSSE3) mmx avx2_256 p_00_66),
404    [0x1e] = X86_OP_ENTRY3(PABSD,          V,x,  None,None, W,x,  vex4 cpuid(SSSE3) mmx avx2_256 p_00_66),
405
406    [0x28] = X86_OP_ENTRY3(PMULDQ,        V,x, H,x,       W,x,  vex4 cpuid(SSE41) avx2_256 p_66),
407    [0x29] = X86_OP_ENTRY3(PCMPEQQ,       V,x, H,x,       W,x,  vex4 cpuid(SSE41) avx2_256 p_66),
408    [0x2a] = X86_OP_ENTRY3(MOVDQ,         V,x, None,None, WM,x, vex1 cpuid(SSE41) avx2_256 p_66), /* movntdqa */
409    [0x2b] = X86_OP_ENTRY3(VPACKUSDW,     V,x, H,x,       W,x,  vex4 cpuid(SSE41) avx2_256 p_66),
410    [0x2c] = X86_OP_ENTRY3(VMASKMOVPS,    V,x, H,x,       WM,x, vex6 cpuid(AVX) p_66),
411    [0x2d] = X86_OP_ENTRY3(VMASKMOVPD,    V,x, H,x,       WM,x, vex6 cpuid(AVX) p_66),
412    /* Incorrectly listed as Mx,Hx,Vx in the manual */
413    [0x2e] = X86_OP_ENTRY3(VMASKMOVPS_st, M,x, V,x,       H,x,  vex6 cpuid(AVX) p_66),
414    [0x2f] = X86_OP_ENTRY3(VMASKMOVPD_st, M,x, V,x,       H,x,  vex6 cpuid(AVX) p_66),
415
416    [0x38] = X86_OP_ENTRY3(PMINSB,        V,x,  H,x, W,x,  vex4 cpuid(SSE41) avx2_256 p_66),
417    [0x39] = X86_OP_ENTRY3(PMINSD,        V,x,  H,x, W,x,  vex4 cpuid(SSE41) avx2_256 p_66),
418    [0x3a] = X86_OP_ENTRY3(PMINUW,        V,x,  H,x, W,x,  vex4 cpuid(SSE41) avx2_256 p_66),
419    [0x3b] = X86_OP_ENTRY3(PMINUD,        V,x,  H,x, W,x,  vex4 cpuid(SSE41) avx2_256 p_66),
420    [0x3c] = X86_OP_ENTRY3(PMAXSB,        V,x,  H,x, W,x,  vex4 cpuid(SSE41) avx2_256 p_66),
421    [0x3d] = X86_OP_ENTRY3(PMAXSD,        V,x,  H,x, W,x,  vex4 cpuid(SSE41) avx2_256 p_66),
422    [0x3e] = X86_OP_ENTRY3(PMAXUW,        V,x,  H,x, W,x,  vex4 cpuid(SSE41) avx2_256 p_66),
423    [0x3f] = X86_OP_ENTRY3(PMAXUD,        V,x,  H,x, W,x,  vex4 cpuid(SSE41) avx2_256 p_66),
424
425    [0x58] = X86_OP_ENTRY3(VPBROADCASTD,   V,x,  None,None, W,d,  vex6 cpuid(AVX2) p_66),
426    [0x59] = X86_OP_ENTRY3(VPBROADCASTQ,   V,x,  None,None, W,q,  vex6 cpuid(AVX2) p_66),
427    [0x5a] = X86_OP_ENTRY3(VBROADCASTx128, V,qq, None,None, WM,dq,vex6 cpuid(AVX2) p_66),
428
429    [0x78] = X86_OP_ENTRY3(VPBROADCASTB,   V,x,  None,None, W,b,  vex6 cpuid(AVX2) p_66),
430    [0x79] = X86_OP_ENTRY3(VPBROADCASTW,   V,x,  None,None, W,w,  vex6 cpuid(AVX2) p_66),
431
432    [0x8c] = X86_OP_ENTRY3(VPMASKMOV,    V,x,  H,x, WM,x, vex6 cpuid(AVX2) p_66),
433    [0x8e] = X86_OP_ENTRY3(VPMASKMOV_st, M,x,  V,x, H,x,  vex6 cpuid(AVX2) p_66),
434
435    /* Should be exception type 2 or 3 but they do not have legacy SSE equivalents? */
436    [0x98] = X86_OP_ENTRY3(VFMADD132Px,  V,x,  H,x, W,x,  vex6 cpuid(FMA) p_66),
437    [0x99] = X86_OP_ENTRY3(VFMADD132Sx,  V,x,  H,x, W,x,  vex6 cpuid(FMA) p_66),
438    [0x9a] = X86_OP_ENTRY3(VFMSUB132Px,  V,x,  H,x, W,x,  vex6 cpuid(FMA) p_66),
439    [0x9b] = X86_OP_ENTRY3(VFMSUB132Sx,  V,x,  H,x, W,x,  vex6 cpuid(FMA) p_66),
440    [0x9c] = X86_OP_ENTRY3(VFNMADD132Px, V,x,  H,x, W,x,  vex6 cpuid(FMA) p_66),
441    [0x9d] = X86_OP_ENTRY3(VFNMADD132Sx, V,x,  H,x, W,x,  vex6 cpuid(FMA) p_66),
442    [0x9e] = X86_OP_ENTRY3(VFNMSUB132Px, V,x,  H,x, W,x,  vex6 cpuid(FMA) p_66),
443    [0x9f] = X86_OP_ENTRY3(VFNMSUB132Sx, V,x,  H,x, W,x,  vex6 cpuid(FMA) p_66),
444
445    [0xa8] = X86_OP_ENTRY3(VFMADD213Px,  V,x,  H,x, W,x,  vex6 cpuid(FMA) p_66),
446    [0xa9] = X86_OP_ENTRY3(VFMADD213Sx,  V,x,  H,x, W,x,  vex6 cpuid(FMA) p_66),
447    [0xaa] = X86_OP_ENTRY3(VFMSUB213Px,  V,x,  H,x, W,x,  vex6 cpuid(FMA) p_66),
448    [0xab] = X86_OP_ENTRY3(VFMSUB213Sx,  V,x,  H,x, W,x,  vex6 cpuid(FMA) p_66),
449    [0xac] = X86_OP_ENTRY3(VFNMADD213Px, V,x,  H,x, W,x,  vex6 cpuid(FMA) p_66),
450    [0xad] = X86_OP_ENTRY3(VFNMADD213Sx, V,x,  H,x, W,x,  vex6 cpuid(FMA) p_66),
451    [0xae] = X86_OP_ENTRY3(VFNMSUB213Px, V,x,  H,x, W,x,  vex6 cpuid(FMA) p_66),
452    [0xaf] = X86_OP_ENTRY3(VFNMSUB213Sx, V,x,  H,x, W,x,  vex6 cpuid(FMA) p_66),
453
454    [0xb8] = X86_OP_ENTRY3(VFMADD231Px,  V,x,  H,x, W,x,  vex6 cpuid(FMA) p_66),
455    [0xb9] = X86_OP_ENTRY3(VFMADD231Sx,  V,x,  H,x, W,x,  vex6 cpuid(FMA) p_66),
456    [0xba] = X86_OP_ENTRY3(VFMSUB231Px,  V,x,  H,x, W,x,  vex6 cpuid(FMA) p_66),
457    [0xbb] = X86_OP_ENTRY3(VFMSUB231Sx,  V,x,  H,x, W,x,  vex6 cpuid(FMA) p_66),
458    [0xbc] = X86_OP_ENTRY3(VFNMADD231Px, V,x,  H,x, W,x,  vex6 cpuid(FMA) p_66),
459    [0xbd] = X86_OP_ENTRY3(VFNMADD231Sx, V,x,  H,x, W,x,  vex6 cpuid(FMA) p_66),
460    [0xbe] = X86_OP_ENTRY3(VFNMSUB231Px, V,x,  H,x, W,x,  vex6 cpuid(FMA) p_66),
461    [0xbf] = X86_OP_ENTRY3(VFNMSUB231Sx, V,x,  H,x, W,x,  vex6 cpuid(FMA) p_66),
462
463    [0xdb] = X86_OP_ENTRY3(VAESIMC,     V,dq, None,None, W,dq, vex4 cpuid(AES) p_66),
464    [0xdc] = X86_OP_ENTRY3(VAESENC,     V,x,  H,x,       W,x,  vex4 cpuid(AES) p_66),
465    [0xdd] = X86_OP_ENTRY3(VAESENCLAST, V,x,  H,x,       W,x,  vex4 cpuid(AES) p_66),
466    [0xde] = X86_OP_ENTRY3(VAESDEC,     V,x,  H,x,       W,x,  vex4 cpuid(AES) p_66),
467    [0xdf] = X86_OP_ENTRY3(VAESDECLAST, V,x,  H,x,       W,x,  vex4 cpuid(AES) p_66),
468};
469
470/* five rows for no prefix, 66, F3, F2, 66+F2  */
471static const X86OpEntry opcodes_0F38_F0toFF[16][5] = {
472    [0] = {
473        X86_OP_ENTRY3(MOVBE, G,y, M,y, None,None, cpuid(MOVBE)),
474        X86_OP_ENTRY3(MOVBE, G,w, M,w, None,None, cpuid(MOVBE)),
475        {},
476        X86_OP_ENTRY2(CRC32, G,d, E,b, cpuid(SSE42)),
477        X86_OP_ENTRY2(CRC32, G,d, E,b, cpuid(SSE42)),
478    },
479    [1] = {
480        X86_OP_ENTRY3(MOVBE, M,y, G,y, None,None, cpuid(MOVBE)),
481        X86_OP_ENTRY3(MOVBE, M,w, G,w, None,None, cpuid(MOVBE)),
482        {},
483        X86_OP_ENTRY2(CRC32, G,d, E,y, cpuid(SSE42)),
484        X86_OP_ENTRY2(CRC32, G,d, E,w, cpuid(SSE42)),
485    },
486    [2] = {
487        X86_OP_ENTRY3(ANDN, G,y, B,y, E,y, vex13 cpuid(BMI1)),
488        {},
489        {},
490        {},
491        {},
492    },
493    [3] = {
494        X86_OP_GROUP3(group17, B,y, E,y, None,None, vex13 cpuid(BMI1)),
495        {},
496        {},
497        {},
498        {},
499    },
500    [5] = {
501        X86_OP_ENTRY3(BZHI, G,y, E,y, B,y, vex13 cpuid(BMI1)),
502        {},
503        X86_OP_ENTRY3(PEXT, G,y, B,y, E,y, vex13 cpuid(BMI2)),
504        X86_OP_ENTRY3(PDEP, G,y, B,y, E,y, vex13 cpuid(BMI2)),
505        {},
506    },
507    [6] = {
508        {},
509        X86_OP_ENTRY2(ADCX, G,y, E,y, cpuid(ADX)),
510        X86_OP_ENTRY2(ADOX, G,y, E,y, cpuid(ADX)),
511        X86_OP_ENTRY3(MULX, /* B,y, */ G,y, E,y, 2,y, vex13 cpuid(BMI2)),
512        {},
513    },
514    [7] = {
515        X86_OP_ENTRY3(BEXTR, G,y, E,y, B,y, vex13 cpuid(BMI1)),
516        X86_OP_ENTRY3(SHLX, G,y, E,y, B,y, vex13 cpuid(BMI1)),
517        X86_OP_ENTRY3(SARX, G,y, E,y, B,y, vex13 cpuid(BMI1)),
518        X86_OP_ENTRY3(SHRX, G,y, E,y, B,y, vex13 cpuid(BMI1)),
519        {},
520    },
521};
522
523static void decode_0F38(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
524{
525    *b = x86_ldub_code(env, s);
526    if (*b < 0xf0) {
527        *entry = opcodes_0F38_00toEF[*b];
528    } else {
529        int row = 0;
530        if (s->prefix & PREFIX_REPZ) {
531            /* The REPZ (F3) prefix has priority over 66 */
532            row = 2;
533        } else {
534            row += s->prefix & PREFIX_REPNZ ? 3 : 0;
535            row += s->prefix & PREFIX_DATA ? 1 : 0;
536        }
537        *entry = opcodes_0F38_F0toFF[*b & 15][row];
538    }
539}
540
541static void decode_VINSERTPS(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
542{
543    static const X86OpEntry
544        vinsertps_reg = X86_OP_ENTRY4(VINSERTPS_r, V,dq, H,dq, U,dq, vex5 cpuid(SSE41) p_66),
545        vinsertps_mem = X86_OP_ENTRY4(VINSERTPS_m, V,dq, H,dq, M,d,  vex5 cpuid(SSE41) p_66);
546
547    int modrm = get_modrm(s, env);
548    *entry = (modrm >> 6) == 3 ? vinsertps_reg : vinsertps_mem;
549}
550
551static const X86OpEntry opcodes_0F3A[256] = {
552    /*
553     * These are VEX-only, but incorrectly listed in the manual as exception type 4.
554     * Also the "qq" instructions are sometimes omitted by Table 2-17, but are VEX256
555     * only.
556     */
557    [0x00] = X86_OP_ENTRY3(VPERMQ,      V,qq, W,qq, I,b,  vex6 cpuid(AVX2) p_66),
558    [0x01] = X86_OP_ENTRY3(VPERMQ,      V,qq, W,qq, I,b,  vex6 cpuid(AVX2) p_66), /* VPERMPD */
559    [0x02] = X86_OP_ENTRY4(VBLENDPS,    V,x,  H,x,  W,x,  vex6 cpuid(AVX2) p_66), /* VPBLENDD */
560    [0x04] = X86_OP_ENTRY3(VPERMILPS_i, V,x,  W,x,  I,b,  vex6 cpuid(AVX) p_66),
561    [0x05] = X86_OP_ENTRY3(VPERMILPD_i, V,x,  W,x,  I,b,  vex6 cpuid(AVX) p_66),
562    [0x06] = X86_OP_ENTRY4(VPERM2x128,  V,qq, H,qq, W,qq, vex6 cpuid(AVX) p_66),
563
564    [0x14] = X86_OP_ENTRY3(PEXTRB,     E,b,  V,dq, I,b,  vex5 cpuid(SSE41) zext0 p_66),
565    [0x15] = X86_OP_ENTRY3(PEXTRW,     E,w,  V,dq, I,b,  vex5 cpuid(SSE41) zext0 p_66),
566    [0x16] = X86_OP_ENTRY3(PEXTR,      E,y,  V,dq, I,b,  vex5 cpuid(SSE41) p_66),
567    [0x17] = X86_OP_ENTRY3(VEXTRACTPS, E,d,  V,dq, I,b,  vex5 cpuid(SSE41) p_66),
568    [0x1d] = X86_OP_ENTRY3(VCVTPS2PH,  W,xh, V,x,  I,b,  vex11 cpuid(F16C) p_66),
569
570    [0x20] = X86_OP_ENTRY4(PINSRB,     V,dq, H,dq, E,b,  vex5 cpuid(SSE41) zext2 p_66),
571    [0x21] = X86_OP_GROUP0(VINSERTPS),
572    [0x22] = X86_OP_ENTRY4(PINSR,      V,dq, H,dq, E,y,  vex5 cpuid(SSE41) p_66),
573
574    [0x40] = X86_OP_ENTRY4(VDDPS,      V,x,  H,x,  W,x,  vex2 cpuid(SSE41) p_66),
575    [0x41] = X86_OP_ENTRY4(VDDPD,      V,dq, H,dq, W,dq, vex2 cpuid(SSE41) p_66),
576    [0x42] = X86_OP_ENTRY4(VMPSADBW,   V,x,  H,x,  W,x,  vex2 cpuid(SSE41) avx2_256 p_66),
577    [0x44] = X86_OP_ENTRY4(PCLMULQDQ,  V,dq, H,dq, W,dq, vex4 cpuid(PCLMULQDQ) p_66),
578    [0x46] = X86_OP_ENTRY4(VPERM2x128, V,qq, H,qq, W,qq, vex6 cpuid(AVX2) p_66),
579
580    [0x60] = X86_OP_ENTRY4(PCMPESTRM,  None,None, V,dq, W,dq, vex4_unal cpuid(SSE42) p_66),
581    [0x61] = X86_OP_ENTRY4(PCMPESTRI,  None,None, V,dq, W,dq, vex4_unal cpuid(SSE42) p_66),
582    [0x62] = X86_OP_ENTRY4(PCMPISTRM,  None,None, V,dq, W,dq, vex4_unal cpuid(SSE42) p_66),
583    [0x63] = X86_OP_ENTRY4(PCMPISTRI,  None,None, V,dq, W,dq, vex4_unal cpuid(SSE42) p_66),
584
585    [0x08] = X86_OP_ENTRY3(VROUNDPS,   V,x,  W,x,  I,b,  vex2 cpuid(SSE41) p_66),
586    [0x09] = X86_OP_ENTRY3(VROUNDPD,   V,x,  W,x,  I,b,  vex2 cpuid(SSE41) p_66),
587    /*
588     * Not listed as four operand in the manual.  Also writes and reads 128-bits
589     * from the first two operands due to the V operand picking higher entries of
590     * the H operand; the "Vss,Hss,Wss" description from the manual is incorrect.
591     * For other unary operations such as VSQRTSx this is hidden by the "REPScalar"
592     * value of vex_special, because the table lists the operand types of VSQRTPx.
593     */
594    [0x0a] = X86_OP_ENTRY4(VROUNDSS,   V,x,  H,x, W,ss, vex3 cpuid(SSE41) p_66),
595    [0x0b] = X86_OP_ENTRY4(VROUNDSD,   V,x,  H,x, W,sd, vex3 cpuid(SSE41) p_66),
596    [0x0c] = X86_OP_ENTRY4(VBLENDPS,   V,x,  H,x,  W,x,  vex4 cpuid(SSE41) p_66),
597    [0x0d] = X86_OP_ENTRY4(VBLENDPD,   V,x,  H,x,  W,x,  vex4 cpuid(SSE41) p_66),
598    [0x0e] = X86_OP_ENTRY4(VPBLENDW,   V,x,  H,x,  W,x,  vex4 cpuid(SSE41) avx2_256 p_66),
599    [0x0f] = X86_OP_ENTRY4(PALIGNR,    V,x,  H,x,  W,x,  vex4 cpuid(SSSE3) mmx avx2_256 p_00_66),
600
601    [0x18] = X86_OP_ENTRY4(VINSERTx128,  V,qq, H,qq, W,qq, vex6 cpuid(AVX) p_66),
602    [0x19] = X86_OP_ENTRY3(VEXTRACTx128, W,dq, V,qq, I,b,  vex6 cpuid(AVX) p_66),
603
604    [0x38] = X86_OP_ENTRY4(VINSERTx128,  V,qq, H,qq, W,qq, vex6 cpuid(AVX2) p_66),
605    [0x39] = X86_OP_ENTRY3(VEXTRACTx128, W,dq, V,qq, I,b,  vex6 cpuid(AVX2) p_66),
606
607    /* Listed incorrectly as type 4 */
608    [0x4a] = X86_OP_ENTRY4(VBLENDVPS, V,x,  H,x,  W,x,   vex6 cpuid(AVX) p_66),
609    [0x4b] = X86_OP_ENTRY4(VBLENDVPD, V,x,  H,x,  W,x,   vex6 cpuid(AVX) p_66),
610    [0x4c] = X86_OP_ENTRY4(VPBLENDVB, V,x,  H,x,  W,x,   vex6 cpuid(AVX) p_66 avx2_256),
611
612    [0xdf] = X86_OP_ENTRY3(VAESKEYGEN, V,dq, W,dq, I,b,  vex4 cpuid(AES) p_66),
613
614    [0xF0] = X86_OP_ENTRY3(RORX, G,y, E,y, I,b, vex13 cpuid(BMI2) p_f2),
615};
616
617static void decode_0F3A(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
618{
619    *b = x86_ldub_code(env, s);
620    *entry = opcodes_0F3A[*b];
621}
622
623/*
624 * There are some mistakes in the operands in the manual, and the load/store/register
625 * cases are easiest to keep separate, so the entries for 10-17 follow simplicity and
626 * efficiency of implementation rather than copying what the manual says.
627 *
628 * In particular:
629 *
630 * 1) "VMOVSS m32, xmm1" and "VMOVSD m64, xmm1" do not support VEX.vvvv != 1111b,
631 * but this is not mentioned in the tables.
632 *
633 * 2) MOVHLPS, MOVHPS, MOVHPD, MOVLPD, MOVLPS read the high quadword of one of their
634 * operands, which must therefore be dq; MOVLPD and MOVLPS also write the high
635 * quadword of the V operand.
636 */
637static void decode_0F10(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
638{
639    static const X86OpEntry opcodes_0F10_reg[4] = {
640        X86_OP_ENTRY3(MOVDQ,   V,x,  None,None, W,x, vex4_unal), /* MOVUPS */
641        X86_OP_ENTRY3(MOVDQ,   V,x,  None,None, W,x, vex4_unal), /* MOVUPD */
642        X86_OP_ENTRY3(VMOVSS,  V,x,  H,x,       W,x, vex5),
643        X86_OP_ENTRY3(VMOVLPx, V,x,  H,x,       W,x, vex5), /* MOVSD */
644    };
645
646    static const X86OpEntry opcodes_0F10_mem[4] = {
647        X86_OP_ENTRY3(MOVDQ,      V,x,  None,None, W,x,  vex4_unal), /* MOVUPS */
648        X86_OP_ENTRY3(MOVDQ,      V,x,  None,None, W,x,  vex4_unal), /* MOVUPD */
649        X86_OP_ENTRY3(VMOVSS_ld,  V,x,  H,x,       M,ss, vex5),
650        X86_OP_ENTRY3(VMOVSD_ld,  V,x,  H,x,       M,sd, vex5),
651    };
652
653    if ((get_modrm(s, env) >> 6) == 3) {
654        *entry = *decode_by_prefix(s, opcodes_0F10_reg);
655    } else {
656        *entry = *decode_by_prefix(s, opcodes_0F10_mem);
657    }
658}
659
660static void decode_0F11(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
661{
662    static const X86OpEntry opcodes_0F11_reg[4] = {
663        X86_OP_ENTRY3(MOVDQ,   W,x,  None,None, V,x, vex4), /* MOVUPS */
664        X86_OP_ENTRY3(MOVDQ,   W,x,  None,None, V,x, vex4), /* MOVUPD */
665        X86_OP_ENTRY3(VMOVSS,  W,x,  H,x,       V,x, vex5),
666        X86_OP_ENTRY3(VMOVLPx, W,x,  H,x,       V,q, vex5), /* MOVSD */
667    };
668
669    static const X86OpEntry opcodes_0F11_mem[4] = {
670        X86_OP_ENTRY3(MOVDQ,      W,x,  None,None, V,x, vex4), /* MOVUPS */
671        X86_OP_ENTRY3(MOVDQ,      W,x,  None,None, V,x, vex4), /* MOVUPD */
672        X86_OP_ENTRY3(VMOVSS_st,  M,ss, None,None, V,x, vex5),
673        X86_OP_ENTRY3(VMOVLPx_st, M,sd, None,None, V,x, vex5), /* MOVSD */
674    };
675
676    if ((get_modrm(s, env) >> 6) == 3) {
677        *entry = *decode_by_prefix(s, opcodes_0F11_reg);
678    } else {
679        *entry = *decode_by_prefix(s, opcodes_0F11_mem);
680    }
681}
682
683static void decode_0F12(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
684{
685    static const X86OpEntry opcodes_0F12_mem[4] = {
686        /*
687         * Use dq for operand for compatibility with gen_MOVSD and
688         * to allow VEX128 only.
689         */
690        X86_OP_ENTRY3(VMOVLPx_ld, V,dq, H,dq,      M,q, vex5), /* MOVLPS */
691        X86_OP_ENTRY3(VMOVLPx_ld, V,dq, H,dq,      M,q, vex5), /* MOVLPD */
692        X86_OP_ENTRY3(VMOVSLDUP,  V,x,  None,None, W,x, vex4 cpuid(SSE3)),
693        X86_OP_ENTRY3(VMOVDDUP,   V,x,  None,None, WM,q, vex5 cpuid(SSE3)), /* qq if VEX.256 */
694    };
695    static const X86OpEntry opcodes_0F12_reg[4] = {
696        X86_OP_ENTRY3(VMOVHLPS,  V,dq, H,dq,       U,dq, vex7),
697        X86_OP_ENTRY3(VMOVLPx,   W,x,  H,x,        U,q,  vex5), /* MOVLPD */
698        X86_OP_ENTRY3(VMOVSLDUP, V,x,  None,None,  U,x,  vex4 cpuid(SSE3)),
699        X86_OP_ENTRY3(VMOVDDUP,  V,x,  None,None,  U,x,  vex5 cpuid(SSE3)),
700    };
701
702    if ((get_modrm(s, env) >> 6) == 3) {
703        *entry = *decode_by_prefix(s, opcodes_0F12_reg);
704    } else {
705        *entry = *decode_by_prefix(s, opcodes_0F12_mem);
706        if ((s->prefix & PREFIX_REPNZ) && s->vex_l) {
707            entry->s2 = X86_SIZE_qq;
708        }
709    }
710}
711
712static void decode_0F16(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
713{
714    static const X86OpEntry opcodes_0F16_mem[4] = {
715        /*
716         * Operand 1 technically only reads the low 64 bits, but uses dq so that
717         * it is easier to check for op0 == op1 in an endianness-neutral manner.
718         */
719        X86_OP_ENTRY3(VMOVHPx_ld, V,dq, H,dq,      M,q, vex5), /* MOVHPS */
720        X86_OP_ENTRY3(VMOVHPx_ld, V,dq, H,dq,      M,q, vex5), /* MOVHPD */
721        X86_OP_ENTRY3(VMOVSHDUP,  V,x,  None,None, W,x, vex4 cpuid(SSE3)),
722        {},
723    };
724    static const X86OpEntry opcodes_0F16_reg[4] = {
725        /* Same as above, operand 1 could be Hq if it wasn't for big-endian.  */
726        X86_OP_ENTRY3(VMOVLHPS,  V,dq, H,dq,      U,q, vex7),
727        X86_OP_ENTRY3(VMOVHPx,   V,x,  H,x,       U,x, vex5), /* MOVHPD */
728        X86_OP_ENTRY3(VMOVSHDUP, V,x,  None,None, U,x, vex4 cpuid(SSE3)),
729        {},
730    };
731
732    if ((get_modrm(s, env) >> 6) == 3) {
733        *entry = *decode_by_prefix(s, opcodes_0F16_reg);
734    } else {
735        *entry = *decode_by_prefix(s, opcodes_0F16_mem);
736    }
737}
738
739static void decode_0F2A(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
740{
741    static const X86OpEntry opcodes_0F2A[4] = {
742        X86_OP_ENTRY3(CVTPI2Px,  V,x,  None,None, Q,q),
743        X86_OP_ENTRY3(CVTPI2Px,  V,x,  None,None, Q,q),
744        X86_OP_ENTRY3(VCVTSI2Sx, V,x,  H,x, E,y,        vex3),
745        X86_OP_ENTRY3(VCVTSI2Sx, V,x,  H,x, E,y,        vex3),
746    };
747    *entry = *decode_by_prefix(s, opcodes_0F2A);
748}
749
750static void decode_0F2B(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
751{
752    static const X86OpEntry opcodes_0F2B[4] = {
753        X86_OP_ENTRY3(MOVDQ,      M,x,  None,None, V,x, vex1), /* MOVNTPS */
754        X86_OP_ENTRY3(MOVDQ,      M,x,  None,None, V,x, vex1), /* MOVNTPD */
755        /* AMD extensions */
756        X86_OP_ENTRY3(VMOVSS_st,  M,ss, None,None, V,x, vex4 cpuid(SSE4A)), /* MOVNTSS */
757        X86_OP_ENTRY3(VMOVLPx_st, M,sd, None,None, V,x, vex4 cpuid(SSE4A)), /* MOVNTSD */
758    };
759
760    *entry = *decode_by_prefix(s, opcodes_0F2B);
761}
762
763static void decode_0F2C(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
764{
765    static const X86OpEntry opcodes_0F2C[4] = {
766        /* Listed as ps/pd in the manual, but CVTTPS2PI only reads 64-bit.  */
767        X86_OP_ENTRY3(CVTTPx2PI,  P,q,  None,None, W,q),
768        X86_OP_ENTRY3(CVTTPx2PI,  P,q,  None,None, W,dq),
769        X86_OP_ENTRY3(VCVTTSx2SI, G,y,  None,None, W,ss, vex3),
770        X86_OP_ENTRY3(VCVTTSx2SI, G,y,  None,None, W,sd, vex3),
771    };
772    *entry = *decode_by_prefix(s, opcodes_0F2C);
773}
774
775static void decode_0F2D(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
776{
777    static const X86OpEntry opcodes_0F2D[4] = {
778        /* Listed as ps/pd in the manual, but CVTPS2PI only reads 64-bit.  */
779        X86_OP_ENTRY3(CVTPx2PI,  P,q,  None,None, W,q),
780        X86_OP_ENTRY3(CVTPx2PI,  P,q,  None,None, W,dq),
781        X86_OP_ENTRY3(VCVTSx2SI, G,y,  None,None, W,ss, vex3),
782        X86_OP_ENTRY3(VCVTSx2SI, G,y,  None,None, W,sd, vex3),
783    };
784    *entry = *decode_by_prefix(s, opcodes_0F2D);
785}
786
787static void decode_VxCOMISx(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
788{
789    /*
790     * VUCOMISx and VCOMISx are different and use no-prefix and 0x66 for SS and SD
791     * respectively.  Scalar values usually are associated with 0xF2 and 0xF3, for
792     * which X86_VEX_REPScalar exists, but here it has to be decoded by hand.
793     */
794    entry->s1 = entry->s2 = (s->prefix & PREFIX_DATA ? X86_SIZE_sd : X86_SIZE_ss);
795    entry->gen = (*b == 0x2E ? gen_VUCOMI : gen_VCOMI);
796}
797
798static void decode_sse_unary(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
799{
800    if (!(s->prefix & (PREFIX_REPZ | PREFIX_REPNZ))) {
801        entry->op1 = X86_TYPE_None;
802        entry->s1 = X86_SIZE_None;
803    }
804    switch (*b) {
805    case 0x51: entry->gen = gen_VSQRT; break;
806    case 0x52: entry->gen = gen_VRSQRT; break;
807    case 0x53: entry->gen = gen_VRCP; break;
808    }
809}
810
811static void decode_0F5A(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
812{
813    static const X86OpEntry opcodes_0F5A[4] = {
814        X86_OP_ENTRY2(VCVTPS2PD,  V,x,       W,xh, vex2),      /* VCVTPS2PD */
815        X86_OP_ENTRY2(VCVTPD2PS,  V,x,       W,x,  vex2),      /* VCVTPD2PS */
816        X86_OP_ENTRY3(VCVTSS2SD,  V,x,  H,x, W,x,  vex2_rep3), /* VCVTSS2SD */
817        X86_OP_ENTRY3(VCVTSD2SS,  V,x,  H,x, W,x,  vex2_rep3), /* VCVTSD2SS */
818    };
819    *entry = *decode_by_prefix(s, opcodes_0F5A);
820}
821
822static void decode_0F5B(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
823{
824    static const X86OpEntry opcodes_0F5B[4] = {
825        X86_OP_ENTRY2(VCVTDQ2PS,   V,x, W,x,      vex2),
826        X86_OP_ENTRY2(VCVTPS2DQ,   V,x, W,x,      vex2),
827        X86_OP_ENTRY2(VCVTTPS2DQ,  V,x, W,x,      vex2),
828        {},
829    };
830    *entry = *decode_by_prefix(s, opcodes_0F5B);
831}
832
833static void decode_0FE6(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
834{
835    static const X86OpEntry opcodes_0FE6[4] = {
836        {},
837        X86_OP_ENTRY2(VCVTTPD2DQ,  V,x, W,x,      vex2),
838        X86_OP_ENTRY2(VCVTDQ2PD,   V,x, W,x,      vex5),
839        X86_OP_ENTRY2(VCVTPD2DQ,   V,x, W,x,      vex2),
840    };
841    *entry = *decode_by_prefix(s, opcodes_0FE6);
842}
843
844static const X86OpEntry opcodes_0F[256] = {
845    [0x0E] = X86_OP_ENTRY0(EMMS,                              cpuid(3DNOW)), /* femms */
846    /*
847     * 3DNow!'s opcode byte comes *after* modrm and displacements, making it
848     * more like an Ib operand.  Dispatch to the right helper in a single gen_*
849     * function.
850     */
851    [0x0F] = X86_OP_ENTRY3(3dnow,       P,q, Q,q, I,b,        cpuid(3DNOW)),
852
853    [0x10] = X86_OP_GROUP0(0F10),
854    [0x11] = X86_OP_GROUP0(0F11),
855    [0x12] = X86_OP_GROUP0(0F12),
856    [0x13] = X86_OP_ENTRY3(VMOVLPx_st,  M,q, None,None, V,q,  vex5 p_00_66),
857    [0x14] = X86_OP_ENTRY3(VUNPCKLPx,   V,x, H,x, W,x,        vex4 p_00_66),
858    [0x15] = X86_OP_ENTRY3(VUNPCKHPx,   V,x, H,x, W,x,        vex4 p_00_66),
859    [0x16] = X86_OP_GROUP0(0F16),
860    /* Incorrectly listed as Mq,Vq in the manual */
861    [0x17] = X86_OP_ENTRY3(VMOVHPx_st,  M,q, None,None, V,dq, vex5 p_00_66),
862
863    [0x50] = X86_OP_ENTRY3(MOVMSK,     G,y, None,None, U,x, vex7 p_00_66),
864    [0x51] = X86_OP_GROUP3(sse_unary,  V,x, H,x, W,x, vex2_rep3 p_00_66_f3_f2), /* sqrtps */
865    [0x52] = X86_OP_GROUP3(sse_unary,  V,x, H,x, W,x, vex4_rep5 p_00_f3), /* rsqrtps */
866    [0x53] = X86_OP_GROUP3(sse_unary,  V,x, H,x, W,x, vex4_rep5 p_00_f3), /* rcpps */
867    [0x54] = X86_OP_ENTRY3(PAND,       V,x, H,x, W,x,  vex4 p_00_66), /* vand */
868    [0x55] = X86_OP_ENTRY3(PANDN,      V,x, H,x, W,x,  vex4 p_00_66), /* vandn */
869    [0x56] = X86_OP_ENTRY3(POR,        V,x, H,x, W,x,  vex4 p_00_66), /* vor */
870    [0x57] = X86_OP_ENTRY3(PXOR,       V,x, H,x, W,x,  vex4 p_00_66), /* vxor */
871
872    [0x60] = X86_OP_ENTRY3(PUNPCKLBW,  V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
873    [0x61] = X86_OP_ENTRY3(PUNPCKLWD,  V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
874    [0x62] = X86_OP_ENTRY3(PUNPCKLDQ,  V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
875    [0x63] = X86_OP_ENTRY3(PACKSSWB,   V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
876    [0x64] = X86_OP_ENTRY3(PCMPGTB,    V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
877    [0x65] = X86_OP_ENTRY3(PCMPGTW,    V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
878    [0x66] = X86_OP_ENTRY3(PCMPGTD,    V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
879    [0x67] = X86_OP_ENTRY3(PACKUSWB,   V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
880
881    [0x70] = X86_OP_GROUP0(0F70),
882    [0x71] = X86_OP_GROUP0(group12),
883    [0x72] = X86_OP_GROUP0(group13),
884    [0x73] = X86_OP_GROUP0(group14),
885    [0x74] = X86_OP_ENTRY3(PCMPEQB,    V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
886    [0x75] = X86_OP_ENTRY3(PCMPEQW,    V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
887    [0x76] = X86_OP_ENTRY3(PCMPEQD,    V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
888    [0x77] = X86_OP_GROUP0(0F77),
889
890    [0x28] = X86_OP_ENTRY3(MOVDQ,      V,x,  None,None, W,x, vex1 p_00_66), /* MOVAPS */
891    [0x29] = X86_OP_ENTRY3(MOVDQ,      W,x,  None,None, V,x, vex1 p_00_66), /* MOVAPS */
892    [0x2A] = X86_OP_GROUP0(0F2A),
893    [0x2B] = X86_OP_GROUP0(0F2B),
894    [0x2C] = X86_OP_GROUP0(0F2C),
895    [0x2D] = X86_OP_GROUP0(0F2D),
896    [0x2E] = X86_OP_GROUP3(VxCOMISx,   None,None, V,x, W,x,  vex3 p_00_66), /* VUCOMISS/SD */
897    [0x2F] = X86_OP_GROUP3(VxCOMISx,   None,None, V,x, W,x,  vex3 p_00_66), /* VCOMISS/SD */
898
899    [0x38] = X86_OP_GROUP0(0F38),
900    [0x3a] = X86_OP_GROUP0(0F3A),
901
902    [0x58] = X86_OP_ENTRY3(VADD,       V,x, H,x, W,x, vex2_rep3 p_00_66_f3_f2),
903    [0x59] = X86_OP_ENTRY3(VMUL,       V,x, H,x, W,x, vex2_rep3 p_00_66_f3_f2),
904    [0x5a] = X86_OP_GROUP0(0F5A),
905    [0x5b] = X86_OP_GROUP0(0F5B),
906    [0x5c] = X86_OP_ENTRY3(VSUB,       V,x, H,x, W,x, vex2_rep3 p_00_66_f3_f2),
907    [0x5d] = X86_OP_ENTRY3(VMIN,       V,x, H,x, W,x, vex2_rep3 p_00_66_f3_f2),
908    [0x5e] = X86_OP_ENTRY3(VDIV,       V,x, H,x, W,x, vex2_rep3 p_00_66_f3_f2),
909    [0x5f] = X86_OP_ENTRY3(VMAX,       V,x, H,x, W,x, vex2_rep3 p_00_66_f3_f2),
910
911    [0x68] = X86_OP_ENTRY3(PUNPCKHBW,  V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
912    [0x69] = X86_OP_ENTRY3(PUNPCKHWD,  V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
913    [0x6a] = X86_OP_ENTRY3(PUNPCKHDQ,  V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
914    [0x6b] = X86_OP_ENTRY3(PACKSSDW,   V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
915    [0x6c] = X86_OP_ENTRY3(PUNPCKLQDQ, V,x, H,x, W,x,  vex4 p_66 avx2_256),
916    [0x6d] = X86_OP_ENTRY3(PUNPCKHQDQ, V,x, H,x, W,x,  vex4 p_66 avx2_256),
917    [0x6e] = X86_OP_ENTRY3(MOVD_to,    V,x, None,None, E,y, vex5 mmx p_00_66),  /* wrong dest Vy on SDM! */
918    [0x6f] = X86_OP_GROUP0(0F6F),
919
920    [0x78] = X86_OP_GROUP0(0F78),
921    [0x79] = X86_OP_GROUP2(0F79,       V,x, U,x,       cpuid(SSE4A)),
922    [0x7c] = X86_OP_ENTRY3(VHADD,      V,x, H,x, W,x,  vex2 cpuid(SSE3) p_66_f2),
923    [0x7d] = X86_OP_ENTRY3(VHSUB,      V,x, H,x, W,x,  vex2 cpuid(SSE3) p_66_f2),
924    [0x7e] = X86_OP_GROUP0(0F7E),
925    [0x7f] = X86_OP_GROUP0(0F7F),
926
927    [0xae] = X86_OP_GROUP0(group15),
928
929    [0xc2] = X86_OP_ENTRY4(VCMP,       V,x, H,x, W,x,       vex2_rep3 p_00_66_f3_f2),
930    [0xc4] = X86_OP_ENTRY4(PINSRW,     V,dq,H,dq,E,w,       vex5 mmx p_00_66),
931    [0xc5] = X86_OP_ENTRY3(PEXTRW,     G,d, U,dq,I,b,       vex5 mmx p_00_66),
932    [0xc6] = X86_OP_ENTRY4(VSHUF,      V,x, H,x, W,x,       vex4 p_00_66),
933
934    [0xd0] = X86_OP_ENTRY3(VADDSUB,   V,x, H,x, W,x,        vex2 cpuid(SSE3) p_66_f2),
935    [0xd1] = X86_OP_ENTRY3(PSRLW_r,   V,x, H,x, W,x,        vex4 mmx avx2_256 p_00_66),
936    [0xd2] = X86_OP_ENTRY3(PSRLD_r,   V,x, H,x, W,x,        vex4 mmx avx2_256 p_00_66),
937    [0xd3] = X86_OP_ENTRY3(PSRLQ_r,   V,x, H,x, W,x,        vex4 mmx avx2_256 p_00_66),
938    [0xd4] = X86_OP_ENTRY3(PADDQ,     V,x, H,x, W,x,        vex4 mmx avx2_256 p_00_66),
939    [0xd5] = X86_OP_ENTRY3(PMULLW,    V,x, H,x, W,x,        vex4 mmx avx2_256 p_00_66),
940    [0xd6] = X86_OP_GROUP0(0FD6),
941    [0xd7] = X86_OP_ENTRY3(PMOVMSKB,  G,d, None,None, U,x,  vex7 mmx avx2_256 p_00_66),
942
943    [0xe0] = X86_OP_ENTRY3(PAVGB,     V,x, H,x, W,x,        vex4 mmx avx2_256 p_00_66),
944    [0xe1] = X86_OP_ENTRY3(PSRAW_r,   V,x, H,x, W,x,        vex7 mmx avx2_256 p_00_66),
945    [0xe2] = X86_OP_ENTRY3(PSRAD_r,   V,x, H,x, W,x,        vex7 mmx avx2_256 p_00_66),
946    [0xe3] = X86_OP_ENTRY3(PAVGW,     V,x, H,x, W,x,        vex4 mmx avx2_256 p_00_66),
947    [0xe4] = X86_OP_ENTRY3(PMULHUW,   V,x, H,x, W,x,        vex4 mmx avx2_256 p_00_66),
948    [0xe5] = X86_OP_ENTRY3(PMULHW,    V,x, H,x, W,x,        vex4 mmx avx2_256 p_00_66),
949    [0xe6] = X86_OP_GROUP0(0FE6),
950    [0xe7] = X86_OP_ENTRY3(MOVDQ,     W,x, None,None, V,x,  vex1 mmx p_00_66), /* MOVNTQ/MOVNTDQ */
951
952    [0xf0] = X86_OP_ENTRY3(MOVDQ,    V,x, None,None, WM,x,  vex4_unal cpuid(SSE3) p_f2), /* LDDQU */
953    [0xf1] = X86_OP_ENTRY3(PSLLW_r,  V,x, H,x, W,x,         vex7 mmx avx2_256 p_00_66),
954    [0xf2] = X86_OP_ENTRY3(PSLLD_r,  V,x, H,x, W,x,         vex7 mmx avx2_256 p_00_66),
955    [0xf3] = X86_OP_ENTRY3(PSLLQ_r,  V,x, H,x, W,x,         vex7 mmx avx2_256 p_00_66),
956    [0xf4] = X86_OP_ENTRY3(PMULUDQ,  V,x, H,x, W,x,         vex4 mmx avx2_256 p_00_66),
957    [0xf5] = X86_OP_ENTRY3(PMADDWD,  V,x, H,x, W,x,         vex4 mmx avx2_256 p_00_66),
958    [0xf6] = X86_OP_ENTRY3(PSADBW,   V,x, H,x, W,x,         vex4 mmx avx2_256 p_00_66),
959    [0xf7] = X86_OP_ENTRY3(MASKMOV,  None,None, V,dq, U,dq, vex4_unal avx2_256 mmx p_00_66),
960
961    /* Incorrectly missing from 2-17 */
962    [0xd8] = X86_OP_ENTRY3(PSUBUSB,  V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
963    [0xd9] = X86_OP_ENTRY3(PSUBUSW,  V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
964    [0xda] = X86_OP_ENTRY3(PMINUB,   V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
965    [0xdb] = X86_OP_ENTRY3(PAND,     V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
966    [0xdc] = X86_OP_ENTRY3(PADDUSB,  V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
967    [0xdd] = X86_OP_ENTRY3(PADDUSW,  V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
968    [0xde] = X86_OP_ENTRY3(PMAXUB,   V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
969    [0xdf] = X86_OP_ENTRY3(PANDN,    V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
970
971    [0xe8] = X86_OP_ENTRY3(PSUBSB,  V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
972    [0xe9] = X86_OP_ENTRY3(PSUBSW,  V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
973    [0xea] = X86_OP_ENTRY3(PMINSW,  V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
974    [0xeb] = X86_OP_ENTRY3(POR,     V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
975    [0xec] = X86_OP_ENTRY3(PADDSB,  V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
976    [0xed] = X86_OP_ENTRY3(PADDSW,  V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
977    [0xee] = X86_OP_ENTRY3(PMAXSW,  V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
978    [0xef] = X86_OP_ENTRY3(PXOR,    V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
979
980    [0xf8] = X86_OP_ENTRY3(PSUBB,  V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
981    [0xf9] = X86_OP_ENTRY3(PSUBW,  V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
982    [0xfa] = X86_OP_ENTRY3(PSUBD,  V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
983    [0xfb] = X86_OP_ENTRY3(PSUBQ,  V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
984    [0xfc] = X86_OP_ENTRY3(PADDB,  V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
985    [0xfd] = X86_OP_ENTRY3(PADDW,  V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
986    [0xfe] = X86_OP_ENTRY3(PADDD,  V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
987    /* 0xff = UD0 */
988};
989
990static void do_decode_0F(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
991{
992    *entry = opcodes_0F[*b];
993}
994
995static void decode_0F(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
996{
997    *b = x86_ldub_code(env, s);
998    do_decode_0F(s, env, entry, b);
999}
1000
1001static const X86OpEntry opcodes_root[256] = {
1002    [0x0F] = X86_OP_GROUP0(0F),
1003};
1004
1005#undef mmx
1006#undef vex1
1007#undef vex2
1008#undef vex3
1009#undef vex4
1010#undef vex4_unal
1011#undef vex5
1012#undef vex6
1013#undef vex7
1014#undef vex8
1015#undef vex11
1016#undef vex12
1017#undef vex13
1018
1019/*
1020 * Decode the fixed part of the opcode and place the last
1021 * in b.
1022 */
1023static void decode_root(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
1024{
1025    *entry = opcodes_root[*b];
1026}
1027
1028
1029static int decode_modrm(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode,
1030                        X86DecodedOp *op, X86OpType type)
1031{
1032    int modrm = get_modrm(s, env);
1033    if ((modrm >> 6) == 3) {
1034        if (s->prefix & PREFIX_LOCK) {
1035            decode->e.gen = gen_illegal;
1036            return 0xff;
1037        }
1038        op->n = (modrm & 7);
1039        if (type != X86_TYPE_Q && type != X86_TYPE_N) {
1040            op->n |= REX_B(s);
1041        }
1042    } else {
1043        op->has_ea = true;
1044        op->n = -1;
1045        decode->mem = gen_lea_modrm_0(env, s, get_modrm(s, env));
1046    }
1047    return modrm;
1048}
1049
1050static bool decode_op_size(DisasContext *s, X86OpEntry *e, X86OpSize size, MemOp *ot)
1051{
1052    switch (size) {
1053    case X86_SIZE_b:  /* byte */
1054        *ot = MO_8;
1055        return true;
1056
1057    case X86_SIZE_d:  /* 32-bit */
1058    case X86_SIZE_ss: /* SSE/AVX scalar single precision */
1059        *ot = MO_32;
1060        return true;
1061
1062    case X86_SIZE_p:  /* Far pointer, return offset size */
1063    case X86_SIZE_s:  /* Descriptor, return offset size */
1064    case X86_SIZE_v:  /* 16/32/64-bit, based on operand size */
1065        *ot = s->dflag;
1066        return true;
1067
1068    case X86_SIZE_pi: /* MMX */
1069    case X86_SIZE_q:  /* 64-bit */
1070    case X86_SIZE_sd: /* SSE/AVX scalar double precision */
1071        *ot = MO_64;
1072        return true;
1073
1074    case X86_SIZE_w:  /* 16-bit */
1075        *ot = MO_16;
1076        return true;
1077
1078    case X86_SIZE_y:  /* 32/64-bit, based on operand size */
1079        *ot = s->dflag == MO_16 ? MO_32 : s->dflag;
1080        return true;
1081
1082    case X86_SIZE_z:  /* 16-bit for 16-bit operand size, else 32-bit */
1083        *ot = s->dflag == MO_16 ? MO_16 : MO_32;
1084        return true;
1085
1086    case X86_SIZE_dq: /* SSE/AVX 128-bit */
1087        if (e->special == X86_SPECIAL_MMX &&
1088            !(s->prefix & (PREFIX_DATA | PREFIX_REPZ | PREFIX_REPNZ))) {
1089            *ot = MO_64;
1090            return true;
1091        }
1092        if (s->vex_l && e->s0 != X86_SIZE_qq && e->s1 != X86_SIZE_qq) {
1093            return false;
1094        }
1095        *ot = MO_128;
1096        return true;
1097
1098    case X86_SIZE_qq: /* AVX 256-bit */
1099        if (!s->vex_l) {
1100            return false;
1101        }
1102        *ot = MO_256;
1103        return true;
1104
1105    case X86_SIZE_x:  /* 128/256-bit, based on operand size */
1106        if (e->special == X86_SPECIAL_MMX &&
1107            !(s->prefix & (PREFIX_DATA | PREFIX_REPZ | PREFIX_REPNZ))) {
1108            *ot = MO_64;
1109            return true;
1110        }
1111        /* fall through */
1112    case X86_SIZE_ps: /* SSE/AVX packed single precision */
1113    case X86_SIZE_pd: /* SSE/AVX packed double precision */
1114        *ot = s->vex_l ? MO_256 : MO_128;
1115        return true;
1116
1117    case X86_SIZE_xh: /* SSE/AVX packed half register */
1118        *ot = s->vex_l ? MO_128 : MO_64;
1119        return true;
1120
1121    case X86_SIZE_d64:  /* Default to 64-bit in 64-bit mode */
1122        *ot = CODE64(s) && s->dflag == MO_32 ? MO_64 : s->dflag;
1123        return true;
1124
1125    case X86_SIZE_f64:  /* Ignore size override prefix in 64-bit mode */
1126        *ot = CODE64(s) ? MO_64 : s->dflag;
1127        return true;
1128
1129    default:
1130        *ot = -1;
1131        return true;
1132    }
1133}
1134
1135static bool decode_op(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode,
1136                      X86DecodedOp *op, X86OpType type, int b)
1137{
1138    int modrm;
1139
1140    switch (type) {
1141    case X86_TYPE_None:  /* Implicit or absent */
1142    case X86_TYPE_A:  /* Implicit */
1143    case X86_TYPE_F:  /* EFLAGS/RFLAGS */
1144        break;
1145
1146    case X86_TYPE_B:  /* VEX.vvvv selects a GPR */
1147        op->unit = X86_OP_INT;
1148        op->n = s->vex_v;
1149        break;
1150
1151    case X86_TYPE_C:  /* REG in the modrm byte selects a control register */
1152        op->unit = X86_OP_CR;
1153        goto get_reg;
1154
1155    case X86_TYPE_D:  /* REG in the modrm byte selects a debug register */
1156        op->unit = X86_OP_DR;
1157        goto get_reg;
1158
1159    case X86_TYPE_G:  /* REG in the modrm byte selects a GPR */
1160        op->unit = X86_OP_INT;
1161        goto get_reg;
1162
1163    case X86_TYPE_S:  /* reg selects a segment register */
1164        op->unit = X86_OP_SEG;
1165        goto get_reg;
1166
1167    case X86_TYPE_P:
1168        op->unit = X86_OP_MMX;
1169        goto get_reg;
1170
1171    case X86_TYPE_V:  /* reg in the modrm byte selects an XMM/YMM register */
1172        if (decode->e.special == X86_SPECIAL_MMX &&
1173            !(s->prefix & (PREFIX_DATA | PREFIX_REPZ | PREFIX_REPNZ))) {
1174            op->unit = X86_OP_MMX;
1175        } else {
1176            op->unit = X86_OP_SSE;
1177        }
1178    get_reg:
1179        op->n = ((get_modrm(s, env) >> 3) & 7) | REX_R(s);
1180        break;
1181
1182    case X86_TYPE_E:  /* ALU modrm operand */
1183        op->unit = X86_OP_INT;
1184        goto get_modrm;
1185
1186    case X86_TYPE_Q:  /* MMX modrm operand */
1187        op->unit = X86_OP_MMX;
1188        goto get_modrm;
1189
1190    case X86_TYPE_W:  /* XMM/YMM modrm operand */
1191        if (decode->e.special == X86_SPECIAL_MMX &&
1192            !(s->prefix & (PREFIX_DATA | PREFIX_REPZ | PREFIX_REPNZ))) {
1193            op->unit = X86_OP_MMX;
1194        } else {
1195            op->unit = X86_OP_SSE;
1196        }
1197        goto get_modrm;
1198
1199    case X86_TYPE_N:  /* R/M in the modrm byte selects an MMX register */
1200        op->unit = X86_OP_MMX;
1201        goto get_modrm_reg;
1202
1203    case X86_TYPE_U:  /* R/M in the modrm byte selects an XMM/YMM register */
1204        if (decode->e.special == X86_SPECIAL_MMX &&
1205            !(s->prefix & (PREFIX_DATA | PREFIX_REPZ | PREFIX_REPNZ))) {
1206            op->unit = X86_OP_MMX;
1207        } else {
1208            op->unit = X86_OP_SSE;
1209        }
1210        goto get_modrm_reg;
1211
1212    case X86_TYPE_R:  /* R/M in the modrm byte selects a register */
1213        op->unit = X86_OP_INT;
1214    get_modrm_reg:
1215        modrm = get_modrm(s, env);
1216        if ((modrm >> 6) != 3) {
1217            return false;
1218        }
1219        goto get_modrm;
1220
1221    case X86_TYPE_WM:  /* modrm byte selects an XMM/YMM memory operand */
1222        op->unit = X86_OP_SSE;
1223        /* fall through */
1224    case X86_TYPE_M:  /* modrm byte selects a memory operand */
1225        modrm = get_modrm(s, env);
1226        if ((modrm >> 6) == 3) {
1227            return false;
1228        }
1229    get_modrm:
1230        decode_modrm(s, env, decode, op, type);
1231        break;
1232
1233    case X86_TYPE_O:  /* Absolute address encoded in the instruction */
1234        op->unit = X86_OP_INT;
1235        op->has_ea = true;
1236        op->n = -1;
1237        decode->mem = (AddressParts) {
1238            .def_seg = R_DS,
1239            .base = -1,
1240            .index = -1,
1241            .disp = insn_get_addr(env, s, s->aflag)
1242        };
1243        break;
1244
1245    case X86_TYPE_H:  /* For AVX, VEX.vvvv selects an XMM/YMM register */
1246        if ((s->prefix & PREFIX_VEX)) {
1247            op->unit = X86_OP_SSE;
1248            op->n = s->vex_v;
1249            break;
1250        }
1251        if (op == &decode->op[0]) {
1252            /* shifts place the destination in VEX.vvvv, use modrm */
1253            return decode_op(s, env, decode, op, decode->e.op1, b);
1254        } else {
1255            return decode_op(s, env, decode, op, decode->e.op0, b);
1256        }
1257
1258    case X86_TYPE_I:  /* Immediate */
1259        op->unit = X86_OP_IMM;
1260        decode->immediate = insn_get_signed(env, s, op->ot);
1261        break;
1262
1263    case X86_TYPE_J:  /* Relative offset for a jump */
1264        op->unit = X86_OP_IMM;
1265        decode->immediate = insn_get_signed(env, s, op->ot);
1266        decode->immediate += s->pc - s->cs_base;
1267        if (s->dflag == MO_16) {
1268            decode->immediate &= 0xffff;
1269        } else if (!CODE64(s)) {
1270            decode->immediate &= 0xffffffffu;
1271        }
1272        break;
1273
1274    case X86_TYPE_L:  /* The upper 4 bits of the immediate select a 128-bit register */
1275        op->n = insn_get(env, s, op->ot) >> 4;
1276        break;
1277
1278    case X86_TYPE_X:  /* string source */
1279        op->n = -1;
1280        decode->mem = (AddressParts) {
1281            .def_seg = R_DS,
1282            .base = R_ESI,
1283            .index = -1,
1284        };
1285        break;
1286
1287    case X86_TYPE_Y:  /* string destination */
1288        op->n = -1;
1289        decode->mem = (AddressParts) {
1290            .def_seg = R_ES,
1291            .base = R_EDI,
1292            .index = -1,
1293        };
1294        break;
1295
1296    case X86_TYPE_2op:
1297        *op = decode->op[0];
1298        break;
1299
1300    case X86_TYPE_LoBits:
1301        op->n = (b & 7) | REX_B(s);
1302        op->unit = X86_OP_INT;
1303        break;
1304
1305    case X86_TYPE_0 ... X86_TYPE_7:
1306        op->n = type - X86_TYPE_0;
1307        op->unit = X86_OP_INT;
1308        break;
1309
1310    case X86_TYPE_ES ... X86_TYPE_GS:
1311        op->n = type - X86_TYPE_ES;
1312        op->unit = X86_OP_SEG;
1313        break;
1314    }
1315
1316    return true;
1317}
1318
1319static bool validate_sse_prefix(DisasContext *s, X86OpEntry *e)
1320{
1321    uint16_t sse_prefixes;
1322
1323    if (!e->valid_prefix) {
1324        return true;
1325    }
1326    if (s->prefix & (PREFIX_REPZ | PREFIX_REPNZ)) {
1327        /* In SSE instructions, 0xF3 and 0xF2 cancel 0x66.  */
1328        s->prefix &= ~PREFIX_DATA;
1329    }
1330
1331    /* Now, either zero or one bit is set in sse_prefixes.  */
1332    sse_prefixes = s->prefix & (PREFIX_REPZ | PREFIX_REPNZ | PREFIX_DATA);
1333    return e->valid_prefix & (1 << sse_prefixes);
1334}
1335
1336static bool decode_insn(DisasContext *s, CPUX86State *env, X86DecodeFunc decode_func,
1337                        X86DecodedInsn *decode)
1338{
1339    X86OpEntry *e = &decode->e;
1340
1341    decode_func(s, env, e, &decode->b);
1342    while (e->is_decode) {
1343        e->is_decode = false;
1344        e->decode(s, env, e, &decode->b);
1345    }
1346
1347    if (!validate_sse_prefix(s, e)) {
1348        return false;
1349    }
1350
1351    /* First compute size of operands in order to initialize s->rip_offset.  */
1352    if (e->op0 != X86_TYPE_None) {
1353        if (!decode_op_size(s, e, e->s0, &decode->op[0].ot)) {
1354            return false;
1355        }
1356        if (e->op0 == X86_TYPE_I) {
1357            s->rip_offset += 1 << decode->op[0].ot;
1358        }
1359    }
1360    if (e->op1 != X86_TYPE_None) {
1361        if (!decode_op_size(s, e, e->s1, &decode->op[1].ot)) {
1362            return false;
1363        }
1364        if (e->op1 == X86_TYPE_I) {
1365            s->rip_offset += 1 << decode->op[1].ot;
1366        }
1367    }
1368    if (e->op2 != X86_TYPE_None) {
1369        if (!decode_op_size(s, e, e->s2, &decode->op[2].ot)) {
1370            return false;
1371        }
1372        if (e->op2 == X86_TYPE_I) {
1373            s->rip_offset += 1 << decode->op[2].ot;
1374        }
1375    }
1376    if (e->op3 != X86_TYPE_None) {
1377        /*
1378         * A couple instructions actually use the extra immediate byte for an Lx
1379         * register operand; those are handled in the gen_* functions as one off.
1380         */
1381        assert(e->op3 == X86_TYPE_I && e->s3 == X86_SIZE_b);
1382        s->rip_offset += 1;
1383    }
1384
1385    if (e->op0 != X86_TYPE_None &&
1386        !decode_op(s, env, decode, &decode->op[0], e->op0, decode->b)) {
1387        return false;
1388    }
1389
1390    if (e->op1 != X86_TYPE_None &&
1391        !decode_op(s, env, decode, &decode->op[1], e->op1, decode->b)) {
1392        return false;
1393    }
1394
1395    if (e->op2 != X86_TYPE_None &&
1396        !decode_op(s, env, decode, &decode->op[2], e->op2, decode->b)) {
1397        return false;
1398    }
1399
1400    if (e->op3 != X86_TYPE_None) {
1401        decode->immediate = insn_get_signed(env, s, MO_8);
1402    }
1403
1404    return true;
1405}
1406
1407static bool has_cpuid_feature(DisasContext *s, X86CPUIDFeature cpuid)
1408{
1409    switch (cpuid) {
1410    case X86_FEAT_None:
1411        return true;
1412    case X86_FEAT_F16C:
1413        return (s->cpuid_ext_features & CPUID_EXT_F16C);
1414    case X86_FEAT_FMA:
1415        return (s->cpuid_ext_features & CPUID_EXT_FMA);
1416    case X86_FEAT_MOVBE:
1417        return (s->cpuid_ext_features & CPUID_EXT_MOVBE);
1418    case X86_FEAT_PCLMULQDQ:
1419        return (s->cpuid_ext_features & CPUID_EXT_PCLMULQDQ);
1420    case X86_FEAT_SSE:
1421        return (s->cpuid_ext_features & CPUID_SSE);
1422    case X86_FEAT_SSE2:
1423        return (s->cpuid_ext_features & CPUID_SSE2);
1424    case X86_FEAT_SSE3:
1425        return (s->cpuid_ext_features & CPUID_EXT_SSE3);
1426    case X86_FEAT_SSSE3:
1427        return (s->cpuid_ext_features & CPUID_EXT_SSSE3);
1428    case X86_FEAT_SSE41:
1429        return (s->cpuid_ext_features & CPUID_EXT_SSE41);
1430    case X86_FEAT_SSE42:
1431        return (s->cpuid_ext_features & CPUID_EXT_SSE42);
1432    case X86_FEAT_AES:
1433        if (!(s->cpuid_ext_features & CPUID_EXT_AES)) {
1434            return false;
1435        } else if (!(s->prefix & PREFIX_VEX)) {
1436            return true;
1437        } else if (!(s->cpuid_ext_features & CPUID_EXT_AVX)) {
1438            return false;
1439        } else {
1440            return !s->vex_l || (s->cpuid_7_0_ecx_features & CPUID_7_0_ECX_VAES);
1441        }
1442
1443    case X86_FEAT_AVX:
1444        return (s->cpuid_ext_features & CPUID_EXT_AVX);
1445
1446    case X86_FEAT_3DNOW:
1447        return (s->cpuid_ext2_features & CPUID_EXT2_3DNOW);
1448    case X86_FEAT_SSE4A:
1449        return (s->cpuid_ext3_features & CPUID_EXT3_SSE4A);
1450
1451    case X86_FEAT_ADX:
1452        return (s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_ADX);
1453    case X86_FEAT_BMI1:
1454        return (s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI1);
1455    case X86_FEAT_BMI2:
1456        return (s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI2);
1457    case X86_FEAT_AVX2:
1458        return (s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_AVX2);
1459    }
1460    g_assert_not_reached();
1461}
1462
1463static bool validate_vex(DisasContext *s, X86DecodedInsn *decode)
1464{
1465    X86OpEntry *e = &decode->e;
1466
1467    switch (e->vex_special) {
1468    case X86_VEX_REPScalar:
1469        /*
1470         * Instructions which differ between 00/66 and F2/F3 in the
1471         * exception classification and the size of the memory operand.
1472         */
1473        assert(e->vex_class == 1 || e->vex_class == 2 || e->vex_class == 4);
1474        if (s->prefix & (PREFIX_REPZ | PREFIX_REPNZ)) {
1475            e->vex_class = e->vex_class < 4 ? 3 : 5;
1476            if (s->vex_l) {
1477                goto illegal;
1478            }
1479            assert(decode->e.s2 == X86_SIZE_x);
1480            if (decode->op[2].has_ea) {
1481                decode->op[2].ot = s->prefix & PREFIX_REPZ ? MO_32 : MO_64;
1482            }
1483        }
1484        break;
1485
1486    case X86_VEX_SSEUnaligned:
1487        /* handled in sse_needs_alignment.  */
1488        break;
1489
1490    case X86_VEX_AVX2_256:
1491        if ((s->prefix & PREFIX_VEX) && s->vex_l && !has_cpuid_feature(s, X86_FEAT_AVX2)) {
1492            goto illegal;
1493        }
1494    }
1495
1496    /* TODO: instructions that require VEX.W=0 (Table 2-16) */
1497
1498    switch (e->vex_class) {
1499    case 0:
1500        if (s->prefix & PREFIX_VEX) {
1501            goto illegal;
1502        }
1503        return true;
1504    case 1:
1505    case 2:
1506    case 3:
1507    case 4:
1508    case 5:
1509    case 7:
1510        if (s->prefix & PREFIX_VEX) {
1511            if (!(s->flags & HF_AVX_EN_MASK)) {
1512                goto illegal;
1513            }
1514        } else if (e->special != X86_SPECIAL_MMX ||
1515                   (s->prefix & (PREFIX_REPZ | PREFIX_REPNZ | PREFIX_DATA))) {
1516            if (!(s->flags & HF_OSFXSR_MASK)) {
1517                goto illegal;
1518            }
1519        }
1520        break;
1521    case 12:
1522        /* Must have a VSIB byte and no address prefix.  */
1523        assert(s->has_modrm);
1524        if ((s->modrm & 7) != 4 || s->aflag == MO_16) {
1525            goto illegal;
1526        }
1527
1528        /* Check no overlap between registers.  */
1529        if (!decode->op[0].has_ea &&
1530            (decode->op[0].n == decode->mem.index || decode->op[0].n == decode->op[1].n)) {
1531            goto illegal;
1532        }
1533        assert(!decode->op[1].has_ea);
1534        if (decode->op[1].n == decode->mem.index) {
1535            goto illegal;
1536        }
1537        if (!decode->op[2].has_ea &&
1538            (decode->op[2].n == decode->mem.index || decode->op[2].n == decode->op[1].n)) {
1539            goto illegal;
1540        }
1541        /* fall through */
1542    case 6:
1543    case 11:
1544        if (!(s->prefix & PREFIX_VEX)) {
1545            goto illegal;
1546        }
1547        if (!(s->flags & HF_AVX_EN_MASK)) {
1548            goto illegal;
1549        }
1550        break;
1551    case 8:
1552        /* Non-VEX case handled in decode_0F77.  */
1553        assert(s->prefix & PREFIX_VEX);
1554        if (!(s->flags & HF_AVX_EN_MASK)) {
1555            goto illegal;
1556        }
1557        break;
1558    case 13:
1559        if (!(s->prefix & PREFIX_VEX)) {
1560            goto illegal;
1561        }
1562        if (s->vex_l) {
1563            goto illegal;
1564        }
1565        /* All integer instructions use VEX.vvvv, so exit.  */
1566        return true;
1567    }
1568
1569    if (s->vex_v != 0 &&
1570        e->op0 != X86_TYPE_H && e->op0 != X86_TYPE_B &&
1571        e->op1 != X86_TYPE_H && e->op1 != X86_TYPE_B &&
1572        e->op2 != X86_TYPE_H && e->op2 != X86_TYPE_B) {
1573        goto illegal;
1574    }
1575
1576    if (s->flags & HF_TS_MASK) {
1577        goto nm_exception;
1578    }
1579    if (s->flags & HF_EM_MASK) {
1580        goto illegal;
1581    }
1582    return true;
1583
1584nm_exception:
1585    gen_NM_exception(s);
1586    return false;
1587illegal:
1588    gen_illegal_opcode(s);
1589    return false;
1590}
1591
1592/*
1593 * Convert one instruction. s->base.is_jmp is set if the translation must
1594 * be stopped.
1595 */
1596static void disas_insn_new(DisasContext *s, CPUState *cpu, int b)
1597{
1598    CPUX86State *env = cpu_env(cpu);
1599    bool first = true;
1600    X86DecodedInsn decode;
1601    X86DecodeFunc decode_func = decode_root;
1602
1603    s->has_modrm = false;
1604
1605 next_byte:
1606    if (first) {
1607        first = false;
1608    } else {
1609        b = x86_ldub_code(env, s);
1610    }
1611    /* Collect prefixes.  */
1612    switch (b) {
1613    case 0xf3:
1614        s->prefix |= PREFIX_REPZ;
1615        s->prefix &= ~PREFIX_REPNZ;
1616        goto next_byte;
1617    case 0xf2:
1618        s->prefix |= PREFIX_REPNZ;
1619        s->prefix &= ~PREFIX_REPZ;
1620        goto next_byte;
1621    case 0xf0:
1622        s->prefix |= PREFIX_LOCK;
1623        goto next_byte;
1624    case 0x2e:
1625        s->override = R_CS;
1626        goto next_byte;
1627    case 0x36:
1628        s->override = R_SS;
1629        goto next_byte;
1630    case 0x3e:
1631        s->override = R_DS;
1632        goto next_byte;
1633    case 0x26:
1634        s->override = R_ES;
1635        goto next_byte;
1636    case 0x64:
1637        s->override = R_FS;
1638        goto next_byte;
1639    case 0x65:
1640        s->override = R_GS;
1641        goto next_byte;
1642    case 0x66:
1643        s->prefix |= PREFIX_DATA;
1644        goto next_byte;
1645    case 0x67:
1646        s->prefix |= PREFIX_ADR;
1647        goto next_byte;
1648#ifdef TARGET_X86_64
1649    case 0x40 ... 0x4f:
1650        if (CODE64(s)) {
1651            /* REX prefix */
1652            s->prefix |= PREFIX_REX;
1653            s->vex_w = (b >> 3) & 1;
1654            s->rex_r = (b & 0x4) << 1;
1655            s->rex_x = (b & 0x2) << 2;
1656            s->rex_b = (b & 0x1) << 3;
1657            goto next_byte;
1658        }
1659        break;
1660#endif
1661    case 0xc5: /* 2-byte VEX */
1662    case 0xc4: /* 3-byte VEX */
1663        /*
1664         * VEX prefixes cannot be used except in 32-bit mode.
1665         * Otherwise the instruction is LES or LDS.
1666         */
1667        if (CODE32(s) && !VM86(s)) {
1668            static const int pp_prefix[4] = {
1669                0, PREFIX_DATA, PREFIX_REPZ, PREFIX_REPNZ
1670            };
1671            int vex3, vex2 = x86_ldub_code(env, s);
1672
1673            if (!CODE64(s) && (vex2 & 0xc0) != 0xc0) {
1674                /*
1675                 * 4.1.4.6: In 32-bit mode, bits [7:6] must be 11b,
1676                 * otherwise the instruction is LES or LDS.
1677                 */
1678                s->pc--; /* rewind the advance_pc() x86_ldub_code() did */
1679                break;
1680            }
1681
1682            /* 4.1.1-4.1.3: No preceding lock, 66, f2, f3, or rex prefixes. */
1683            if (s->prefix & (PREFIX_REPZ | PREFIX_REPNZ
1684                             | PREFIX_LOCK | PREFIX_DATA | PREFIX_REX)) {
1685                goto illegal_op;
1686            }
1687#ifdef TARGET_X86_64
1688            s->rex_r = (~vex2 >> 4) & 8;
1689#endif
1690            if (b == 0xc5) {
1691                /* 2-byte VEX prefix: RVVVVlpp, implied 0f leading opcode byte */
1692                vex3 = vex2;
1693                decode_func = decode_0F;
1694            } else {
1695                /* 3-byte VEX prefix: RXBmmmmm wVVVVlpp */
1696                vex3 = x86_ldub_code(env, s);
1697#ifdef TARGET_X86_64
1698                s->rex_x = (~vex2 >> 3) & 8;
1699                s->rex_b = (~vex2 >> 2) & 8;
1700#endif
1701                s->vex_w = (vex3 >> 7) & 1;
1702                switch (vex2 & 0x1f) {
1703                case 0x01: /* Implied 0f leading opcode bytes.  */
1704                    decode_func = decode_0F;
1705                    break;
1706                case 0x02: /* Implied 0f 38 leading opcode bytes.  */
1707                    decode_func = decode_0F38;
1708                    break;
1709                case 0x03: /* Implied 0f 3a leading opcode bytes.  */
1710                    decode_func = decode_0F3A;
1711                    break;
1712                default:   /* Reserved for future use.  */
1713                    goto unknown_op;
1714                }
1715            }
1716            s->vex_v = (~vex3 >> 3) & 0xf;
1717            s->vex_l = (vex3 >> 2) & 1;
1718            s->prefix |= pp_prefix[vex3 & 3] | PREFIX_VEX;
1719        }
1720        break;
1721    default:
1722        if (b >= 0x100) {
1723            b -= 0x100;
1724            decode_func = do_decode_0F;
1725        }
1726        break;
1727    }
1728
1729    /* Post-process prefixes.  */
1730    if (CODE64(s)) {
1731        /*
1732         * In 64-bit mode, the default data size is 32-bit.  Select 64-bit
1733         * data with rex_w, and 16-bit data with 0x66; rex_w takes precedence
1734         * over 0x66 if both are present.
1735         */
1736        s->dflag = (REX_W(s) ? MO_64 : s->prefix & PREFIX_DATA ? MO_16 : MO_32);
1737        /* In 64-bit mode, 0x67 selects 32-bit addressing.  */
1738        s->aflag = (s->prefix & PREFIX_ADR ? MO_32 : MO_64);
1739    } else {
1740        /* In 16/32-bit mode, 0x66 selects the opposite data size.  */
1741        if (CODE32(s) ^ ((s->prefix & PREFIX_DATA) != 0)) {
1742            s->dflag = MO_32;
1743        } else {
1744            s->dflag = MO_16;
1745        }
1746        /* In 16/32-bit mode, 0x67 selects the opposite addressing.  */
1747        if (CODE32(s) ^ ((s->prefix & PREFIX_ADR) != 0)) {
1748            s->aflag = MO_32;
1749        }  else {
1750            s->aflag = MO_16;
1751        }
1752    }
1753
1754    memset(&decode, 0, sizeof(decode));
1755    decode.b = b;
1756    if (!decode_insn(s, env, decode_func, &decode)) {
1757        goto illegal_op;
1758    }
1759    if (!decode.e.gen) {
1760        goto unknown_op;
1761    }
1762
1763    if (!has_cpuid_feature(s, decode.e.cpuid)) {
1764        goto illegal_op;
1765    }
1766
1767    switch (decode.e.special) {
1768    case X86_SPECIAL_None:
1769        break;
1770
1771    case X86_SPECIAL_Locked:
1772        if (decode.op[0].has_ea) {
1773            s->prefix |= PREFIX_LOCK;
1774        }
1775        break;
1776
1777    case X86_SPECIAL_ProtMode:
1778        if (!PE(s) || VM86(s)) {
1779            goto illegal_op;
1780        }
1781        break;
1782
1783    case X86_SPECIAL_i64:
1784        if (CODE64(s)) {
1785            goto illegal_op;
1786        }
1787        break;
1788    case X86_SPECIAL_o64:
1789        if (!CODE64(s)) {
1790            goto illegal_op;
1791        }
1792        break;
1793
1794    case X86_SPECIAL_ZExtOp0:
1795        assert(decode.op[0].unit == X86_OP_INT);
1796        if (!decode.op[0].has_ea) {
1797            decode.op[0].ot = MO_32;
1798        }
1799        break;
1800
1801    case X86_SPECIAL_ZExtOp2:
1802        assert(decode.op[2].unit == X86_OP_INT);
1803        if (!decode.op[2].has_ea) {
1804            decode.op[2].ot = MO_32;
1805        }
1806        break;
1807
1808    case X86_SPECIAL_AVXExtMov:
1809        if (!decode.op[2].has_ea) {
1810            decode.op[2].ot = s->vex_l ? MO_256 : MO_128;
1811        } else if (s->vex_l) {
1812            decode.op[2].ot++;
1813        }
1814        break;
1815
1816    default:
1817        break;
1818    }
1819
1820    if (!validate_vex(s, &decode)) {
1821        return;
1822    }
1823    if (decode.e.special == X86_SPECIAL_MMX &&
1824        !(s->prefix & (PREFIX_REPZ | PREFIX_REPNZ | PREFIX_DATA))) {
1825        gen_helper_enter_mmx(tcg_env);
1826    }
1827
1828    if (decode.op[0].has_ea || decode.op[1].has_ea || decode.op[2].has_ea) {
1829        gen_load_ea(s, &decode.mem, decode.e.vex_class == 12);
1830    }
1831    if (s->prefix & PREFIX_LOCK) {
1832        if (decode.op[0].unit != X86_OP_INT || !decode.op[0].has_ea) {
1833            goto illegal_op;
1834        }
1835        gen_load(s, &decode, 2, s->T1);
1836        decode.e.gen(s, env, &decode);
1837    } else {
1838        if (decode.op[0].unit == X86_OP_MMX) {
1839            compute_mmx_offset(&decode.op[0]);
1840        } else if (decode.op[0].unit == X86_OP_SSE) {
1841            compute_xmm_offset(&decode.op[0]);
1842        }
1843        gen_load(s, &decode, 1, s->T0);
1844        gen_load(s, &decode, 2, s->T1);
1845        decode.e.gen(s, env, &decode);
1846        gen_writeback(s, &decode, 0, s->T0);
1847    }
1848    return;
1849 illegal_op:
1850    gen_illegal_opcode(s);
1851    return;
1852 unknown_op:
1853    gen_unknown_opcode(env, s);
1854}
1855