xref: /openbmc/qemu/target/i386/tcg/decode-new.c.inc (revision db96605a49b334eeb2a5d1cc12981778f7d792aa)
1/*
2 * New-style decoder for i386 instructions
3 *
4 *  Copyright (c) 2022 Red Hat, Inc.
5 *
6 * Author: Paolo Bonzini <pbonzini@redhat.com>
7 *
8 * This library is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either
11 * version 2.1 of the License, or (at your option) any later version.
12 *
13 * This library is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
16 * Lesser General Public License for more details.
17 *
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
20 */
21
22/*
23 * The decoder is mostly based on tables copied from the Intel SDM.  As
24 * a result, most operand load and writeback is done entirely in common
25 * table-driven code using the same operand type (X86_TYPE_*) and
26 * size (X86_SIZE_*) codes used in the manual.
27 *
28 * The main difference is that the V, U and W types are extended to
29 * cover MMX as well; if an instruction is like
30 *
31 *      por   Pq, Qq
32 *  66  por   Vx, Hx, Wx
33 *
34 * only the second row is included and the instruction is marked as a
35 * valid MMX instruction.  The MMX flag directs the decoder to rewrite
36 * the V/U/H/W types to P/N/P/Q if there is no prefix, as well as changing
37 * "x" to "q" if there is no prefix.
38 *
39 * In addition, the ss/ps/sd/pd types are sometimes mushed together as "x"
40 * if the difference is expressed via prefixes.  Individual instructions
41 * are separated by prefix in the generator functions.
42 *
43 * There are a couple cases in which instructions (e.g. MOVD) write the
44 * whole XMM or MM register but are established incorrectly in the manual
45 * as "d" or "q".  These have to be fixed for the decoder to work correctly.
46 */
47
48#define X86_OP_NONE { 0 },
49
50#define X86_OP_GROUP3(op, op0_, s0_, op1_, s1_, op2_, s2_, ...) { \
51    .decode = glue(decode_, op),                                  \
52    .op0 = glue(X86_TYPE_, op0_),                                 \
53    .s0 = glue(X86_SIZE_, s0_),                                   \
54    .op1 = glue(X86_TYPE_, op1_),                                 \
55    .s1 = glue(X86_SIZE_, s1_),                                   \
56    .op2 = glue(X86_TYPE_, op2_),                                 \
57    .s2 = glue(X86_SIZE_, s2_),                                   \
58    .is_decode = true,                                            \
59    ## __VA_ARGS__                                                \
60}
61
62#define X86_OP_GROUP2(op, op0, s0, op1, s1, ...)                  \
63    X86_OP_GROUP3(op, op0, s0, 2op, s0, op1, s1, ## __VA_ARGS__)
64#define X86_OP_GROUP0(op, ...)                                    \
65    X86_OP_GROUP3(op, None, None, None, None, None, None, ## __VA_ARGS__)
66
67#define X86_OP_ENTRY3(op, op0_, s0_, op1_, s1_, op2_, s2_, ...) { \
68    .gen = glue(gen_, op),                                        \
69    .op0 = glue(X86_TYPE_, op0_),                                 \
70    .s0 = glue(X86_SIZE_, s0_),                                   \
71    .op1 = glue(X86_TYPE_, op1_),                                 \
72    .s1 = glue(X86_SIZE_, s1_),                                   \
73    .op2 = glue(X86_TYPE_, op2_),                                 \
74    .s2 = glue(X86_SIZE_, s2_),                                   \
75    ## __VA_ARGS__                                                \
76}
77
78#define X86_OP_ENTRY4(op, op0_, s0_, op1_, s1_, op2_, s2_, ...)   \
79    X86_OP_ENTRY3(op, op0_, s0_, op1_, s1_, op2_, s2_,            \
80        .op3 = X86_TYPE_I, .s3 = X86_SIZE_b,                      \
81        ## __VA_ARGS__)
82
83#define X86_OP_ENTRY2(op, op0, s0, op1, s1, ...)                  \
84    X86_OP_ENTRY3(op, op0, s0, 2op, s0, op1, s1, ## __VA_ARGS__)
85#define X86_OP_ENTRYw(op, op0, s0, ...)                           \
86    X86_OP_ENTRY3(op, op0, s0, None, None, None, None, ## __VA_ARGS__)
87#define X86_OP_ENTRYr(op, op0, s0, ...)                           \
88    X86_OP_ENTRY3(op, None, None, None, None, op0, s0, ## __VA_ARGS__)
89#define X86_OP_ENTRY0(op, ...)                                    \
90    X86_OP_ENTRY3(op, None, None, None, None, None, None, ## __VA_ARGS__)
91
92#define cpuid(feat) .cpuid = X86_FEAT_##feat,
93#define i64 .special = X86_SPECIAL_i64,
94#define o64 .special = X86_SPECIAL_o64,
95#define xchg .special = X86_SPECIAL_Locked,
96#define mmx .special = X86_SPECIAL_MMX,
97#define zext0 .special = X86_SPECIAL_ZExtOp0,
98#define zext2 .special = X86_SPECIAL_ZExtOp2,
99#define avx_movx .special = X86_SPECIAL_AVXExtMov,
100
101#define vex1 .vex_class = 1,
102#define vex1_rep3 .vex_class = 1, .vex_special = X86_VEX_REPScalar,
103#define vex2 .vex_class = 2,
104#define vex2_rep3 .vex_class = 2, .vex_special = X86_VEX_REPScalar,
105#define vex3 .vex_class = 3,
106#define vex4 .vex_class = 4,
107#define vex4_unal .vex_class = 4, .vex_special = X86_VEX_SSEUnaligned,
108#define vex5 .vex_class = 5,
109#define vex6 .vex_class = 6,
110#define vex7 .vex_class = 7,
111#define vex8 .vex_class = 8,
112#define vex11 .vex_class = 11,
113#define vex12 .vex_class = 12,
114#define vex13 .vex_class = 13,
115
116#define avx2_256 .vex_special = X86_VEX_AVX2_256,
117
118#define P_00          1
119#define P_66          (1 << PREFIX_DATA)
120#define P_F3          (1 << PREFIX_REPZ)
121#define P_F2          (1 << PREFIX_REPNZ)
122
123#define p_00          .valid_prefix = P_00,
124#define p_66          .valid_prefix = P_66,
125#define p_f3          .valid_prefix = P_F3,
126#define p_f2          .valid_prefix = P_F2,
127#define p_00_66       .valid_prefix = P_00 | P_66,
128#define p_00_f3       .valid_prefix = P_00 | P_F3,
129#define p_66_f2       .valid_prefix = P_66 | P_F2,
130#define p_00_66_f3    .valid_prefix = P_00 | P_66 | P_F3,
131#define p_66_f3_f2    .valid_prefix = P_66 | P_F3 | P_F2,
132#define p_00_66_f3_f2 .valid_prefix = P_00 | P_66 | P_F3 | P_F2,
133
134static uint8_t get_modrm(DisasContext *s, CPUX86State *env)
135{
136    if (!s->has_modrm) {
137        s->modrm = x86_ldub_code(env, s);
138        s->has_modrm = true;
139    }
140    return s->modrm;
141}
142
143static inline const X86OpEntry *decode_by_prefix(DisasContext *s, const X86OpEntry entries[4])
144{
145    if (s->prefix & PREFIX_REPNZ) {
146        return &entries[3];
147    } else if (s->prefix & PREFIX_REPZ) {
148        return &entries[2];
149    } else if (s->prefix & PREFIX_DATA) {
150        return &entries[1];
151    } else {
152        return &entries[0];
153    }
154}
155
156static void decode_group15(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
157{
158    /* only includes ldmxcsr and stmxcsr, because they have AVX variants.  */
159    static const X86OpEntry group15_reg[8] = {
160    };
161
162    static const X86OpEntry group15_mem[8] = {
163        [2] = X86_OP_ENTRYr(LDMXCSR,    E,d, vex5),
164        [3] = X86_OP_ENTRYw(STMXCSR,    E,d, vex5),
165    };
166
167    uint8_t modrm = get_modrm(s, env);
168    if ((modrm >> 6) == 3) {
169        *entry = group15_reg[(modrm >> 3) & 7];
170    } else {
171        *entry = group15_mem[(modrm >> 3) & 7];
172    }
173}
174
175static void decode_group17(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
176{
177    static const X86GenFunc group17_gen[8] = {
178        NULL, gen_BLSR, gen_BLSMSK, gen_BLSI,
179    };
180    int op = (get_modrm(s, env) >> 3) & 7;
181    entry->gen = group17_gen[op];
182}
183
184static void decode_group12(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
185{
186    static const X86OpEntry opcodes_group12[8] = {
187        {},
188        {},
189        X86_OP_ENTRY3(PSRLW_i,  H,x, U,x, I,b, vex7 mmx avx2_256 p_00_66),
190        {},
191        X86_OP_ENTRY3(PSRAW_i,  H,x, U,x, I,b, vex7 mmx avx2_256 p_00_66),
192        {},
193        X86_OP_ENTRY3(PSLLW_i,  H,x, U,x, I,b, vex7 mmx avx2_256 p_00_66),
194        {},
195    };
196
197    int op = (get_modrm(s, env) >> 3) & 7;
198    *entry = opcodes_group12[op];
199}
200
201static void decode_group13(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
202{
203    static const X86OpEntry opcodes_group13[8] = {
204        {},
205        {},
206        X86_OP_ENTRY3(PSRLD_i,  H,x, U,x, I,b, vex7 mmx avx2_256 p_00_66),
207        {},
208        X86_OP_ENTRY3(PSRAD_i,  H,x, U,x, I,b, vex7 mmx avx2_256 p_00_66),
209        {},
210        X86_OP_ENTRY3(PSLLD_i,  H,x, U,x, I,b, vex7 mmx avx2_256 p_00_66),
211        {},
212    };
213
214    int op = (get_modrm(s, env) >> 3) & 7;
215    *entry = opcodes_group13[op];
216}
217
218static void decode_group14(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
219{
220    static const X86OpEntry opcodes_group14[8] = {
221        /* grp14 */
222        {},
223        {},
224        X86_OP_ENTRY3(PSRLQ_i,  H,x, U,x, I,b, vex7 mmx avx2_256 p_00_66),
225        X86_OP_ENTRY3(PSRLDQ_i, H,x, U,x, I,b, vex7 avx2_256 p_66),
226        {},
227        {},
228        X86_OP_ENTRY3(PSLLQ_i,  H,x, U,x, I,b, vex7 mmx avx2_256 p_00_66),
229        X86_OP_ENTRY3(PSLLDQ_i, H,x, U,x, I,b, vex7 avx2_256 p_66),
230    };
231
232    int op = (get_modrm(s, env) >> 3) & 7;
233    *entry = opcodes_group14[op];
234}
235
236static void decode_0F6F(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
237{
238    static const X86OpEntry opcodes_0F6F[4] = {
239        X86_OP_ENTRY3(MOVDQ,       P,q, None,None, Q,q, vex1 mmx),  /* movq */
240        X86_OP_ENTRY3(MOVDQ,       V,x, None,None, W,x, vex1),      /* movdqa */
241        X86_OP_ENTRY3(MOVDQ,       V,x, None,None, W,x, vex4_unal), /* movdqu */
242        {},
243    };
244    *entry = *decode_by_prefix(s, opcodes_0F6F);
245}
246
247static void decode_0F70(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
248{
249    static const X86OpEntry pshufw[4] = {
250        X86_OP_ENTRY3(PSHUFW,  P,q, Q,q, I,b, vex4 mmx),
251        X86_OP_ENTRY3(PSHUFD,  V,x, W,x, I,b, vex4 avx2_256),
252        X86_OP_ENTRY3(PSHUFHW, V,x, W,x, I,b, vex4 avx2_256),
253        X86_OP_ENTRY3(PSHUFLW, V,x, W,x, I,b, vex4 avx2_256),
254    };
255
256    *entry = *decode_by_prefix(s, pshufw);
257}
258
259static void decode_0F77(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
260{
261    if (!(s->prefix & PREFIX_VEX)) {
262        entry->gen = gen_EMMS;
263    } else if (!s->vex_l) {
264        entry->gen = gen_VZEROUPPER;
265        entry->vex_class = 8;
266    } else {
267        entry->gen = gen_VZEROALL;
268        entry->vex_class = 8;
269    }
270}
271
272static void decode_0F78(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
273{
274    static const X86OpEntry opcodes_0F78[4] = {
275        {},
276        X86_OP_ENTRY3(EXTRQ_i,       V,x, None,None, I,w,  cpuid(SSE4A)),
277        {},
278        X86_OP_ENTRY3(INSERTQ_i,     V,x, U,x, I,w,        cpuid(SSE4A)),
279    };
280    *entry = *decode_by_prefix(s, opcodes_0F78);
281}
282
283static void decode_0F79(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
284{
285    if (s->prefix & PREFIX_REPNZ) {
286        entry->gen = gen_INSERTQ_r;
287    } else if (s->prefix & PREFIX_DATA) {
288        entry->gen = gen_EXTRQ_r;
289    } else {
290        entry->gen = NULL;
291    };
292}
293
294static void decode_0F7E(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
295{
296    static const X86OpEntry opcodes_0F7E[4] = {
297        X86_OP_ENTRY3(MOVD_from,  E,y, None,None, P,y, vex5 mmx),
298        X86_OP_ENTRY3(MOVD_from,  E,y, None,None, V,y, vex5),
299        X86_OP_ENTRY3(MOVQ,       V,x, None,None, W,q, vex5),  /* wrong dest Vy on SDM! */
300        {},
301    };
302    *entry = *decode_by_prefix(s, opcodes_0F7E);
303}
304
305static void decode_0F7F(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
306{
307    static const X86OpEntry opcodes_0F7F[4] = {
308        X86_OP_ENTRY3(MOVDQ,       W,x, None,None, V,x, vex1 mmx), /* movq */
309        X86_OP_ENTRY3(MOVDQ,       W,x, None,None, V,x, vex1), /* movdqa */
310        X86_OP_ENTRY3(MOVDQ,       W,x, None,None, V,x, vex4_unal), /* movdqu */
311        {},
312    };
313    *entry = *decode_by_prefix(s, opcodes_0F7F);
314}
315
316static void decode_0FD6(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
317{
318    static const X86OpEntry movq[4] = {
319        {},
320        X86_OP_ENTRY3(MOVQ,    W,x,  None, None, V,q, vex5),
321        X86_OP_ENTRY3(MOVq_dq, V,dq, None, None, N,q),
322        X86_OP_ENTRY3(MOVq_dq, P,q,  None, None, U,q),
323    };
324
325    *entry = *decode_by_prefix(s, movq);
326}
327
328static const X86OpEntry opcodes_0F38_00toEF[240] = {
329    [0x00] = X86_OP_ENTRY3(PSHUFB,    V,x,  H,x,   W,x,  vex4 cpuid(SSSE3) mmx avx2_256 p_00_66),
330    [0x01] = X86_OP_ENTRY3(PHADDW,    V,x,  H,x,   W,x,  vex4 cpuid(SSSE3) mmx avx2_256 p_00_66),
331    [0x02] = X86_OP_ENTRY3(PHADDD,    V,x,  H,x,   W,x,  vex4 cpuid(SSSE3) mmx avx2_256 p_00_66),
332    [0x03] = X86_OP_ENTRY3(PHADDSW,   V,x,  H,x,   W,x,  vex4 cpuid(SSSE3) mmx avx2_256 p_00_66),
333    [0x04] = X86_OP_ENTRY3(PMADDUBSW, V,x,  H,x,   W,x,  vex4 cpuid(SSSE3) mmx avx2_256 p_00_66),
334    [0x05] = X86_OP_ENTRY3(PHSUBW,    V,x,  H,x,   W,x,  vex4 cpuid(SSSE3) mmx avx2_256 p_00_66),
335    [0x06] = X86_OP_ENTRY3(PHSUBD,    V,x,  H,x,   W,x,  vex4 cpuid(SSSE3) mmx avx2_256 p_00_66),
336    [0x07] = X86_OP_ENTRY3(PHSUBSW,   V,x,  H,x,   W,x,  vex4 cpuid(SSSE3) mmx avx2_256 p_00_66),
337
338    [0x10] = X86_OP_ENTRY2(PBLENDVB,  V,x,         W,x,  vex4 cpuid(SSE41) avx2_256 p_66),
339    [0x14] = X86_OP_ENTRY2(BLENDVPS,  V,x,         W,x,  vex4 cpuid(SSE41) p_66),
340    [0x15] = X86_OP_ENTRY2(BLENDVPD,  V,x,         W,x,  vex4 cpuid(SSE41) p_66),
341    /* Listed incorrectly as type 4 */
342    [0x16] = X86_OP_ENTRY3(VPERMD,    V,qq, H,qq,      W,qq,  vex6 cpuid(AVX2) p_66),
343    [0x17] = X86_OP_ENTRY3(VPTEST,    None,None, V,x,  W,x,   vex4 cpuid(SSE41) p_66),
344
345    /*
346     * Source operand listed as Mq/Ux and similar in the manual; incorrectly listed
347     * as 128-bit only in 2-17.
348     */
349    [0x20] = X86_OP_ENTRY3(VPMOVSXBW, V,x,  None,None, W,q,   vex5 cpuid(SSE41) avx_movx avx2_256 p_66),
350    [0x21] = X86_OP_ENTRY3(VPMOVSXBD, V,x,  None,None, W,d,   vex5 cpuid(SSE41) avx_movx avx2_256 p_66),
351    [0x22] = X86_OP_ENTRY3(VPMOVSXBQ, V,x,  None,None, W,w,   vex5 cpuid(SSE41) avx_movx avx2_256 p_66),
352    [0x23] = X86_OP_ENTRY3(VPMOVSXWD, V,x,  None,None, W,q,   vex5 cpuid(SSE41) avx_movx avx2_256 p_66),
353    [0x24] = X86_OP_ENTRY3(VPMOVSXWQ, V,x,  None,None, W,d,   vex5 cpuid(SSE41) avx_movx avx2_256 p_66),
354    [0x25] = X86_OP_ENTRY3(VPMOVSXDQ, V,x,  None,None, W,q,   vex5 cpuid(SSE41) avx_movx avx2_256 p_66),
355
356    /* Same as PMOVSX.  */
357    [0x30] = X86_OP_ENTRY3(VPMOVZXBW, V,x,  None,None, W,q,   vex5 cpuid(SSE41) avx_movx avx2_256 p_66),
358    [0x31] = X86_OP_ENTRY3(VPMOVZXBD, V,x,  None,None, W,d,   vex5 cpuid(SSE41) avx_movx avx2_256 p_66),
359    [0x32] = X86_OP_ENTRY3(VPMOVZXBQ, V,x,  None,None, W,w,   vex5 cpuid(SSE41) avx_movx avx2_256 p_66),
360    [0x33] = X86_OP_ENTRY3(VPMOVZXWD, V,x,  None,None, W,q,   vex5 cpuid(SSE41) avx_movx avx2_256 p_66),
361    [0x34] = X86_OP_ENTRY3(VPMOVZXWQ, V,x,  None,None, W,d,   vex5 cpuid(SSE41) avx_movx avx2_256 p_66),
362    [0x35] = X86_OP_ENTRY3(VPMOVZXDQ, V,x,  None,None, W,q,   vex5 cpuid(SSE41) avx_movx avx2_256 p_66),
363    [0x36] = X86_OP_ENTRY3(VPERMD,    V,qq, H,qq,      W,qq,  vex6 cpuid(AVX2) p_66),
364    [0x37] = X86_OP_ENTRY3(PCMPGTQ,   V,x,  H,x,       W,x,   vex4 cpuid(SSE42) avx2_256 p_66),
365
366    [0x40] = X86_OP_ENTRY3(PMULLD,      V,x,  H,x,       W,x,  vex4 cpuid(SSE41) avx2_256 p_66),
367    [0x41] = X86_OP_ENTRY3(VPHMINPOSUW, V,dq, None,None, W,dq, vex4 cpuid(SSE41) p_66),
368    /* Listed incorrectly as type 4 */
369    [0x45] = X86_OP_ENTRY3(VPSRLV,      V,x,  H,x,       W,x,  vex6 cpuid(AVX2) p_66),
370    [0x46] = X86_OP_ENTRY3(VPSRAV,      V,x,  H,x,       W,x,  vex6 cpuid(AVX2) p_66),
371    [0x47] = X86_OP_ENTRY3(VPSLLV,      V,x,  H,x,       W,x,  vex6 cpuid(AVX2) p_66),
372
373    [0x90] = X86_OP_ENTRY3(VPGATHERD, V,x,  H,x,  M,d,  vex12 cpuid(AVX2) p_66), /* vpgatherdd/q */
374    [0x91] = X86_OP_ENTRY3(VPGATHERQ, V,x,  H,x,  M,q,  vex12 cpuid(AVX2) p_66), /* vpgatherqd/q */
375    [0x92] = X86_OP_ENTRY3(VPGATHERD, V,x,  H,x,  M,d,  vex12 cpuid(AVX2) p_66), /* vgatherdps/d */
376    [0x93] = X86_OP_ENTRY3(VPGATHERQ, V,x,  H,x,  M,q,  vex12 cpuid(AVX2) p_66), /* vgatherqps/d */
377
378    [0x08] = X86_OP_ENTRY3(PSIGNB,    V,x,        H,x,  W,x,  vex4 cpuid(SSSE3) mmx avx2_256 p_00_66),
379    [0x09] = X86_OP_ENTRY3(PSIGNW,    V,x,        H,x,  W,x,  vex4 cpuid(SSSE3) mmx avx2_256 p_00_66),
380    [0x0a] = X86_OP_ENTRY3(PSIGND,    V,x,        H,x,  W,x,  vex4 cpuid(SSSE3) mmx avx2_256 p_00_66),
381    [0x0b] = X86_OP_ENTRY3(PMULHRSW,  V,x,        H,x,  W,x,  vex4 cpuid(SSSE3) mmx avx2_256 p_00_66),
382    [0x0c] = X86_OP_ENTRY3(VPERMILPS, V,x,        H,x,  W,x,  vex4 cpuid(AVX) p_00_66),
383    [0x0d] = X86_OP_ENTRY3(VPERMILPD, V,x,        H,x,  W,x,  vex4 cpuid(AVX) p_66),
384    [0x0e] = X86_OP_ENTRY3(VTESTPS,   None,None,  V,x,  W,x,  vex4 cpuid(AVX) p_66),
385    [0x0f] = X86_OP_ENTRY3(VTESTPD,   None,None,  V,x,  W,x,  vex4 cpuid(AVX) p_66),
386
387    [0x18] = X86_OP_ENTRY3(VPBROADCASTD,   V,x,  None,None, W,d,  vex6 cpuid(AVX) p_66), /* vbroadcastss */
388    [0x19] = X86_OP_ENTRY3(VPBROADCASTQ,   V,qq, None,None, W,q,  vex6 cpuid(AVX) p_66), /* vbroadcastsd */
389    [0x1a] = X86_OP_ENTRY3(VBROADCASTx128, V,qq, None,None, WM,dq,vex6 cpuid(AVX) p_66),
390    [0x1c] = X86_OP_ENTRY3(PABSB,          V,x,  None,None, W,x,  vex4 cpuid(SSSE3) mmx avx2_256 p_00_66),
391    [0x1d] = X86_OP_ENTRY3(PABSW,          V,x,  None,None, W,x,  vex4 cpuid(SSSE3) mmx avx2_256 p_00_66),
392    [0x1e] = X86_OP_ENTRY3(PABSD,          V,x,  None,None, W,x,  vex4 cpuid(SSSE3) mmx avx2_256 p_00_66),
393
394    [0x28] = X86_OP_ENTRY3(PMULDQ,        V,x, H,x,       W,x,  vex4 cpuid(SSE41) avx2_256 p_66),
395    [0x29] = X86_OP_ENTRY3(PCMPEQQ,       V,x, H,x,       W,x,  vex4 cpuid(SSE41) avx2_256 p_66),
396    [0x2a] = X86_OP_ENTRY3(MOVDQ,         V,x, None,None, WM,x, vex1 cpuid(SSE41) avx2_256 p_66), /* movntdqa */
397    [0x2b] = X86_OP_ENTRY3(VPACKUSDW,     V,x, H,x,       W,x,  vex4 cpuid(SSE41) avx2_256 p_66),
398    [0x2c] = X86_OP_ENTRY3(VMASKMOVPS,    V,x, H,x,       WM,x, vex6 cpuid(AVX) p_66),
399    [0x2d] = X86_OP_ENTRY3(VMASKMOVPD,    V,x, H,x,       WM,x, vex6 cpuid(AVX) p_66),
400    /* Incorrectly listed as Mx,Hx,Vx in the manual */
401    [0x2e] = X86_OP_ENTRY3(VMASKMOVPS_st, M,x, V,x,       H,x,  vex6 cpuid(AVX) p_66),
402    [0x2f] = X86_OP_ENTRY3(VMASKMOVPD_st, M,x, V,x,       H,x,  vex6 cpuid(AVX) p_66),
403
404    [0x38] = X86_OP_ENTRY3(PMINSB,        V,x,  H,x, W,x,  vex4 cpuid(SSE41) avx2_256 p_66),
405    [0x39] = X86_OP_ENTRY3(PMINSD,        V,x,  H,x, W,x,  vex4 cpuid(SSE41) avx2_256 p_66),
406    [0x3a] = X86_OP_ENTRY3(PMINUW,        V,x,  H,x, W,x,  vex4 cpuid(SSE41) avx2_256 p_66),
407    [0x3b] = X86_OP_ENTRY3(PMINUD,        V,x,  H,x, W,x,  vex4 cpuid(SSE41) avx2_256 p_66),
408    [0x3c] = X86_OP_ENTRY3(PMAXSB,        V,x,  H,x, W,x,  vex4 cpuid(SSE41) avx2_256 p_66),
409    [0x3d] = X86_OP_ENTRY3(PMAXSD,        V,x,  H,x, W,x,  vex4 cpuid(SSE41) avx2_256 p_66),
410    [0x3e] = X86_OP_ENTRY3(PMAXUW,        V,x,  H,x, W,x,  vex4 cpuid(SSE41) avx2_256 p_66),
411    [0x3f] = X86_OP_ENTRY3(PMAXUD,        V,x,  H,x, W,x,  vex4 cpuid(SSE41) avx2_256 p_66),
412
413    [0x58] = X86_OP_ENTRY3(VPBROADCASTD,   V,x,  None,None, W,d,  vex6 cpuid(AVX2) p_66),
414    [0x59] = X86_OP_ENTRY3(VPBROADCASTQ,   V,x,  None,None, W,q,  vex6 cpuid(AVX2) p_66),
415    [0x5a] = X86_OP_ENTRY3(VBROADCASTx128, V,qq, None,None, WM,dq,vex6 cpuid(AVX2) p_66),
416
417    [0x78] = X86_OP_ENTRY3(VPBROADCASTB,   V,x,  None,None, W,b,  vex6 cpuid(AVX2) p_66),
418    [0x79] = X86_OP_ENTRY3(VPBROADCASTW,   V,x,  None,None, W,w,  vex6 cpuid(AVX2) p_66),
419
420    [0x8c] = X86_OP_ENTRY3(VPMASKMOV,    V,x,  H,x, WM,x, vex6 cpuid(AVX2) p_66),
421    [0x8e] = X86_OP_ENTRY3(VPMASKMOV_st, M,x,  V,x, H,x,  vex6 cpuid(AVX2) p_66),
422
423    [0xdb] = X86_OP_ENTRY3(VAESIMC,     V,dq, None,None, W,dq, vex4 cpuid(AES) p_66),
424    [0xdc] = X86_OP_ENTRY3(VAESENC,     V,x,  H,x,       W,x,  vex4 cpuid(AES) p_66),
425    [0xdd] = X86_OP_ENTRY3(VAESENCLAST, V,x,  H,x,       W,x,  vex4 cpuid(AES) p_66),
426    [0xde] = X86_OP_ENTRY3(VAESDEC,     V,x,  H,x,       W,x,  vex4 cpuid(AES) p_66),
427    [0xdf] = X86_OP_ENTRY3(VAESDECLAST, V,x,  H,x,       W,x,  vex4 cpuid(AES) p_66),
428};
429
430/* five rows for no prefix, 66, F3, F2, 66+F2  */
431static const X86OpEntry opcodes_0F38_F0toFF[16][5] = {
432    [0] = {
433        X86_OP_ENTRY3(MOVBE, G,y, M,y, None,None, cpuid(MOVBE)),
434        X86_OP_ENTRY3(MOVBE, G,w, M,w, None,None, cpuid(MOVBE)),
435        {},
436        X86_OP_ENTRY2(CRC32, G,d, E,b, cpuid(SSE42)),
437        X86_OP_ENTRY2(CRC32, G,d, E,b, cpuid(SSE42)),
438    },
439    [1] = {
440        X86_OP_ENTRY3(MOVBE, M,y, G,y, None,None, cpuid(MOVBE)),
441        X86_OP_ENTRY3(MOVBE, M,w, G,w, None,None, cpuid(MOVBE)),
442        {},
443        X86_OP_ENTRY2(CRC32, G,d, E,y, cpuid(SSE42)),
444        X86_OP_ENTRY2(CRC32, G,d, E,w, cpuid(SSE42)),
445    },
446    [2] = {
447        X86_OP_ENTRY3(ANDN, G,y, B,y, E,y, vex13 cpuid(BMI1)),
448        {},
449        {},
450        {},
451        {},
452    },
453    [3] = {
454        X86_OP_GROUP3(group17, B,y, E,y, None,None, vex13 cpuid(BMI1)),
455        {},
456        {},
457        {},
458        {},
459    },
460    [5] = {
461        X86_OP_ENTRY3(BZHI, G,y, E,y, B,y, vex13 cpuid(BMI1)),
462        {},
463        X86_OP_ENTRY3(PEXT, G,y, B,y, E,y, vex13 cpuid(BMI2)),
464        X86_OP_ENTRY3(PDEP, G,y, B,y, E,y, vex13 cpuid(BMI2)),
465        {},
466    },
467    [6] = {
468        {},
469        X86_OP_ENTRY2(ADCX, G,y, E,y, cpuid(ADX)),
470        X86_OP_ENTRY2(ADOX, G,y, E,y, cpuid(ADX)),
471        X86_OP_ENTRY3(MULX, /* B,y, */ G,y, E,y, 2,y, vex13 cpuid(BMI2)),
472        {},
473    },
474    [7] = {
475        X86_OP_ENTRY3(BEXTR, G,y, E,y, B,y, vex13 cpuid(BMI1)),
476        X86_OP_ENTRY3(SHLX, G,y, E,y, B,y, vex13 cpuid(BMI1)),
477        X86_OP_ENTRY3(SARX, G,y, E,y, B,y, vex13 cpuid(BMI1)),
478        X86_OP_ENTRY3(SHRX, G,y, E,y, B,y, vex13 cpuid(BMI1)),
479        {},
480    },
481};
482
483static void decode_0F38(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
484{
485    *b = x86_ldub_code(env, s);
486    if (*b < 0xf0) {
487        *entry = opcodes_0F38_00toEF[*b];
488    } else {
489        int row = 0;
490        if (s->prefix & PREFIX_REPZ) {
491            /* The REPZ (F3) prefix has priority over 66 */
492            row = 2;
493        } else {
494            row += s->prefix & PREFIX_REPNZ ? 3 : 0;
495            row += s->prefix & PREFIX_DATA ? 1 : 0;
496        }
497        *entry = opcodes_0F38_F0toFF[*b & 15][row];
498    }
499}
500
501static void decode_VINSERTPS(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
502{
503    static const X86OpEntry
504        vinsertps_reg = X86_OP_ENTRY4(VINSERTPS_r, V,dq, H,dq, U,dq, vex5 cpuid(SSE41) p_66),
505        vinsertps_mem = X86_OP_ENTRY4(VINSERTPS_m, V,dq, H,dq, M,d,  vex5 cpuid(SSE41) p_66);
506
507    int modrm = get_modrm(s, env);
508    *entry = (modrm >> 6) == 3 ? vinsertps_reg : vinsertps_mem;
509}
510
511static const X86OpEntry opcodes_0F3A[256] = {
512    /*
513     * These are VEX-only, but incorrectly listed in the manual as exception type 4.
514     * Also the "qq" instructions are sometimes omitted by Table 2-17, but are VEX256
515     * only.
516     */
517    [0x00] = X86_OP_ENTRY3(VPERMQ,      V,qq, W,qq, I,b,  vex6 cpuid(AVX2) p_66),
518    [0x01] = X86_OP_ENTRY3(VPERMQ,      V,qq, W,qq, I,b,  vex6 cpuid(AVX2) p_66), /* VPERMPD */
519    [0x02] = X86_OP_ENTRY4(VBLENDPS,    V,x,  H,x,  W,x,  vex6 cpuid(AVX2) p_66), /* VPBLENDD */
520    [0x04] = X86_OP_ENTRY3(VPERMILPS_i, V,x,  W,x,  I,b,  vex6 cpuid(AVX) p_66),
521    [0x05] = X86_OP_ENTRY3(VPERMILPD_i, V,x,  W,x,  I,b,  vex6 cpuid(AVX) p_66),
522    [0x06] = X86_OP_ENTRY4(VPERM2x128,  V,qq, H,qq, W,qq, vex6 cpuid(AVX) p_66),
523
524    [0x14] = X86_OP_ENTRY3(PEXTRB,     E,b,  V,dq, I,b,  vex5 cpuid(SSE41) zext0 p_66),
525    [0x15] = X86_OP_ENTRY3(PEXTRW,     E,w,  V,dq, I,b,  vex5 cpuid(SSE41) zext0 p_66),
526    [0x16] = X86_OP_ENTRY3(PEXTR,      E,y,  V,dq, I,b,  vex5 cpuid(SSE41) p_66),
527    [0x17] = X86_OP_ENTRY3(VEXTRACTPS, E,d,  V,dq, I,b,  vex5 cpuid(SSE41) p_66),
528
529    [0x20] = X86_OP_ENTRY4(PINSRB,     V,dq, H,dq, E,b,  vex5 cpuid(SSE41) zext2 p_66),
530    [0x21] = X86_OP_GROUP0(VINSERTPS),
531    [0x22] = X86_OP_ENTRY4(PINSR,      V,dq, H,dq, E,y,  vex5 cpuid(SSE41) p_66),
532
533    [0x40] = X86_OP_ENTRY4(VDDPS,      V,x,  H,x,  W,x,  vex2 cpuid(SSE41) p_66),
534    [0x41] = X86_OP_ENTRY4(VDDPD,      V,dq, H,dq, W,dq, vex2 cpuid(SSE41) p_66),
535    [0x42] = X86_OP_ENTRY4(VMPSADBW,   V,x,  H,x,  W,x,  vex2 cpuid(SSE41) avx2_256 p_66),
536    [0x44] = X86_OP_ENTRY4(PCLMULQDQ,  V,dq, H,dq, W,dq, vex4 cpuid(PCLMULQDQ) p_66),
537    [0x46] = X86_OP_ENTRY4(VPERM2x128, V,qq, H,qq, W,qq, vex6 cpuid(AVX2) p_66),
538
539    [0x60] = X86_OP_ENTRY4(PCMPESTRM,  None,None, V,dq, W,dq, vex4_unal cpuid(SSE42) p_66),
540    [0x61] = X86_OP_ENTRY4(PCMPESTRI,  None,None, V,dq, W,dq, vex4_unal cpuid(SSE42) p_66),
541    [0x62] = X86_OP_ENTRY4(PCMPISTRM,  None,None, V,dq, W,dq, vex4_unal cpuid(SSE42) p_66),
542    [0x63] = X86_OP_ENTRY4(PCMPISTRI,  None,None, V,dq, W,dq, vex4_unal cpuid(SSE42) p_66),
543
544    [0x08] = X86_OP_ENTRY3(VROUNDPS,   V,x,  W,x,  I,b,  vex2 cpuid(SSE41) p_66),
545    [0x09] = X86_OP_ENTRY3(VROUNDPD,   V,x,  W,x,  I,b,  vex2 cpuid(SSE41) p_66),
546    /*
547     * Not listed as four operand in the manual.  Also writes and reads 128-bits
548     * from the first two operands due to the V operand picking higher entries of
549     * the H operand; the "Vss,Hss,Wss" description from the manual is incorrect.
550     * For other unary operations such as VSQRTSx this is hidden by the "REPScalar"
551     * value of vex_special, because the table lists the operand types of VSQRTPx.
552     */
553    [0x0a] = X86_OP_ENTRY4(VROUNDSS,   V,x,  H,x, W,ss, vex3 cpuid(SSE41) p_66),
554    [0x0b] = X86_OP_ENTRY4(VROUNDSD,   V,x,  H,x, W,sd, vex3 cpuid(SSE41) p_66),
555    [0x0c] = X86_OP_ENTRY4(VBLENDPS,   V,x,  H,x,  W,x,  vex4 cpuid(SSE41) p_66),
556    [0x0d] = X86_OP_ENTRY4(VBLENDPD,   V,x,  H,x,  W,x,  vex4 cpuid(SSE41) p_66),
557    [0x0e] = X86_OP_ENTRY4(VPBLENDW,   V,x,  H,x,  W,x,  vex4 cpuid(SSE41) avx2_256 p_66),
558    [0x0f] = X86_OP_ENTRY4(PALIGNR,    V,x,  H,x,  W,x,  vex4 cpuid(SSSE3) mmx avx2_256 p_00_66),
559
560    [0x18] = X86_OP_ENTRY4(VINSERTx128,  V,qq, H,qq, W,qq, vex6 cpuid(AVX) p_66),
561    [0x19] = X86_OP_ENTRY3(VEXTRACTx128, W,dq, V,qq, I,b,  vex6 cpuid(AVX) p_66),
562
563    [0x38] = X86_OP_ENTRY4(VINSERTx128,  V,qq, H,qq, W,qq, vex6 cpuid(AVX2) p_66),
564    [0x39] = X86_OP_ENTRY3(VEXTRACTx128, W,dq, V,qq, I,b,  vex6 cpuid(AVX2) p_66),
565
566    /* Listed incorrectly as type 4 */
567    [0x4a] = X86_OP_ENTRY4(VBLENDVPS, V,x,  H,x,  W,x,   vex6 cpuid(AVX) p_66),
568    [0x4b] = X86_OP_ENTRY4(VBLENDVPD, V,x,  H,x,  W,x,   vex6 cpuid(AVX) p_66),
569    [0x4c] = X86_OP_ENTRY4(VPBLENDVB, V,x,  H,x,  W,x,   vex6 cpuid(AVX) p_66 avx2_256),
570
571    [0xdf] = X86_OP_ENTRY3(VAESKEYGEN, V,dq, W,dq, I,b,  vex4 cpuid(AES) p_66),
572
573    [0xF0] = X86_OP_ENTRY3(RORX, G,y, E,y, I,b, vex13 cpuid(BMI2) p_f2),
574};
575
576static void decode_0F3A(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
577{
578    *b = x86_ldub_code(env, s);
579    *entry = opcodes_0F3A[*b];
580}
581
582/*
583 * There are some mistakes in the operands in the manual, and the load/store/register
584 * cases are easiest to keep separate, so the entries for 10-17 follow simplicity and
585 * efficiency of implementation rather than copying what the manual says.
586 *
587 * In particular:
588 *
589 * 1) "VMOVSS m32, xmm1" and "VMOVSD m64, xmm1" do not support VEX.vvvv != 1111b,
590 * but this is not mentioned in the tables.
591 *
592 * 2) MOVHLPS, MOVHPS, MOVHPD, MOVLPD, MOVLPS read the high quadword of one of their
593 * operands, which must therefore be dq; MOVLPD and MOVLPS also write the high
594 * quadword of the V operand.
595 */
596static void decode_0F10(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
597{
598    static const X86OpEntry opcodes_0F10_reg[4] = {
599        X86_OP_ENTRY3(MOVDQ,   V,x,  None,None, W,x, vex4_unal), /* MOVUPS */
600        X86_OP_ENTRY3(MOVDQ,   V,x,  None,None, W,x, vex4_unal), /* MOVUPD */
601        X86_OP_ENTRY3(VMOVSS,  V,x,  H,x,       W,x, vex4),
602        X86_OP_ENTRY3(VMOVLPx, V,x,  H,x,       W,x, vex4), /* MOVSD */
603    };
604
605    static const X86OpEntry opcodes_0F10_mem[4] = {
606        X86_OP_ENTRY3(MOVDQ,      V,x,  None,None, W,x,  vex4_unal), /* MOVUPS */
607        X86_OP_ENTRY3(MOVDQ,      V,x,  None,None, W,x,  vex4_unal), /* MOVUPD */
608        X86_OP_ENTRY3(VMOVSS_ld,  V,x,  H,x,       M,ss, vex4),
609        X86_OP_ENTRY3(VMOVSD_ld,  V,x,  H,x,       M,sd, vex4),
610    };
611
612    if ((get_modrm(s, env) >> 6) == 3) {
613        *entry = *decode_by_prefix(s, opcodes_0F10_reg);
614    } else {
615        *entry = *decode_by_prefix(s, opcodes_0F10_mem);
616    }
617}
618
619static void decode_0F11(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
620{
621    static const X86OpEntry opcodes_0F11_reg[4] = {
622        X86_OP_ENTRY3(MOVDQ,   W,x,  None,None, V,x, vex4), /* MOVPS */
623        X86_OP_ENTRY3(MOVDQ,   W,x,  None,None, V,x, vex4), /* MOVPD */
624        X86_OP_ENTRY3(VMOVSS,  W,x,  H,x,       V,x, vex4),
625        X86_OP_ENTRY3(VMOVLPx, W,x,  H,x,       V,q, vex4), /* MOVSD */
626    };
627
628    static const X86OpEntry opcodes_0F11_mem[4] = {
629        X86_OP_ENTRY3(MOVDQ,      W,x,  None,None, V,x, vex4), /* MOVPS */
630        X86_OP_ENTRY3(MOVDQ,      W,x,  None,None, V,x, vex4), /* MOVPD */
631        X86_OP_ENTRY3(VMOVSS_st,  M,ss, None,None, V,x, vex4),
632        X86_OP_ENTRY3(VMOVLPx_st, M,sd, None,None, V,x, vex4), /* MOVSD */
633    };
634
635    if ((get_modrm(s, env) >> 6) == 3) {
636        *entry = *decode_by_prefix(s, opcodes_0F11_reg);
637    } else {
638        *entry = *decode_by_prefix(s, opcodes_0F11_mem);
639    }
640}
641
642static void decode_0F12(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
643{
644    static const X86OpEntry opcodes_0F12_mem[4] = {
645        /*
646         * Use dq for operand for compatibility with gen_MOVSD and
647         * to allow VEX128 only.
648         */
649        X86_OP_ENTRY3(VMOVLPx_ld, V,dq, H,dq,      M,q, vex4), /* MOVLPS */
650        X86_OP_ENTRY3(VMOVLPx_ld, V,dq, H,dq,      M,q, vex4), /* MOVLPD */
651        X86_OP_ENTRY3(VMOVSLDUP,  V,x,  None,None, W,x, vex4 cpuid(SSE3)),
652        X86_OP_ENTRY3(VMOVDDUP,   V,x,  None,None, WM,q, vex4 cpuid(SSE3)), /* qq if VEX.256 */
653    };
654    static const X86OpEntry opcodes_0F12_reg[4] = {
655        X86_OP_ENTRY3(VMOVHLPS,  V,dq, H,dq,       U,dq, vex4),
656        X86_OP_ENTRY3(VMOVLPx,   W,x,  H,x,        U,q,  vex4), /* MOVLPD */
657        X86_OP_ENTRY3(VMOVSLDUP, V,x,  None,None,  U,x,  vex4 cpuid(SSE3)),
658        X86_OP_ENTRY3(VMOVDDUP,  V,x,  None,None,  U,x,  vex4 cpuid(SSE3)),
659    };
660
661    if ((get_modrm(s, env) >> 6) == 3) {
662        *entry = *decode_by_prefix(s, opcodes_0F12_reg);
663    } else {
664        *entry = *decode_by_prefix(s, opcodes_0F12_mem);
665        if ((s->prefix & PREFIX_REPNZ) && s->vex_l) {
666            entry->s2 = X86_SIZE_qq;
667        }
668    }
669}
670
671static void decode_0F16(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
672{
673    static const X86OpEntry opcodes_0F16_mem[4] = {
674        /*
675         * Operand 1 technically only reads the low 64 bits, but uses dq so that
676         * it is easier to check for op0 == op1 in an endianness-neutral manner.
677         */
678        X86_OP_ENTRY3(VMOVHPx_ld, V,dq, H,dq,      M,q, vex4), /* MOVHPS */
679        X86_OP_ENTRY3(VMOVHPx_ld, V,dq, H,dq,      M,q, vex4), /* MOVHPD */
680        X86_OP_ENTRY3(VMOVSHDUP,  V,x,  None,None, W,x, vex4 cpuid(SSE3)),
681        {},
682    };
683    static const X86OpEntry opcodes_0F16_reg[4] = {
684        /* Same as above, operand 1 could be Hq if it wasn't for big-endian.  */
685        X86_OP_ENTRY3(VMOVLHPS,  V,dq, H,dq,      U,q, vex4),
686        X86_OP_ENTRY3(VMOVHPx,   V,x,  H,x,       U,x, vex4), /* MOVHPD */
687        X86_OP_ENTRY3(VMOVSHDUP, V,x,  None,None, U,x, vex4 cpuid(SSE3)),
688        {},
689    };
690
691    if ((get_modrm(s, env) >> 6) == 3) {
692        *entry = *decode_by_prefix(s, opcodes_0F16_reg);
693    } else {
694        *entry = *decode_by_prefix(s, opcodes_0F16_mem);
695    }
696}
697
698static void decode_0F2A(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
699{
700    static const X86OpEntry opcodes_0F2A[4] = {
701        X86_OP_ENTRY3(CVTPI2Px,  V,x,  None,None, Q,q),
702        X86_OP_ENTRY3(CVTPI2Px,  V,x,  None,None, Q,q),
703        X86_OP_ENTRY3(VCVTSI2Sx, V,x,  H,x, E,y,        vex3),
704        X86_OP_ENTRY3(VCVTSI2Sx, V,x,  H,x, E,y,        vex3),
705    };
706    *entry = *decode_by_prefix(s, opcodes_0F2A);
707}
708
709static void decode_0F2B(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
710{
711    static const X86OpEntry opcodes_0F2B[4] = {
712        X86_OP_ENTRY3(MOVDQ,      M,x,  None,None, V,x, vex4), /* MOVNTPS */
713        X86_OP_ENTRY3(MOVDQ,      M,x,  None,None, V,x, vex4), /* MOVNTPD */
714        X86_OP_ENTRY3(VMOVSS_st,  M,ss, None,None, V,x, vex4 cpuid(SSE4A)), /* MOVNTSS */
715        X86_OP_ENTRY3(VMOVLPx_st, M,sd, None,None, V,x, vex4 cpuid(SSE4A)), /* MOVNTSD */
716    };
717
718    *entry = *decode_by_prefix(s, opcodes_0F2B);
719}
720
721static void decode_0F2C(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
722{
723    static const X86OpEntry opcodes_0F2C[4] = {
724        /* Listed as ps/pd in the manual, but CVTTPS2PI only reads 64-bit.  */
725        X86_OP_ENTRY3(CVTTPx2PI,  P,q,  None,None, W,q),
726        X86_OP_ENTRY3(CVTTPx2PI,  P,q,  None,None, W,dq),
727        X86_OP_ENTRY3(VCVTTSx2SI, G,y,  None,None, W,ss, vex3),
728        X86_OP_ENTRY3(VCVTTSx2SI, G,y,  None,None, W,sd, vex3),
729    };
730    *entry = *decode_by_prefix(s, opcodes_0F2C);
731}
732
733static void decode_0F2D(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
734{
735    static const X86OpEntry opcodes_0F2D[4] = {
736        /* Listed as ps/pd in the manual, but CVTPS2PI only reads 64-bit.  */
737        X86_OP_ENTRY3(CVTPx2PI,  P,q,  None,None, W,q),
738        X86_OP_ENTRY3(CVTPx2PI,  P,q,  None,None, W,dq),
739        X86_OP_ENTRY3(VCVTSx2SI, G,y,  None,None, W,ss, vex3),
740        X86_OP_ENTRY3(VCVTSx2SI, G,y,  None,None, W,sd, vex3),
741    };
742    *entry = *decode_by_prefix(s, opcodes_0F2D);
743}
744
745static void decode_sse_unary(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
746{
747    if (!(s->prefix & (PREFIX_REPZ | PREFIX_REPNZ))) {
748        entry->op1 = X86_TYPE_None;
749        entry->s1 = X86_SIZE_None;
750    }
751    switch (*b) {
752    case 0x51: entry->gen = gen_VSQRT; break;
753    case 0x52: entry->gen = gen_VRSQRT; break;
754    case 0x53: entry->gen = gen_VRCP; break;
755    case 0x5A: entry->gen = gen_VCVTfp2fp; break;
756    }
757}
758
759static void decode_0F5B(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
760{
761    static const X86OpEntry opcodes_0F5B[4] = {
762        X86_OP_ENTRY2(VCVTDQ2PS,   V,x, W,x,      vex2),
763        X86_OP_ENTRY2(VCVTPS2DQ,   V,x, W,x,      vex2),
764        X86_OP_ENTRY2(VCVTTPS2DQ,  V,x, W,x,      vex2),
765        {},
766    };
767    *entry = *decode_by_prefix(s, opcodes_0F5B);
768}
769
770static void decode_0FE6(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
771{
772    static const X86OpEntry opcodes_0FE6[4] = {
773        {},
774        X86_OP_ENTRY2(VCVTTPD2DQ,  V,x, W,x,      vex2),
775        X86_OP_ENTRY2(VCVTDQ2PD,   V,x, W,x,      vex2),
776        X86_OP_ENTRY2(VCVTPD2DQ,   V,x, W,x,      vex2),
777    };
778    *entry = *decode_by_prefix(s, opcodes_0FE6);
779}
780
781static const X86OpEntry opcodes_0F[256] = {
782    [0x0E] = X86_OP_ENTRY0(EMMS,                              cpuid(3DNOW)), /* femms */
783    /*
784     * 3DNow!'s opcode byte comes *after* modrm and displacements, making it
785     * more like an Ib operand.  Dispatch to the right helper in a single gen_*
786     * function.
787     */
788    [0x0F] = X86_OP_ENTRY3(3dnow,       P,q, Q,q, I,b,        cpuid(3DNOW)),
789
790    [0x10] = X86_OP_GROUP0(0F10),
791    [0x11] = X86_OP_GROUP0(0F11),
792    [0x12] = X86_OP_GROUP0(0F12),
793    [0x13] = X86_OP_ENTRY3(VMOVLPx_st,  M,q, None,None, V,q,  vex4 p_00_66),
794    [0x14] = X86_OP_ENTRY3(VUNPCKLPx,   V,x, H,x, W,x,        vex4 p_00_66),
795    [0x15] = X86_OP_ENTRY3(VUNPCKHPx,   V,x, H,x, W,x,        vex4 p_00_66),
796    [0x16] = X86_OP_GROUP0(0F16),
797    /* Incorrectly listed as Mq,Vq in the manual */
798    [0x17] = X86_OP_ENTRY3(VMOVHPx_st,  M,q, None,None, V,dq, vex4 p_00_66),
799
800    [0x50] = X86_OP_ENTRY3(MOVMSK,     G,y, None,None, U,x, vex7 p_00_66),
801    [0x51] = X86_OP_GROUP3(sse_unary,  V,x, H,x, W,x, vex2_rep3 p_00_66_f3_f2),
802    [0x52] = X86_OP_GROUP3(sse_unary,  V,x, H,x, W,x, vex5 p_00_f3),
803    [0x53] = X86_OP_GROUP3(sse_unary,  V,x, H,x, W,x, vex5 p_00_f3),
804    [0x54] = X86_OP_ENTRY3(PAND,       V,x, H,x, W,x,  vex4 p_00_66), /* vand */
805    [0x55] = X86_OP_ENTRY3(PANDN,      V,x, H,x, W,x,  vex4 p_00_66), /* vandn */
806    [0x56] = X86_OP_ENTRY3(POR,        V,x, H,x, W,x,  vex4 p_00_66), /* vor */
807    [0x57] = X86_OP_ENTRY3(PXOR,       V,x, H,x, W,x,  vex4 p_00_66), /* vxor */
808
809    [0x60] = X86_OP_ENTRY3(PUNPCKLBW,  V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
810    [0x61] = X86_OP_ENTRY3(PUNPCKLWD,  V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
811    [0x62] = X86_OP_ENTRY3(PUNPCKLDQ,  V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
812    [0x63] = X86_OP_ENTRY3(PACKSSWB,   V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
813    [0x64] = X86_OP_ENTRY3(PCMPGTB,    V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
814    [0x65] = X86_OP_ENTRY3(PCMPGTW,    V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
815    [0x66] = X86_OP_ENTRY3(PCMPGTD,    V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
816    [0x67] = X86_OP_ENTRY3(PACKUSWB,   V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
817
818    [0x70] = X86_OP_GROUP0(0F70),
819    [0x71] = X86_OP_GROUP0(group12),
820    [0x72] = X86_OP_GROUP0(group13),
821    [0x73] = X86_OP_GROUP0(group14),
822    [0x74] = X86_OP_ENTRY3(PCMPEQB,    V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
823    [0x75] = X86_OP_ENTRY3(PCMPEQW,    V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
824    [0x76] = X86_OP_ENTRY3(PCMPEQD,    V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
825    [0x77] = X86_OP_GROUP0(0F77),
826
827    [0x28] = X86_OP_ENTRY3(MOVDQ,      V,x,  None,None, W,x, vex1 p_00_66), /* MOVAPS */
828    [0x29] = X86_OP_ENTRY3(MOVDQ,      W,x,  None,None, V,x, vex1 p_00_66), /* MOVAPS */
829    [0x2A] = X86_OP_GROUP0(0F2A),
830    [0x2B] = X86_OP_GROUP0(0F2B),
831    [0x2C] = X86_OP_GROUP0(0F2C),
832    [0x2D] = X86_OP_GROUP0(0F2D),
833    [0x2E] = X86_OP_ENTRY3(VUCOMI,     None,None, V,x, W,x,  vex4 p_00_66),
834    [0x2F] = X86_OP_ENTRY3(VCOMI,      None,None, V,x, W,x,  vex4 p_00_66),
835
836    [0x38] = X86_OP_GROUP0(0F38),
837    [0x3a] = X86_OP_GROUP0(0F3A),
838
839    [0x58] = X86_OP_ENTRY3(VADD,       V,x, H,x, W,x, vex2_rep3 p_00_66_f3_f2),
840    [0x59] = X86_OP_ENTRY3(VMUL,       V,x, H,x, W,x, vex2_rep3 p_00_66_f3_f2),
841    [0x5a] = X86_OP_GROUP3(sse_unary,  V,x, H,x, W,x, vex3 p_00_66_f3_f2),
842    [0x5b] = X86_OP_GROUP0(0F5B),
843    [0x5c] = X86_OP_ENTRY3(VSUB,       V,x, H,x, W,x, vex2_rep3 p_00_66_f3_f2),
844    [0x5d] = X86_OP_ENTRY3(VMIN,       V,x, H,x, W,x, vex2_rep3 p_00_66_f3_f2),
845    [0x5e] = X86_OP_ENTRY3(VDIV,       V,x, H,x, W,x, vex2_rep3 p_00_66_f3_f2),
846    [0x5f] = X86_OP_ENTRY3(VMAX,       V,x, H,x, W,x, vex2_rep3 p_00_66_f3_f2),
847
848    [0x68] = X86_OP_ENTRY3(PUNPCKHBW,  V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
849    [0x69] = X86_OP_ENTRY3(PUNPCKHWD,  V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
850    [0x6a] = X86_OP_ENTRY3(PUNPCKHDQ,  V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
851    [0x6b] = X86_OP_ENTRY3(PACKSSDW,   V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
852    [0x6c] = X86_OP_ENTRY3(PUNPCKLQDQ, V,x, H,x, W,x,  vex4 p_66 avx2_256),
853    [0x6d] = X86_OP_ENTRY3(PUNPCKHQDQ, V,x, H,x, W,x,  vex4 p_66 avx2_256),
854    [0x6e] = X86_OP_ENTRY3(MOVD_to,    V,x, None,None, E,y, vex5 mmx p_00_66),  /* wrong dest Vy on SDM! */
855    [0x6f] = X86_OP_GROUP0(0F6F),
856
857    [0x78] = X86_OP_GROUP0(0F78),
858    [0x79] = X86_OP_GROUP2(0F79,       V,x, U,x,       cpuid(SSE4A)),
859    [0x7c] = X86_OP_ENTRY3(VHADD,      V,x, H,x, W,x,  vex2 cpuid(SSE3) p_66_f2),
860    [0x7d] = X86_OP_ENTRY3(VHSUB,      V,x, H,x, W,x,  vex2 cpuid(SSE3) p_66_f2),
861    [0x7e] = X86_OP_GROUP0(0F7E),
862    [0x7f] = X86_OP_GROUP0(0F7F),
863
864    [0xae] = X86_OP_GROUP0(group15),
865
866    [0xc2] = X86_OP_ENTRY4(VCMP,       V,x, H,x, W,x,       vex2_rep3 p_00_66_f3_f2),
867    [0xc4] = X86_OP_ENTRY4(PINSRW,     V,dq,H,dq,E,w,       vex5 mmx p_00_66),
868    [0xc5] = X86_OP_ENTRY3(PEXTRW,     G,d, U,dq,I,b,       vex5 mmx p_00_66),
869    [0xc6] = X86_OP_ENTRY4(VSHUF,      V,x, H,x, W,x,       vex4 p_00_66),
870
871    [0xd0] = X86_OP_ENTRY3(VADDSUB,   V,x, H,x, W,x,        vex2 cpuid(SSE3) p_66_f2),
872    [0xd1] = X86_OP_ENTRY3(PSRLW_r,   V,x, H,x, W,x,        vex4 mmx avx2_256 p_00_66),
873    [0xd2] = X86_OP_ENTRY3(PSRLD_r,   V,x, H,x, W,x,        vex4 mmx avx2_256 p_00_66),
874    [0xd3] = X86_OP_ENTRY3(PSRLQ_r,   V,x, H,x, W,x,        vex4 mmx avx2_256 p_00_66),
875    [0xd4] = X86_OP_ENTRY3(PADDQ,     V,x, H,x, W,x,        vex4 mmx avx2_256 p_00_66),
876    [0xd5] = X86_OP_ENTRY3(PMULLW,    V,x, H,x, W,x,        vex4 mmx avx2_256 p_00_66),
877    [0xd6] = X86_OP_GROUP0(0FD6),
878    [0xd7] = X86_OP_ENTRY3(PMOVMSKB,  G,d, None,None, U,x,  vex7 mmx avx2_256 p_00_66),
879
880    [0xe0] = X86_OP_ENTRY3(PAVGB,     V,x, H,x, W,x,        vex4 mmx avx2_256 p_00_66),
881    [0xe1] = X86_OP_ENTRY3(PSRAW_r,   V,x, H,x, W,x,        vex7 mmx avx2_256 p_00_66),
882    [0xe2] = X86_OP_ENTRY3(PSRAD_r,   V,x, H,x, W,x,        vex7 mmx avx2_256 p_00_66),
883    [0xe3] = X86_OP_ENTRY3(PAVGW,     V,x, H,x, W,x,        vex4 mmx avx2_256 p_00_66),
884    [0xe4] = X86_OP_ENTRY3(PMULHUW,   V,x, H,x, W,x,        vex4 mmx avx2_256 p_00_66),
885    [0xe5] = X86_OP_ENTRY3(PMULHW,    V,x, H,x, W,x,        vex4 mmx avx2_256 p_00_66),
886    [0xe6] = X86_OP_GROUP0(0FE6),
887    [0xe7] = X86_OP_ENTRY3(MOVDQ,     W,x, None,None, V,x,  vex1 mmx p_00_66), /* MOVNTQ/MOVNTDQ */
888
889    [0xf0] = X86_OP_ENTRY3(MOVDQ,    V,x, None,None, WM,x,  vex4_unal cpuid(SSE3) p_f2), /* LDDQU */
890    [0xf1] = X86_OP_ENTRY3(PSLLW_r,  V,x, H,x, W,x,         vex7 mmx avx2_256 p_00_66),
891    [0xf2] = X86_OP_ENTRY3(PSLLD_r,  V,x, H,x, W,x,         vex7 mmx avx2_256 p_00_66),
892    [0xf3] = X86_OP_ENTRY3(PSLLQ_r,  V,x, H,x, W,x,         vex7 mmx avx2_256 p_00_66),
893    [0xf4] = X86_OP_ENTRY3(PMULUDQ,  V,x, H,x, W,x,         vex4 mmx avx2_256 p_00_66),
894    [0xf5] = X86_OP_ENTRY3(PMADDWD,  V,x, H,x, W,x,         vex4 mmx avx2_256 p_00_66),
895    [0xf6] = X86_OP_ENTRY3(PSADBW,   V,x, H,x, W,x,         vex4 mmx avx2_256 p_00_66),
896    [0xf7] = X86_OP_ENTRY3(MASKMOV,  None,None, V,dq, U,dq, vex4_unal avx2_256 mmx p_00_66),
897
898    /* Incorrectly missing from 2-17 */
899    [0xd8] = X86_OP_ENTRY3(PSUBUSB,  V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
900    [0xd9] = X86_OP_ENTRY3(PSUBUSW,  V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
901    [0xda] = X86_OP_ENTRY3(PMINUB,   V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
902    [0xdb] = X86_OP_ENTRY3(PAND,     V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
903    [0xdc] = X86_OP_ENTRY3(PADDUSB,  V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
904    [0xdd] = X86_OP_ENTRY3(PADDUSW,  V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
905    [0xde] = X86_OP_ENTRY3(PMAXUB,   V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
906    [0xdf] = X86_OP_ENTRY3(PANDN,    V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
907
908    [0xe8] = X86_OP_ENTRY3(PSUBSB,  V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
909    [0xe9] = X86_OP_ENTRY3(PSUBSW,  V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
910    [0xea] = X86_OP_ENTRY3(PMINSW,  V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
911    [0xeb] = X86_OP_ENTRY3(POR,     V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
912    [0xec] = X86_OP_ENTRY3(PADDSB,  V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
913    [0xed] = X86_OP_ENTRY3(PADDSW,  V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
914    [0xee] = X86_OP_ENTRY3(PMAXSW,  V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
915    [0xef] = X86_OP_ENTRY3(PXOR,    V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
916
917    [0xf8] = X86_OP_ENTRY3(PSUBB,  V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
918    [0xf9] = X86_OP_ENTRY3(PSUBW,  V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
919    [0xfa] = X86_OP_ENTRY3(PSUBD,  V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
920    [0xfb] = X86_OP_ENTRY3(PSUBQ,  V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
921    [0xfc] = X86_OP_ENTRY3(PADDB,  V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
922    [0xfd] = X86_OP_ENTRY3(PADDW,  V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
923    [0xfe] = X86_OP_ENTRY3(PADDD,  V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
924    /* 0xff = UD0 */
925};
926
927static void do_decode_0F(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
928{
929    *entry = opcodes_0F[*b];
930}
931
932static void decode_0F(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
933{
934    *b = x86_ldub_code(env, s);
935    do_decode_0F(s, env, entry, b);
936}
937
938static const X86OpEntry opcodes_root[256] = {
939    [0x0F] = X86_OP_GROUP0(0F),
940};
941
942#undef mmx
943#undef vex1
944#undef vex2
945#undef vex3
946#undef vex4
947#undef vex4_unal
948#undef vex5
949#undef vex6
950#undef vex7
951#undef vex8
952#undef vex11
953#undef vex12
954#undef vex13
955
956/*
957 * Decode the fixed part of the opcode and place the last
958 * in b.
959 */
960static void decode_root(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
961{
962    *entry = opcodes_root[*b];
963}
964
965
966static int decode_modrm(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode,
967                        X86DecodedOp *op, X86OpType type)
968{
969    int modrm = get_modrm(s, env);
970    if ((modrm >> 6) == 3) {
971        if (s->prefix & PREFIX_LOCK) {
972            decode->e.gen = gen_illegal;
973            return 0xff;
974        }
975        op->n = (modrm & 7);
976        if (type != X86_TYPE_Q && type != X86_TYPE_N) {
977            op->n |= REX_B(s);
978        }
979    } else {
980        op->has_ea = true;
981        op->n = -1;
982        decode->mem = gen_lea_modrm_0(env, s, get_modrm(s, env));
983    }
984    return modrm;
985}
986
987static bool decode_op_size(DisasContext *s, X86OpEntry *e, X86OpSize size, MemOp *ot)
988{
989    switch (size) {
990    case X86_SIZE_b:  /* byte */
991        *ot = MO_8;
992        return true;
993
994    case X86_SIZE_d:  /* 32-bit */
995    case X86_SIZE_ss: /* SSE/AVX scalar single precision */
996        *ot = MO_32;
997        return true;
998
999    case X86_SIZE_p:  /* Far pointer, return offset size */
1000    case X86_SIZE_s:  /* Descriptor, return offset size */
1001    case X86_SIZE_v:  /* 16/32/64-bit, based on operand size */
1002        *ot = s->dflag;
1003        return true;
1004
1005    case X86_SIZE_pi: /* MMX */
1006    case X86_SIZE_q:  /* 64-bit */
1007    case X86_SIZE_sd: /* SSE/AVX scalar double precision */
1008        *ot = MO_64;
1009        return true;
1010
1011    case X86_SIZE_w:  /* 16-bit */
1012        *ot = MO_16;
1013        return true;
1014
1015    case X86_SIZE_y:  /* 32/64-bit, based on operand size */
1016        *ot = s->dflag == MO_16 ? MO_32 : s->dflag;
1017        return true;
1018
1019    case X86_SIZE_z:  /* 16-bit for 16-bit operand size, else 32-bit */
1020        *ot = s->dflag == MO_16 ? MO_16 : MO_32;
1021        return true;
1022
1023    case X86_SIZE_dq: /* SSE/AVX 128-bit */
1024        if (e->special == X86_SPECIAL_MMX &&
1025            !(s->prefix & (PREFIX_DATA | PREFIX_REPZ | PREFIX_REPNZ))) {
1026            *ot = MO_64;
1027            return true;
1028        }
1029        if (s->vex_l && e->s0 != X86_SIZE_qq && e->s1 != X86_SIZE_qq) {
1030            return false;
1031        }
1032        *ot = MO_128;
1033        return true;
1034
1035    case X86_SIZE_qq: /* AVX 256-bit */
1036        if (!s->vex_l) {
1037            return false;
1038        }
1039        *ot = MO_256;
1040        return true;
1041
1042    case X86_SIZE_x:  /* 128/256-bit, based on operand size */
1043        if (e->special == X86_SPECIAL_MMX &&
1044            !(s->prefix & (PREFIX_DATA | PREFIX_REPZ | PREFIX_REPNZ))) {
1045            *ot = MO_64;
1046            return true;
1047        }
1048        /* fall through */
1049    case X86_SIZE_ps: /* SSE/AVX packed single precision */
1050    case X86_SIZE_pd: /* SSE/AVX packed double precision */
1051        *ot = s->vex_l ? MO_256 : MO_128;
1052        return true;
1053
1054    case X86_SIZE_d64:  /* Default to 64-bit in 64-bit mode */
1055        *ot = CODE64(s) && s->dflag == MO_32 ? MO_64 : s->dflag;
1056        return true;
1057
1058    case X86_SIZE_f64:  /* Ignore size override prefix in 64-bit mode */
1059        *ot = CODE64(s) ? MO_64 : s->dflag;
1060        return true;
1061
1062    default:
1063        *ot = -1;
1064        return true;
1065    }
1066}
1067
1068static bool decode_op(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode,
1069                      X86DecodedOp *op, X86OpType type, int b)
1070{
1071    int modrm;
1072
1073    switch (type) {
1074    case X86_TYPE_None:  /* Implicit or absent */
1075    case X86_TYPE_A:  /* Implicit */
1076    case X86_TYPE_F:  /* EFLAGS/RFLAGS */
1077        break;
1078
1079    case X86_TYPE_B:  /* VEX.vvvv selects a GPR */
1080        op->unit = X86_OP_INT;
1081        op->n = s->vex_v;
1082        break;
1083
1084    case X86_TYPE_C:  /* REG in the modrm byte selects a control register */
1085        op->unit = X86_OP_CR;
1086        goto get_reg;
1087
1088    case X86_TYPE_D:  /* REG in the modrm byte selects a debug register */
1089        op->unit = X86_OP_DR;
1090        goto get_reg;
1091
1092    case X86_TYPE_G:  /* REG in the modrm byte selects a GPR */
1093        op->unit = X86_OP_INT;
1094        goto get_reg;
1095
1096    case X86_TYPE_S:  /* reg selects a segment register */
1097        op->unit = X86_OP_SEG;
1098        goto get_reg;
1099
1100    case X86_TYPE_P:
1101        op->unit = X86_OP_MMX;
1102        goto get_reg;
1103
1104    case X86_TYPE_V:  /* reg in the modrm byte selects an XMM/YMM register */
1105        if (decode->e.special == X86_SPECIAL_MMX &&
1106            !(s->prefix & (PREFIX_DATA | PREFIX_REPZ | PREFIX_REPNZ))) {
1107            op->unit = X86_OP_MMX;
1108        } else {
1109            op->unit = X86_OP_SSE;
1110        }
1111    get_reg:
1112        op->n = ((get_modrm(s, env) >> 3) & 7) | REX_R(s);
1113        break;
1114
1115    case X86_TYPE_E:  /* ALU modrm operand */
1116        op->unit = X86_OP_INT;
1117        goto get_modrm;
1118
1119    case X86_TYPE_Q:  /* MMX modrm operand */
1120        op->unit = X86_OP_MMX;
1121        goto get_modrm;
1122
1123    case X86_TYPE_W:  /* XMM/YMM modrm operand */
1124        if (decode->e.special == X86_SPECIAL_MMX &&
1125            !(s->prefix & (PREFIX_DATA | PREFIX_REPZ | PREFIX_REPNZ))) {
1126            op->unit = X86_OP_MMX;
1127        } else {
1128            op->unit = X86_OP_SSE;
1129        }
1130        goto get_modrm;
1131
1132    case X86_TYPE_N:  /* R/M in the modrm byte selects an MMX register */
1133        op->unit = X86_OP_MMX;
1134        goto get_modrm_reg;
1135
1136    case X86_TYPE_U:  /* R/M in the modrm byte selects an XMM/YMM register */
1137        if (decode->e.special == X86_SPECIAL_MMX &&
1138            !(s->prefix & (PREFIX_DATA | PREFIX_REPZ | PREFIX_REPNZ))) {
1139            op->unit = X86_OP_MMX;
1140        } else {
1141            op->unit = X86_OP_SSE;
1142        }
1143        goto get_modrm_reg;
1144
1145    case X86_TYPE_R:  /* R/M in the modrm byte selects a register */
1146        op->unit = X86_OP_INT;
1147    get_modrm_reg:
1148        modrm = get_modrm(s, env);
1149        if ((modrm >> 6) != 3) {
1150            return false;
1151        }
1152        goto get_modrm;
1153
1154    case X86_TYPE_WM:  /* modrm byte selects an XMM/YMM memory operand */
1155        op->unit = X86_OP_SSE;
1156        /* fall through */
1157    case X86_TYPE_M:  /* modrm byte selects a memory operand */
1158        modrm = get_modrm(s, env);
1159        if ((modrm >> 6) == 3) {
1160            return false;
1161        }
1162    get_modrm:
1163        decode_modrm(s, env, decode, op, type);
1164        break;
1165
1166    case X86_TYPE_O:  /* Absolute address encoded in the instruction */
1167        op->unit = X86_OP_INT;
1168        op->has_ea = true;
1169        op->n = -1;
1170        decode->mem = (AddressParts) {
1171            .def_seg = R_DS,
1172            .base = -1,
1173            .index = -1,
1174            .disp = insn_get_addr(env, s, s->aflag)
1175        };
1176        break;
1177
1178    case X86_TYPE_H:  /* For AVX, VEX.vvvv selects an XMM/YMM register */
1179        if ((s->prefix & PREFIX_VEX)) {
1180            op->unit = X86_OP_SSE;
1181            op->n = s->vex_v;
1182            break;
1183        }
1184        if (op == &decode->op[0]) {
1185            /* shifts place the destination in VEX.vvvv, use modrm */
1186            return decode_op(s, env, decode, op, decode->e.op1, b);
1187        } else {
1188            return decode_op(s, env, decode, op, decode->e.op0, b);
1189        }
1190
1191    case X86_TYPE_I:  /* Immediate */
1192        op->unit = X86_OP_IMM;
1193        decode->immediate = insn_get_signed(env, s, op->ot);
1194        break;
1195
1196    case X86_TYPE_J:  /* Relative offset for a jump */
1197        op->unit = X86_OP_IMM;
1198        decode->immediate = insn_get_signed(env, s, op->ot);
1199        decode->immediate += s->pc - s->cs_base;
1200        if (s->dflag == MO_16) {
1201            decode->immediate &= 0xffff;
1202        } else if (!CODE64(s)) {
1203            decode->immediate &= 0xffffffffu;
1204        }
1205        break;
1206
1207    case X86_TYPE_L:  /* The upper 4 bits of the immediate select a 128-bit register */
1208        op->n = insn_get(env, s, op->ot) >> 4;
1209        break;
1210
1211    case X86_TYPE_X:  /* string source */
1212        op->n = -1;
1213        decode->mem = (AddressParts) {
1214            .def_seg = R_DS,
1215            .base = R_ESI,
1216            .index = -1,
1217        };
1218        break;
1219
1220    case X86_TYPE_Y:  /* string destination */
1221        op->n = -1;
1222        decode->mem = (AddressParts) {
1223            .def_seg = R_ES,
1224            .base = R_EDI,
1225            .index = -1,
1226        };
1227        break;
1228
1229    case X86_TYPE_2op:
1230        *op = decode->op[0];
1231        break;
1232
1233    case X86_TYPE_LoBits:
1234        op->n = (b & 7) | REX_B(s);
1235        op->unit = X86_OP_INT;
1236        break;
1237
1238    case X86_TYPE_0 ... X86_TYPE_7:
1239        op->n = type - X86_TYPE_0;
1240        op->unit = X86_OP_INT;
1241        break;
1242
1243    case X86_TYPE_ES ... X86_TYPE_GS:
1244        op->n = type - X86_TYPE_ES;
1245        op->unit = X86_OP_SEG;
1246        break;
1247    }
1248
1249    return true;
1250}
1251
1252static bool validate_sse_prefix(DisasContext *s, X86OpEntry *e)
1253{
1254    uint16_t sse_prefixes;
1255
1256    if (!e->valid_prefix) {
1257        return true;
1258    }
1259    if (s->prefix & (PREFIX_REPZ | PREFIX_REPNZ)) {
1260        /* In SSE instructions, 0xF3 and 0xF2 cancel 0x66.  */
1261        s->prefix &= ~PREFIX_DATA;
1262    }
1263
1264    /* Now, either zero or one bit is set in sse_prefixes.  */
1265    sse_prefixes = s->prefix & (PREFIX_REPZ | PREFIX_REPNZ | PREFIX_DATA);
1266    return e->valid_prefix & (1 << sse_prefixes);
1267}
1268
1269static bool decode_insn(DisasContext *s, CPUX86State *env, X86DecodeFunc decode_func,
1270                        X86DecodedInsn *decode)
1271{
1272    X86OpEntry *e = &decode->e;
1273
1274    decode_func(s, env, e, &decode->b);
1275    while (e->is_decode) {
1276        e->is_decode = false;
1277        e->decode(s, env, e, &decode->b);
1278    }
1279
1280    if (!validate_sse_prefix(s, e)) {
1281        return false;
1282    }
1283
1284    /* First compute size of operands in order to initialize s->rip_offset.  */
1285    if (e->op0 != X86_TYPE_None) {
1286        if (!decode_op_size(s, e, e->s0, &decode->op[0].ot)) {
1287            return false;
1288        }
1289        if (e->op0 == X86_TYPE_I) {
1290            s->rip_offset += 1 << decode->op[0].ot;
1291        }
1292    }
1293    if (e->op1 != X86_TYPE_None) {
1294        if (!decode_op_size(s, e, e->s1, &decode->op[1].ot)) {
1295            return false;
1296        }
1297        if (e->op1 == X86_TYPE_I) {
1298            s->rip_offset += 1 << decode->op[1].ot;
1299        }
1300    }
1301    if (e->op2 != X86_TYPE_None) {
1302        if (!decode_op_size(s, e, e->s2, &decode->op[2].ot)) {
1303            return false;
1304        }
1305        if (e->op2 == X86_TYPE_I) {
1306            s->rip_offset += 1 << decode->op[2].ot;
1307        }
1308    }
1309    if (e->op3 != X86_TYPE_None) {
1310        /*
1311         * A couple instructions actually use the extra immediate byte for an Lx
1312         * register operand; those are handled in the gen_* functions as one off.
1313         */
1314        assert(e->op3 == X86_TYPE_I && e->s3 == X86_SIZE_b);
1315        s->rip_offset += 1;
1316    }
1317
1318    if (e->op0 != X86_TYPE_None &&
1319        !decode_op(s, env, decode, &decode->op[0], e->op0, decode->b)) {
1320        return false;
1321    }
1322
1323    if (e->op1 != X86_TYPE_None &&
1324        !decode_op(s, env, decode, &decode->op[1], e->op1, decode->b)) {
1325        return false;
1326    }
1327
1328    if (e->op2 != X86_TYPE_None &&
1329        !decode_op(s, env, decode, &decode->op[2], e->op2, decode->b)) {
1330        return false;
1331    }
1332
1333    if (e->op3 != X86_TYPE_None) {
1334        decode->immediate = insn_get_signed(env, s, MO_8);
1335    }
1336
1337    return true;
1338}
1339
1340static bool has_cpuid_feature(DisasContext *s, X86CPUIDFeature cpuid)
1341{
1342    switch (cpuid) {
1343    case X86_FEAT_None:
1344        return true;
1345    case X86_FEAT_MOVBE:
1346        return (s->cpuid_ext_features & CPUID_EXT_MOVBE);
1347    case X86_FEAT_PCLMULQDQ:
1348        return (s->cpuid_ext_features & CPUID_EXT_PCLMULQDQ);
1349    case X86_FEAT_SSE:
1350        return (s->cpuid_ext_features & CPUID_SSE);
1351    case X86_FEAT_SSE2:
1352        return (s->cpuid_ext_features & CPUID_SSE2);
1353    case X86_FEAT_SSE3:
1354        return (s->cpuid_ext_features & CPUID_EXT_SSE3);
1355    case X86_FEAT_SSSE3:
1356        return (s->cpuid_ext_features & CPUID_EXT_SSSE3);
1357    case X86_FEAT_SSE41:
1358        return (s->cpuid_ext_features & CPUID_EXT_SSE41);
1359    case X86_FEAT_SSE42:
1360        return (s->cpuid_ext_features & CPUID_EXT_SSE42);
1361    case X86_FEAT_AES:
1362        if (!(s->cpuid_ext_features & CPUID_EXT_AES)) {
1363            return false;
1364        } else if (!(s->prefix & PREFIX_VEX)) {
1365            return true;
1366        } else if (!(s->cpuid_ext_features & CPUID_EXT_AVX)) {
1367            return false;
1368        } else {
1369            return !s->vex_l || (s->cpuid_7_0_ecx_features & CPUID_7_0_ECX_VAES);
1370        }
1371
1372    case X86_FEAT_AVX:
1373        return (s->cpuid_ext_features & CPUID_EXT_AVX);
1374
1375    case X86_FEAT_3DNOW:
1376        return (s->cpuid_ext2_features & CPUID_EXT2_3DNOW);
1377    case X86_FEAT_SSE4A:
1378        return (s->cpuid_ext3_features & CPUID_EXT3_SSE4A);
1379
1380    case X86_FEAT_ADX:
1381        return (s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_ADX);
1382    case X86_FEAT_BMI1:
1383        return (s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI1);
1384    case X86_FEAT_BMI2:
1385        return (s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI2);
1386    case X86_FEAT_AVX2:
1387        return (s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_AVX2);
1388    }
1389    g_assert_not_reached();
1390}
1391
1392static bool validate_vex(DisasContext *s, X86DecodedInsn *decode)
1393{
1394    X86OpEntry *e = &decode->e;
1395
1396    switch (e->vex_special) {
1397    case X86_VEX_REPScalar:
1398        /*
1399         * Instructions which differ between 00/66 and F2/F3 in the
1400         * exception classification and the size of the memory operand.
1401         */
1402        assert(e->vex_class == 1 || e->vex_class == 2);
1403        if (s->prefix & (PREFIX_REPZ | PREFIX_REPNZ)) {
1404            e->vex_class = 3;
1405            if (s->vex_l) {
1406                goto illegal;
1407            }
1408            assert(decode->e.s2 == X86_SIZE_x);
1409            if (decode->op[2].has_ea) {
1410                decode->op[2].ot = s->prefix & PREFIX_REPZ ? MO_32 : MO_64;
1411            }
1412        }
1413        break;
1414
1415    case X86_VEX_SSEUnaligned:
1416        /* handled in sse_needs_alignment.  */
1417        break;
1418
1419    case X86_VEX_AVX2_256:
1420        if ((s->prefix & PREFIX_VEX) && s->vex_l && !has_cpuid_feature(s, X86_FEAT_AVX2)) {
1421            goto illegal;
1422        }
1423    }
1424
1425    /* TODO: instructions that require VEX.W=0 (Table 2-16) */
1426
1427    switch (e->vex_class) {
1428    case 0:
1429        if (s->prefix & PREFIX_VEX) {
1430            goto illegal;
1431        }
1432        return true;
1433    case 1:
1434    case 2:
1435    case 3:
1436    case 4:
1437    case 5:
1438    case 7:
1439        if (s->prefix & PREFIX_VEX) {
1440            if (!(s->flags & HF_AVX_EN_MASK)) {
1441                goto illegal;
1442            }
1443        } else {
1444            if (!(s->flags & HF_OSFXSR_MASK)) {
1445                goto illegal;
1446            }
1447        }
1448        break;
1449    case 12:
1450        /* Must have a VSIB byte and no address prefix.  */
1451        assert(s->has_modrm);
1452        if ((s->modrm & 7) != 4 || s->aflag == MO_16) {
1453            goto illegal;
1454        }
1455
1456        /* Check no overlap between registers.  */
1457        if (!decode->op[0].has_ea &&
1458            (decode->op[0].n == decode->mem.index || decode->op[0].n == decode->op[1].n)) {
1459            goto illegal;
1460        }
1461        assert(!decode->op[1].has_ea);
1462        if (decode->op[1].n == decode->mem.index) {
1463            goto illegal;
1464        }
1465        if (!decode->op[2].has_ea &&
1466            (decode->op[2].n == decode->mem.index || decode->op[2].n == decode->op[1].n)) {
1467            goto illegal;
1468        }
1469        /* fall through */
1470    case 6:
1471    case 11:
1472        if (!(s->prefix & PREFIX_VEX)) {
1473            goto illegal;
1474        }
1475        if (!(s->flags & HF_AVX_EN_MASK)) {
1476            goto illegal;
1477        }
1478        break;
1479    case 8:
1480        /* Non-VEX case handled in decode_0F77.  */
1481        assert(s->prefix & PREFIX_VEX);
1482        if (!(s->flags & HF_AVX_EN_MASK)) {
1483            goto illegal;
1484        }
1485        break;
1486    case 13:
1487        if (!(s->prefix & PREFIX_VEX)) {
1488            goto illegal;
1489        }
1490        if (s->vex_l) {
1491            goto illegal;
1492        }
1493        /* All integer instructions use VEX.vvvv, so exit.  */
1494        return true;
1495    }
1496
1497    if (s->vex_v != 0 &&
1498        e->op0 != X86_TYPE_H && e->op0 != X86_TYPE_B &&
1499        e->op1 != X86_TYPE_H && e->op1 != X86_TYPE_B &&
1500        e->op2 != X86_TYPE_H && e->op2 != X86_TYPE_B) {
1501        goto illegal;
1502    }
1503
1504    if (s->flags & HF_TS_MASK) {
1505        goto nm_exception;
1506    }
1507    if (s->flags & HF_EM_MASK) {
1508        goto illegal;
1509    }
1510    return true;
1511
1512nm_exception:
1513    gen_NM_exception(s);
1514    return false;
1515illegal:
1516    gen_illegal_opcode(s);
1517    return false;
1518}
1519
1520static void decode_temp_free(X86DecodedOp *op)
1521{
1522    if (op->v_ptr) {
1523        tcg_temp_free_ptr(op->v_ptr);
1524    }
1525}
1526
1527static void decode_temps_free(X86DecodedInsn *decode)
1528{
1529    decode_temp_free(&decode->op[0]);
1530    decode_temp_free(&decode->op[1]);
1531    decode_temp_free(&decode->op[2]);
1532}
1533
1534/*
1535 * Convert one instruction. s->base.is_jmp is set if the translation must
1536 * be stopped.
1537 */
1538static void disas_insn_new(DisasContext *s, CPUState *cpu, int b)
1539{
1540    CPUX86State *env = cpu->env_ptr;
1541    bool first = true;
1542    X86DecodedInsn decode;
1543    X86DecodeFunc decode_func = decode_root;
1544
1545    s->has_modrm = false;
1546
1547 next_byte:
1548    if (first) {
1549        first = false;
1550    } else {
1551        b = x86_ldub_code(env, s);
1552    }
1553    /* Collect prefixes.  */
1554    switch (b) {
1555    case 0xf3:
1556        s->prefix |= PREFIX_REPZ;
1557        s->prefix &= ~PREFIX_REPNZ;
1558        goto next_byte;
1559    case 0xf2:
1560        s->prefix |= PREFIX_REPNZ;
1561        s->prefix &= ~PREFIX_REPZ;
1562        goto next_byte;
1563    case 0xf0:
1564        s->prefix |= PREFIX_LOCK;
1565        goto next_byte;
1566    case 0x2e:
1567        s->override = R_CS;
1568        goto next_byte;
1569    case 0x36:
1570        s->override = R_SS;
1571        goto next_byte;
1572    case 0x3e:
1573        s->override = R_DS;
1574        goto next_byte;
1575    case 0x26:
1576        s->override = R_ES;
1577        goto next_byte;
1578    case 0x64:
1579        s->override = R_FS;
1580        goto next_byte;
1581    case 0x65:
1582        s->override = R_GS;
1583        goto next_byte;
1584    case 0x66:
1585        s->prefix |= PREFIX_DATA;
1586        goto next_byte;
1587    case 0x67:
1588        s->prefix |= PREFIX_ADR;
1589        goto next_byte;
1590#ifdef TARGET_X86_64
1591    case 0x40 ... 0x4f:
1592        if (CODE64(s)) {
1593            /* REX prefix */
1594            s->prefix |= PREFIX_REX;
1595            s->vex_w = (b >> 3) & 1;
1596            s->rex_r = (b & 0x4) << 1;
1597            s->rex_x = (b & 0x2) << 2;
1598            s->rex_b = (b & 0x1) << 3;
1599            goto next_byte;
1600        }
1601        break;
1602#endif
1603    case 0xc5: /* 2-byte VEX */
1604    case 0xc4: /* 3-byte VEX */
1605        /*
1606         * VEX prefixes cannot be used except in 32-bit mode.
1607         * Otherwise the instruction is LES or LDS.
1608         */
1609        if (CODE32(s) && !VM86(s)) {
1610            static const int pp_prefix[4] = {
1611                0, PREFIX_DATA, PREFIX_REPZ, PREFIX_REPNZ
1612            };
1613            int vex3, vex2 = x86_ldub_code(env, s);
1614
1615            if (!CODE64(s) && (vex2 & 0xc0) != 0xc0) {
1616                /*
1617                 * 4.1.4.6: In 32-bit mode, bits [7:6] must be 11b,
1618                 * otherwise the instruction is LES or LDS.
1619                 */
1620                s->pc--; /* rewind the advance_pc() x86_ldub_code() did */
1621                break;
1622            }
1623
1624            /* 4.1.1-4.1.3: No preceding lock, 66, f2, f3, or rex prefixes. */
1625            if (s->prefix & (PREFIX_REPZ | PREFIX_REPNZ
1626                             | PREFIX_LOCK | PREFIX_DATA | PREFIX_REX)) {
1627                goto illegal_op;
1628            }
1629#ifdef TARGET_X86_64
1630            s->rex_r = (~vex2 >> 4) & 8;
1631#endif
1632            if (b == 0xc5) {
1633                /* 2-byte VEX prefix: RVVVVlpp, implied 0f leading opcode byte */
1634                vex3 = vex2;
1635                decode_func = decode_0F;
1636            } else {
1637                /* 3-byte VEX prefix: RXBmmmmm wVVVVlpp */
1638                vex3 = x86_ldub_code(env, s);
1639#ifdef TARGET_X86_64
1640                s->rex_x = (~vex2 >> 3) & 8;
1641                s->rex_b = (~vex2 >> 2) & 8;
1642#endif
1643                s->vex_w = (vex3 >> 7) & 1;
1644                switch (vex2 & 0x1f) {
1645                case 0x01: /* Implied 0f leading opcode bytes.  */
1646                    decode_func = decode_0F;
1647                    break;
1648                case 0x02: /* Implied 0f 38 leading opcode bytes.  */
1649                    decode_func = decode_0F38;
1650                    break;
1651                case 0x03: /* Implied 0f 3a leading opcode bytes.  */
1652                    decode_func = decode_0F3A;
1653                    break;
1654                default:   /* Reserved for future use.  */
1655                    goto unknown_op;
1656                }
1657            }
1658            s->vex_v = (~vex3 >> 3) & 0xf;
1659            s->vex_l = (vex3 >> 2) & 1;
1660            s->prefix |= pp_prefix[vex3 & 3] | PREFIX_VEX;
1661        }
1662        break;
1663    default:
1664        if (b >= 0x100) {
1665            b -= 0x100;
1666            decode_func = do_decode_0F;
1667        }
1668        break;
1669    }
1670
1671    /* Post-process prefixes.  */
1672    if (CODE64(s)) {
1673        /*
1674         * In 64-bit mode, the default data size is 32-bit.  Select 64-bit
1675         * data with rex_w, and 16-bit data with 0x66; rex_w takes precedence
1676         * over 0x66 if both are present.
1677         */
1678        s->dflag = (REX_W(s) ? MO_64 : s->prefix & PREFIX_DATA ? MO_16 : MO_32);
1679        /* In 64-bit mode, 0x67 selects 32-bit addressing.  */
1680        s->aflag = (s->prefix & PREFIX_ADR ? MO_32 : MO_64);
1681    } else {
1682        /* In 16/32-bit mode, 0x66 selects the opposite data size.  */
1683        if (CODE32(s) ^ ((s->prefix & PREFIX_DATA) != 0)) {
1684            s->dflag = MO_32;
1685        } else {
1686            s->dflag = MO_16;
1687        }
1688        /* In 16/32-bit mode, 0x67 selects the opposite addressing.  */
1689        if (CODE32(s) ^ ((s->prefix & PREFIX_ADR) != 0)) {
1690            s->aflag = MO_32;
1691        }  else {
1692            s->aflag = MO_16;
1693        }
1694    }
1695
1696    memset(&decode, 0, sizeof(decode));
1697    decode.b = b;
1698    if (!decode_insn(s, env, decode_func, &decode)) {
1699        goto illegal_op;
1700    }
1701    if (!decode.e.gen) {
1702        goto unknown_op;
1703    }
1704
1705    if (!has_cpuid_feature(s, decode.e.cpuid)) {
1706        goto illegal_op;
1707    }
1708
1709    switch (decode.e.special) {
1710    case X86_SPECIAL_None:
1711        break;
1712
1713    case X86_SPECIAL_Locked:
1714        if (decode.op[0].has_ea) {
1715            s->prefix |= PREFIX_LOCK;
1716        }
1717        break;
1718
1719    case X86_SPECIAL_ProtMode:
1720        if (!PE(s) || VM86(s)) {
1721            goto illegal_op;
1722        }
1723        break;
1724
1725    case X86_SPECIAL_i64:
1726        if (CODE64(s)) {
1727            goto illegal_op;
1728        }
1729        break;
1730    case X86_SPECIAL_o64:
1731        if (!CODE64(s)) {
1732            goto illegal_op;
1733        }
1734        break;
1735
1736    case X86_SPECIAL_ZExtOp0:
1737        assert(decode.op[0].unit == X86_OP_INT);
1738        if (!decode.op[0].has_ea) {
1739            decode.op[0].ot = MO_32;
1740        }
1741        break;
1742
1743    case X86_SPECIAL_ZExtOp2:
1744        assert(decode.op[2].unit == X86_OP_INT);
1745        if (!decode.op[2].has_ea) {
1746            decode.op[2].ot = MO_32;
1747        }
1748        break;
1749
1750    case X86_SPECIAL_AVXExtMov:
1751        if (!decode.op[2].has_ea) {
1752            decode.op[2].ot = s->vex_l ? MO_256 : MO_128;
1753        } else if (s->vex_l) {
1754            decode.op[2].ot++;
1755        }
1756        break;
1757
1758    case X86_SPECIAL_MMX:
1759        if (!(s->prefix & (PREFIX_REPZ | PREFIX_REPNZ | PREFIX_DATA))) {
1760            gen_helper_enter_mmx(cpu_env);
1761        }
1762        break;
1763    }
1764
1765    if (!validate_vex(s, &decode)) {
1766        return;
1767    }
1768    if (decode.op[0].has_ea || decode.op[1].has_ea || decode.op[2].has_ea) {
1769        gen_load_ea(s, &decode.mem, decode.e.vex_class == 12);
1770    }
1771    if (s->prefix & PREFIX_LOCK) {
1772        if (decode.op[0].unit != X86_OP_INT || !decode.op[0].has_ea) {
1773            goto illegal_op;
1774        }
1775        gen_load(s, &decode, 2, s->T1);
1776        decode.e.gen(s, env, &decode);
1777    } else {
1778        if (decode.op[0].unit == X86_OP_MMX) {
1779            compute_mmx_offset(&decode.op[0]);
1780        } else if (decode.op[0].unit == X86_OP_SSE) {
1781            compute_xmm_offset(&decode.op[0]);
1782        }
1783        gen_load(s, &decode, 1, s->T0);
1784        gen_load(s, &decode, 2, s->T1);
1785        decode.e.gen(s, env, &decode);
1786        gen_writeback(s, &decode, 0, s->T0);
1787    }
1788    decode_temps_free(&decode);
1789    return;
1790 illegal_op:
1791    gen_illegal_opcode(s);
1792    return;
1793 unknown_op:
1794    gen_unknown_opcode(env, s);
1795}
1796