xref: /openbmc/qemu/target/i386/tcg/decode-new.c.inc (revision 719c6819)
1/*
2 * New-style decoder for i386 instructions
3 *
4 *  Copyright (c) 2022 Red Hat, Inc.
5 *
6 * Author: Paolo Bonzini <pbonzini@redhat.com>
7 *
8 * This library is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either
11 * version 2.1 of the License, or (at your option) any later version.
12 *
13 * This library is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
16 * Lesser General Public License for more details.
17 *
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
20 */
21
22/*
23 * The decoder is mostly based on tables copied from the Intel SDM.  As
24 * a result, most operand load and writeback is done entirely in common
25 * table-driven code using the same operand type (X86_TYPE_*) and
26 * size (X86_SIZE_*) codes used in the manual.  There are a few differences
27 * though.
28 *
29 * Operand sizes
30 * -------------
31 *
32 * The manual lists d64 ("cannot encode 32-bit size in 64-bit mode") and f64
33 * ("cannot encode 16-bit or 32-bit size in 64-bit mode") as modifiers of the
34 * "v" or "z" sizes.  The decoder simply makes them separate operand sizes.
35 *
36 * The manual lists immediate far destinations as Ap (technically an implicit
37 * argument).  The decoder splits them into two immediates, using "Ip" for
38 * the offset part (that comes first in the instruction stream) and "Iw" for
39 * the segment/selector part.  The size of the offset is given by s->dflag
40 * and the instructions are illegal in 64-bit mode, so the choice of "Ip"
41 * is somewhat arbitrary; "Iv" or "Iz" would work just as well.
42 *
43 * Operand types
44 * -------------
45 *
46 * For memory-only operands, if the emitter functions wants to rely on
47 * generic load and writeback, the decoder needs to know the type of the
48 * operand.  Therefore, M is often replaced by the more specific EM and WM
49 * (respectively selecting an ALU operand, like the operand type E, or a
50 * vector operand like the operand type W).
51 *
52 * Immediates are almost always signed or masked away in helpers.  Two
53 * common exceptions are IN/OUT and absolute jumps.  For these, there is
54 * an additional custom operand type "I_unsigned".  Alternatively, the
55 * mask could be applied (and the original sign-extended value would be
56 * optimized away by TCG) in the emitter function.
57 *
58 * Finally, a "nop" operand type is used for multi-byte NOPs.  It accepts
59 * any value of mod including 11b (unlike M) but it does not try to
60 * interpret the operand (like M).
61 *
62 * Vector operands
63 * ---------------
64 *
65 * The main difference is that the V, U and W types are extended to
66 * cover MMX as well; if an instruction is like
67 *
68 *      por   Pq, Qq
69 *  66  por   Vx, Hx, Wx
70 *
71 * only the second row is included and the instruction is marked as a
72 * valid MMX instruction.  The MMX flag directs the decoder to rewrite
73 * the V/U/H/W types to P/N/P/Q if there is no prefix, as well as changing
74 * "x" to "q" if there is no prefix.
75 *
76 * In addition, the ss/ps/sd/pd types are sometimes mushed together as "x"
77 * if the difference is expressed via prefixes.  Individual instructions
78 * are separated by prefix in the generator functions.
79 *
80 * There is a custom size "xh" used to address half of a SSE/AVX operand.
81 * This points to a 64-bit operand for SSE operations, 128-bit operand
82 * for 256-bit AVX operands, etc.  It is used for conversion operations
83 * such as VCVTPH2PS or VCVTSS2SD.
84 *
85 * There are a couple cases in which instructions (e.g. MOVD) write the
86 * whole XMM or MM register but are established incorrectly in the manual
87 * as "d" or "q".  These have to be fixed for the decoder to work correctly.
88 *
89 * VEX exception classes
90 * ---------------------
91 *
92 * Speaking about imprecisions in the manual, the decoder treats all
93 * exception-class 4 instructions as having an optional VEX prefix, and
94 * all exception-class 6 instructions as having a mandatory VEX prefix.
95 * This is true except for a dozen instructions; these are in exception
96 * class 4 but do not ignore the VEX.W bit (which does not even exist
97 * without a VEX prefix).  These instructions are mostly listed in Intel's
98 * table 2-16, but with a few exceptions.
99 *
100 * The AMD manual has more precise subclasses for exceptions, and unlike Intel
101 * they list the VEX.W requirements in the exception classes as well (except
102 * when they don't).  AMD describes class 6 as "AVX Mixed Memory Argument"
103 * without defining what a mixed memory argument is, but still use 4 as the
104 * primary exception class... except when they don't.
105 *
106 * The summary is:
107 *                       Intel     AMD         VEX.W           note
108 * -------------------------------------------------------------------
109 * vpblendd              4         4J          0
110 * vpblendvb             4         4E-X        0               (*)
111 * vpbroadcastq          6         6D          0               (+)
112 * vpermd/vpermps        4         4H          0               (§)
113 * vpermq/vpermpd        4         4H-1        1               (§)
114 * vpermilpd/vpermilps   4         6E          0               (^)
115 * vpmaskmovd            6         4K          significant     (^)
116 * vpsllv                4         4K          significant
117 * vpsrav                4         4J          0
118 * vpsrlv                4         4K          significant
119 * vtestps/vtestpd       4         4G          0
120 *
121 *    (*)  AMD lists VPBLENDVB as related to SSE4.1 PBLENDVB, which may
122 *         explain why it is considered exception class 4.  However,
123 *         Intel says that VEX-only instructions should be in class 6...
124 *
125 *    (+)  Not found in Intel's table 2-16
126 *
127 *    (§)  4H and 4H-1 do not mention VEX.W requirements, which are
128 *         however present in the description of the instruction
129 *
130 *    (^)  these are the two cases in which Intel and AMD disagree on the
131 *         primary exception class
132 */
133
134#define X86_OP_NONE { 0 },
135
136#define X86_OP_GROUP3(op, op0_, s0_, op1_, s1_, op2_, s2_, ...) { \
137    .decode = glue(decode_, op),                                  \
138    .op0 = glue(X86_TYPE_, op0_),                                 \
139    .s0 = glue(X86_SIZE_, s0_),                                   \
140    .op1 = glue(X86_TYPE_, op1_),                                 \
141    .s1 = glue(X86_SIZE_, s1_),                                   \
142    .op2 = glue(X86_TYPE_, op2_),                                 \
143    .s2 = glue(X86_SIZE_, s2_),                                   \
144    .is_decode = true,                                            \
145    ## __VA_ARGS__                                                \
146}
147
148#define X86_OP_GROUP1(op, op0, s0, ...)                           \
149    X86_OP_GROUP3(op, op0, s0, 2op, s0, None, None, ## __VA_ARGS__)
150#define X86_OP_GROUP2(op, op0, s0, op1, s1, ...)                  \
151    X86_OP_GROUP3(op, op0, s0, 2op, s0, op1, s1, ## __VA_ARGS__)
152#define X86_OP_GROUPw(op, op0, s0, ...)                           \
153    X86_OP_GROUP3(op, op0, s0, None, None, None, None, ## __VA_ARGS__)
154#define X86_OP_GROUP0(op, ...)                                    \
155    X86_OP_GROUP3(op, None, None, None, None, None, None, ## __VA_ARGS__)
156
157#define X86_OP_ENTRY3(op, op0_, s0_, op1_, s1_, op2_, s2_, ...) { \
158    .gen = glue(gen_, op),                                        \
159    .op0 = glue(X86_TYPE_, op0_),                                 \
160    .s0 = glue(X86_SIZE_, s0_),                                   \
161    .op1 = glue(X86_TYPE_, op1_),                                 \
162    .s1 = glue(X86_SIZE_, s1_),                                   \
163    .op2 = glue(X86_TYPE_, op2_),                                 \
164    .s2 = glue(X86_SIZE_, s2_),                                   \
165    ## __VA_ARGS__                                                \
166}
167
168#define X86_OP_ENTRY4(op, op0_, s0_, op1_, s1_, op2_, s2_, ...)   \
169    X86_OP_ENTRY3(op, op0_, s0_, op1_, s1_, op2_, s2_,            \
170        .op3 = X86_TYPE_I, .s3 = X86_SIZE_b,                      \
171        ## __VA_ARGS__)
172
173/*
174 * Short forms that are mostly useful for ALU opcodes and other
175 * one-byte opcodes.  For vector instructions it is usually
176 * clearer to write all three operands explicitly, because the
177 * corresponding gen_* function will use OP_PTRn rather than s->T0
178 * and s->T1.
179 */
180#define X86_OP_ENTRYrr(op, op0, s0, op1, s1, ...)                 \
181    X86_OP_ENTRY3(op, None, None, op0, s0, op1, s1, ## __VA_ARGS__)
182#define X86_OP_ENTRYwr(op, op0, s0, op1, s1, ...)                 \
183    X86_OP_ENTRY3(op, op0, s0, None, None, op1, s1, ## __VA_ARGS__)
184#define X86_OP_ENTRY2(op, op0, s0, op1, s1, ...)                  \
185    X86_OP_ENTRY3(op, op0, s0, 2op, s0, op1, s1, ## __VA_ARGS__)
186#define X86_OP_ENTRYw(op, op0, s0, ...)                           \
187    X86_OP_ENTRY3(op, op0, s0, None, None, None, None, ## __VA_ARGS__)
188#define X86_OP_ENTRYr(op, op0, s0, ...)                           \
189    X86_OP_ENTRY3(op, None, None, None, None, op0, s0, ## __VA_ARGS__)
190#define X86_OP_ENTRY1(op, op0, s0, ...)                           \
191    X86_OP_ENTRY3(op, op0, s0, 2op, s0, None, None, ## __VA_ARGS__)
192#define X86_OP_ENTRY0(op, ...)                                    \
193    X86_OP_ENTRY3(op, None, None, None, None, None, None, ## __VA_ARGS__)
194
195#define cpuid(feat) .cpuid = X86_FEAT_##feat,
196#define noseg .special = X86_SPECIAL_NoSeg,
197#define xchg .special = X86_SPECIAL_Locked,
198#define lock .special = X86_SPECIAL_HasLock,
199#define mmx .special = X86_SPECIAL_MMX,
200#define op0_Rd .special = X86_SPECIAL_Op0_Rd,
201#define op2_Ry .special = X86_SPECIAL_Op2_Ry,
202#define avx_movx .special = X86_SPECIAL_AVXExtMov,
203#define sextT0 .special = X86_SPECIAL_SExtT0,
204#define zextT0 .special = X86_SPECIAL_ZExtT0,
205#define op0_Mw .special = X86_SPECIAL_Op0_Mw,
206
207#define vex1 .vex_class = 1,
208#define vex1_rep3 .vex_class = 1, .vex_special = X86_VEX_REPScalar,
209#define vex2 .vex_class = 2,
210#define vex2_rep3 .vex_class = 2, .vex_special = X86_VEX_REPScalar,
211#define vex3 .vex_class = 3,
212#define vex4 .vex_class = 4,
213#define vex4_unal .vex_class = 4, .vex_special = X86_VEX_SSEUnaligned,
214#define vex4_rep5 .vex_class = 4, .vex_special = X86_VEX_REPScalar,
215#define vex5 .vex_class = 5,
216#define vex6 .vex_class = 6,
217#define vex7 .vex_class = 7,
218#define vex8 .vex_class = 8,
219#define vex11 .vex_class = 11,
220#define vex12 .vex_class = 12,
221#define vex13 .vex_class = 13,
222
223#define chk(a) .check = X86_CHECK_##a,
224#define svm(a) .intercept = SVM_EXIT_##a,
225
226#define avx2_256 .vex_special = X86_VEX_AVX2_256,
227
228#define P_00          1
229#define P_66          (1 << PREFIX_DATA)
230#define P_F3          (1 << PREFIX_REPZ)
231#define P_F2          (1 << PREFIX_REPNZ)
232
233#define p_00          .valid_prefix = P_00,
234#define p_66          .valid_prefix = P_66,
235#define p_f3          .valid_prefix = P_F3,
236#define p_f2          .valid_prefix = P_F2,
237#define p_00_66       .valid_prefix = P_00 | P_66,
238#define p_00_f3       .valid_prefix = P_00 | P_F3,
239#define p_66_f2       .valid_prefix = P_66 | P_F2,
240#define p_00_66_f3    .valid_prefix = P_00 | P_66 | P_F3,
241#define p_66_f3_f2    .valid_prefix = P_66 | P_F3 | P_F2,
242#define p_00_66_f3_f2 .valid_prefix = P_00 | P_66 | P_F3 | P_F2,
243
244#define UNKNOWN_OPCODE ((X86OpEntry) {})
245
246static uint8_t get_modrm(DisasContext *s, CPUX86State *env)
247{
248    if (!s->has_modrm) {
249        s->modrm = x86_ldub_code(env, s);
250        s->has_modrm = true;
251    }
252    return s->modrm;
253}
254
255static inline const X86OpEntry *decode_by_prefix(DisasContext *s, const X86OpEntry entries[4])
256{
257    if (s->prefix & PREFIX_REPNZ) {
258        return &entries[3];
259    } else if (s->prefix & PREFIX_REPZ) {
260        return &entries[2];
261    } else if (s->prefix & PREFIX_DATA) {
262        return &entries[1];
263    } else {
264        return &entries[0];
265    }
266}
267
268static void decode_group15(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
269{
270    /* only includes ldmxcsr and stmxcsr, because they have AVX variants.  */
271    static const X86OpEntry group15_reg[8] = {
272    };
273
274    static const X86OpEntry group15_mem[8] = {
275        [2] = X86_OP_ENTRYr(LDMXCSR,    E,d, vex5 chk(VEX128)),
276        [3] = X86_OP_ENTRYw(STMXCSR,    E,d, vex5 chk(VEX128)),
277    };
278
279    uint8_t modrm = get_modrm(s, env);
280    if ((modrm >> 6) == 3) {
281        *entry = group15_reg[(modrm >> 3) & 7];
282    } else {
283        *entry = group15_mem[(modrm >> 3) & 7];
284    }
285}
286
287static void decode_group17(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
288{
289    static const X86GenFunc group17_gen[8] = {
290        NULL, gen_BLSR, gen_BLSMSK, gen_BLSI,
291    };
292    int op = (get_modrm(s, env) >> 3) & 7;
293    entry->gen = group17_gen[op];
294}
295
296static void decode_group12(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
297{
298    static const X86OpEntry opcodes_group12[8] = {
299        {},
300        {},
301        X86_OP_ENTRY3(PSRLW_i,  H,x, U,x, I,b, vex7 mmx avx2_256 p_00_66),
302        {},
303        X86_OP_ENTRY3(PSRAW_i,  H,x, U,x, I,b, vex7 mmx avx2_256 p_00_66),
304        {},
305        X86_OP_ENTRY3(PSLLW_i,  H,x, U,x, I,b, vex7 mmx avx2_256 p_00_66),
306        {},
307    };
308
309    int op = (get_modrm(s, env) >> 3) & 7;
310    *entry = opcodes_group12[op];
311}
312
313static void decode_group13(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
314{
315    static const X86OpEntry opcodes_group13[8] = {
316        {},
317        {},
318        X86_OP_ENTRY3(PSRLD_i,  H,x, U,x, I,b, vex7 mmx avx2_256 p_00_66),
319        {},
320        X86_OP_ENTRY3(PSRAD_i,  H,x, U,x, I,b, vex7 mmx avx2_256 p_00_66),
321        {},
322        X86_OP_ENTRY3(PSLLD_i,  H,x, U,x, I,b, vex7 mmx avx2_256 p_00_66),
323        {},
324    };
325
326    int op = (get_modrm(s, env) >> 3) & 7;
327    *entry = opcodes_group13[op];
328}
329
330static void decode_group14(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
331{
332    static const X86OpEntry opcodes_group14[8] = {
333        /* grp14 */
334        {},
335        {},
336        X86_OP_ENTRY3(PSRLQ_i,  H,x, U,x, I,b, vex7 mmx avx2_256 p_00_66),
337        X86_OP_ENTRY3(PSRLDQ_i, H,x, U,x, I,b, vex7 avx2_256 p_66),
338        {},
339        {},
340        X86_OP_ENTRY3(PSLLQ_i,  H,x, U,x, I,b, vex7 mmx avx2_256 p_00_66),
341        X86_OP_ENTRY3(PSLLDQ_i, H,x, U,x, I,b, vex7 avx2_256 p_66),
342    };
343
344    int op = (get_modrm(s, env) >> 3) & 7;
345    *entry = opcodes_group14[op];
346}
347
348static void decode_0F6F(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
349{
350    static const X86OpEntry opcodes_0F6F[4] = {
351        X86_OP_ENTRY3(MOVDQ,       P,q, None,None, Q,q, vex5 mmx),  /* movq */
352        X86_OP_ENTRY3(MOVDQ,       V,x, None,None, W,x, vex1),      /* movdqa */
353        X86_OP_ENTRY3(MOVDQ,       V,x, None,None, W,x, vex4_unal), /* movdqu */
354        {},
355    };
356    *entry = *decode_by_prefix(s, opcodes_0F6F);
357}
358
359static void decode_0F70(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
360{
361    static const X86OpEntry pshufw[4] = {
362        X86_OP_ENTRY3(PSHUFW,  P,q, Q,q, I,b, vex4 mmx),
363        X86_OP_ENTRY3(PSHUFD,  V,x, W,x, I,b, vex4 avx2_256),
364        X86_OP_ENTRY3(PSHUFHW, V,x, W,x, I,b, vex4 avx2_256),
365        X86_OP_ENTRY3(PSHUFLW, V,x, W,x, I,b, vex4 avx2_256),
366    };
367
368    *entry = *decode_by_prefix(s, pshufw);
369}
370
371static void decode_0F77(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
372{
373    if (!(s->prefix & PREFIX_VEX)) {
374        entry->gen = gen_EMMS;
375    } else if (!s->vex_l) {
376        entry->gen = gen_VZEROUPPER;
377        entry->vex_class = 8;
378    } else {
379        entry->gen = gen_VZEROALL;
380        entry->vex_class = 8;
381    }
382}
383
384static void decode_0F78(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
385{
386    static const X86OpEntry opcodes_0F78[4] = {
387        {},
388        X86_OP_ENTRY3(EXTRQ_i,       V,x, None,None, I,w,  cpuid(SSE4A)), /* AMD extension */
389        {},
390        X86_OP_ENTRY3(INSERTQ_i,     V,x, U,x, I,w,        cpuid(SSE4A)), /* AMD extension */
391    };
392    *entry = *decode_by_prefix(s, opcodes_0F78);
393}
394
395static void decode_0F79(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
396{
397    if (s->prefix & PREFIX_REPNZ) {
398        entry->gen = gen_INSERTQ_r; /* AMD extension */
399    } else if (s->prefix & PREFIX_DATA) {
400        entry->gen = gen_EXTRQ_r; /* AMD extension */
401    } else {
402        entry->gen = NULL;
403    };
404}
405
406static void decode_0F7E(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
407{
408    static const X86OpEntry opcodes_0F7E[4] = {
409        X86_OP_ENTRY3(MOVD_from,  E,y, None,None, P,y, vex5 mmx),
410        X86_OP_ENTRY3(MOVD_from,  E,y, None,None, V,y, vex5),
411        X86_OP_ENTRY3(MOVQ,       V,x, None,None, W,q, vex5),  /* wrong dest Vy on SDM! */
412        {},
413    };
414    *entry = *decode_by_prefix(s, opcodes_0F7E);
415}
416
417static void decode_0F7F(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
418{
419    static const X86OpEntry opcodes_0F7F[4] = {
420        X86_OP_ENTRY3(MOVDQ,       W,x, None,None, V,x, vex5 mmx), /* movq */
421        X86_OP_ENTRY3(MOVDQ,       W,x, None,None, V,x, vex1), /* movdqa */
422        X86_OP_ENTRY3(MOVDQ,       W,x, None,None, V,x, vex4_unal), /* movdqu */
423        {},
424    };
425    *entry = *decode_by_prefix(s, opcodes_0F7F);
426}
427
428static void decode_0FD6(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
429{
430    static const X86OpEntry movq[4] = {
431        {},
432        X86_OP_ENTRY3(MOVQ,    W,x,  None, None, V,q, vex5),
433        X86_OP_ENTRY3(MOVq_dq, V,dq, None, None, N,q),
434        X86_OP_ENTRY3(MOVq_dq, P,q,  None, None, U,q),
435    };
436
437    *entry = *decode_by_prefix(s, movq);
438}
439
440static const X86OpEntry opcodes_0F38_00toEF[240] = {
441    [0x00] = X86_OP_ENTRY3(PSHUFB,    V,x,  H,x,   W,x,  vex4 cpuid(SSSE3) mmx avx2_256 p_00_66),
442    [0x01] = X86_OP_ENTRY3(PHADDW,    V,x,  H,x,   W,x,  vex4 cpuid(SSSE3) mmx avx2_256 p_00_66),
443    [0x02] = X86_OP_ENTRY3(PHADDD,    V,x,  H,x,   W,x,  vex4 cpuid(SSSE3) mmx avx2_256 p_00_66),
444    [0x03] = X86_OP_ENTRY3(PHADDSW,   V,x,  H,x,   W,x,  vex4 cpuid(SSSE3) mmx avx2_256 p_00_66),
445    [0x04] = X86_OP_ENTRY3(PMADDUBSW, V,x,  H,x,   W,x,  vex4 cpuid(SSSE3) mmx avx2_256 p_00_66),
446    [0x05] = X86_OP_ENTRY3(PHSUBW,    V,x,  H,x,   W,x,  vex4 cpuid(SSSE3) mmx avx2_256 p_00_66),
447    [0x06] = X86_OP_ENTRY3(PHSUBD,    V,x,  H,x,   W,x,  vex4 cpuid(SSSE3) mmx avx2_256 p_00_66),
448    [0x07] = X86_OP_ENTRY3(PHSUBSW,   V,x,  H,x,   W,x,  vex4 cpuid(SSSE3) mmx avx2_256 p_00_66),
449
450    [0x10] = X86_OP_ENTRY2(PBLENDVB,  V,x,         W,x,  vex4 cpuid(SSE41) avx2_256 p_66),
451    [0x13] = X86_OP_ENTRY2(VCVTPH2PS, V,x,         W,xh, vex11 chk(W0) cpuid(F16C) p_66),
452    [0x14] = X86_OP_ENTRY2(BLENDVPS,  V,x,         W,x,  vex4 cpuid(SSE41) p_66),
453    [0x15] = X86_OP_ENTRY2(BLENDVPD,  V,x,         W,x,  vex4 cpuid(SSE41) p_66),
454    /* Listed incorrectly as type 4 */
455    [0x16] = X86_OP_ENTRY3(VPERMD,    V,qq, H,qq,      W,qq,  vex6 chk(W0) cpuid(AVX2) p_66), /* vpermps */
456    [0x17] = X86_OP_ENTRY3(VPTEST,    None,None, V,x,  W,x,   vex4 cpuid(SSE41) p_66),
457
458    /*
459     * Source operand listed as Mq/Ux and similar in the manual; incorrectly listed
460     * as 128-bit only in 2-17.
461     */
462    [0x20] = X86_OP_ENTRY3(VPMOVSXBW, V,x,  None,None, W,q,   vex5 cpuid(SSE41) avx_movx avx2_256 p_66),
463    [0x21] = X86_OP_ENTRY3(VPMOVSXBD, V,x,  None,None, W,d,   vex5 cpuid(SSE41) avx_movx avx2_256 p_66),
464    [0x22] = X86_OP_ENTRY3(VPMOVSXBQ, V,x,  None,None, W,w,   vex5 cpuid(SSE41) avx_movx avx2_256 p_66),
465    [0x23] = X86_OP_ENTRY3(VPMOVSXWD, V,x,  None,None, W,q,   vex5 cpuid(SSE41) avx_movx avx2_256 p_66),
466    [0x24] = X86_OP_ENTRY3(VPMOVSXWQ, V,x,  None,None, W,d,   vex5 cpuid(SSE41) avx_movx avx2_256 p_66),
467    [0x25] = X86_OP_ENTRY3(VPMOVSXDQ, V,x,  None,None, W,q,   vex5 cpuid(SSE41) avx_movx avx2_256 p_66),
468
469    /* Same as PMOVSX.  */
470    [0x30] = X86_OP_ENTRY3(VPMOVZXBW, V,x,  None,None, W,q,   vex5 cpuid(SSE41) avx_movx avx2_256 p_66),
471    [0x31] = X86_OP_ENTRY3(VPMOVZXBD, V,x,  None,None, W,d,   vex5 cpuid(SSE41) avx_movx avx2_256 p_66),
472    [0x32] = X86_OP_ENTRY3(VPMOVZXBQ, V,x,  None,None, W,w,   vex5 cpuid(SSE41) avx_movx avx2_256 p_66),
473    [0x33] = X86_OP_ENTRY3(VPMOVZXWD, V,x,  None,None, W,q,   vex5 cpuid(SSE41) avx_movx avx2_256 p_66),
474    [0x34] = X86_OP_ENTRY3(VPMOVZXWQ, V,x,  None,None, W,d,   vex5 cpuid(SSE41) avx_movx avx2_256 p_66),
475    [0x35] = X86_OP_ENTRY3(VPMOVZXDQ, V,x,  None,None, W,q,   vex5 cpuid(SSE41) avx_movx avx2_256 p_66),
476    [0x36] = X86_OP_ENTRY3(VPERMD,    V,qq, H,qq,      W,qq,  vex6 chk(W0) cpuid(AVX2) p_66),
477    [0x37] = X86_OP_ENTRY3(PCMPGTQ,   V,x,  H,x,       W,x,   vex4 cpuid(SSE42) avx2_256 p_66),
478
479    [0x40] = X86_OP_ENTRY3(PMULLD,      V,x,  H,x,       W,x,  vex4 cpuid(SSE41) avx2_256 p_66),
480    [0x41] = X86_OP_ENTRY3(VPHMINPOSUW, V,dq, None,None, W,dq, vex4 cpuid(SSE41) p_66),
481    /* Listed incorrectly as type 4 */
482    [0x45] = X86_OP_ENTRY3(VPSRLV,      V,x,  H,x,       W,x,  vex6 cpuid(AVX2) p_66),
483    [0x46] = X86_OP_ENTRY3(VPSRAV,      V,x,  H,x,       W,x,  vex6 chk(W0) cpuid(AVX2) p_66),
484    [0x47] = X86_OP_ENTRY3(VPSLLV,      V,x,  H,x,       W,x,  vex6 cpuid(AVX2) p_66),
485
486    [0x90] = X86_OP_ENTRY3(VPGATHERD, V,x,  H,x,  M,d,  vex12 cpuid(AVX2) p_66), /* vpgatherdd/q */
487    [0x91] = X86_OP_ENTRY3(VPGATHERQ, V,x,  H,x,  M,q,  vex12 cpuid(AVX2) p_66), /* vpgatherqd/q */
488    [0x92] = X86_OP_ENTRY3(VPGATHERD, V,x,  H,x,  M,d,  vex12 cpuid(AVX2) p_66), /* vgatherdps/d */
489    [0x93] = X86_OP_ENTRY3(VPGATHERQ, V,x,  H,x,  M,q,  vex12 cpuid(AVX2) p_66), /* vgatherqps/d */
490
491    /* Should be exception type 2 but they do not have legacy SSE equivalents? */
492    [0x96] = X86_OP_ENTRY3(VFMADDSUB132Px, V,x,  H,x, W,x,  vex6 cpuid(FMA) p_66),
493    [0x97] = X86_OP_ENTRY3(VFMSUBADD132Px, V,x,  H,x, W,x,  vex6 cpuid(FMA) p_66),
494
495    [0xa6] = X86_OP_ENTRY3(VFMADDSUB213Px, V,x,  H,x, W,x,  vex6 cpuid(FMA) p_66),
496    [0xa7] = X86_OP_ENTRY3(VFMSUBADD213Px, V,x,  H,x, W,x,  vex6 cpuid(FMA) p_66),
497
498    [0xb6] = X86_OP_ENTRY3(VFMADDSUB231Px, V,x,  H,x, W,x,  vex6 cpuid(FMA) p_66),
499    [0xb7] = X86_OP_ENTRY3(VFMSUBADD231Px, V,x,  H,x, W,x,  vex6 cpuid(FMA) p_66),
500
501    [0x08] = X86_OP_ENTRY3(PSIGNB,    V,x,        H,x,  W,x,  vex4 cpuid(SSSE3) mmx avx2_256 p_00_66),
502    [0x09] = X86_OP_ENTRY3(PSIGNW,    V,x,        H,x,  W,x,  vex4 cpuid(SSSE3) mmx avx2_256 p_00_66),
503    [0x0a] = X86_OP_ENTRY3(PSIGND,    V,x,        H,x,  W,x,  vex4 cpuid(SSSE3) mmx avx2_256 p_00_66),
504    [0x0b] = X86_OP_ENTRY3(PMULHRSW,  V,x,        H,x,  W,x,  vex4 cpuid(SSSE3) mmx avx2_256 p_00_66),
505    /* Listed incorrectly as type 4 */
506    [0x0c] = X86_OP_ENTRY3(VPERMILPS, V,x,        H,x,  W,x,  vex6 chk(W0) cpuid(AVX) p_00_66),
507    [0x0d] = X86_OP_ENTRY3(VPERMILPD, V,x,        H,x,  W,x,  vex6 chk(W0) cpuid(AVX) p_66),
508    [0x0e] = X86_OP_ENTRY3(VTESTPS,   None,None,  V,x,  W,x,  vex6 chk(W0) cpuid(AVX) p_66),
509    [0x0f] = X86_OP_ENTRY3(VTESTPD,   None,None,  V,x,  W,x,  vex6 chk(W0) cpuid(AVX) p_66),
510
511    [0x18] = X86_OP_ENTRY3(VPBROADCASTD,   V,x,  None,None, W,d,  vex6 chk(W0) cpuid(AVX) p_66), /* vbroadcastss */
512    [0x19] = X86_OP_ENTRY3(VPBROADCASTQ,   V,qq, None,None, W,q,  vex6 chk(W0) cpuid(AVX) p_66), /* vbroadcastsd */
513    [0x1a] = X86_OP_ENTRY3(VBROADCASTx128, V,qq, None,None, WM,dq,vex6 chk(W0) cpuid(AVX) p_66),
514    [0x1c] = X86_OP_ENTRY3(PABSB,          V,x,  None,None, W,x,  vex4 cpuid(SSSE3) mmx avx2_256 p_00_66),
515    [0x1d] = X86_OP_ENTRY3(PABSW,          V,x,  None,None, W,x,  vex4 cpuid(SSSE3) mmx avx2_256 p_00_66),
516    [0x1e] = X86_OP_ENTRY3(PABSD,          V,x,  None,None, W,x,  vex4 cpuid(SSSE3) mmx avx2_256 p_00_66),
517
518    [0x28] = X86_OP_ENTRY3(PMULDQ,        V,x, H,x,       W,x,  vex4 cpuid(SSE41) avx2_256 p_66),
519    [0x29] = X86_OP_ENTRY3(PCMPEQQ,       V,x, H,x,       W,x,  vex4 cpuid(SSE41) avx2_256 p_66),
520    [0x2a] = X86_OP_ENTRY3(MOVDQ,         V,x, None,None, WM,x, vex1 cpuid(SSE41) avx2_256 p_66), /* movntdqa */
521    [0x2b] = X86_OP_ENTRY3(VPACKUSDW,     V,x, H,x,       W,x,  vex4 cpuid(SSE41) avx2_256 p_66),
522    [0x2c] = X86_OP_ENTRY3(VMASKMOVPS,    V,x, H,x,       WM,x, vex6 chk(W0) cpuid(AVX) p_66),
523    [0x2d] = X86_OP_ENTRY3(VMASKMOVPD,    V,x, H,x,       WM,x, vex6 chk(W0) cpuid(AVX) p_66),
524    /* Incorrectly listed as Mx,Hx,Vx in the manual */
525    [0x2e] = X86_OP_ENTRY3(VMASKMOVPS_st, M,x, V,x,       H,x,  vex6 chk(W0) cpuid(AVX) p_66),
526    [0x2f] = X86_OP_ENTRY3(VMASKMOVPD_st, M,x, V,x,       H,x,  vex6 chk(W0) cpuid(AVX) p_66),
527
528    [0x38] = X86_OP_ENTRY3(PMINSB,        V,x,  H,x, W,x,  vex4 cpuid(SSE41) avx2_256 p_66),
529    [0x39] = X86_OP_ENTRY3(PMINSD,        V,x,  H,x, W,x,  vex4 cpuid(SSE41) avx2_256 p_66),
530    [0x3a] = X86_OP_ENTRY3(PMINUW,        V,x,  H,x, W,x,  vex4 cpuid(SSE41) avx2_256 p_66),
531    [0x3b] = X86_OP_ENTRY3(PMINUD,        V,x,  H,x, W,x,  vex4 cpuid(SSE41) avx2_256 p_66),
532    [0x3c] = X86_OP_ENTRY3(PMAXSB,        V,x,  H,x, W,x,  vex4 cpuid(SSE41) avx2_256 p_66),
533    [0x3d] = X86_OP_ENTRY3(PMAXSD,        V,x,  H,x, W,x,  vex4 cpuid(SSE41) avx2_256 p_66),
534    [0x3e] = X86_OP_ENTRY3(PMAXUW,        V,x,  H,x, W,x,  vex4 cpuid(SSE41) avx2_256 p_66),
535    [0x3f] = X86_OP_ENTRY3(PMAXUD,        V,x,  H,x, W,x,  vex4 cpuid(SSE41) avx2_256 p_66),
536
537    /* VPBROADCASTQ not listed as W0 in table 2-16 */
538    [0x58] = X86_OP_ENTRY3(VPBROADCASTD,   V,x,  None,None, W,d,  vex6 chk(W0) cpuid(AVX2) p_66),
539    [0x59] = X86_OP_ENTRY3(VPBROADCASTQ,   V,x,  None,None, W,q,  vex6 chk(W0) cpuid(AVX2) p_66),
540    [0x5a] = X86_OP_ENTRY3(VBROADCASTx128, V,qq, None,None, WM,dq,vex6 chk(W0) cpuid(AVX2) p_66),
541
542    [0x78] = X86_OP_ENTRY3(VPBROADCASTB,   V,x,  None,None, W,b,  vex6 chk(W0) cpuid(AVX2) p_66),
543    [0x79] = X86_OP_ENTRY3(VPBROADCASTW,   V,x,  None,None, W,w,  vex6 chk(W0) cpuid(AVX2) p_66),
544
545    [0x8c] = X86_OP_ENTRY3(VPMASKMOV,    V,x,  H,x, WM,x, vex6 cpuid(AVX2) p_66),
546    [0x8e] = X86_OP_ENTRY3(VPMASKMOV_st, M,x,  V,x, H,x,  vex6 cpuid(AVX2) p_66),
547
548    /* Should be exception type 2 or 3 but they do not have legacy SSE equivalents? */
549    [0x98] = X86_OP_ENTRY3(VFMADD132Px,  V,x,  H,x, W,x,  vex6 cpuid(FMA) p_66),
550    [0x99] = X86_OP_ENTRY3(VFMADD132Sx,  V,x,  H,x, W,x,  vex6 cpuid(FMA) p_66),
551    [0x9a] = X86_OP_ENTRY3(VFMSUB132Px,  V,x,  H,x, W,x,  vex6 cpuid(FMA) p_66),
552    [0x9b] = X86_OP_ENTRY3(VFMSUB132Sx,  V,x,  H,x, W,x,  vex6 cpuid(FMA) p_66),
553    [0x9c] = X86_OP_ENTRY3(VFNMADD132Px, V,x,  H,x, W,x,  vex6 cpuid(FMA) p_66),
554    [0x9d] = X86_OP_ENTRY3(VFNMADD132Sx, V,x,  H,x, W,x,  vex6 cpuid(FMA) p_66),
555    [0x9e] = X86_OP_ENTRY3(VFNMSUB132Px, V,x,  H,x, W,x,  vex6 cpuid(FMA) p_66),
556    [0x9f] = X86_OP_ENTRY3(VFNMSUB132Sx, V,x,  H,x, W,x,  vex6 cpuid(FMA) p_66),
557
558    [0xa8] = X86_OP_ENTRY3(VFMADD213Px,  V,x,  H,x, W,x,  vex6 cpuid(FMA) p_66),
559    [0xa9] = X86_OP_ENTRY3(VFMADD213Sx,  V,x,  H,x, W,x,  vex6 cpuid(FMA) p_66),
560    [0xaa] = X86_OP_ENTRY3(VFMSUB213Px,  V,x,  H,x, W,x,  vex6 cpuid(FMA) p_66),
561    [0xab] = X86_OP_ENTRY3(VFMSUB213Sx,  V,x,  H,x, W,x,  vex6 cpuid(FMA) p_66),
562    [0xac] = X86_OP_ENTRY3(VFNMADD213Px, V,x,  H,x, W,x,  vex6 cpuid(FMA) p_66),
563    [0xad] = X86_OP_ENTRY3(VFNMADD213Sx, V,x,  H,x, W,x,  vex6 cpuid(FMA) p_66),
564    [0xae] = X86_OP_ENTRY3(VFNMSUB213Px, V,x,  H,x, W,x,  vex6 cpuid(FMA) p_66),
565    [0xaf] = X86_OP_ENTRY3(VFNMSUB213Sx, V,x,  H,x, W,x,  vex6 cpuid(FMA) p_66),
566
567    [0xb8] = X86_OP_ENTRY3(VFMADD231Px,  V,x,  H,x, W,x,  vex6 cpuid(FMA) p_66),
568    [0xb9] = X86_OP_ENTRY3(VFMADD231Sx,  V,x,  H,x, W,x,  vex6 cpuid(FMA) p_66),
569    [0xba] = X86_OP_ENTRY3(VFMSUB231Px,  V,x,  H,x, W,x,  vex6 cpuid(FMA) p_66),
570    [0xbb] = X86_OP_ENTRY3(VFMSUB231Sx,  V,x,  H,x, W,x,  vex6 cpuid(FMA) p_66),
571    [0xbc] = X86_OP_ENTRY3(VFNMADD231Px, V,x,  H,x, W,x,  vex6 cpuid(FMA) p_66),
572    [0xbd] = X86_OP_ENTRY3(VFNMADD231Sx, V,x,  H,x, W,x,  vex6 cpuid(FMA) p_66),
573    [0xbe] = X86_OP_ENTRY3(VFNMSUB231Px, V,x,  H,x, W,x,  vex6 cpuid(FMA) p_66),
574    [0xbf] = X86_OP_ENTRY3(VFNMSUB231Sx, V,x,  H,x, W,x,  vex6 cpuid(FMA) p_66),
575
576    [0xc8] = X86_OP_ENTRY2(SHA1NEXTE,   V,dq, W,dq, cpuid(SHA_NI)),
577    [0xc9] = X86_OP_ENTRY2(SHA1MSG1,    V,dq, W,dq, cpuid(SHA_NI)),
578    [0xca] = X86_OP_ENTRY2(SHA1MSG2,    V,dq, W,dq, cpuid(SHA_NI)),
579    [0xcb] = X86_OP_ENTRY2(SHA256RNDS2, V,dq, W,dq, cpuid(SHA_NI)),
580    [0xcc] = X86_OP_ENTRY2(SHA256MSG1,  V,dq, W,dq, cpuid(SHA_NI)),
581    [0xcd] = X86_OP_ENTRY2(SHA256MSG2,  V,dq, W,dq, cpuid(SHA_NI)),
582
583    [0xdb] = X86_OP_ENTRY3(VAESIMC,     V,dq, None,None, W,dq, vex4 cpuid(AES) p_66),
584    [0xdc] = X86_OP_ENTRY3(VAESENC,     V,x,  H,x,       W,x,  vex4 cpuid(AES) p_66),
585    [0xdd] = X86_OP_ENTRY3(VAESENCLAST, V,x,  H,x,       W,x,  vex4 cpuid(AES) p_66),
586    [0xde] = X86_OP_ENTRY3(VAESDEC,     V,x,  H,x,       W,x,  vex4 cpuid(AES) p_66),
587    [0xdf] = X86_OP_ENTRY3(VAESDECLAST, V,x,  H,x,       W,x,  vex4 cpuid(AES) p_66),
588
589    /*
590     * REG selects srcdest2 operand, VEX.vvvv selects src3.  VEX class not found
591     * in manual, assumed to be 13 from the VEX.L0 constraint.
592     */
593    [0xe0] = X86_OP_ENTRY3(CMPccXADD,   M,y, G,y, B,y,  vex13 xchg chk(o64) cpuid(CMPCCXADD) p_66),
594    [0xe1] = X86_OP_ENTRY3(CMPccXADD,   M,y, G,y, B,y,  vex13 xchg chk(o64) cpuid(CMPCCXADD) p_66),
595    [0xe2] = X86_OP_ENTRY3(CMPccXADD,   M,y, G,y, B,y,  vex13 xchg chk(o64) cpuid(CMPCCXADD) p_66),
596    [0xe3] = X86_OP_ENTRY3(CMPccXADD,   M,y, G,y, B,y,  vex13 xchg chk(o64) cpuid(CMPCCXADD) p_66),
597    [0xe4] = X86_OP_ENTRY3(CMPccXADD,   M,y, G,y, B,y,  vex13 xchg chk(o64) cpuid(CMPCCXADD) p_66),
598    [0xe5] = X86_OP_ENTRY3(CMPccXADD,   M,y, G,y, B,y,  vex13 xchg chk(o64) cpuid(CMPCCXADD) p_66),
599    [0xe6] = X86_OP_ENTRY3(CMPccXADD,   M,y, G,y, B,y,  vex13 xchg chk(o64) cpuid(CMPCCXADD) p_66),
600    [0xe7] = X86_OP_ENTRY3(CMPccXADD,   M,y, G,y, B,y,  vex13 xchg chk(o64) cpuid(CMPCCXADD) p_66),
601
602    [0xe8] = X86_OP_ENTRY3(CMPccXADD,   M,y, G,y, B,y,  vex13 xchg chk(o64) cpuid(CMPCCXADD) p_66),
603    [0xe9] = X86_OP_ENTRY3(CMPccXADD,   M,y, G,y, B,y,  vex13 xchg chk(o64) cpuid(CMPCCXADD) p_66),
604    [0xea] = X86_OP_ENTRY3(CMPccXADD,   M,y, G,y, B,y,  vex13 xchg chk(o64) cpuid(CMPCCXADD) p_66),
605    [0xeb] = X86_OP_ENTRY3(CMPccXADD,   M,y, G,y, B,y,  vex13 xchg chk(o64) cpuid(CMPCCXADD) p_66),
606    [0xec] = X86_OP_ENTRY3(CMPccXADD,   M,y, G,y, B,y,  vex13 xchg chk(o64) cpuid(CMPCCXADD) p_66),
607    [0xed] = X86_OP_ENTRY3(CMPccXADD,   M,y, G,y, B,y,  vex13 xchg chk(o64) cpuid(CMPCCXADD) p_66),
608    [0xee] = X86_OP_ENTRY3(CMPccXADD,   M,y, G,y, B,y,  vex13 xchg chk(o64) cpuid(CMPCCXADD) p_66),
609    [0xef] = X86_OP_ENTRY3(CMPccXADD,   M,y, G,y, B,y,  vex13 xchg chk(o64) cpuid(CMPCCXADD) p_66),
610};
611
612/* five rows for no prefix, 66, F3, F2, 66+F2  */
613static const X86OpEntry opcodes_0F38_F0toFF[16][5] = {
614    [0] = {
615        X86_OP_ENTRY3(MOVBE, G,y, M,y, None,None, cpuid(MOVBE)),
616        X86_OP_ENTRY3(MOVBE, G,w, M,w, None,None, cpuid(MOVBE)),
617        {},
618        X86_OP_ENTRY2(CRC32, G,d, E,b, cpuid(SSE42)),
619        X86_OP_ENTRY2(CRC32, G,d, E,b, cpuid(SSE42)),
620    },
621    [1] = {
622        X86_OP_ENTRY3(MOVBE, M,y, G,y, None,None, cpuid(MOVBE)),
623        X86_OP_ENTRY3(MOVBE, M,w, G,w, None,None, cpuid(MOVBE)),
624        {},
625        X86_OP_ENTRY2(CRC32, G,d, E,y, cpuid(SSE42)),
626        X86_OP_ENTRY2(CRC32, G,d, E,w, cpuid(SSE42)),
627    },
628    [2] = {
629        X86_OP_ENTRY3(ANDN, G,y, B,y, E,y, vex13 cpuid(BMI1)),
630        {},
631        {},
632        {},
633        {},
634    },
635    [3] = {
636        X86_OP_GROUP3(group17, B,y, E,y, None,None, vex13 cpuid(BMI1)),
637        {},
638        {},
639        {},
640        {},
641    },
642    [5] = {
643        X86_OP_ENTRY3(BZHI, G,y, E,y, B,y, vex13 cpuid(BMI1)),
644        {},
645        X86_OP_ENTRY3(PEXT, G,y, B,y, E,y, vex13 zextT0 cpuid(BMI2)),
646        X86_OP_ENTRY3(PDEP, G,y, B,y, E,y, vex13 zextT0 cpuid(BMI2)),
647        {},
648    },
649    [6] = {
650        {},
651        X86_OP_ENTRY2(ADCX, G,y, E,y, cpuid(ADX)),
652        X86_OP_ENTRY2(ADOX, G,y, E,y, cpuid(ADX)),
653        X86_OP_ENTRY3(MULX, /* B,y, */ G,y, E,y, 2,y, vex13 cpuid(BMI2)),
654        {},
655    },
656    [7] = {
657        X86_OP_ENTRY3(BEXTR, G,y, E,y, B,y, vex13 zextT0 cpuid(BMI1)),
658        X86_OP_ENTRY3(SHLX, G,y, E,y, B,y, vex13 cpuid(BMI1)),
659        X86_OP_ENTRY3(SARX, G,y, E,y, B,y, vex13 sextT0 cpuid(BMI1)),
660        X86_OP_ENTRY3(SHRX, G,y, E,y, B,y, vex13 zextT0 cpuid(BMI1)),
661        {},
662    },
663};
664
665static void decode_0F38(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
666{
667    *b = x86_ldub_code(env, s);
668    if (*b < 0xf0) {
669        *entry = opcodes_0F38_00toEF[*b];
670    } else {
671        int row = 0;
672        if (s->prefix & PREFIX_REPZ) {
673            /* The REPZ (F3) prefix has priority over 66 */
674            row = 2;
675        } else {
676            row += s->prefix & PREFIX_REPNZ ? 3 : 0;
677            row += s->prefix & PREFIX_DATA ? 1 : 0;
678        }
679        *entry = opcodes_0F38_F0toFF[*b & 15][row];
680    }
681}
682
683static void decode_VINSERTPS(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
684{
685    static const X86OpEntry
686        vinsertps_reg = X86_OP_ENTRY4(VINSERTPS_r, V,dq, H,dq, U,dq, vex5 cpuid(SSE41) p_66),
687        vinsertps_mem = X86_OP_ENTRY4(VINSERTPS_m, V,dq, H,dq, M,d,  vex5 cpuid(SSE41) p_66);
688
689    int modrm = get_modrm(s, env);
690    *entry = (modrm >> 6) == 3 ? vinsertps_reg : vinsertps_mem;
691}
692
693static const X86OpEntry opcodes_0F3A[256] = {
694    /*
695     * These are VEX-only, but incorrectly listed in the manual as exception type 4.
696     * Also the "qq" instructions are sometimes omitted by Table 2-17, but are VEX256
697     * only.
698     */
699    [0x00] = X86_OP_ENTRY3(VPERMQ,      V,qq, W,qq, I,b,  vex6 chk(W1) cpuid(AVX2) p_66),
700    [0x01] = X86_OP_ENTRY3(VPERMQ,      V,qq, W,qq, I,b,  vex6 chk(W1) cpuid(AVX2) p_66), /* VPERMPD */
701    [0x02] = X86_OP_ENTRY4(VBLENDPS,    V,x,  H,x,  W,x,  vex6 chk(W0) cpuid(AVX2) p_66), /* VPBLENDD */
702    [0x04] = X86_OP_ENTRY3(VPERMILPS_i, V,x,  W,x,  I,b,  vex6 chk(W0) cpuid(AVX) p_66),
703    [0x05] = X86_OP_ENTRY3(VPERMILPD_i, V,x,  W,x,  I,b,  vex6 chk(W0) cpuid(AVX) p_66),
704    [0x06] = X86_OP_ENTRY4(VPERM2x128,  V,qq, H,qq, W,qq, vex6 chk(W0) cpuid(AVX) p_66),
705
706    [0x14] = X86_OP_ENTRY3(PEXTRB,     E,b,  V,dq, I,b,  vex5 cpuid(SSE41) op0_Rd p_66),
707    [0x15] = X86_OP_ENTRY3(PEXTRW,     E,w,  V,dq, I,b,  vex5 cpuid(SSE41) op0_Rd p_66),
708    [0x16] = X86_OP_ENTRY3(PEXTR,      E,y,  V,dq, I,b,  vex5 cpuid(SSE41) p_66),
709    [0x17] = X86_OP_ENTRY3(VEXTRACTPS, E,d,  V,dq, I,b,  vex5 cpuid(SSE41) p_66),
710    [0x1d] = X86_OP_ENTRY3(VCVTPS2PH,  W,xh, V,x,  I,b,  vex11 chk(W0) cpuid(F16C) p_66),
711
712    [0x20] = X86_OP_ENTRY4(PINSRB,     V,dq, H,dq, E,b,  vex5 cpuid(SSE41) op2_Ry p_66),
713    [0x21] = X86_OP_GROUP0(VINSERTPS),
714    [0x22] = X86_OP_ENTRY4(PINSR,      V,dq, H,dq, E,y,  vex5 cpuid(SSE41) p_66),
715
716    [0x40] = X86_OP_ENTRY4(VDDPS,      V,x,  H,x,  W,x,  vex2 cpuid(SSE41) p_66),
717    [0x41] = X86_OP_ENTRY4(VDDPD,      V,dq, H,dq, W,dq, vex2 cpuid(SSE41) p_66),
718    [0x42] = X86_OP_ENTRY4(VMPSADBW,   V,x,  H,x,  W,x,  vex2 cpuid(SSE41) avx2_256 p_66),
719    [0x44] = X86_OP_ENTRY4(PCLMULQDQ,  V,dq, H,dq, W,dq, vex4 cpuid(PCLMULQDQ) p_66),
720    [0x46] = X86_OP_ENTRY4(VPERM2x128, V,qq, H,qq, W,qq, vex6 chk(W0) cpuid(AVX2) p_66),
721
722    [0x60] = X86_OP_ENTRY4(PCMPESTRM,  None,None, V,dq, W,dq, vex4_unal cpuid(SSE42) p_66),
723    [0x61] = X86_OP_ENTRY4(PCMPESTRI,  None,None, V,dq, W,dq, vex4_unal cpuid(SSE42) p_66),
724    [0x62] = X86_OP_ENTRY4(PCMPISTRM,  None,None, V,dq, W,dq, vex4_unal cpuid(SSE42) p_66),
725    [0x63] = X86_OP_ENTRY4(PCMPISTRI,  None,None, V,dq, W,dq, vex4_unal cpuid(SSE42) p_66),
726
727    [0x08] = X86_OP_ENTRY3(VROUNDPS,   V,x,  W,x,  I,b,  vex2 cpuid(SSE41) p_66),
728    [0x09] = X86_OP_ENTRY3(VROUNDPD,   V,x,  W,x,  I,b,  vex2 cpuid(SSE41) p_66),
729    /*
730     * Not listed as four operand in the manual.  Also writes and reads 128-bits
731     * from the first two operands due to the V operand picking higher entries of
732     * the H operand; the "Vss,Hss,Wss" description from the manual is incorrect.
733     * For other unary operations such as VSQRTSx this is hidden by the "REPScalar"
734     * value of vex_special, because the table lists the operand types of VSQRTPx.
735     */
736    [0x0a] = X86_OP_ENTRY4(VROUNDSS,   V,x,  H,x, W,ss, vex3 cpuid(SSE41) p_66),
737    [0x0b] = X86_OP_ENTRY4(VROUNDSD,   V,x,  H,x, W,sd, vex3 cpuid(SSE41) p_66),
738    [0x0c] = X86_OP_ENTRY4(VBLENDPS,   V,x,  H,x,  W,x,  vex4 cpuid(SSE41) p_66),
739    [0x0d] = X86_OP_ENTRY4(VBLENDPD,   V,x,  H,x,  W,x,  vex4 cpuid(SSE41) p_66),
740    [0x0e] = X86_OP_ENTRY4(VPBLENDW,   V,x,  H,x,  W,x,  vex4 cpuid(SSE41) avx2_256 p_66),
741    [0x0f] = X86_OP_ENTRY4(PALIGNR,    V,x,  H,x,  W,x,  vex4 cpuid(SSSE3) mmx avx2_256 p_00_66),
742
743    [0x18] = X86_OP_ENTRY4(VINSERTx128,  V,qq, H,qq, W,qq, vex6 chk(W0) cpuid(AVX) p_66),
744    [0x19] = X86_OP_ENTRY3(VEXTRACTx128, W,dq, V,qq, I,b,  vex6 chk(W0) cpuid(AVX) p_66),
745
746    [0x38] = X86_OP_ENTRY4(VINSERTx128,  V,qq, H,qq, W,qq, vex6 chk(W0) cpuid(AVX2) p_66),
747    [0x39] = X86_OP_ENTRY3(VEXTRACTx128, W,dq, V,qq, I,b,  vex6 chk(W0) cpuid(AVX2) p_66),
748
749    /* Listed incorrectly as type 4 */
750    [0x4a] = X86_OP_ENTRY4(VBLENDVPS, V,x,  H,x,  W,x,   vex6 chk(W0) cpuid(AVX) p_66),
751    [0x4b] = X86_OP_ENTRY4(VBLENDVPD, V,x,  H,x,  W,x,   vex6 chk(W0) cpuid(AVX) p_66),
752    [0x4c] = X86_OP_ENTRY4(VPBLENDVB, V,x,  H,x,  W,x,   vex6 chk(W0) cpuid(AVX) p_66 avx2_256),
753
754    [0xcc] = X86_OP_ENTRY3(SHA1RNDS4,  V,dq, W,dq, I,b,  cpuid(SHA_NI)),
755
756    [0xdf] = X86_OP_ENTRY3(VAESKEYGEN, V,dq, W,dq, I,b,  vex4 cpuid(AES) p_66),
757
758    [0xF0] = X86_OP_ENTRY3(RORX, G,y, E,y, I,b, vex13 cpuid(BMI2) p_f2),
759};
760
761static void decode_0F3A(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
762{
763    *b = x86_ldub_code(env, s);
764    *entry = opcodes_0F3A[*b];
765}
766
767/*
768 * There are some mistakes in the operands in the manual, and the load/store/register
769 * cases are easiest to keep separate, so the entries for 10-17 follow simplicity and
770 * efficiency of implementation rather than copying what the manual says.
771 *
772 * In particular:
773 *
774 * 1) "VMOVSS m32, xmm1" and "VMOVSD m64, xmm1" do not support VEX.vvvv != 1111b,
775 * but this is not mentioned in the tables.
776 *
777 * 2) MOVHLPS, MOVHPS, MOVHPD, MOVLPD, MOVLPS read the high quadword of one of their
778 * operands, which must therefore be dq; MOVLPD and MOVLPS also write the high
779 * quadword of the V operand.
780 */
781static void decode_0F10(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
782{
783    static const X86OpEntry opcodes_0F10_reg[4] = {
784        X86_OP_ENTRY3(MOVDQ,   V,x,  None,None, W,x, vex4_unal), /* MOVUPS */
785        X86_OP_ENTRY3(MOVDQ,   V,x,  None,None, W,x, vex4_unal), /* MOVUPD */
786        X86_OP_ENTRY3(VMOVSS,  V,x,  H,x,       W,x, vex5),
787        X86_OP_ENTRY3(VMOVLPx, V,x,  H,x,       W,x, vex5), /* MOVSD */
788    };
789
790    static const X86OpEntry opcodes_0F10_mem[4] = {
791        X86_OP_ENTRY3(MOVDQ,      V,x,  None,None, W,x,  vex4_unal), /* MOVUPS */
792        X86_OP_ENTRY3(MOVDQ,      V,x,  None,None, W,x,  vex4_unal), /* MOVUPD */
793        X86_OP_ENTRY3(VMOVSS_ld,  V,x,  H,x,       M,ss, vex5),
794        X86_OP_ENTRY3(VMOVSD_ld,  V,x,  H,x,       M,sd, vex5),
795    };
796
797    if ((get_modrm(s, env) >> 6) == 3) {
798        *entry = *decode_by_prefix(s, opcodes_0F10_reg);
799    } else {
800        *entry = *decode_by_prefix(s, opcodes_0F10_mem);
801    }
802}
803
804static void decode_0F11(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
805{
806    static const X86OpEntry opcodes_0F11_reg[4] = {
807        X86_OP_ENTRY3(MOVDQ,   W,x,  None,None, V,x, vex4), /* MOVUPS */
808        X86_OP_ENTRY3(MOVDQ,   W,x,  None,None, V,x, vex4), /* MOVUPD */
809        X86_OP_ENTRY3(VMOVSS,  W,x,  H,x,       V,x, vex5),
810        X86_OP_ENTRY3(VMOVLPx, W,x,  H,x,       V,q, vex5), /* MOVSD */
811    };
812
813    static const X86OpEntry opcodes_0F11_mem[4] = {
814        X86_OP_ENTRY3(MOVDQ,      W,x,  None,None, V,x, vex4), /* MOVUPS */
815        X86_OP_ENTRY3(MOVDQ,      W,x,  None,None, V,x, vex4), /* MOVUPD */
816        X86_OP_ENTRY3(VMOVSS_st,  M,ss, None,None, V,x, vex5),
817        X86_OP_ENTRY3(VMOVLPx_st, M,sd, None,None, V,x, vex5), /* MOVSD */
818    };
819
820    if ((get_modrm(s, env) >> 6) == 3) {
821        *entry = *decode_by_prefix(s, opcodes_0F11_reg);
822    } else {
823        *entry = *decode_by_prefix(s, opcodes_0F11_mem);
824    }
825}
826
827static void decode_0F12(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
828{
829    static const X86OpEntry opcodes_0F12_mem[4] = {
830        /*
831         * Use dq for operand for compatibility with gen_MOVSD and
832         * to allow VEX128 only.
833         */
834        X86_OP_ENTRY3(VMOVLPx_ld, V,dq, H,dq,      M,q, vex5), /* MOVLPS */
835        X86_OP_ENTRY3(VMOVLPx_ld, V,dq, H,dq,      M,q, vex5), /* MOVLPD */
836        X86_OP_ENTRY3(VMOVSLDUP,  V,x,  None,None, W,x, vex4 cpuid(SSE3)),
837        X86_OP_ENTRY3(VMOVDDUP,   V,x,  None,None, WM,q, vex5 cpuid(SSE3)), /* qq if VEX.256 */
838    };
839    static const X86OpEntry opcodes_0F12_reg[4] = {
840        X86_OP_ENTRY3(VMOVHLPS,  V,dq, H,dq,       U,dq, vex7),
841        X86_OP_ENTRY3(VMOVLPx,   W,x,  H,x,        U,q,  vex5), /* MOVLPD */
842        X86_OP_ENTRY3(VMOVSLDUP, V,x,  None,None,  U,x,  vex4 cpuid(SSE3)),
843        X86_OP_ENTRY3(VMOVDDUP,  V,x,  None,None,  U,x,  vex5 cpuid(SSE3)),
844    };
845
846    if ((get_modrm(s, env) >> 6) == 3) {
847        *entry = *decode_by_prefix(s, opcodes_0F12_reg);
848    } else {
849        *entry = *decode_by_prefix(s, opcodes_0F12_mem);
850        if ((s->prefix & PREFIX_REPNZ) && s->vex_l) {
851            entry->s2 = X86_SIZE_qq;
852        }
853    }
854}
855
856static void decode_0F16(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
857{
858    static const X86OpEntry opcodes_0F16_mem[4] = {
859        /*
860         * Operand 1 technically only reads the low 64 bits, but uses dq so that
861         * it is easier to check for op0 == op1 in an endianness-neutral manner.
862         */
863        X86_OP_ENTRY3(VMOVHPx_ld, V,dq, H,dq,      M,q, vex5), /* MOVHPS */
864        X86_OP_ENTRY3(VMOVHPx_ld, V,dq, H,dq,      M,q, vex5), /* MOVHPD */
865        X86_OP_ENTRY3(VMOVSHDUP,  V,x,  None,None, W,x, vex4 cpuid(SSE3)),
866        {},
867    };
868    static const X86OpEntry opcodes_0F16_reg[4] = {
869        /* Same as above, operand 1 could be Hq if it wasn't for big-endian.  */
870        X86_OP_ENTRY3(VMOVLHPS,  V,dq, H,dq,      U,q, vex7),
871        X86_OP_ENTRY3(VMOVHPx,   V,x,  H,x,       U,x, vex5), /* MOVHPD */
872        X86_OP_ENTRY3(VMOVSHDUP, V,x,  None,None, U,x, vex4 cpuid(SSE3)),
873        {},
874    };
875
876    if ((get_modrm(s, env) >> 6) == 3) {
877        *entry = *decode_by_prefix(s, opcodes_0F16_reg);
878    } else {
879        *entry = *decode_by_prefix(s, opcodes_0F16_mem);
880    }
881}
882
883static void decode_0F2A(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
884{
885    static const X86OpEntry opcodes_0F2A[4] = {
886        X86_OP_ENTRY3(CVTPI2Px,  V,x,  None,None, Q,q),
887        X86_OP_ENTRY3(CVTPI2Px,  V,x,  None,None, Q,q),
888        X86_OP_ENTRY3(VCVTSI2Sx, V,x,  H,x, E,y,        vex3),
889        X86_OP_ENTRY3(VCVTSI2Sx, V,x,  H,x, E,y,        vex3),
890    };
891    *entry = *decode_by_prefix(s, opcodes_0F2A);
892}
893
894static void decode_0F2B(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
895{
896    static const X86OpEntry opcodes_0F2B[4] = {
897        X86_OP_ENTRY3(MOVDQ,      M,x,  None,None, V,x, vex1), /* MOVNTPS */
898        X86_OP_ENTRY3(MOVDQ,      M,x,  None,None, V,x, vex1), /* MOVNTPD */
899        /* AMD extensions */
900        X86_OP_ENTRY3(VMOVSS_st,  M,ss, None,None, V,x, vex4 cpuid(SSE4A)), /* MOVNTSS */
901        X86_OP_ENTRY3(VMOVLPx_st, M,sd, None,None, V,x, vex4 cpuid(SSE4A)), /* MOVNTSD */
902    };
903
904    *entry = *decode_by_prefix(s, opcodes_0F2B);
905}
906
907static void decode_0F2C(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
908{
909    static const X86OpEntry opcodes_0F2C[4] = {
910        /* Listed as ps/pd in the manual, but CVTTPS2PI only reads 64-bit.  */
911        X86_OP_ENTRY3(CVTTPx2PI,  P,q,  None,None, W,q),
912        X86_OP_ENTRY3(CVTTPx2PI,  P,q,  None,None, W,dq),
913        X86_OP_ENTRY3(VCVTTSx2SI, G,y,  None,None, W,ss, vex3),
914        X86_OP_ENTRY3(VCVTTSx2SI, G,y,  None,None, W,sd, vex3),
915    };
916    *entry = *decode_by_prefix(s, opcodes_0F2C);
917}
918
919static void decode_0F2D(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
920{
921    static const X86OpEntry opcodes_0F2D[4] = {
922        /* Listed as ps/pd in the manual, but CVTPS2PI only reads 64-bit.  */
923        X86_OP_ENTRY3(CVTPx2PI,  P,q,  None,None, W,q),
924        X86_OP_ENTRY3(CVTPx2PI,  P,q,  None,None, W,dq),
925        X86_OP_ENTRY3(VCVTSx2SI, G,y,  None,None, W,ss, vex3),
926        X86_OP_ENTRY3(VCVTSx2SI, G,y,  None,None, W,sd, vex3),
927    };
928    *entry = *decode_by_prefix(s, opcodes_0F2D);
929}
930
931static void decode_VxCOMISx(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
932{
933    /*
934     * VUCOMISx and VCOMISx are different and use no-prefix and 0x66 for SS and SD
935     * respectively.  Scalar values usually are associated with 0xF2 and 0xF3, for
936     * which X86_VEX_REPScalar exists, but here it has to be decoded by hand.
937     */
938    entry->s1 = entry->s2 = (s->prefix & PREFIX_DATA ? X86_SIZE_sd : X86_SIZE_ss);
939    entry->gen = (*b == 0x2E ? gen_VUCOMI : gen_VCOMI);
940}
941
942static void decode_sse_unary(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
943{
944    if (!(s->prefix & (PREFIX_REPZ | PREFIX_REPNZ))) {
945        entry->op1 = X86_TYPE_None;
946        entry->s1 = X86_SIZE_None;
947    }
948    switch (*b) {
949    case 0x51: entry->gen = gen_VSQRT; break;
950    case 0x52: entry->gen = gen_VRSQRT; break;
951    case 0x53: entry->gen = gen_VRCP; break;
952    }
953}
954
955static void decode_0F5A(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
956{
957    static const X86OpEntry opcodes_0F5A[4] = {
958        X86_OP_ENTRY2(VCVTPS2PD,  V,x,       W,xh, vex2),      /* VCVTPS2PD */
959        X86_OP_ENTRY2(VCVTPD2PS,  V,x,       W,x,  vex2),      /* VCVTPD2PS */
960        X86_OP_ENTRY3(VCVTSS2SD,  V,x,  H,x, W,x,  vex2_rep3), /* VCVTSS2SD */
961        X86_OP_ENTRY3(VCVTSD2SS,  V,x,  H,x, W,x,  vex2_rep3), /* VCVTSD2SS */
962    };
963    *entry = *decode_by_prefix(s, opcodes_0F5A);
964}
965
966static void decode_0F5B(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
967{
968    static const X86OpEntry opcodes_0F5B[4] = {
969        X86_OP_ENTRY2(VCVTDQ2PS,   V,x, W,x,      vex2),
970        X86_OP_ENTRY2(VCVTPS2DQ,   V,x, W,x,      vex2),
971        X86_OP_ENTRY2(VCVTTPS2DQ,  V,x, W,x,      vex2),
972        {},
973    };
974    *entry = *decode_by_prefix(s, opcodes_0F5B);
975}
976
977static void decode_0FE6(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
978{
979    static const X86OpEntry opcodes_0FE6[4] = {
980        {},
981        X86_OP_ENTRY2(VCVTTPD2DQ,  V,x, W,x,      vex2),
982        X86_OP_ENTRY2(VCVTDQ2PD,   V,x, W,x,      vex5),
983        X86_OP_ENTRY2(VCVTPD2DQ,   V,x, W,x,      vex2),
984    };
985    *entry = *decode_by_prefix(s, opcodes_0FE6);
986}
987
988static const X86OpEntry opcodes_0F[256] = {
989    [0x0E] = X86_OP_ENTRY0(EMMS,                              cpuid(3DNOW)), /* femms */
990    /*
991     * 3DNow!'s opcode byte comes *after* modrm and displacements, making it
992     * more like an Ib operand.  Dispatch to the right helper in a single gen_*
993     * function.
994     */
995    [0x0F] = X86_OP_ENTRY3(3dnow,       P,q, Q,q, I,b,        cpuid(3DNOW)),
996
997    [0x10] = X86_OP_GROUP0(0F10),
998    [0x11] = X86_OP_GROUP0(0F11),
999    [0x12] = X86_OP_GROUP0(0F12),
1000    [0x13] = X86_OP_ENTRY3(VMOVLPx_st,  M,q, None,None, V,q,  vex5 p_00_66),
1001    [0x14] = X86_OP_ENTRY3(VUNPCKLPx,   V,x, H,x, W,x,        vex4 p_00_66),
1002    [0x15] = X86_OP_ENTRY3(VUNPCKHPx,   V,x, H,x, W,x,        vex4 p_00_66),
1003    [0x16] = X86_OP_GROUP0(0F16),
1004    /* Incorrectly listed as Mq,Vq in the manual */
1005    [0x17] = X86_OP_ENTRY3(VMOVHPx_st,  M,q, None,None, V,dq, vex5 p_00_66),
1006
1007    [0x40] = X86_OP_ENTRY2(CMOVcc,     G,v, E,v, cpuid(CMOV)),
1008    [0x41] = X86_OP_ENTRY2(CMOVcc,     G,v, E,v, cpuid(CMOV)),
1009    [0x42] = X86_OP_ENTRY2(CMOVcc,     G,v, E,v, cpuid(CMOV)),
1010    [0x43] = X86_OP_ENTRY2(CMOVcc,     G,v, E,v, cpuid(CMOV)),
1011    [0x44] = X86_OP_ENTRY2(CMOVcc,     G,v, E,v, cpuid(CMOV)),
1012    [0x45] = X86_OP_ENTRY2(CMOVcc,     G,v, E,v, cpuid(CMOV)),
1013    [0x46] = X86_OP_ENTRY2(CMOVcc,     G,v, E,v, cpuid(CMOV)),
1014    [0x47] = X86_OP_ENTRY2(CMOVcc,     G,v, E,v, cpuid(CMOV)),
1015
1016    [0x50] = X86_OP_ENTRY3(MOVMSK,     G,y, None,None, U,x, vex7 p_00_66),
1017    [0x51] = X86_OP_GROUP3(sse_unary,  V,x, H,x, W,x, vex2_rep3 p_00_66_f3_f2), /* sqrtps */
1018    [0x52] = X86_OP_GROUP3(sse_unary,  V,x, H,x, W,x, vex4_rep5 p_00_f3), /* rsqrtps */
1019    [0x53] = X86_OP_GROUP3(sse_unary,  V,x, H,x, W,x, vex4_rep5 p_00_f3), /* rcpps */
1020    [0x54] = X86_OP_ENTRY3(PAND,       V,x, H,x, W,x,  vex4 p_00_66), /* vand */
1021    [0x55] = X86_OP_ENTRY3(PANDN,      V,x, H,x, W,x,  vex4 p_00_66), /* vandn */
1022    [0x56] = X86_OP_ENTRY3(POR,        V,x, H,x, W,x,  vex4 p_00_66), /* vor */
1023    [0x57] = X86_OP_ENTRY3(PXOR,       V,x, H,x, W,x,  vex4 p_00_66), /* vxor */
1024
1025    [0x60] = X86_OP_ENTRY3(PUNPCKLBW,  V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
1026    [0x61] = X86_OP_ENTRY3(PUNPCKLWD,  V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
1027    [0x62] = X86_OP_ENTRY3(PUNPCKLDQ,  V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
1028    [0x63] = X86_OP_ENTRY3(PACKSSWB,   V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
1029    [0x64] = X86_OP_ENTRY3(PCMPGTB,    V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
1030    [0x65] = X86_OP_ENTRY3(PCMPGTW,    V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
1031    [0x66] = X86_OP_ENTRY3(PCMPGTD,    V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
1032    [0x67] = X86_OP_ENTRY3(PACKUSWB,   V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
1033
1034    [0x70] = X86_OP_GROUP0(0F70),
1035    [0x71] = X86_OP_GROUP0(group12),
1036    [0x72] = X86_OP_GROUP0(group13),
1037    [0x73] = X86_OP_GROUP0(group14),
1038    [0x74] = X86_OP_ENTRY3(PCMPEQB,    V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
1039    [0x75] = X86_OP_ENTRY3(PCMPEQW,    V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
1040    [0x76] = X86_OP_ENTRY3(PCMPEQD,    V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
1041    [0x77] = X86_OP_GROUP0(0F77),
1042
1043    [0x80] = X86_OP_ENTRYr(Jcc, J,z_f64),
1044    [0x81] = X86_OP_ENTRYr(Jcc, J,z_f64),
1045    [0x82] = X86_OP_ENTRYr(Jcc, J,z_f64),
1046    [0x83] = X86_OP_ENTRYr(Jcc, J,z_f64),
1047    [0x84] = X86_OP_ENTRYr(Jcc, J,z_f64),
1048    [0x85] = X86_OP_ENTRYr(Jcc, J,z_f64),
1049    [0x86] = X86_OP_ENTRYr(Jcc, J,z_f64),
1050    [0x87] = X86_OP_ENTRYr(Jcc, J,z_f64),
1051
1052    [0x90] = X86_OP_ENTRYw(SETcc, E,b),
1053    [0x91] = X86_OP_ENTRYw(SETcc, E,b),
1054    [0x92] = X86_OP_ENTRYw(SETcc, E,b),
1055    [0x93] = X86_OP_ENTRYw(SETcc, E,b),
1056    [0x94] = X86_OP_ENTRYw(SETcc, E,b),
1057    [0x95] = X86_OP_ENTRYw(SETcc, E,b),
1058    [0x96] = X86_OP_ENTRYw(SETcc, E,b),
1059    [0x97] = X86_OP_ENTRYw(SETcc, E,b),
1060
1061    [0xa0] = X86_OP_ENTRYr(PUSH, FS, w),
1062    [0xa1] = X86_OP_ENTRYw(POP, FS, w),
1063
1064    [0x0b] = X86_OP_ENTRY0(UD),           /* UD2 */
1065    [0x0d] = X86_OP_ENTRY1(NOP,  M,v),    /* 3DNow! prefetch */
1066
1067    [0x18] = X86_OP_ENTRY1(NOP,  nop,v),  /* prefetch/reserved NOP */
1068    [0x19] = X86_OP_ENTRY1(NOP,  nop,v),  /* reserved NOP */
1069    [0x1c] = X86_OP_ENTRY1(NOP,  nop,v),  /* reserved NOP */
1070    [0x1d] = X86_OP_ENTRY1(NOP,  nop,v),  /* reserved NOP */
1071    [0x1e] = X86_OP_ENTRY1(NOP,  nop,v),  /* reserved NOP */
1072    [0x1f] = X86_OP_ENTRY1(NOP,  nop,v),  /* NOP/reserved NOP */
1073
1074    [0x28] = X86_OP_ENTRY3(MOVDQ,      V,x,  None,None, W,x, vex1 p_00_66), /* MOVAPS */
1075    [0x29] = X86_OP_ENTRY3(MOVDQ,      W,x,  None,None, V,x, vex1 p_00_66), /* MOVAPS */
1076    [0x2A] = X86_OP_GROUP0(0F2A),
1077    [0x2B] = X86_OP_GROUP0(0F2B),
1078    [0x2C] = X86_OP_GROUP0(0F2C),
1079    [0x2D] = X86_OP_GROUP0(0F2D),
1080    [0x2E] = X86_OP_GROUP3(VxCOMISx,   None,None, V,x, W,x,  vex3 p_00_66), /* VUCOMISS/SD */
1081    [0x2F] = X86_OP_GROUP3(VxCOMISx,   None,None, V,x, W,x,  vex3 p_00_66), /* VCOMISS/SD */
1082
1083    [0x38] = X86_OP_GROUP0(0F38),
1084    [0x3a] = X86_OP_GROUP0(0F3A),
1085
1086    [0x48] = X86_OP_ENTRY2(CMOVcc,     G,v, E,v, cpuid(CMOV)),
1087    [0x49] = X86_OP_ENTRY2(CMOVcc,     G,v, E,v, cpuid(CMOV)),
1088    [0x4a] = X86_OP_ENTRY2(CMOVcc,     G,v, E,v, cpuid(CMOV)),
1089    [0x4b] = X86_OP_ENTRY2(CMOVcc,     G,v, E,v, cpuid(CMOV)),
1090    [0x4c] = X86_OP_ENTRY2(CMOVcc,     G,v, E,v, cpuid(CMOV)),
1091    [0x4d] = X86_OP_ENTRY2(CMOVcc,     G,v, E,v, cpuid(CMOV)),
1092    [0x4e] = X86_OP_ENTRY2(CMOVcc,     G,v, E,v, cpuid(CMOV)),
1093    [0x4f] = X86_OP_ENTRY2(CMOVcc,     G,v, E,v, cpuid(CMOV)),
1094
1095    [0x58] = X86_OP_ENTRY3(VADD,       V,x, H,x, W,x, vex2_rep3 p_00_66_f3_f2),
1096    [0x59] = X86_OP_ENTRY3(VMUL,       V,x, H,x, W,x, vex2_rep3 p_00_66_f3_f2),
1097    [0x5a] = X86_OP_GROUP0(0F5A),
1098    [0x5b] = X86_OP_GROUP0(0F5B),
1099    [0x5c] = X86_OP_ENTRY3(VSUB,       V,x, H,x, W,x, vex2_rep3 p_00_66_f3_f2),
1100    [0x5d] = X86_OP_ENTRY3(VMIN,       V,x, H,x, W,x, vex2_rep3 p_00_66_f3_f2),
1101    [0x5e] = X86_OP_ENTRY3(VDIV,       V,x, H,x, W,x, vex2_rep3 p_00_66_f3_f2),
1102    [0x5f] = X86_OP_ENTRY3(VMAX,       V,x, H,x, W,x, vex2_rep3 p_00_66_f3_f2),
1103
1104    [0x68] = X86_OP_ENTRY3(PUNPCKHBW,  V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
1105    [0x69] = X86_OP_ENTRY3(PUNPCKHWD,  V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
1106    [0x6a] = X86_OP_ENTRY3(PUNPCKHDQ,  V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
1107    [0x6b] = X86_OP_ENTRY3(PACKSSDW,   V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
1108    [0x6c] = X86_OP_ENTRY3(PUNPCKLQDQ, V,x, H,x, W,x,  vex4 p_66 avx2_256),
1109    [0x6d] = X86_OP_ENTRY3(PUNPCKHQDQ, V,x, H,x, W,x,  vex4 p_66 avx2_256),
1110    [0x6e] = X86_OP_ENTRY3(MOVD_to,    V,x, None,None, E,y, vex5 mmx p_00_66),  /* wrong dest Vy on SDM! */
1111    [0x6f] = X86_OP_GROUP0(0F6F),
1112
1113    [0x78] = X86_OP_GROUP0(0F78),
1114    [0x79] = X86_OP_GROUP2(0F79,       V,x, U,x,       cpuid(SSE4A)),
1115    [0x7c] = X86_OP_ENTRY3(VHADD,      V,x, H,x, W,x,  vex2 cpuid(SSE3) p_66_f2),
1116    [0x7d] = X86_OP_ENTRY3(VHSUB,      V,x, H,x, W,x,  vex2 cpuid(SSE3) p_66_f2),
1117    [0x7e] = X86_OP_GROUP0(0F7E),
1118    [0x7f] = X86_OP_GROUP0(0F7F),
1119
1120    [0x88] = X86_OP_ENTRYr(Jcc, J,z_f64),
1121    [0x89] = X86_OP_ENTRYr(Jcc, J,z_f64),
1122    [0x8a] = X86_OP_ENTRYr(Jcc, J,z_f64),
1123    [0x8b] = X86_OP_ENTRYr(Jcc, J,z_f64),
1124    [0x8c] = X86_OP_ENTRYr(Jcc, J,z_f64),
1125    [0x8d] = X86_OP_ENTRYr(Jcc, J,z_f64),
1126    [0x8e] = X86_OP_ENTRYr(Jcc, J,z_f64),
1127    [0x8f] = X86_OP_ENTRYr(Jcc, J,z_f64),
1128
1129    [0x98] = X86_OP_ENTRYw(SETcc, E,b),
1130    [0x99] = X86_OP_ENTRYw(SETcc, E,b),
1131    [0x9a] = X86_OP_ENTRYw(SETcc, E,b),
1132    [0x9b] = X86_OP_ENTRYw(SETcc, E,b),
1133    [0x9c] = X86_OP_ENTRYw(SETcc, E,b),
1134    [0x9d] = X86_OP_ENTRYw(SETcc, E,b),
1135    [0x9e] = X86_OP_ENTRYw(SETcc, E,b),
1136    [0x9f] = X86_OP_ENTRYw(SETcc, E,b),
1137
1138    [0xa8] = X86_OP_ENTRYr(PUSH,   GS, w),
1139    [0xa9] = X86_OP_ENTRYw(POP,    GS, w),
1140    [0xae] = X86_OP_GROUP0(group15),
1141    /*
1142     * It's slightly more efficient to put Ev operand in T0 and allow gen_IMUL3
1143     * to assume sextT0.  Multiplication is commutative anyway.
1144     */
1145    [0xaf] = X86_OP_ENTRY3(IMUL3,  G,v, E,v, 2op,v, sextT0),
1146
1147    [0xb2] = X86_OP_ENTRY3(LSS,    G,v, EM,p, None, None),
1148    [0xb4] = X86_OP_ENTRY3(LFS,    G,v, EM,p, None, None),
1149    [0xb5] = X86_OP_ENTRY3(LGS,    G,v, EM,p, None, None),
1150    [0xb6] = X86_OP_ENTRY3(MOV,    G,v, E,b, None, None, zextT0), /* MOVZX */
1151    [0xb7] = X86_OP_ENTRY3(MOV,    G,v, E,w, None, None, zextT0), /* MOVZX */
1152
1153    /* decoded as modrm, which is visible as a difference between page fault and #UD */
1154    [0xb9] = X86_OP_ENTRYr(UD,     nop,v),                        /* UD1 */
1155    [0xbe] = X86_OP_ENTRY3(MOV,    G,v, E,b, None, None, sextT0), /* MOVSX */
1156    [0xbf] = X86_OP_ENTRY3(MOV,    G,v, E,w, None, None, sextT0), /* MOVSX */
1157
1158    [0xc2] = X86_OP_ENTRY4(VCMP,       V,x, H,x, W,x,       vex2_rep3 p_00_66_f3_f2),
1159    [0xc3] = X86_OP_ENTRY3(MOV,        EM,y,G,y, None,None, cpuid(SSE2)), /* MOVNTI */
1160    [0xc4] = X86_OP_ENTRY4(PINSRW,     V,dq,H,dq,E,w,       vex5 mmx p_00_66),
1161    [0xc5] = X86_OP_ENTRY3(PEXTRW,     G,d, U,dq,I,b,       vex5 mmx p_00_66),
1162    [0xc6] = X86_OP_ENTRY4(VSHUF,      V,x, H,x, W,x,       vex4 p_00_66),
1163
1164    [0xc8] = X86_OP_ENTRY1(BSWAP,     LoBits,y),
1165    [0xc9] = X86_OP_ENTRY1(BSWAP,     LoBits,y),
1166    [0xca] = X86_OP_ENTRY1(BSWAP,     LoBits,y),
1167    [0xcb] = X86_OP_ENTRY1(BSWAP,     LoBits,y),
1168    [0xcc] = X86_OP_ENTRY1(BSWAP,     LoBits,y),
1169    [0xcd] = X86_OP_ENTRY1(BSWAP,     LoBits,y),
1170    [0xce] = X86_OP_ENTRY1(BSWAP,     LoBits,y),
1171    [0xcf] = X86_OP_ENTRY1(BSWAP,     LoBits,y),
1172
1173    [0xd0] = X86_OP_ENTRY3(VADDSUB,   V,x, H,x, W,x,        vex2 cpuid(SSE3) p_66_f2),
1174    [0xd1] = X86_OP_ENTRY3(PSRLW_r,   V,x, H,x, W,x,        vex4 mmx avx2_256 p_00_66),
1175    [0xd2] = X86_OP_ENTRY3(PSRLD_r,   V,x, H,x, W,x,        vex4 mmx avx2_256 p_00_66),
1176    [0xd3] = X86_OP_ENTRY3(PSRLQ_r,   V,x, H,x, W,x,        vex4 mmx avx2_256 p_00_66),
1177    [0xd4] = X86_OP_ENTRY3(PADDQ,     V,x, H,x, W,x,        vex4 mmx avx2_256 p_00_66),
1178    [0xd5] = X86_OP_ENTRY3(PMULLW,    V,x, H,x, W,x,        vex4 mmx avx2_256 p_00_66),
1179    [0xd6] = X86_OP_GROUP0(0FD6),
1180    [0xd7] = X86_OP_ENTRY3(PMOVMSKB,  G,d, None,None, U,x,  vex7 mmx avx2_256 p_00_66),
1181
1182    [0xe0] = X86_OP_ENTRY3(PAVGB,     V,x, H,x, W,x,        vex4 mmx avx2_256 p_00_66),
1183    [0xe1] = X86_OP_ENTRY3(PSRAW_r,   V,x, H,x, W,x,        vex7 mmx avx2_256 p_00_66),
1184    [0xe2] = X86_OP_ENTRY3(PSRAD_r,   V,x, H,x, W,x,        vex7 mmx avx2_256 p_00_66),
1185    [0xe3] = X86_OP_ENTRY3(PAVGW,     V,x, H,x, W,x,        vex4 mmx avx2_256 p_00_66),
1186    [0xe4] = X86_OP_ENTRY3(PMULHUW,   V,x, H,x, W,x,        vex4 mmx avx2_256 p_00_66),
1187    [0xe5] = X86_OP_ENTRY3(PMULHW,    V,x, H,x, W,x,        vex4 mmx avx2_256 p_00_66),
1188    [0xe6] = X86_OP_GROUP0(0FE6),
1189    [0xe7] = X86_OP_ENTRY3(MOVDQ,     W,x, None,None, V,x,  vex1 mmx p_00_66), /* MOVNTQ/MOVNTDQ */
1190
1191    [0xf0] = X86_OP_ENTRY3(MOVDQ,    V,x, None,None, WM,x,  vex4_unal cpuid(SSE3) p_f2), /* LDDQU */
1192    [0xf1] = X86_OP_ENTRY3(PSLLW_r,  V,x, H,x, W,x,         vex7 mmx avx2_256 p_00_66),
1193    [0xf2] = X86_OP_ENTRY3(PSLLD_r,  V,x, H,x, W,x,         vex7 mmx avx2_256 p_00_66),
1194    [0xf3] = X86_OP_ENTRY3(PSLLQ_r,  V,x, H,x, W,x,         vex7 mmx avx2_256 p_00_66),
1195    [0xf4] = X86_OP_ENTRY3(PMULUDQ,  V,x, H,x, W,x,         vex4 mmx avx2_256 p_00_66),
1196    [0xf5] = X86_OP_ENTRY3(PMADDWD,  V,x, H,x, W,x,         vex4 mmx avx2_256 p_00_66),
1197    [0xf6] = X86_OP_ENTRY3(PSADBW,   V,x, H,x, W,x,         vex4 mmx avx2_256 p_00_66),
1198    [0xf7] = X86_OP_ENTRY3(MASKMOV,  None,None, V,dq, U,dq, vex4_unal avx2_256 mmx p_00_66),
1199
1200    /* Incorrectly missing from 2-17 */
1201    [0xd8] = X86_OP_ENTRY3(PSUBUSB,  V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
1202    [0xd9] = X86_OP_ENTRY3(PSUBUSW,  V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
1203    [0xda] = X86_OP_ENTRY3(PMINUB,   V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
1204    [0xdb] = X86_OP_ENTRY3(PAND,     V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
1205    [0xdc] = X86_OP_ENTRY3(PADDUSB,  V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
1206    [0xdd] = X86_OP_ENTRY3(PADDUSW,  V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
1207    [0xde] = X86_OP_ENTRY3(PMAXUB,   V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
1208    [0xdf] = X86_OP_ENTRY3(PANDN,    V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
1209
1210    [0xe8] = X86_OP_ENTRY3(PSUBSB,  V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
1211    [0xe9] = X86_OP_ENTRY3(PSUBSW,  V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
1212    [0xea] = X86_OP_ENTRY3(PMINSW,  V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
1213    [0xeb] = X86_OP_ENTRY3(POR,     V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
1214    [0xec] = X86_OP_ENTRY3(PADDSB,  V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
1215    [0xed] = X86_OP_ENTRY3(PADDSW,  V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
1216    [0xee] = X86_OP_ENTRY3(PMAXSW,  V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
1217    [0xef] = X86_OP_ENTRY3(PXOR,    V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
1218
1219    [0xf8] = X86_OP_ENTRY3(PSUBB,  V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
1220    [0xf9] = X86_OP_ENTRY3(PSUBW,  V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
1221    [0xfa] = X86_OP_ENTRY3(PSUBD,  V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
1222    [0xfb] = X86_OP_ENTRY3(PSUBQ,  V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
1223    [0xfc] = X86_OP_ENTRY3(PADDB,  V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
1224    [0xfd] = X86_OP_ENTRY3(PADDW,  V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
1225    [0xfe] = X86_OP_ENTRY3(PADDD,  V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
1226    [0xff] = X86_OP_ENTRYr(UD,     nop,v),                        /* UD0 */
1227};
1228
1229static void do_decode_0F(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
1230{
1231    *entry = opcodes_0F[*b];
1232}
1233
1234static void decode_0F(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
1235{
1236    *b = x86_ldub_code(env, s);
1237    do_decode_0F(s, env, entry, b);
1238}
1239
1240static void decode_63(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
1241{
1242    static const X86OpEntry arpl = X86_OP_ENTRY2(ARPL, E,w, G,w, chk(prot));
1243    static const X86OpEntry mov = X86_OP_ENTRY3(MOV, G,v, E,v, None, None);
1244    static const X86OpEntry movsxd = X86_OP_ENTRY3(MOV, G,v, E,d, None, None, sextT0);
1245    if (!CODE64(s)) {
1246        *entry = arpl;
1247    } else if (REX_W(s)) {
1248        *entry = movsxd;
1249    } else {
1250        *entry = mov;
1251    }
1252}
1253
1254static void decode_group1(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
1255{
1256    static const X86GenFunc group1_gen[8] = {
1257        gen_ADD, gen_OR, gen_ADC, gen_SBB, gen_AND, gen_SUB, gen_XOR, gen_SUB,
1258    };
1259    int op = (get_modrm(s, env) >> 3) & 7;
1260    entry->gen = group1_gen[op];
1261
1262    if (op == 7) {
1263        /* prevent writeback for CMP */
1264        entry->op1 = entry->op0;
1265        entry->op0 = X86_TYPE_None;
1266        entry->s0 = X86_SIZE_None;
1267    } else {
1268        entry->special = X86_SPECIAL_HasLock;
1269    }
1270}
1271
1272static void decode_group1A(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
1273{
1274    int op = (get_modrm(s, env) >> 3) & 7;
1275    if (op != 0) {
1276        /* could be XOP prefix too */
1277        *entry = UNKNOWN_OPCODE;
1278    } else {
1279        entry->gen = gen_POP;
1280        /* The address must use the value of ESP after the pop.  */
1281        s->popl_esp_hack = 1 << mo_pushpop(s, s->dflag);
1282    }
1283}
1284
1285static void decode_group2(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
1286{
1287    static const X86GenFunc group2_gen[8] = {
1288        gen_ROL, gen_ROR, gen_RCL, gen_RCR,
1289        gen_SHL, gen_SHR, gen_SHL /* SAL, undocumented */, gen_SAR,
1290    };
1291    int op = (get_modrm(s, env) >> 3) & 7;
1292    entry->gen = group2_gen[op];
1293    if (op == 7) {
1294        entry->special = X86_SPECIAL_SExtT0;
1295    } else {
1296        entry->special = X86_SPECIAL_ZExtT0;
1297    }
1298}
1299
1300static void decode_group3(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
1301{
1302    static const X86OpEntry opcodes_grp3[16] = {
1303        /* 0xf6 */
1304        [0x00] = X86_OP_ENTRYrr(AND, E,b, I,b),
1305        [0x02] = X86_OP_ENTRY1(NOT,  E,b,      lock),
1306        [0x03] = X86_OP_ENTRY1(NEG,  E,b,      lock),
1307        [0x04] = X86_OP_ENTRYrr(MUL, E,b, 0,b, zextT0),
1308        [0x05] = X86_OP_ENTRYrr(IMUL,E,b, 0,b, sextT0),
1309        [0x06] = X86_OP_ENTRYr(DIV,  E,b),
1310        [0x07] = X86_OP_ENTRYr(IDIV, E,b),
1311
1312        /* 0xf7 */
1313        [0x08] = X86_OP_ENTRYrr(AND, E,v, I,z),
1314        [0x0a] = X86_OP_ENTRY1(NOT,  E,v,      lock),
1315        [0x0b] = X86_OP_ENTRY1(NEG,  E,v,      lock),
1316        [0x0c] = X86_OP_ENTRYrr(MUL, E,v, 0,v, zextT0),
1317        [0x0d] = X86_OP_ENTRYrr(IMUL,E,v, 0,v, sextT0),
1318        [0x0e] = X86_OP_ENTRYr(DIV,  E,v),
1319        [0x0f] = X86_OP_ENTRYr(IDIV, E,v),
1320    };
1321
1322    int w = (*b & 1);
1323    int reg = (get_modrm(s, env) >> 3) & 7;
1324
1325    *entry = opcodes_grp3[(w << 3) | reg];
1326}
1327
1328static void decode_group4_5(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
1329{
1330    static const X86OpEntry opcodes_grp4_5[16] = {
1331        /* 0xfe */
1332        [0x00] = X86_OP_ENTRY1(INC,     E,b,                           lock),
1333        [0x01] = X86_OP_ENTRY1(DEC,     E,b,                           lock),
1334
1335        /* 0xff */
1336        [0x08] = X86_OP_ENTRY1(INC,     E,v,                           lock),
1337        [0x09] = X86_OP_ENTRY1(DEC,     E,v,                           lock),
1338        [0x0a] = X86_OP_ENTRY3(CALL_m,  None, None, E,f64, None, None, zextT0),
1339        [0x0b] = X86_OP_ENTRYr(CALLF_m, M,p),
1340        [0x0c] = X86_OP_ENTRY3(JMP_m,   None, None, E,f64, None, None, zextT0),
1341        [0x0d] = X86_OP_ENTRYr(JMPF_m,  M,p),
1342        [0x0e] = X86_OP_ENTRYr(PUSH,    E,f64),
1343    };
1344
1345    int w = (*b & 1);
1346    int reg = (get_modrm(s, env) >> 3) & 7;
1347
1348    *entry = opcodes_grp4_5[(w << 3) | reg];
1349}
1350
1351
1352static void decode_group11(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
1353{
1354    int op = (get_modrm(s, env) >> 3) & 7;
1355    if (op != 0) {
1356        *entry = UNKNOWN_OPCODE;
1357    } else {
1358        entry->gen = gen_MOV;
1359    }
1360}
1361
1362static void decode_90(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
1363{
1364    static X86OpEntry pause = X86_OP_ENTRY0(PAUSE, svm(PAUSE));
1365    static X86OpEntry nop = X86_OP_ENTRY0(NOP);
1366    static X86OpEntry xchg_ax = X86_OP_ENTRY2(XCHG, 0,v, LoBits,v);
1367
1368    if (REX_B(s)) {
1369        *entry = xchg_ax;
1370    } else {
1371        *entry = (s->prefix & PREFIX_REPZ) ? pause : nop;
1372    }
1373}
1374
1375static const X86OpEntry opcodes_root[256] = {
1376    [0x00] = X86_OP_ENTRY2(ADD, E,b, G,b, lock),
1377    [0x01] = X86_OP_ENTRY2(ADD, E,v, G,v, lock),
1378    [0x02] = X86_OP_ENTRY2(ADD, G,b, E,b, lock),
1379    [0x03] = X86_OP_ENTRY2(ADD, G,v, E,v, lock),
1380    [0x04] = X86_OP_ENTRY2(ADD, 0,b, I,b, lock),   /* AL, Ib */
1381    [0x05] = X86_OP_ENTRY2(ADD, 0,v, I,z, lock),   /* rAX, Iz */
1382    [0x06] = X86_OP_ENTRYr(PUSH, ES, w, chk(i64)),
1383    [0x07] = X86_OP_ENTRYw(POP, ES, w, chk(i64)),
1384
1385    [0x10] = X86_OP_ENTRY2(ADC, E,b, G,b, lock),
1386    [0x11] = X86_OP_ENTRY2(ADC, E,v, G,v, lock),
1387    [0x12] = X86_OP_ENTRY2(ADC, G,b, E,b, lock),
1388    [0x13] = X86_OP_ENTRY2(ADC, G,v, E,v, lock),
1389    [0x14] = X86_OP_ENTRY2(ADC, 0,b, I,b, lock),   /* AL, Ib */
1390    [0x15] = X86_OP_ENTRY2(ADC, 0,v, I,z, lock),   /* rAX, Iz */
1391    [0x16] = X86_OP_ENTRYr(PUSH, SS, w, chk(i64)),
1392    [0x17] = X86_OP_ENTRYw(POP, SS, w, chk(i64)),
1393
1394    [0x20] = X86_OP_ENTRY2(AND, E,b, G,b, lock),
1395    [0x21] = X86_OP_ENTRY2(AND, E,v, G,v, lock),
1396    [0x22] = X86_OP_ENTRY2(AND, G,b, E,b, lock),
1397    [0x23] = X86_OP_ENTRY2(AND, G,v, E,v, lock),
1398    [0x24] = X86_OP_ENTRY2(AND, 0,b, I,b, lock),   /* AL, Ib */
1399    [0x25] = X86_OP_ENTRY2(AND, 0,v, I,z, lock),   /* rAX, Iz */
1400    [0x26] = {},
1401    [0x27] = X86_OP_ENTRY0(DAA, chk(i64)),
1402
1403    [0x30] = X86_OP_ENTRY2(XOR, E,b, G,b, lock),
1404    [0x31] = X86_OP_ENTRY2(XOR, E,v, G,v, lock),
1405    [0x32] = X86_OP_ENTRY2(XOR, G,b, E,b, lock),
1406    [0x33] = X86_OP_ENTRY2(XOR, G,v, E,v, lock),
1407    [0x34] = X86_OP_ENTRY2(XOR, 0,b, I,b, lock),   /* AL, Ib */
1408    [0x35] = X86_OP_ENTRY2(XOR, 0,v, I,z, lock),   /* rAX, Iz */
1409    [0x36] = {},
1410    [0x37] = X86_OP_ENTRY0(AAA, chk(i64)),
1411
1412    [0x40] = X86_OP_ENTRY1(INC, 0,v, chk(i64)),
1413    [0x41] = X86_OP_ENTRY1(INC, 1,v, chk(i64)),
1414    [0x42] = X86_OP_ENTRY1(INC, 2,v, chk(i64)),
1415    [0x43] = X86_OP_ENTRY1(INC, 3,v, chk(i64)),
1416    [0x44] = X86_OP_ENTRY1(INC, 4,v, chk(i64)),
1417    [0x45] = X86_OP_ENTRY1(INC, 5,v, chk(i64)),
1418    [0x46] = X86_OP_ENTRY1(INC, 6,v, chk(i64)),
1419    [0x47] = X86_OP_ENTRY1(INC, 7,v, chk(i64)),
1420
1421    [0x50] = X86_OP_ENTRYr(PUSH, LoBits,d64),
1422    [0x51] = X86_OP_ENTRYr(PUSH, LoBits,d64),
1423    [0x52] = X86_OP_ENTRYr(PUSH, LoBits,d64),
1424    [0x53] = X86_OP_ENTRYr(PUSH, LoBits,d64),
1425    [0x54] = X86_OP_ENTRYr(PUSH, LoBits,d64),
1426    [0x55] = X86_OP_ENTRYr(PUSH, LoBits,d64),
1427    [0x56] = X86_OP_ENTRYr(PUSH, LoBits,d64),
1428    [0x57] = X86_OP_ENTRYr(PUSH, LoBits,d64),
1429
1430    [0x60] = X86_OP_ENTRY0(PUSHA, chk(i64)),
1431    [0x61] = X86_OP_ENTRY0(POPA, chk(i64)),
1432    [0x62] = X86_OP_ENTRYrr(BOUND, G,v, M,a, chk(i64)),
1433    [0x63] = X86_OP_GROUP0(63),
1434    [0x64] = {},
1435    [0x65] = {},
1436    [0x66] = {},
1437    [0x67] = {},
1438
1439    [0x70] = X86_OP_ENTRYr(Jcc, J,b),
1440    [0x71] = X86_OP_ENTRYr(Jcc, J,b),
1441    [0x72] = X86_OP_ENTRYr(Jcc, J,b),
1442    [0x73] = X86_OP_ENTRYr(Jcc, J,b),
1443    [0x74] = X86_OP_ENTRYr(Jcc, J,b),
1444    [0x75] = X86_OP_ENTRYr(Jcc, J,b),
1445    [0x76] = X86_OP_ENTRYr(Jcc, J,b),
1446    [0x77] = X86_OP_ENTRYr(Jcc, J,b),
1447
1448    [0x80] = X86_OP_GROUP2(group1, E,b, I,b),
1449    [0x81] = X86_OP_GROUP2(group1, E,v, I,z),
1450    [0x82] = X86_OP_GROUP2(group1, E,b, I,b, chk(i64)),
1451    [0x83] = X86_OP_GROUP2(group1, E,v, I,b),
1452    [0x84] = X86_OP_ENTRYrr(AND, E,b, G,b),
1453    [0x85] = X86_OP_ENTRYrr(AND, E,v, G,v),
1454    [0x86] = X86_OP_ENTRY2(XCHG, E,b, G,b, xchg),
1455    [0x87] = X86_OP_ENTRY2(XCHG, E,v, G,v, xchg),
1456
1457    [0x90] = X86_OP_GROUP0(90),
1458    [0x91] = X86_OP_ENTRY2(XCHG, 0,v, LoBits,v),
1459    [0x92] = X86_OP_ENTRY2(XCHG, 0,v, LoBits,v),
1460    [0x93] = X86_OP_ENTRY2(XCHG, 0,v, LoBits,v),
1461    [0x94] = X86_OP_ENTRY2(XCHG, 0,v, LoBits,v),
1462    [0x95] = X86_OP_ENTRY2(XCHG, 0,v, LoBits,v),
1463    [0x96] = X86_OP_ENTRY2(XCHG, 0,v, LoBits,v),
1464    [0x97] = X86_OP_ENTRY2(XCHG, 0,v, LoBits,v),
1465
1466    [0xA0] = X86_OP_ENTRY3(MOV, 0,b, O,b, None, None), /* AL, Ob */
1467    [0xA1] = X86_OP_ENTRY3(MOV, 0,v, O,v, None, None), /* rAX, Ov */
1468    [0xA2] = X86_OP_ENTRY3(MOV, O,b, 0,b, None, None), /* Ob, AL */
1469    [0xA3] = X86_OP_ENTRY3(MOV, O,v, 0,v, None, None), /* Ov, rAX */
1470    [0xA4] = X86_OP_ENTRYrr(MOVS, Y,b, X,b),
1471    [0xA5] = X86_OP_ENTRYrr(MOVS, Y,v, X,v),
1472    [0xA6] = X86_OP_ENTRYrr(CMPS, Y,b, X,b),
1473    [0xA7] = X86_OP_ENTRYrr(CMPS, Y,v, X,v),
1474
1475    [0xB0] = X86_OP_ENTRY3(MOV, LoBits,b, I,b, None, None),
1476    [0xB1] = X86_OP_ENTRY3(MOV, LoBits,b, I,b, None, None),
1477    [0xB2] = X86_OP_ENTRY3(MOV, LoBits,b, I,b, None, None),
1478    [0xB3] = X86_OP_ENTRY3(MOV, LoBits,b, I,b, None, None),
1479    [0xB4] = X86_OP_ENTRY3(MOV, LoBits,b, I,b, None, None),
1480    [0xB5] = X86_OP_ENTRY3(MOV, LoBits,b, I,b, None, None),
1481    [0xB6] = X86_OP_ENTRY3(MOV, LoBits,b, I,b, None, None),
1482    [0xB7] = X86_OP_ENTRY3(MOV, LoBits,b, I,b, None, None),
1483
1484    [0xC0] = X86_OP_GROUP2(group2, E,b, I,b),
1485    [0xC1] = X86_OP_GROUP2(group2, E,v, I,b),
1486    [0xC2] = X86_OP_ENTRYr(RET, I,w),
1487    [0xC3] = X86_OP_ENTRY0(RET),
1488    [0xC4] = X86_OP_ENTRY3(LES, G,z, EM,p, None, None, chk(i64)),
1489    [0xC5] = X86_OP_ENTRY3(LDS, G,z, EM,p, None, None, chk(i64)),
1490    [0xC6] = X86_OP_GROUP3(group11, E,b, I,b, None, None), /* reg=000b */
1491    [0xC7] = X86_OP_GROUP3(group11, E,v, I,z, None, None), /* reg=000b */
1492
1493    [0xD0] = X86_OP_GROUP1(group2, E,b),
1494    [0xD1] = X86_OP_GROUP1(group2, E,v),
1495    [0xD2] = X86_OP_GROUP2(group2, E,b, 1,b), /* CL */
1496    [0xD3] = X86_OP_GROUP2(group2, E,v, 1,b), /* CL */
1497    [0xD4] = X86_OP_ENTRY2(AAM, 0,w, I,b),
1498    [0xD5] = X86_OP_ENTRY2(AAD, 0,w, I,b),
1499    [0xD6] = X86_OP_ENTRYw(SALC, 0,b),
1500    [0xD7] = X86_OP_ENTRY1(XLAT, 0,b, zextT0), /* AL read/written */
1501
1502    [0xE0] = X86_OP_ENTRYr(LOOPNE, J,b), /* implicit: CX with aflag size */
1503    [0xE1] = X86_OP_ENTRYr(LOOPE,  J,b), /* implicit: CX with aflag size */
1504    [0xE2] = X86_OP_ENTRYr(LOOP,   J,b), /* implicit: CX with aflag size */
1505    [0xE3] = X86_OP_ENTRYr(JCXZ,   J,b), /* implicit: CX with aflag size */
1506    [0xE4] = X86_OP_ENTRYwr(IN,    0,b, I_unsigned,b), /* AL */
1507    [0xE5] = X86_OP_ENTRYwr(IN,    0,v, I_unsigned,b), /* AX/EAX */
1508    [0xE6] = X86_OP_ENTRYrr(OUT,   0,b, I_unsigned,b), /* AL */
1509    [0xE7] = X86_OP_ENTRYrr(OUT,   0,v, I_unsigned,b), /* AX/EAX */
1510
1511    [0xF1] = X86_OP_ENTRY0(INT1,   svm(ICEBP)),
1512    [0xF4] = X86_OP_ENTRY0(HLT,    chk(cpl0) svm(HLT)),
1513    [0xF5] = X86_OP_ENTRY0(CMC),
1514    [0xF6] = X86_OP_GROUP1(group3, E,b),
1515    [0xF7] = X86_OP_GROUP1(group3, E,v),
1516
1517    [0x08] = X86_OP_ENTRY2(OR, E,b, G,b, lock),
1518    [0x09] = X86_OP_ENTRY2(OR, E,v, G,v, lock),
1519    [0x0A] = X86_OP_ENTRY2(OR, G,b, E,b, lock),
1520    [0x0B] = X86_OP_ENTRY2(OR, G,v, E,v, lock),
1521    [0x0C] = X86_OP_ENTRY2(OR, 0,b, I,b, lock),   /* AL, Ib */
1522    [0x0D] = X86_OP_ENTRY2(OR, 0,v, I,z, lock),   /* rAX, Iz */
1523    [0x0E] = X86_OP_ENTRYr(PUSH, CS, w, chk(i64)),
1524    [0x0F] = X86_OP_GROUP0(0F),
1525
1526    [0x18] = X86_OP_ENTRY2(SBB, E,b, G,b, lock),
1527    [0x19] = X86_OP_ENTRY2(SBB, E,v, G,v, lock),
1528    [0x1A] = X86_OP_ENTRY2(SBB, G,b, E,b, lock),
1529    [0x1B] = X86_OP_ENTRY2(SBB, G,v, E,v, lock),
1530    [0x1C] = X86_OP_ENTRY2(SBB, 0,b, I,b, lock),   /* AL, Ib */
1531    [0x1D] = X86_OP_ENTRY2(SBB, 0,v, I,z, lock),   /* rAX, Iz */
1532    [0x1E] = X86_OP_ENTRYr(PUSH, DS, w, chk(i64)),
1533    [0x1F] = X86_OP_ENTRYw(POP, DS, w, chk(i64)),
1534
1535    [0x28] = X86_OP_ENTRY2(SUB, E,b, G,b, lock),
1536    [0x29] = X86_OP_ENTRY2(SUB, E,v, G,v, lock),
1537    [0x2A] = X86_OP_ENTRY2(SUB, G,b, E,b, lock),
1538    [0x2B] = X86_OP_ENTRY2(SUB, G,v, E,v, lock),
1539    [0x2C] = X86_OP_ENTRY2(SUB, 0,b, I,b, lock),   /* AL, Ib */
1540    [0x2D] = X86_OP_ENTRY2(SUB, 0,v, I,z, lock),   /* rAX, Iz */
1541    [0x2E] = {},
1542    [0x2F] = X86_OP_ENTRY0(DAS, chk(i64)),
1543
1544    [0x38] = X86_OP_ENTRYrr(SUB, E,b, G,b),
1545    [0x39] = X86_OP_ENTRYrr(SUB, E,v, G,v),
1546    [0x3A] = X86_OP_ENTRYrr(SUB, G,b, E,b),
1547    [0x3B] = X86_OP_ENTRYrr(SUB, G,v, E,v),
1548    [0x3C] = X86_OP_ENTRYrr(SUB, 0,b, I,b),   /* AL, Ib */
1549    [0x3D] = X86_OP_ENTRYrr(SUB, 0,v, I,z),   /* rAX, Iz */
1550    [0x3E] = {},
1551    [0x3F] = X86_OP_ENTRY0(AAS, chk(i64)),
1552
1553    [0x48] = X86_OP_ENTRY1(DEC, 0,v, chk(i64)),
1554    [0x49] = X86_OP_ENTRY1(DEC, 1,v, chk(i64)),
1555    [0x4A] = X86_OP_ENTRY1(DEC, 2,v, chk(i64)),
1556    [0x4B] = X86_OP_ENTRY1(DEC, 3,v, chk(i64)),
1557    [0x4C] = X86_OP_ENTRY1(DEC, 4,v, chk(i64)),
1558    [0x4D] = X86_OP_ENTRY1(DEC, 5,v, chk(i64)),
1559    [0x4E] = X86_OP_ENTRY1(DEC, 6,v, chk(i64)),
1560    [0x4F] = X86_OP_ENTRY1(DEC, 7,v, chk(i64)),
1561
1562    [0x58] = X86_OP_ENTRYw(POP, LoBits,d64),
1563    [0x59] = X86_OP_ENTRYw(POP, LoBits,d64),
1564    [0x5A] = X86_OP_ENTRYw(POP, LoBits,d64),
1565    [0x5B] = X86_OP_ENTRYw(POP, LoBits,d64),
1566    [0x5C] = X86_OP_ENTRYw(POP, LoBits,d64),
1567    [0x5D] = X86_OP_ENTRYw(POP, LoBits,d64),
1568    [0x5E] = X86_OP_ENTRYw(POP, LoBits,d64),
1569    [0x5F] = X86_OP_ENTRYw(POP, LoBits,d64),
1570
1571    [0x68] = X86_OP_ENTRYr(PUSH, I,z),
1572    [0x69] = X86_OP_ENTRY3(IMUL3, G,v, E,v, I,z, sextT0),
1573    [0x6A] = X86_OP_ENTRYr(PUSH, I,b),
1574    [0x6B] = X86_OP_ENTRY3(IMUL3, G,v, E,v, I,b, sextT0),
1575    [0x6C] = X86_OP_ENTRYrr(INS, Y,b, 2,w), /* DX */
1576    [0x6D] = X86_OP_ENTRYrr(INS, Y,z, 2,w), /* DX */
1577    [0x6E] = X86_OP_ENTRYrr(OUTS, X,b, 2,w), /* DX */
1578    [0x6F] = X86_OP_ENTRYrr(OUTS, X,z, 2,w), /* DX */
1579
1580    [0x78] = X86_OP_ENTRYr(Jcc, J,b),
1581    [0x79] = X86_OP_ENTRYr(Jcc, J,b),
1582    [0x7A] = X86_OP_ENTRYr(Jcc, J,b),
1583    [0x7B] = X86_OP_ENTRYr(Jcc, J,b),
1584    [0x7C] = X86_OP_ENTRYr(Jcc, J,b),
1585    [0x7D] = X86_OP_ENTRYr(Jcc, J,b),
1586    [0x7E] = X86_OP_ENTRYr(Jcc, J,b),
1587    [0x7F] = X86_OP_ENTRYr(Jcc, J,b),
1588
1589    [0x88] = X86_OP_ENTRY3(MOV, E,b, G,b, None, None),
1590    [0x89] = X86_OP_ENTRY3(MOV, E,v, G,v, None, None),
1591    [0x8A] = X86_OP_ENTRY3(MOV, G,b, E,b, None, None),
1592    [0x8B] = X86_OP_ENTRY3(MOV, G,v, E,v, None, None),
1593    /* Missing in Table A-2: memory destination is always 16-bit.  */
1594    [0x8C] = X86_OP_ENTRY3(MOV, E,v, S,w, None, None, op0_Mw),
1595    [0x8D] = X86_OP_ENTRY3(LEA, G,v, M,v, None, None, noseg),
1596    [0x8E] = X86_OP_ENTRY3(MOV, S,w, E,w, None, None),
1597    [0x8F] = X86_OP_GROUPw(group1A, E,v),
1598
1599    [0x98] = X86_OP_ENTRY1(CBW,    0,v), /* rAX */
1600    [0x99] = X86_OP_ENTRY3(CWD,    2,v, 0,v, None, None), /* rDX, rAX */
1601    [0x9A] = X86_OP_ENTRYrr(CALLF, I_unsigned,p, I_unsigned,w, chk(i64)),
1602    [0x9B] = X86_OP_ENTRY0(WAIT),
1603    [0x9C] = X86_OP_ENTRY0(PUSHF,  chk(vm86_iopl) svm(PUSHF)),
1604    [0x9D] = X86_OP_ENTRY0(POPF,   chk(vm86_iopl) svm(POPF)),
1605    [0x9E] = X86_OP_ENTRY0(SAHF),
1606    [0x9F] = X86_OP_ENTRY0(LAHF),
1607
1608    [0xA8] = X86_OP_ENTRYrr(AND, 0,b, I,b),   /* AL, Ib */
1609    [0xA9] = X86_OP_ENTRYrr(AND, 0,v, I,z),   /* rAX, Iz */
1610    [0xAA] = X86_OP_ENTRY3(STOS, Y,b, 0,b, None, None),
1611    [0xAB] = X86_OP_ENTRY3(STOS, Y,v, 0,v, None, None),
1612    /* Manual writeback because REP LODS (!) has to write EAX/RAX after every LODS.  */
1613    [0xAC] = X86_OP_ENTRYr(LODS, X,b),
1614    [0xAD] = X86_OP_ENTRYr(LODS, X,v),
1615    [0xAE] = X86_OP_ENTRYrr(SCAS, 0,b, Y,b),
1616    [0xAF] = X86_OP_ENTRYrr(SCAS, 0,v, Y,v),
1617
1618    [0xB8] = X86_OP_ENTRY3(MOV, LoBits,v, I,v, None, None),
1619    [0xB9] = X86_OP_ENTRY3(MOV, LoBits,v, I,v, None, None),
1620    [0xBA] = X86_OP_ENTRY3(MOV, LoBits,v, I,v, None, None),
1621    [0xBB] = X86_OP_ENTRY3(MOV, LoBits,v, I,v, None, None),
1622    [0xBC] = X86_OP_ENTRY3(MOV, LoBits,v, I,v, None, None),
1623    [0xBD] = X86_OP_ENTRY3(MOV, LoBits,v, I,v, None, None),
1624    [0xBE] = X86_OP_ENTRY3(MOV, LoBits,v, I,v, None, None),
1625    [0xBF] = X86_OP_ENTRY3(MOV, LoBits,v, I,v, None, None),
1626
1627    [0xC8] = X86_OP_ENTRYrr(ENTER, I,w, I,b),
1628    [0xC9] = X86_OP_ENTRY1(LEAVE, A,d64),
1629    [0xCA] = X86_OP_ENTRYr(RETF,  I,w),
1630    [0xCB] = X86_OP_ENTRY0(RETF),
1631    [0xCC] = X86_OP_ENTRY0(INT3),
1632    [0xCD] = X86_OP_ENTRYr(INT, I,b,  chk(vm86_iopl)),
1633    [0xCE] = X86_OP_ENTRY0(INTO),
1634    [0xCF] = X86_OP_ENTRY0(IRET,      chk(vm86_iopl) svm(IRET)),
1635
1636    [0xE8] = X86_OP_ENTRYr(CALL,   J,z_f64),
1637    [0xE9] = X86_OP_ENTRYr(JMP,    J,z_f64),
1638    [0xEA] = X86_OP_ENTRYrr(JMPF,  I_unsigned,p, I_unsigned,w, chk(i64)),
1639    [0xEB] = X86_OP_ENTRYr(JMP,    J,b),
1640    [0xEC] = X86_OP_ENTRYwr(IN,    0,b, 2,w), /* AL, DX */
1641    [0xED] = X86_OP_ENTRYwr(IN,    0,v, 2,w), /* AX/EAX, DX */
1642    [0xEE] = X86_OP_ENTRYrr(OUT,   0,b, 2,w), /* DX, AL */
1643    [0xEF] = X86_OP_ENTRYrr(OUT,   0,v, 2,w), /* DX, AX/EAX */
1644
1645    [0xF8] = X86_OP_ENTRY0(CLC),
1646    [0xF9] = X86_OP_ENTRY0(STC),
1647    [0xFA] = X86_OP_ENTRY0(CLI,    chk(iopl)),
1648    [0xFB] = X86_OP_ENTRY0(STI,    chk(iopl)),
1649    [0xFC] = X86_OP_ENTRY0(CLD),
1650    [0xFD] = X86_OP_ENTRY0(STD),
1651    [0xFE] = X86_OP_GROUP1(group4_5, E,b),
1652    [0xFF] = X86_OP_GROUP1(group4_5, E,v),
1653};
1654
1655#undef mmx
1656#undef vex1
1657#undef vex2
1658#undef vex3
1659#undef vex4
1660#undef vex4_unal
1661#undef vex5
1662#undef vex6
1663#undef vex7
1664#undef vex8
1665#undef vex11
1666#undef vex12
1667#undef vex13
1668
1669/*
1670 * Decode the fixed part of the opcode and place the last
1671 * in b.
1672 */
1673static void decode_root(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
1674{
1675    *entry = opcodes_root[*b];
1676}
1677
1678
1679static int decode_modrm(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode,
1680                        X86DecodedOp *op, X86OpType type)
1681{
1682    int modrm = get_modrm(s, env);
1683    if ((modrm >> 6) == 3) {
1684        op->n = (modrm & 7);
1685        if (type != X86_TYPE_Q && type != X86_TYPE_N) {
1686            op->n |= REX_B(s);
1687        }
1688    } else {
1689        op->has_ea = true;
1690        op->n = -1;
1691        decode->mem = gen_lea_modrm_0(env, s, get_modrm(s, env));
1692    }
1693    return modrm;
1694}
1695
1696static bool decode_op_size(DisasContext *s, X86OpEntry *e, X86OpSize size, MemOp *ot)
1697{
1698    switch (size) {
1699    case X86_SIZE_b:  /* byte */
1700        *ot = MO_8;
1701        return true;
1702
1703    case X86_SIZE_d:  /* 32-bit */
1704    case X86_SIZE_ss: /* SSE/AVX scalar single precision */
1705        *ot = MO_32;
1706        return true;
1707
1708    case X86_SIZE_p:  /* Far pointer, return offset size */
1709    case X86_SIZE_s:  /* Descriptor, return offset size */
1710    case X86_SIZE_v:  /* 16/32/64-bit, based on operand size */
1711        *ot = s->dflag;
1712        return true;
1713
1714    case X86_SIZE_pi: /* MMX */
1715    case X86_SIZE_q:  /* 64-bit */
1716    case X86_SIZE_sd: /* SSE/AVX scalar double precision */
1717        *ot = MO_64;
1718        return true;
1719
1720    case X86_SIZE_w:  /* 16-bit */
1721        *ot = MO_16;
1722        return true;
1723
1724    case X86_SIZE_y:  /* 32/64-bit, based on operand size */
1725        *ot = s->dflag == MO_16 ? MO_32 : s->dflag;
1726        return true;
1727
1728    case X86_SIZE_z:  /* 16-bit for 16-bit operand size, else 32-bit */
1729        *ot = s->dflag == MO_16 ? MO_16 : MO_32;
1730        return true;
1731
1732    case X86_SIZE_z_f64:  /* 32-bit for 32-bit operand size or 64-bit mode, else 16-bit */
1733        *ot = !CODE64(s) && s->dflag == MO_16 ? MO_16 : MO_32;
1734        return true;
1735
1736    case X86_SIZE_dq: /* SSE/AVX 128-bit */
1737        if (e->special == X86_SPECIAL_MMX &&
1738            !(s->prefix & (PREFIX_DATA | PREFIX_REPZ | PREFIX_REPNZ))) {
1739            *ot = MO_64;
1740            return true;
1741        }
1742        if (s->vex_l && e->s0 != X86_SIZE_qq && e->s1 != X86_SIZE_qq) {
1743            return false;
1744        }
1745        *ot = MO_128;
1746        return true;
1747
1748    case X86_SIZE_qq: /* AVX 256-bit */
1749        if (!s->vex_l) {
1750            return false;
1751        }
1752        *ot = MO_256;
1753        return true;
1754
1755    case X86_SIZE_x:  /* 128/256-bit, based on operand size */
1756        if (e->special == X86_SPECIAL_MMX &&
1757            !(s->prefix & (PREFIX_DATA | PREFIX_REPZ | PREFIX_REPNZ))) {
1758            *ot = MO_64;
1759            return true;
1760        }
1761        /* fall through */
1762    case X86_SIZE_ps: /* SSE/AVX packed single precision */
1763    case X86_SIZE_pd: /* SSE/AVX packed double precision */
1764        *ot = s->vex_l ? MO_256 : MO_128;
1765        return true;
1766
1767    case X86_SIZE_xh: /* SSE/AVX packed half register */
1768        *ot = s->vex_l ? MO_128 : MO_64;
1769        return true;
1770
1771    case X86_SIZE_d64:  /* Default to 64-bit in 64-bit mode */
1772        *ot = CODE64(s) && s->dflag == MO_32 ? MO_64 : s->dflag;
1773        return true;
1774
1775    case X86_SIZE_f64:  /* Ignore size override prefix in 64-bit mode */
1776        *ot = CODE64(s) ? MO_64 : s->dflag;
1777        return true;
1778
1779    default:
1780        *ot = -1;
1781        return true;
1782    }
1783}
1784
1785static bool decode_op(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode,
1786                      X86DecodedOp *op, X86OpType type, int b)
1787{
1788    int modrm;
1789
1790    switch (type) {
1791    case X86_TYPE_None:  /* Implicit or absent */
1792    case X86_TYPE_A:  /* Implicit */
1793    case X86_TYPE_F:  /* EFLAGS/RFLAGS */
1794    case X86_TYPE_X:  /* string source */
1795    case X86_TYPE_Y:  /* string destination */
1796        break;
1797
1798    case X86_TYPE_B:  /* VEX.vvvv selects a GPR */
1799        op->unit = X86_OP_INT;
1800        op->n = s->vex_v;
1801        break;
1802
1803    case X86_TYPE_C:  /* REG in the modrm byte selects a control register */
1804        op->unit = X86_OP_CR;
1805        goto get_reg;
1806
1807    case X86_TYPE_D:  /* REG in the modrm byte selects a debug register */
1808        op->unit = X86_OP_DR;
1809        goto get_reg;
1810
1811    case X86_TYPE_G:  /* REG in the modrm byte selects a GPR */
1812        op->unit = X86_OP_INT;
1813        goto get_reg;
1814
1815    case X86_TYPE_S:  /* reg selects a segment register */
1816        op->unit = X86_OP_SEG;
1817        goto get_reg;
1818
1819    case X86_TYPE_P:
1820        op->unit = X86_OP_MMX;
1821        goto get_reg;
1822
1823    case X86_TYPE_V:  /* reg in the modrm byte selects an XMM/YMM register */
1824        if (decode->e.special == X86_SPECIAL_MMX &&
1825            !(s->prefix & (PREFIX_DATA | PREFIX_REPZ | PREFIX_REPNZ))) {
1826            op->unit = X86_OP_MMX;
1827        } else {
1828            op->unit = X86_OP_SSE;
1829        }
1830    get_reg:
1831        op->n = ((get_modrm(s, env) >> 3) & 7) | REX_R(s);
1832        break;
1833
1834    case X86_TYPE_E:  /* ALU modrm operand */
1835        op->unit = X86_OP_INT;
1836        goto get_modrm;
1837
1838    case X86_TYPE_Q:  /* MMX modrm operand */
1839        op->unit = X86_OP_MMX;
1840        goto get_modrm;
1841
1842    case X86_TYPE_W:  /* XMM/YMM modrm operand */
1843        if (decode->e.special == X86_SPECIAL_MMX &&
1844            !(s->prefix & (PREFIX_DATA | PREFIX_REPZ | PREFIX_REPNZ))) {
1845            op->unit = X86_OP_MMX;
1846        } else {
1847            op->unit = X86_OP_SSE;
1848        }
1849        goto get_modrm;
1850
1851    case X86_TYPE_N:  /* R/M in the modrm byte selects an MMX register */
1852        op->unit = X86_OP_MMX;
1853        goto get_modrm_reg;
1854
1855    case X86_TYPE_U:  /* R/M in the modrm byte selects an XMM/YMM register */
1856        if (decode->e.special == X86_SPECIAL_MMX &&
1857            !(s->prefix & (PREFIX_DATA | PREFIX_REPZ | PREFIX_REPNZ))) {
1858            op->unit = X86_OP_MMX;
1859        } else {
1860            op->unit = X86_OP_SSE;
1861        }
1862        goto get_modrm_reg;
1863
1864    case X86_TYPE_R:  /* R/M in the modrm byte selects a register */
1865        op->unit = X86_OP_INT;
1866    get_modrm_reg:
1867        modrm = get_modrm(s, env);
1868        if ((modrm >> 6) != 3) {
1869            return false;
1870        }
1871        goto get_modrm;
1872
1873    case X86_TYPE_WM:  /* modrm byte selects an XMM/YMM memory operand */
1874        op->unit = X86_OP_SSE;
1875        goto get_modrm_mem;
1876
1877    case X86_TYPE_EM:  /* modrm byte selects an ALU memory operand */
1878        op->unit = X86_OP_INT;
1879        /* fall through */
1880    case X86_TYPE_M:  /* modrm byte selects a memory operand */
1881    get_modrm_mem:
1882        modrm = get_modrm(s, env);
1883        if ((modrm >> 6) == 3) {
1884            return false;
1885        }
1886        /* fall through */
1887    case X86_TYPE_nop:  /* modrm operand decoded but not fetched */
1888    get_modrm:
1889        decode_modrm(s, env, decode, op, type);
1890        break;
1891
1892    case X86_TYPE_O:  /* Absolute address encoded in the instruction */
1893        op->unit = X86_OP_INT;
1894        op->has_ea = true;
1895        op->n = -1;
1896        decode->mem = (AddressParts) {
1897            .def_seg = R_DS,
1898            .base = -1,
1899            .index = -1,
1900            .disp = insn_get_addr(env, s, s->aflag)
1901        };
1902        break;
1903
1904    case X86_TYPE_H:  /* For AVX, VEX.vvvv selects an XMM/YMM register */
1905        if ((s->prefix & PREFIX_VEX)) {
1906            op->unit = X86_OP_SSE;
1907            op->n = s->vex_v;
1908            break;
1909        }
1910        if (op == &decode->op[0]) {
1911            /* shifts place the destination in VEX.vvvv, use modrm */
1912            return decode_op(s, env, decode, op, decode->e.op1, b);
1913        } else {
1914            return decode_op(s, env, decode, op, decode->e.op0, b);
1915        }
1916
1917    case X86_TYPE_I:  /* Immediate */
1918    case X86_TYPE_J:  /* Relative offset for a jump */
1919        op->unit = X86_OP_IMM;
1920        decode->immediate = op->imm = insn_get_signed(env, s, op->ot);
1921        break;
1922
1923    case X86_TYPE_I_unsigned:  /* Immediate */
1924        op->unit = X86_OP_IMM;
1925        decode->immediate = op->imm = insn_get(env, s, op->ot);
1926        break;
1927
1928    case X86_TYPE_L:  /* The upper 4 bits of the immediate select a 128-bit register */
1929        op->n = insn_get(env, s, op->ot) >> 4;
1930        break;
1931
1932    case X86_TYPE_2op:
1933        *op = decode->op[0];
1934        break;
1935
1936    case X86_TYPE_LoBits:
1937        op->n = (b & 7) | REX_B(s);
1938        op->unit = X86_OP_INT;
1939        break;
1940
1941    case X86_TYPE_0 ... X86_TYPE_7:
1942        op->n = type - X86_TYPE_0;
1943        op->unit = X86_OP_INT;
1944        break;
1945
1946    case X86_TYPE_ES ... X86_TYPE_GS:
1947        op->n = type - X86_TYPE_ES;
1948        op->unit = X86_OP_SEG;
1949        break;
1950    }
1951
1952    return true;
1953}
1954
1955static bool validate_sse_prefix(DisasContext *s, X86OpEntry *e)
1956{
1957    uint16_t sse_prefixes;
1958
1959    if (!e->valid_prefix) {
1960        return true;
1961    }
1962    if (s->prefix & (PREFIX_REPZ | PREFIX_REPNZ)) {
1963        /* In SSE instructions, 0xF3 and 0xF2 cancel 0x66.  */
1964        s->prefix &= ~PREFIX_DATA;
1965    }
1966
1967    /* Now, either zero or one bit is set in sse_prefixes.  */
1968    sse_prefixes = s->prefix & (PREFIX_REPZ | PREFIX_REPNZ | PREFIX_DATA);
1969    return e->valid_prefix & (1 << sse_prefixes);
1970}
1971
1972static bool decode_insn(DisasContext *s, CPUX86State *env, X86DecodeFunc decode_func,
1973                        X86DecodedInsn *decode)
1974{
1975    X86OpEntry *e = &decode->e;
1976
1977    decode_func(s, env, e, &decode->b);
1978    while (e->is_decode) {
1979        e->is_decode = false;
1980        e->decode(s, env, e, &decode->b);
1981    }
1982
1983    if (!validate_sse_prefix(s, e)) {
1984        return false;
1985    }
1986
1987    /* First compute size of operands in order to initialize s->rip_offset.  */
1988    if (e->op0 != X86_TYPE_None) {
1989        if (!decode_op_size(s, e, e->s0, &decode->op[0].ot)) {
1990            return false;
1991        }
1992        if (e->op0 == X86_TYPE_I) {
1993            s->rip_offset += 1 << decode->op[0].ot;
1994        }
1995    }
1996    if (e->op1 != X86_TYPE_None) {
1997        if (!decode_op_size(s, e, e->s1, &decode->op[1].ot)) {
1998            return false;
1999        }
2000        if (e->op1 == X86_TYPE_I) {
2001            s->rip_offset += 1 << decode->op[1].ot;
2002        }
2003    }
2004    if (e->op2 != X86_TYPE_None) {
2005        if (!decode_op_size(s, e, e->s2, &decode->op[2].ot)) {
2006            return false;
2007        }
2008        if (e->op2 == X86_TYPE_I) {
2009            s->rip_offset += 1 << decode->op[2].ot;
2010        }
2011    }
2012    if (e->op3 != X86_TYPE_None) {
2013        /*
2014         * A couple instructions actually use the extra immediate byte for an Lx
2015         * register operand; those are handled in the gen_* functions as one off.
2016         */
2017        assert(e->op3 == X86_TYPE_I && e->s3 == X86_SIZE_b);
2018        s->rip_offset += 1;
2019    }
2020
2021    if (e->op0 != X86_TYPE_None &&
2022        !decode_op(s, env, decode, &decode->op[0], e->op0, decode->b)) {
2023        return false;
2024    }
2025
2026    if (e->op1 != X86_TYPE_None &&
2027        !decode_op(s, env, decode, &decode->op[1], e->op1, decode->b)) {
2028        return false;
2029    }
2030
2031    if (e->op2 != X86_TYPE_None &&
2032        !decode_op(s, env, decode, &decode->op[2], e->op2, decode->b)) {
2033        return false;
2034    }
2035
2036    if (e->op3 != X86_TYPE_None) {
2037        decode->immediate = insn_get_signed(env, s, MO_8);
2038    }
2039
2040    return true;
2041}
2042
2043static bool has_cpuid_feature(DisasContext *s, X86CPUIDFeature cpuid)
2044{
2045    switch (cpuid) {
2046    case X86_FEAT_None:
2047        return true;
2048    case X86_FEAT_CMOV:
2049        return (s->cpuid_features & CPUID_CMOV);
2050    case X86_FEAT_F16C:
2051        return (s->cpuid_ext_features & CPUID_EXT_F16C);
2052    case X86_FEAT_FMA:
2053        return (s->cpuid_ext_features & CPUID_EXT_FMA);
2054    case X86_FEAT_MOVBE:
2055        return (s->cpuid_ext_features & CPUID_EXT_MOVBE);
2056    case X86_FEAT_PCLMULQDQ:
2057        return (s->cpuid_ext_features & CPUID_EXT_PCLMULQDQ);
2058    case X86_FEAT_SSE:
2059        return (s->cpuid_features & CPUID_SSE);
2060    case X86_FEAT_SSE2:
2061        return (s->cpuid_features & CPUID_SSE2);
2062    case X86_FEAT_SSE3:
2063        return (s->cpuid_ext_features & CPUID_EXT_SSE3);
2064    case X86_FEAT_SSSE3:
2065        return (s->cpuid_ext_features & CPUID_EXT_SSSE3);
2066    case X86_FEAT_SSE41:
2067        return (s->cpuid_ext_features & CPUID_EXT_SSE41);
2068    case X86_FEAT_SSE42:
2069        return (s->cpuid_ext_features & CPUID_EXT_SSE42);
2070    case X86_FEAT_AES:
2071        if (!(s->cpuid_ext_features & CPUID_EXT_AES)) {
2072            return false;
2073        } else if (!(s->prefix & PREFIX_VEX)) {
2074            return true;
2075        } else if (!(s->cpuid_ext_features & CPUID_EXT_AVX)) {
2076            return false;
2077        } else {
2078            return !s->vex_l || (s->cpuid_7_0_ecx_features & CPUID_7_0_ECX_VAES);
2079        }
2080
2081    case X86_FEAT_AVX:
2082        return (s->cpuid_ext_features & CPUID_EXT_AVX);
2083
2084    case X86_FEAT_3DNOW:
2085        return (s->cpuid_ext2_features & CPUID_EXT2_3DNOW);
2086    case X86_FEAT_SSE4A:
2087        return (s->cpuid_ext3_features & CPUID_EXT3_SSE4A);
2088
2089    case X86_FEAT_ADX:
2090        return (s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_ADX);
2091    case X86_FEAT_BMI1:
2092        return (s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI1);
2093    case X86_FEAT_BMI2:
2094        return (s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI2);
2095    case X86_FEAT_AVX2:
2096        return (s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_AVX2);
2097    case X86_FEAT_SHA_NI:
2098        return (s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_SHA_NI);
2099
2100    case X86_FEAT_CMPCCXADD:
2101        return (s->cpuid_7_1_eax_features & CPUID_7_1_EAX_CMPCCXADD);
2102    }
2103    g_assert_not_reached();
2104}
2105
2106static bool validate_vex(DisasContext *s, X86DecodedInsn *decode)
2107{
2108    X86OpEntry *e = &decode->e;
2109
2110    switch (e->vex_special) {
2111    case X86_VEX_REPScalar:
2112        /*
2113         * Instructions which differ between 00/66 and F2/F3 in the
2114         * exception classification and the size of the memory operand.
2115         */
2116        assert(e->vex_class == 1 || e->vex_class == 2 || e->vex_class == 4);
2117        if (s->prefix & (PREFIX_REPZ | PREFIX_REPNZ)) {
2118            e->vex_class = e->vex_class < 4 ? 3 : 5;
2119            if (s->vex_l) {
2120                goto illegal;
2121            }
2122            assert(decode->e.s2 == X86_SIZE_x);
2123            if (decode->op[2].has_ea) {
2124                decode->op[2].ot = s->prefix & PREFIX_REPZ ? MO_32 : MO_64;
2125            }
2126        }
2127        break;
2128
2129    case X86_VEX_SSEUnaligned:
2130        /* handled in sse_needs_alignment.  */
2131        break;
2132
2133    case X86_VEX_AVX2_256:
2134        if ((s->prefix & PREFIX_VEX) && s->vex_l && !has_cpuid_feature(s, X86_FEAT_AVX2)) {
2135            goto illegal;
2136        }
2137    }
2138
2139    switch (e->vex_class) {
2140    case 0:
2141        if (s->prefix & PREFIX_VEX) {
2142            goto illegal;
2143        }
2144        return true;
2145    case 1:
2146    case 2:
2147    case 3:
2148    case 4:
2149    case 5:
2150    case 7:
2151        if (s->prefix & PREFIX_VEX) {
2152            if (!(s->flags & HF_AVX_EN_MASK)) {
2153                goto illegal;
2154            }
2155        } else if (e->special != X86_SPECIAL_MMX ||
2156                   (s->prefix & (PREFIX_REPZ | PREFIX_REPNZ | PREFIX_DATA))) {
2157            if (!(s->flags & HF_OSFXSR_MASK)) {
2158                goto illegal;
2159            }
2160        }
2161        break;
2162    case 12:
2163        /* Must have a VSIB byte and no address prefix.  */
2164        assert(s->has_modrm);
2165        if ((s->modrm & 7) != 4 || s->aflag == MO_16) {
2166            goto illegal;
2167        }
2168
2169        /* Check no overlap between registers.  */
2170        if (!decode->op[0].has_ea &&
2171            (decode->op[0].n == decode->mem.index || decode->op[0].n == decode->op[1].n)) {
2172            goto illegal;
2173        }
2174        assert(!decode->op[1].has_ea);
2175        if (decode->op[1].n == decode->mem.index) {
2176            goto illegal;
2177        }
2178        if (!decode->op[2].has_ea &&
2179            (decode->op[2].n == decode->mem.index || decode->op[2].n == decode->op[1].n)) {
2180            goto illegal;
2181        }
2182        /* fall through */
2183    case 6:
2184    case 11:
2185        if (!(s->prefix & PREFIX_VEX)) {
2186            goto illegal;
2187        }
2188        if (!(s->flags & HF_AVX_EN_MASK)) {
2189            goto illegal;
2190        }
2191        break;
2192    case 8:
2193        /* Non-VEX case handled in decode_0F77.  */
2194        assert(s->prefix & PREFIX_VEX);
2195        if (!(s->flags & HF_AVX_EN_MASK)) {
2196            goto illegal;
2197        }
2198        break;
2199    case 13:
2200        if (!(s->prefix & PREFIX_VEX)) {
2201            goto illegal;
2202        }
2203        if (s->vex_l) {
2204            goto illegal;
2205        }
2206        /* All integer instructions use VEX.vvvv, so exit.  */
2207        return true;
2208    }
2209
2210    if (s->vex_v != 0 &&
2211        e->op0 != X86_TYPE_H && e->op0 != X86_TYPE_B &&
2212        e->op1 != X86_TYPE_H && e->op1 != X86_TYPE_B &&
2213        e->op2 != X86_TYPE_H && e->op2 != X86_TYPE_B) {
2214        goto illegal;
2215    }
2216
2217    if (s->flags & HF_TS_MASK) {
2218        goto nm_exception;
2219    }
2220    if (s->flags & HF_EM_MASK) {
2221        goto illegal;
2222    }
2223
2224    if (e->check) {
2225        if (e->check & X86_CHECK_VEX128) {
2226            if (s->vex_l) {
2227                goto illegal;
2228            }
2229        }
2230        if (e->check & X86_CHECK_W0) {
2231            if (s->vex_w) {
2232                goto illegal;
2233            }
2234        }
2235        if (e->check & X86_CHECK_W1) {
2236            if (!s->vex_w) {
2237                goto illegal;
2238            }
2239        }
2240    }
2241    return true;
2242
2243nm_exception:
2244    gen_NM_exception(s);
2245    return false;
2246illegal:
2247    gen_illegal_opcode(s);
2248    return false;
2249}
2250
2251/*
2252 * Convert one instruction. s->base.is_jmp is set if the translation must
2253 * be stopped.
2254 */
2255static void disas_insn(DisasContext *s, CPUState *cpu)
2256{
2257    CPUX86State *env = cpu_env(cpu);
2258    X86DecodedInsn decode;
2259    X86DecodeFunc decode_func = decode_root;
2260    uint8_t cc_live, b;
2261
2262    s->pc = s->base.pc_next;
2263    s->override = -1;
2264    s->popl_esp_hack = 0;
2265#ifdef TARGET_X86_64
2266    s->rex_r = 0;
2267    s->rex_x = 0;
2268    s->rex_b = 0;
2269#endif
2270    s->rip_offset = 0; /* for relative ip address */
2271    s->vex_l = 0;
2272    s->vex_v = 0;
2273    s->vex_w = false;
2274    s->has_modrm = false;
2275    s->prefix = 0;
2276
2277 next_byte:
2278    b = x86_ldub_code(env, s);
2279
2280    /* Collect prefixes.  */
2281    switch (b) {
2282    case 0xf3:
2283        s->prefix |= PREFIX_REPZ;
2284        s->prefix &= ~PREFIX_REPNZ;
2285        goto next_byte;
2286    case 0xf2:
2287        s->prefix |= PREFIX_REPNZ;
2288        s->prefix &= ~PREFIX_REPZ;
2289        goto next_byte;
2290    case 0xf0:
2291        s->prefix |= PREFIX_LOCK;
2292        goto next_byte;
2293    case 0x2e:
2294        s->override = R_CS;
2295        goto next_byte;
2296    case 0x36:
2297        s->override = R_SS;
2298        goto next_byte;
2299    case 0x3e:
2300        s->override = R_DS;
2301        goto next_byte;
2302    case 0x26:
2303        s->override = R_ES;
2304        goto next_byte;
2305    case 0x64:
2306        s->override = R_FS;
2307        goto next_byte;
2308    case 0x65:
2309        s->override = R_GS;
2310        goto next_byte;
2311    case 0x66:
2312        s->prefix |= PREFIX_DATA;
2313        goto next_byte;
2314    case 0x67:
2315        s->prefix |= PREFIX_ADR;
2316        goto next_byte;
2317#ifdef TARGET_X86_64
2318    case 0x40 ... 0x4f:
2319        if (CODE64(s)) {
2320            /* REX prefix */
2321            s->prefix |= PREFIX_REX;
2322            s->vex_w = (b >> 3) & 1;
2323            s->rex_r = (b & 0x4) << 1;
2324            s->rex_x = (b & 0x2) << 2;
2325            s->rex_b = (b & 0x1) << 3;
2326            goto next_byte;
2327        }
2328        break;
2329#endif
2330    case 0xc5: /* 2-byte VEX */
2331    case 0xc4: /* 3-byte VEX */
2332        /*
2333         * VEX prefixes cannot be used except in 32-bit mode.
2334         * Otherwise the instruction is LES or LDS.
2335         */
2336        if (CODE32(s) && !VM86(s)) {
2337            static const int pp_prefix[4] = {
2338                0, PREFIX_DATA, PREFIX_REPZ, PREFIX_REPNZ
2339            };
2340            int vex3, vex2 = x86_ldub_code(env, s);
2341
2342            if (!CODE64(s) && (vex2 & 0xc0) != 0xc0) {
2343                /*
2344                 * 4.1.4.6: In 32-bit mode, bits [7:6] must be 11b,
2345                 * otherwise the instruction is LES or LDS.
2346                 */
2347                s->pc--; /* rewind the advance_pc() x86_ldub_code() did */
2348                break;
2349            }
2350
2351            /* 4.1.1-4.1.3: No preceding lock, 66, f2, f3, or rex prefixes. */
2352            if (s->prefix & (PREFIX_REPZ | PREFIX_REPNZ
2353                             | PREFIX_LOCK | PREFIX_DATA | PREFIX_REX)) {
2354                goto illegal_op;
2355            }
2356#ifdef TARGET_X86_64
2357            s->rex_r = (~vex2 >> 4) & 8;
2358#endif
2359            if (b == 0xc5) {
2360                /* 2-byte VEX prefix: RVVVVlpp, implied 0f leading opcode byte */
2361                vex3 = vex2;
2362                decode_func = decode_0F;
2363            } else {
2364                /* 3-byte VEX prefix: RXBmmmmm wVVVVlpp */
2365                vex3 = x86_ldub_code(env, s);
2366#ifdef TARGET_X86_64
2367                s->rex_x = (~vex2 >> 3) & 8;
2368                s->rex_b = (~vex2 >> 2) & 8;
2369#endif
2370                s->vex_w = (vex3 >> 7) & 1;
2371                switch (vex2 & 0x1f) {
2372                case 0x01: /* Implied 0f leading opcode bytes.  */
2373                    decode_func = decode_0F;
2374                    break;
2375                case 0x02: /* Implied 0f 38 leading opcode bytes.  */
2376                    decode_func = decode_0F38;
2377                    break;
2378                case 0x03: /* Implied 0f 3a leading opcode bytes.  */
2379                    decode_func = decode_0F3A;
2380                    break;
2381                default:   /* Reserved for future use.  */
2382                    goto unknown_op;
2383                }
2384            }
2385            s->vex_v = (~vex3 >> 3) & 0xf;
2386            s->vex_l = (vex3 >> 2) & 1;
2387            s->prefix |= pp_prefix[vex3 & 3] | PREFIX_VEX;
2388        }
2389        break;
2390    default:
2391        break;
2392    }
2393
2394    /* Post-process prefixes.  */
2395    if (CODE64(s)) {
2396        /*
2397         * In 64-bit mode, the default data size is 32-bit.  Select 64-bit
2398         * data with rex_w, and 16-bit data with 0x66; rex_w takes precedence
2399         * over 0x66 if both are present.
2400         */
2401        s->dflag = (REX_W(s) ? MO_64 : s->prefix & PREFIX_DATA ? MO_16 : MO_32);
2402        /* In 64-bit mode, 0x67 selects 32-bit addressing.  */
2403        s->aflag = (s->prefix & PREFIX_ADR ? MO_32 : MO_64);
2404    } else {
2405        /* In 16/32-bit mode, 0x66 selects the opposite data size.  */
2406        if (CODE32(s) ^ ((s->prefix & PREFIX_DATA) != 0)) {
2407            s->dflag = MO_32;
2408        } else {
2409            s->dflag = MO_16;
2410        }
2411        /* In 16/32-bit mode, 0x67 selects the opposite addressing.  */
2412        if (CODE32(s) ^ ((s->prefix & PREFIX_ADR) != 0)) {
2413            s->aflag = MO_32;
2414        }  else {
2415            s->aflag = MO_16;
2416        }
2417    }
2418
2419    /* Go back to old decoder for unconverted opcodes.  */
2420    if (!(s->prefix & PREFIX_VEX)) {
2421        if ((b & ~7) == 0xd8) {
2422            if (!disas_insn_x87(s, cpu, b)) {
2423                goto unknown_op;
2424            }
2425            return;
2426        }
2427
2428        if (b == 0x0f) {
2429            b = x86_ldub_code(env, s);
2430            switch (b) {
2431            case 0x00 ... 0x03: /* mostly privileged instructions */
2432            case 0x05 ... 0x09:
2433            case 0x1a ... 0x1b: /* MPX */
2434            case 0x20 ... 0x23: /* mov from/to CR and DR */
2435            case 0x30 ... 0x35: /* more privileged instructions */
2436            case 0xa2 ... 0xa5: /* CPUID, BT, SHLD */
2437            case 0xaa ... 0xae: /* RSM, SHRD, grp15 */
2438            case 0xb0 ... 0xb1: /* cmpxchg */
2439            case 0xb3:          /* btr */
2440            case 0xb8:          /* integer ops */
2441            case 0xba ... 0xbd: /* integer ops */
2442            case 0xc0 ... 0xc1: /* xadd */
2443            case 0xc7:          /* grp9 */
2444                disas_insn_old(s, cpu, b + 0x100);
2445                return;
2446            default:
2447                decode_func = do_decode_0F;
2448                break;
2449            }
2450        }
2451    }
2452
2453    memset(&decode, 0, sizeof(decode));
2454    decode.cc_op = -1;
2455    decode.b = b;
2456    if (!decode_insn(s, env, decode_func, &decode)) {
2457        goto illegal_op;
2458    }
2459    if (!decode.e.gen) {
2460        goto unknown_op;
2461    }
2462
2463    if (!has_cpuid_feature(s, decode.e.cpuid)) {
2464        goto illegal_op;
2465    }
2466
2467    /* Checks that result in #UD come first.  */
2468    if (decode.e.check) {
2469        if (decode.e.check & X86_CHECK_i64) {
2470            if (CODE64(s)) {
2471                goto illegal_op;
2472            }
2473        }
2474        if (decode.e.check & X86_CHECK_o64) {
2475            if (!CODE64(s)) {
2476                goto illegal_op;
2477            }
2478        }
2479        if (decode.e.check & X86_CHECK_prot) {
2480            if (!PE(s) || VM86(s)) {
2481                goto illegal_op;
2482            }
2483        }
2484    }
2485
2486    switch (decode.e.special) {
2487    case X86_SPECIAL_None:
2488        break;
2489
2490    case X86_SPECIAL_Locked:
2491        if (decode.op[0].has_ea) {
2492            s->prefix |= PREFIX_LOCK;
2493        }
2494        decode.e.special = X86_SPECIAL_HasLock;
2495        /* fallthrough */
2496    case X86_SPECIAL_HasLock:
2497        break;
2498
2499    case X86_SPECIAL_Op0_Rd:
2500        assert(decode.op[0].unit == X86_OP_INT);
2501        if (!decode.op[0].has_ea) {
2502            decode.op[0].ot = MO_32;
2503        }
2504        break;
2505
2506    case X86_SPECIAL_Op2_Ry:
2507        assert(decode.op[2].unit == X86_OP_INT);
2508        if (!decode.op[2].has_ea) {
2509            decode.op[2].ot = s->dflag == MO_16 ? MO_32 : s->dflag;
2510        }
2511        break;
2512
2513    case X86_SPECIAL_AVXExtMov:
2514        if (!decode.op[2].has_ea) {
2515            decode.op[2].ot = s->vex_l ? MO_256 : MO_128;
2516        } else if (s->vex_l) {
2517            decode.op[2].ot++;
2518        }
2519        break;
2520
2521    case X86_SPECIAL_SExtT0:
2522    case X86_SPECIAL_ZExtT0:
2523        /* Handled in gen_load.  */
2524        assert(decode.op[1].unit == X86_OP_INT);
2525        break;
2526
2527    case X86_SPECIAL_NoSeg:
2528        decode.mem.def_seg = -1;
2529        s->override = -1;
2530        break;
2531
2532    case X86_SPECIAL_Op0_Mw:
2533        assert(decode.op[0].unit == X86_OP_INT);
2534        if (decode.op[0].has_ea) {
2535            decode.op[0].ot = MO_16;
2536        }
2537        break;
2538
2539    default:
2540        break;
2541    }
2542
2543    if (s->prefix & PREFIX_LOCK) {
2544        if (decode.e.special != X86_SPECIAL_HasLock || !decode.op[0].has_ea) {
2545            goto illegal_op;
2546        }
2547    }
2548
2549    if (!validate_vex(s, &decode)) {
2550        return;
2551    }
2552
2553    /*
2554     * Checks that result in #GP or VMEXIT come second.  Intercepts are
2555     * generally checked after non-memory exceptions (i.e. after all
2556     * exceptions if there is no memory operand).  Exceptions are
2557     * vm86 checks (INTn, IRET, PUSHF/POPF), RSM and XSETBV (!).
2558     *
2559     * RSM and XSETBV will be handled in the gen_* functions
2560     * instead of using chk().
2561     */
2562    if (decode.e.check & X86_CHECK_cpl0) {
2563        if (CPL(s) != 0) {
2564            goto gp_fault;
2565        }
2566    }
2567    if (decode.e.intercept && unlikely(GUEST(s))) {
2568        gen_helper_svm_check_intercept(tcg_env,
2569                                       tcg_constant_i32(decode.e.intercept));
2570    }
2571    if (decode.e.check) {
2572        if ((decode.e.check & X86_CHECK_vm86_iopl) && VM86(s)) {
2573            if (IOPL(s) < 3) {
2574                goto gp_fault;
2575            }
2576        } else if (decode.e.check & X86_CHECK_cpl_iopl) {
2577            if (IOPL(s) < CPL(s)) {
2578                goto gp_fault;
2579            }
2580        }
2581    }
2582
2583    if (decode.e.special == X86_SPECIAL_MMX &&
2584        !(s->prefix & (PREFIX_REPZ | PREFIX_REPNZ | PREFIX_DATA))) {
2585        gen_helper_enter_mmx(tcg_env);
2586    }
2587
2588    if (decode.op[0].has_ea || decode.op[1].has_ea || decode.op[2].has_ea) {
2589        gen_load_ea(s, &decode.mem, decode.e.vex_class == 12);
2590    }
2591    if (s->prefix & PREFIX_LOCK) {
2592        gen_load(s, &decode, 2, s->T1);
2593        decode.e.gen(s, env, &decode);
2594    } else {
2595        if (decode.op[0].unit == X86_OP_MMX) {
2596            compute_mmx_offset(&decode.op[0]);
2597        } else if (decode.op[0].unit == X86_OP_SSE) {
2598            compute_xmm_offset(&decode.op[0]);
2599        }
2600        gen_load(s, &decode, 1, s->T0);
2601        gen_load(s, &decode, 2, s->T1);
2602        decode.e.gen(s, env, &decode);
2603        gen_writeback(s, &decode, 0, s->T0);
2604    }
2605
2606    /*
2607     * Write back flags after last memory access.  Some newer ALU instructions, as
2608     * well as SSE instructions, write flags in the gen_* function, but that can
2609     * cause incorrect tracking of CC_OP for instructions that write to both memory
2610     * and flags.
2611     */
2612    if (decode.cc_op != -1) {
2613        if (decode.cc_dst) {
2614            tcg_gen_mov_tl(cpu_cc_dst, decode.cc_dst);
2615        }
2616        if (decode.cc_src) {
2617            tcg_gen_mov_tl(cpu_cc_src, decode.cc_src);
2618        }
2619        if (decode.cc_src2) {
2620            tcg_gen_mov_tl(cpu_cc_src2, decode.cc_src2);
2621        }
2622        if (decode.cc_op == CC_OP_DYNAMIC) {
2623            tcg_gen_mov_i32(cpu_cc_op, decode.cc_op_dynamic);
2624        }
2625        set_cc_op(s, decode.cc_op);
2626        cc_live = cc_op_live[decode.cc_op];
2627    } else {
2628        cc_live = 0;
2629    }
2630    if (decode.cc_op != CC_OP_DYNAMIC) {
2631        assert(!decode.cc_op_dynamic);
2632        assert(!!decode.cc_dst == !!(cc_live & USES_CC_DST));
2633        assert(!!decode.cc_src == !!(cc_live & USES_CC_SRC));
2634        assert(!!decode.cc_src2 == !!(cc_live & USES_CC_SRC2));
2635    }
2636
2637    return;
2638 gp_fault:
2639    gen_exception_gpf(s);
2640    return;
2641 illegal_op:
2642    gen_illegal_opcode(s);
2643    return;
2644 unknown_op:
2645    gen_unknown_opcode(env, s);
2646}
2647