1/* 2 * New-style decoder for i386 instructions 3 * 4 * Copyright (c) 2022 Red Hat, Inc. 5 * 6 * Author: Paolo Bonzini <pbonzini@redhat.com> 7 * 8 * This library is free software; you can redistribute it and/or 9 * modify it under the terms of the GNU Lesser General Public 10 * License as published by the Free Software Foundation; either 11 * version 2.1 of the License, or (at your option) any later version. 12 * 13 * This library is distributed in the hope that it will be useful, 14 * but WITHOUT ANY WARRANTY; without even the implied warranty of 15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 16 * Lesser General Public License for more details. 17 * 18 * You should have received a copy of the GNU Lesser General Public 19 * License along with this library; if not, see <http://www.gnu.org/licenses/>. 20 */ 21 22/* 23 * The decoder is mostly based on tables copied from the Intel SDM. As 24 * a result, most operand load and writeback is done entirely in common 25 * table-driven code using the same operand type (X86_TYPE_*) and 26 * size (X86_SIZE_*) codes used in the manual. There are a few differences 27 * though. 28 * 29 * Operand sizes 30 * ------------- 31 * 32 * The manual lists d64 ("cannot encode 32-bit size in 64-bit mode") and f64 33 * ("cannot encode 16-bit or 32-bit size in 64-bit mode") as modifiers of the 34 * "v" or "z" sizes. The decoder simply makes them separate operand sizes. 35 * 36 * Vector operands 37 * --------------- 38 * 39 * The main difference is that the V, U and W types are extended to 40 * cover MMX as well; if an instruction is like 41 * 42 * por Pq, Qq 43 * 66 por Vx, Hx, Wx 44 * 45 * only the second row is included and the instruction is marked as a 46 * valid MMX instruction. The MMX flag directs the decoder to rewrite 47 * the V/U/H/W types to P/N/P/Q if there is no prefix, as well as changing 48 * "x" to "q" if there is no prefix. 49 * 50 * In addition, the ss/ps/sd/pd types are sometimes mushed together as "x" 51 * if the difference is expressed via prefixes. Individual instructions 52 * are separated by prefix in the generator functions. 53 * 54 * There is a custom size "xh" used to address half of a SSE/AVX operand. 55 * This points to a 64-bit operand for SSE operations, 128-bit operand 56 * for 256-bit AVX operands, etc. It is used for conversion operations 57 * such as VCVTPH2PS or VCVTSS2SD. 58 * 59 * There are a couple cases in which instructions (e.g. MOVD) write the 60 * whole XMM or MM register but are established incorrectly in the manual 61 * as "d" or "q". These have to be fixed for the decoder to work correctly. 62 * 63 * VEX exception classes 64 * --------------------- 65 * 66 * Speaking about imprecisions in the manual, the decoder treats all 67 * exception-class 4 instructions as having an optional VEX prefix, and 68 * all exception-class 6 instructions as having a mandatory VEX prefix. 69 * This is true except for a dozen instructions; these are in exception 70 * class 4 but do not ignore the VEX.W bit (which does not even exist 71 * without a VEX prefix). These instructions are mostly listed in Intel's 72 * table 2-16, but with a few exceptions. 73 * 74 * The AMD manual has more precise subclasses for exceptions, and unlike Intel 75 * they list the VEX.W requirements in the exception classes as well (except 76 * when they don't). AMD describes class 6 as "AVX Mixed Memory Argument" 77 * without defining what a mixed memory argument is, but still use 4 as the 78 * primary exception class... except when they don't. 79 * 80 * The summary is: 81 * Intel AMD VEX.W note 82 * ------------------------------------------------------------------- 83 * vpblendd 4 4J 0 84 * vpblendvb 4 4E-X 0 (*) 85 * vpbroadcastq 6 6D 0 (+) 86 * vpermd/vpermps 4 4H 0 (§) 87 * vpermq/vpermpd 4 4H-1 1 (§) 88 * vpermilpd/vpermilps 4 6E 0 (^) 89 * vpmaskmovd 6 4K significant (^) 90 * vpsllv 4 4K significant 91 * vpsrav 4 4J 0 92 * vpsrlv 4 4K significant 93 * vtestps/vtestpd 4 4G 0 94 * 95 * (*) AMD lists VPBLENDVB as related to SSE4.1 PBLENDVB, which may 96 * explain why it is considered exception class 4. However, 97 * Intel says that VEX-only instructions should be in class 6... 98 * 99 * (+) Not found in Intel's table 2-16 100 * 101 * (§) 4H and 4H-1 do not mention VEX.W requirements, which are 102 * however present in the description of the instruction 103 * 104 * (^) these are the two cases in which Intel and AMD disagree on the 105 * primary exception class 106 */ 107 108#define X86_OP_NONE { 0 }, 109 110#define X86_OP_GROUP3(op, op0_, s0_, op1_, s1_, op2_, s2_, ...) { \ 111 .decode = glue(decode_, op), \ 112 .op0 = glue(X86_TYPE_, op0_), \ 113 .s0 = glue(X86_SIZE_, s0_), \ 114 .op1 = glue(X86_TYPE_, op1_), \ 115 .s1 = glue(X86_SIZE_, s1_), \ 116 .op2 = glue(X86_TYPE_, op2_), \ 117 .s2 = glue(X86_SIZE_, s2_), \ 118 .is_decode = true, \ 119 ## __VA_ARGS__ \ 120} 121 122#define X86_OP_GROUP2(op, op0, s0, op1, s1, ...) \ 123 X86_OP_GROUP3(op, op0, s0, 2op, s0, op1, s1, ## __VA_ARGS__) 124#define X86_OP_GROUP0(op, ...) \ 125 X86_OP_GROUP3(op, None, None, None, None, None, None, ## __VA_ARGS__) 126 127#define X86_OP_ENTRY3(op, op0_, s0_, op1_, s1_, op2_, s2_, ...) { \ 128 .gen = glue(gen_, op), \ 129 .op0 = glue(X86_TYPE_, op0_), \ 130 .s0 = glue(X86_SIZE_, s0_), \ 131 .op1 = glue(X86_TYPE_, op1_), \ 132 .s1 = glue(X86_SIZE_, s1_), \ 133 .op2 = glue(X86_TYPE_, op2_), \ 134 .s2 = glue(X86_SIZE_, s2_), \ 135 ## __VA_ARGS__ \ 136} 137 138#define X86_OP_ENTRY4(op, op0_, s0_, op1_, s1_, op2_, s2_, ...) \ 139 X86_OP_ENTRY3(op, op0_, s0_, op1_, s1_, op2_, s2_, \ 140 .op3 = X86_TYPE_I, .s3 = X86_SIZE_b, \ 141 ## __VA_ARGS__) 142 143#define X86_OP_ENTRY2(op, op0, s0, op1, s1, ...) \ 144 X86_OP_ENTRY3(op, op0, s0, 2op, s0, op1, s1, ## __VA_ARGS__) 145#define X86_OP_ENTRYw(op, op0, s0, ...) \ 146 X86_OP_ENTRY3(op, op0, s0, None, None, None, None, ## __VA_ARGS__) 147#define X86_OP_ENTRYr(op, op0, s0, ...) \ 148 X86_OP_ENTRY3(op, None, None, None, None, op0, s0, ## __VA_ARGS__) 149#define X86_OP_ENTRY0(op, ...) \ 150 X86_OP_ENTRY3(op, None, None, None, None, None, None, ## __VA_ARGS__) 151 152#define cpuid(feat) .cpuid = X86_FEAT_##feat, 153#define xchg .special = X86_SPECIAL_Locked, 154#define lock .special = X86_SPECIAL_HasLock, 155#define mmx .special = X86_SPECIAL_MMX, 156#define op0_Rd .special = X86_SPECIAL_Op0_Rd, 157#define op2_Ry .special = X86_SPECIAL_Op2_Ry, 158#define avx_movx .special = X86_SPECIAL_AVXExtMov, 159#define sextT0 .special = X86_SPECIAL_SExtT0, 160#define zextT0 .special = X86_SPECIAL_ZExtT0, 161 162#define vex1 .vex_class = 1, 163#define vex1_rep3 .vex_class = 1, .vex_special = X86_VEX_REPScalar, 164#define vex2 .vex_class = 2, 165#define vex2_rep3 .vex_class = 2, .vex_special = X86_VEX_REPScalar, 166#define vex3 .vex_class = 3, 167#define vex4 .vex_class = 4, 168#define vex4_unal .vex_class = 4, .vex_special = X86_VEX_SSEUnaligned, 169#define vex4_rep5 .vex_class = 4, .vex_special = X86_VEX_REPScalar, 170#define vex5 .vex_class = 5, 171#define vex6 .vex_class = 6, 172#define vex7 .vex_class = 7, 173#define vex8 .vex_class = 8, 174#define vex11 .vex_class = 11, 175#define vex12 .vex_class = 12, 176#define vex13 .vex_class = 13, 177 178#define chk(a) .check = X86_CHECK_##a, 179#define svm(a) .intercept = SVM_EXIT_##a, 180 181#define avx2_256 .vex_special = X86_VEX_AVX2_256, 182 183#define P_00 1 184#define P_66 (1 << PREFIX_DATA) 185#define P_F3 (1 << PREFIX_REPZ) 186#define P_F2 (1 << PREFIX_REPNZ) 187 188#define p_00 .valid_prefix = P_00, 189#define p_66 .valid_prefix = P_66, 190#define p_f3 .valid_prefix = P_F3, 191#define p_f2 .valid_prefix = P_F2, 192#define p_00_66 .valid_prefix = P_00 | P_66, 193#define p_00_f3 .valid_prefix = P_00 | P_F3, 194#define p_66_f2 .valid_prefix = P_66 | P_F2, 195#define p_00_66_f3 .valid_prefix = P_00 | P_66 | P_F3, 196#define p_66_f3_f2 .valid_prefix = P_66 | P_F3 | P_F2, 197#define p_00_66_f3_f2 .valid_prefix = P_00 | P_66 | P_F3 | P_F2, 198 199static uint8_t get_modrm(DisasContext *s, CPUX86State *env) 200{ 201 if (!s->has_modrm) { 202 s->modrm = x86_ldub_code(env, s); 203 s->has_modrm = true; 204 } 205 return s->modrm; 206} 207 208static inline const X86OpEntry *decode_by_prefix(DisasContext *s, const X86OpEntry entries[4]) 209{ 210 if (s->prefix & PREFIX_REPNZ) { 211 return &entries[3]; 212 } else if (s->prefix & PREFIX_REPZ) { 213 return &entries[2]; 214 } else if (s->prefix & PREFIX_DATA) { 215 return &entries[1]; 216 } else { 217 return &entries[0]; 218 } 219} 220 221static void decode_group15(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b) 222{ 223 /* only includes ldmxcsr and stmxcsr, because they have AVX variants. */ 224 static const X86OpEntry group15_reg[8] = { 225 }; 226 227 static const X86OpEntry group15_mem[8] = { 228 [2] = X86_OP_ENTRYr(LDMXCSR, E,d, vex5 chk(VEX128)), 229 [3] = X86_OP_ENTRYw(STMXCSR, E,d, vex5 chk(VEX128)), 230 }; 231 232 uint8_t modrm = get_modrm(s, env); 233 if ((modrm >> 6) == 3) { 234 *entry = group15_reg[(modrm >> 3) & 7]; 235 } else { 236 *entry = group15_mem[(modrm >> 3) & 7]; 237 } 238} 239 240static void decode_group17(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b) 241{ 242 static const X86GenFunc group17_gen[8] = { 243 NULL, gen_BLSR, gen_BLSMSK, gen_BLSI, 244 }; 245 int op = (get_modrm(s, env) >> 3) & 7; 246 entry->gen = group17_gen[op]; 247} 248 249static void decode_group12(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b) 250{ 251 static const X86OpEntry opcodes_group12[8] = { 252 {}, 253 {}, 254 X86_OP_ENTRY3(PSRLW_i, H,x, U,x, I,b, vex7 mmx avx2_256 p_00_66), 255 {}, 256 X86_OP_ENTRY3(PSRAW_i, H,x, U,x, I,b, vex7 mmx avx2_256 p_00_66), 257 {}, 258 X86_OP_ENTRY3(PSLLW_i, H,x, U,x, I,b, vex7 mmx avx2_256 p_00_66), 259 {}, 260 }; 261 262 int op = (get_modrm(s, env) >> 3) & 7; 263 *entry = opcodes_group12[op]; 264} 265 266static void decode_group13(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b) 267{ 268 static const X86OpEntry opcodes_group13[8] = { 269 {}, 270 {}, 271 X86_OP_ENTRY3(PSRLD_i, H,x, U,x, I,b, vex7 mmx avx2_256 p_00_66), 272 {}, 273 X86_OP_ENTRY3(PSRAD_i, H,x, U,x, I,b, vex7 mmx avx2_256 p_00_66), 274 {}, 275 X86_OP_ENTRY3(PSLLD_i, H,x, U,x, I,b, vex7 mmx avx2_256 p_00_66), 276 {}, 277 }; 278 279 int op = (get_modrm(s, env) >> 3) & 7; 280 *entry = opcodes_group13[op]; 281} 282 283static void decode_group14(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b) 284{ 285 static const X86OpEntry opcodes_group14[8] = { 286 /* grp14 */ 287 {}, 288 {}, 289 X86_OP_ENTRY3(PSRLQ_i, H,x, U,x, I,b, vex7 mmx avx2_256 p_00_66), 290 X86_OP_ENTRY3(PSRLDQ_i, H,x, U,x, I,b, vex7 avx2_256 p_66), 291 {}, 292 {}, 293 X86_OP_ENTRY3(PSLLQ_i, H,x, U,x, I,b, vex7 mmx avx2_256 p_00_66), 294 X86_OP_ENTRY3(PSLLDQ_i, H,x, U,x, I,b, vex7 avx2_256 p_66), 295 }; 296 297 int op = (get_modrm(s, env) >> 3) & 7; 298 *entry = opcodes_group14[op]; 299} 300 301static void decode_0F6F(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b) 302{ 303 static const X86OpEntry opcodes_0F6F[4] = { 304 X86_OP_ENTRY3(MOVDQ, P,q, None,None, Q,q, vex5 mmx), /* movq */ 305 X86_OP_ENTRY3(MOVDQ, V,x, None,None, W,x, vex1), /* movdqa */ 306 X86_OP_ENTRY3(MOVDQ, V,x, None,None, W,x, vex4_unal), /* movdqu */ 307 {}, 308 }; 309 *entry = *decode_by_prefix(s, opcodes_0F6F); 310} 311 312static void decode_0F70(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b) 313{ 314 static const X86OpEntry pshufw[4] = { 315 X86_OP_ENTRY3(PSHUFW, P,q, Q,q, I,b, vex4 mmx), 316 X86_OP_ENTRY3(PSHUFD, V,x, W,x, I,b, vex4 avx2_256), 317 X86_OP_ENTRY3(PSHUFHW, V,x, W,x, I,b, vex4 avx2_256), 318 X86_OP_ENTRY3(PSHUFLW, V,x, W,x, I,b, vex4 avx2_256), 319 }; 320 321 *entry = *decode_by_prefix(s, pshufw); 322} 323 324static void decode_0F77(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b) 325{ 326 if (!(s->prefix & PREFIX_VEX)) { 327 entry->gen = gen_EMMS; 328 } else if (!s->vex_l) { 329 entry->gen = gen_VZEROUPPER; 330 entry->vex_class = 8; 331 } else { 332 entry->gen = gen_VZEROALL; 333 entry->vex_class = 8; 334 } 335} 336 337static void decode_0F78(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b) 338{ 339 static const X86OpEntry opcodes_0F78[4] = { 340 {}, 341 X86_OP_ENTRY3(EXTRQ_i, V,x, None,None, I,w, cpuid(SSE4A)), /* AMD extension */ 342 {}, 343 X86_OP_ENTRY3(INSERTQ_i, V,x, U,x, I,w, cpuid(SSE4A)), /* AMD extension */ 344 }; 345 *entry = *decode_by_prefix(s, opcodes_0F78); 346} 347 348static void decode_0F79(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b) 349{ 350 if (s->prefix & PREFIX_REPNZ) { 351 entry->gen = gen_INSERTQ_r; /* AMD extension */ 352 } else if (s->prefix & PREFIX_DATA) { 353 entry->gen = gen_EXTRQ_r; /* AMD extension */ 354 } else { 355 entry->gen = NULL; 356 }; 357} 358 359static void decode_0F7E(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b) 360{ 361 static const X86OpEntry opcodes_0F7E[4] = { 362 X86_OP_ENTRY3(MOVD_from, E,y, None,None, P,y, vex5 mmx), 363 X86_OP_ENTRY3(MOVD_from, E,y, None,None, V,y, vex5), 364 X86_OP_ENTRY3(MOVQ, V,x, None,None, W,q, vex5), /* wrong dest Vy on SDM! */ 365 {}, 366 }; 367 *entry = *decode_by_prefix(s, opcodes_0F7E); 368} 369 370static void decode_0F7F(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b) 371{ 372 static const X86OpEntry opcodes_0F7F[4] = { 373 X86_OP_ENTRY3(MOVDQ, W,x, None,None, V,x, vex5 mmx), /* movq */ 374 X86_OP_ENTRY3(MOVDQ, W,x, None,None, V,x, vex1), /* movdqa */ 375 X86_OP_ENTRY3(MOVDQ, W,x, None,None, V,x, vex4_unal), /* movdqu */ 376 {}, 377 }; 378 *entry = *decode_by_prefix(s, opcodes_0F7F); 379} 380 381static void decode_0FD6(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b) 382{ 383 static const X86OpEntry movq[4] = { 384 {}, 385 X86_OP_ENTRY3(MOVQ, W,x, None, None, V,q, vex5), 386 X86_OP_ENTRY3(MOVq_dq, V,dq, None, None, N,q), 387 X86_OP_ENTRY3(MOVq_dq, P,q, None, None, U,q), 388 }; 389 390 *entry = *decode_by_prefix(s, movq); 391} 392 393static const X86OpEntry opcodes_0F38_00toEF[240] = { 394 [0x00] = X86_OP_ENTRY3(PSHUFB, V,x, H,x, W,x, vex4 cpuid(SSSE3) mmx avx2_256 p_00_66), 395 [0x01] = X86_OP_ENTRY3(PHADDW, V,x, H,x, W,x, vex4 cpuid(SSSE3) mmx avx2_256 p_00_66), 396 [0x02] = X86_OP_ENTRY3(PHADDD, V,x, H,x, W,x, vex4 cpuid(SSSE3) mmx avx2_256 p_00_66), 397 [0x03] = X86_OP_ENTRY3(PHADDSW, V,x, H,x, W,x, vex4 cpuid(SSSE3) mmx avx2_256 p_00_66), 398 [0x04] = X86_OP_ENTRY3(PMADDUBSW, V,x, H,x, W,x, vex4 cpuid(SSSE3) mmx avx2_256 p_00_66), 399 [0x05] = X86_OP_ENTRY3(PHSUBW, V,x, H,x, W,x, vex4 cpuid(SSSE3) mmx avx2_256 p_00_66), 400 [0x06] = X86_OP_ENTRY3(PHSUBD, V,x, H,x, W,x, vex4 cpuid(SSSE3) mmx avx2_256 p_00_66), 401 [0x07] = X86_OP_ENTRY3(PHSUBSW, V,x, H,x, W,x, vex4 cpuid(SSSE3) mmx avx2_256 p_00_66), 402 403 [0x10] = X86_OP_ENTRY2(PBLENDVB, V,x, W,x, vex4 cpuid(SSE41) avx2_256 p_66), 404 [0x13] = X86_OP_ENTRY2(VCVTPH2PS, V,x, W,xh, vex11 chk(W0) cpuid(F16C) p_66), 405 [0x14] = X86_OP_ENTRY2(BLENDVPS, V,x, W,x, vex4 cpuid(SSE41) p_66), 406 [0x15] = X86_OP_ENTRY2(BLENDVPD, V,x, W,x, vex4 cpuid(SSE41) p_66), 407 /* Listed incorrectly as type 4 */ 408 [0x16] = X86_OP_ENTRY3(VPERMD, V,qq, H,qq, W,qq, vex6 chk(W0) cpuid(AVX2) p_66), /* vpermps */ 409 [0x17] = X86_OP_ENTRY3(VPTEST, None,None, V,x, W,x, vex4 cpuid(SSE41) p_66), 410 411 /* 412 * Source operand listed as Mq/Ux and similar in the manual; incorrectly listed 413 * as 128-bit only in 2-17. 414 */ 415 [0x20] = X86_OP_ENTRY3(VPMOVSXBW, V,x, None,None, W,q, vex5 cpuid(SSE41) avx_movx avx2_256 p_66), 416 [0x21] = X86_OP_ENTRY3(VPMOVSXBD, V,x, None,None, W,d, vex5 cpuid(SSE41) avx_movx avx2_256 p_66), 417 [0x22] = X86_OP_ENTRY3(VPMOVSXBQ, V,x, None,None, W,w, vex5 cpuid(SSE41) avx_movx avx2_256 p_66), 418 [0x23] = X86_OP_ENTRY3(VPMOVSXWD, V,x, None,None, W,q, vex5 cpuid(SSE41) avx_movx avx2_256 p_66), 419 [0x24] = X86_OP_ENTRY3(VPMOVSXWQ, V,x, None,None, W,d, vex5 cpuid(SSE41) avx_movx avx2_256 p_66), 420 [0x25] = X86_OP_ENTRY3(VPMOVSXDQ, V,x, None,None, W,q, vex5 cpuid(SSE41) avx_movx avx2_256 p_66), 421 422 /* Same as PMOVSX. */ 423 [0x30] = X86_OP_ENTRY3(VPMOVZXBW, V,x, None,None, W,q, vex5 cpuid(SSE41) avx_movx avx2_256 p_66), 424 [0x31] = X86_OP_ENTRY3(VPMOVZXBD, V,x, None,None, W,d, vex5 cpuid(SSE41) avx_movx avx2_256 p_66), 425 [0x32] = X86_OP_ENTRY3(VPMOVZXBQ, V,x, None,None, W,w, vex5 cpuid(SSE41) avx_movx avx2_256 p_66), 426 [0x33] = X86_OP_ENTRY3(VPMOVZXWD, V,x, None,None, W,q, vex5 cpuid(SSE41) avx_movx avx2_256 p_66), 427 [0x34] = X86_OP_ENTRY3(VPMOVZXWQ, V,x, None,None, W,d, vex5 cpuid(SSE41) avx_movx avx2_256 p_66), 428 [0x35] = X86_OP_ENTRY3(VPMOVZXDQ, V,x, None,None, W,q, vex5 cpuid(SSE41) avx_movx avx2_256 p_66), 429 [0x36] = X86_OP_ENTRY3(VPERMD, V,qq, H,qq, W,qq, vex6 chk(W0) cpuid(AVX2) p_66), 430 [0x37] = X86_OP_ENTRY3(PCMPGTQ, V,x, H,x, W,x, vex4 cpuid(SSE42) avx2_256 p_66), 431 432 [0x40] = X86_OP_ENTRY3(PMULLD, V,x, H,x, W,x, vex4 cpuid(SSE41) avx2_256 p_66), 433 [0x41] = X86_OP_ENTRY3(VPHMINPOSUW, V,dq, None,None, W,dq, vex4 cpuid(SSE41) p_66), 434 /* Listed incorrectly as type 4 */ 435 [0x45] = X86_OP_ENTRY3(VPSRLV, V,x, H,x, W,x, vex6 cpuid(AVX2) p_66), 436 [0x46] = X86_OP_ENTRY3(VPSRAV, V,x, H,x, W,x, vex6 chk(W0) cpuid(AVX2) p_66), 437 [0x47] = X86_OP_ENTRY3(VPSLLV, V,x, H,x, W,x, vex6 cpuid(AVX2) p_66), 438 439 [0x90] = X86_OP_ENTRY3(VPGATHERD, V,x, H,x, M,d, vex12 cpuid(AVX2) p_66), /* vpgatherdd/q */ 440 [0x91] = X86_OP_ENTRY3(VPGATHERQ, V,x, H,x, M,q, vex12 cpuid(AVX2) p_66), /* vpgatherqd/q */ 441 [0x92] = X86_OP_ENTRY3(VPGATHERD, V,x, H,x, M,d, vex12 cpuid(AVX2) p_66), /* vgatherdps/d */ 442 [0x93] = X86_OP_ENTRY3(VPGATHERQ, V,x, H,x, M,q, vex12 cpuid(AVX2) p_66), /* vgatherqps/d */ 443 444 /* Should be exception type 2 but they do not have legacy SSE equivalents? */ 445 [0x96] = X86_OP_ENTRY3(VFMADDSUB132Px, V,x, H,x, W,x, vex6 cpuid(FMA) p_66), 446 [0x97] = X86_OP_ENTRY3(VFMSUBADD132Px, V,x, H,x, W,x, vex6 cpuid(FMA) p_66), 447 448 [0xa6] = X86_OP_ENTRY3(VFMADDSUB213Px, V,x, H,x, W,x, vex6 cpuid(FMA) p_66), 449 [0xa7] = X86_OP_ENTRY3(VFMSUBADD213Px, V,x, H,x, W,x, vex6 cpuid(FMA) p_66), 450 451 [0xb6] = X86_OP_ENTRY3(VFMADDSUB231Px, V,x, H,x, W,x, vex6 cpuid(FMA) p_66), 452 [0xb7] = X86_OP_ENTRY3(VFMSUBADD231Px, V,x, H,x, W,x, vex6 cpuid(FMA) p_66), 453 454 [0x08] = X86_OP_ENTRY3(PSIGNB, V,x, H,x, W,x, vex4 cpuid(SSSE3) mmx avx2_256 p_00_66), 455 [0x09] = X86_OP_ENTRY3(PSIGNW, V,x, H,x, W,x, vex4 cpuid(SSSE3) mmx avx2_256 p_00_66), 456 [0x0a] = X86_OP_ENTRY3(PSIGND, V,x, H,x, W,x, vex4 cpuid(SSSE3) mmx avx2_256 p_00_66), 457 [0x0b] = X86_OP_ENTRY3(PMULHRSW, V,x, H,x, W,x, vex4 cpuid(SSSE3) mmx avx2_256 p_00_66), 458 /* Listed incorrectly as type 4 */ 459 [0x0c] = X86_OP_ENTRY3(VPERMILPS, V,x, H,x, W,x, vex6 chk(W0) cpuid(AVX) p_00_66), 460 [0x0d] = X86_OP_ENTRY3(VPERMILPD, V,x, H,x, W,x, vex6 chk(W0) cpuid(AVX) p_66), 461 [0x0e] = X86_OP_ENTRY3(VTESTPS, None,None, V,x, W,x, vex6 chk(W0) cpuid(AVX) p_66), 462 [0x0f] = X86_OP_ENTRY3(VTESTPD, None,None, V,x, W,x, vex6 chk(W0) cpuid(AVX) p_66), 463 464 [0x18] = X86_OP_ENTRY3(VPBROADCASTD, V,x, None,None, W,d, vex6 chk(W0) cpuid(AVX) p_66), /* vbroadcastss */ 465 [0x19] = X86_OP_ENTRY3(VPBROADCASTQ, V,qq, None,None, W,q, vex6 chk(W0) cpuid(AVX) p_66), /* vbroadcastsd */ 466 [0x1a] = X86_OP_ENTRY3(VBROADCASTx128, V,qq, None,None, WM,dq,vex6 chk(W0) cpuid(AVX) p_66), 467 [0x1c] = X86_OP_ENTRY3(PABSB, V,x, None,None, W,x, vex4 cpuid(SSSE3) mmx avx2_256 p_00_66), 468 [0x1d] = X86_OP_ENTRY3(PABSW, V,x, None,None, W,x, vex4 cpuid(SSSE3) mmx avx2_256 p_00_66), 469 [0x1e] = X86_OP_ENTRY3(PABSD, V,x, None,None, W,x, vex4 cpuid(SSSE3) mmx avx2_256 p_00_66), 470 471 [0x28] = X86_OP_ENTRY3(PMULDQ, V,x, H,x, W,x, vex4 cpuid(SSE41) avx2_256 p_66), 472 [0x29] = X86_OP_ENTRY3(PCMPEQQ, V,x, H,x, W,x, vex4 cpuid(SSE41) avx2_256 p_66), 473 [0x2a] = X86_OP_ENTRY3(MOVDQ, V,x, None,None, WM,x, vex1 cpuid(SSE41) avx2_256 p_66), /* movntdqa */ 474 [0x2b] = X86_OP_ENTRY3(VPACKUSDW, V,x, H,x, W,x, vex4 cpuid(SSE41) avx2_256 p_66), 475 [0x2c] = X86_OP_ENTRY3(VMASKMOVPS, V,x, H,x, WM,x, vex6 chk(W0) cpuid(AVX) p_66), 476 [0x2d] = X86_OP_ENTRY3(VMASKMOVPD, V,x, H,x, WM,x, vex6 chk(W0) cpuid(AVX) p_66), 477 /* Incorrectly listed as Mx,Hx,Vx in the manual */ 478 [0x2e] = X86_OP_ENTRY3(VMASKMOVPS_st, M,x, V,x, H,x, vex6 chk(W0) cpuid(AVX) p_66), 479 [0x2f] = X86_OP_ENTRY3(VMASKMOVPD_st, M,x, V,x, H,x, vex6 chk(W0) cpuid(AVX) p_66), 480 481 [0x38] = X86_OP_ENTRY3(PMINSB, V,x, H,x, W,x, vex4 cpuid(SSE41) avx2_256 p_66), 482 [0x39] = X86_OP_ENTRY3(PMINSD, V,x, H,x, W,x, vex4 cpuid(SSE41) avx2_256 p_66), 483 [0x3a] = X86_OP_ENTRY3(PMINUW, V,x, H,x, W,x, vex4 cpuid(SSE41) avx2_256 p_66), 484 [0x3b] = X86_OP_ENTRY3(PMINUD, V,x, H,x, W,x, vex4 cpuid(SSE41) avx2_256 p_66), 485 [0x3c] = X86_OP_ENTRY3(PMAXSB, V,x, H,x, W,x, vex4 cpuid(SSE41) avx2_256 p_66), 486 [0x3d] = X86_OP_ENTRY3(PMAXSD, V,x, H,x, W,x, vex4 cpuid(SSE41) avx2_256 p_66), 487 [0x3e] = X86_OP_ENTRY3(PMAXUW, V,x, H,x, W,x, vex4 cpuid(SSE41) avx2_256 p_66), 488 [0x3f] = X86_OP_ENTRY3(PMAXUD, V,x, H,x, W,x, vex4 cpuid(SSE41) avx2_256 p_66), 489 490 /* VPBROADCASTQ not listed as W0 in table 2-16 */ 491 [0x58] = X86_OP_ENTRY3(VPBROADCASTD, V,x, None,None, W,d, vex6 chk(W0) cpuid(AVX2) p_66), 492 [0x59] = X86_OP_ENTRY3(VPBROADCASTQ, V,x, None,None, W,q, vex6 chk(W0) cpuid(AVX2) p_66), 493 [0x5a] = X86_OP_ENTRY3(VBROADCASTx128, V,qq, None,None, WM,dq,vex6 chk(W0) cpuid(AVX2) p_66), 494 495 [0x78] = X86_OP_ENTRY3(VPBROADCASTB, V,x, None,None, W,b, vex6 chk(W0) cpuid(AVX2) p_66), 496 [0x79] = X86_OP_ENTRY3(VPBROADCASTW, V,x, None,None, W,w, vex6 chk(W0) cpuid(AVX2) p_66), 497 498 [0x8c] = X86_OP_ENTRY3(VPMASKMOV, V,x, H,x, WM,x, vex6 cpuid(AVX2) p_66), 499 [0x8e] = X86_OP_ENTRY3(VPMASKMOV_st, M,x, V,x, H,x, vex6 cpuid(AVX2) p_66), 500 501 /* Should be exception type 2 or 3 but they do not have legacy SSE equivalents? */ 502 [0x98] = X86_OP_ENTRY3(VFMADD132Px, V,x, H,x, W,x, vex6 cpuid(FMA) p_66), 503 [0x99] = X86_OP_ENTRY3(VFMADD132Sx, V,x, H,x, W,x, vex6 cpuid(FMA) p_66), 504 [0x9a] = X86_OP_ENTRY3(VFMSUB132Px, V,x, H,x, W,x, vex6 cpuid(FMA) p_66), 505 [0x9b] = X86_OP_ENTRY3(VFMSUB132Sx, V,x, H,x, W,x, vex6 cpuid(FMA) p_66), 506 [0x9c] = X86_OP_ENTRY3(VFNMADD132Px, V,x, H,x, W,x, vex6 cpuid(FMA) p_66), 507 [0x9d] = X86_OP_ENTRY3(VFNMADD132Sx, V,x, H,x, W,x, vex6 cpuid(FMA) p_66), 508 [0x9e] = X86_OP_ENTRY3(VFNMSUB132Px, V,x, H,x, W,x, vex6 cpuid(FMA) p_66), 509 [0x9f] = X86_OP_ENTRY3(VFNMSUB132Sx, V,x, H,x, W,x, vex6 cpuid(FMA) p_66), 510 511 [0xa8] = X86_OP_ENTRY3(VFMADD213Px, V,x, H,x, W,x, vex6 cpuid(FMA) p_66), 512 [0xa9] = X86_OP_ENTRY3(VFMADD213Sx, V,x, H,x, W,x, vex6 cpuid(FMA) p_66), 513 [0xaa] = X86_OP_ENTRY3(VFMSUB213Px, V,x, H,x, W,x, vex6 cpuid(FMA) p_66), 514 [0xab] = X86_OP_ENTRY3(VFMSUB213Sx, V,x, H,x, W,x, vex6 cpuid(FMA) p_66), 515 [0xac] = X86_OP_ENTRY3(VFNMADD213Px, V,x, H,x, W,x, vex6 cpuid(FMA) p_66), 516 [0xad] = X86_OP_ENTRY3(VFNMADD213Sx, V,x, H,x, W,x, vex6 cpuid(FMA) p_66), 517 [0xae] = X86_OP_ENTRY3(VFNMSUB213Px, V,x, H,x, W,x, vex6 cpuid(FMA) p_66), 518 [0xaf] = X86_OP_ENTRY3(VFNMSUB213Sx, V,x, H,x, W,x, vex6 cpuid(FMA) p_66), 519 520 [0xb8] = X86_OP_ENTRY3(VFMADD231Px, V,x, H,x, W,x, vex6 cpuid(FMA) p_66), 521 [0xb9] = X86_OP_ENTRY3(VFMADD231Sx, V,x, H,x, W,x, vex6 cpuid(FMA) p_66), 522 [0xba] = X86_OP_ENTRY3(VFMSUB231Px, V,x, H,x, W,x, vex6 cpuid(FMA) p_66), 523 [0xbb] = X86_OP_ENTRY3(VFMSUB231Sx, V,x, H,x, W,x, vex6 cpuid(FMA) p_66), 524 [0xbc] = X86_OP_ENTRY3(VFNMADD231Px, V,x, H,x, W,x, vex6 cpuid(FMA) p_66), 525 [0xbd] = X86_OP_ENTRY3(VFNMADD231Sx, V,x, H,x, W,x, vex6 cpuid(FMA) p_66), 526 [0xbe] = X86_OP_ENTRY3(VFNMSUB231Px, V,x, H,x, W,x, vex6 cpuid(FMA) p_66), 527 [0xbf] = X86_OP_ENTRY3(VFNMSUB231Sx, V,x, H,x, W,x, vex6 cpuid(FMA) p_66), 528 529 [0xc8] = X86_OP_ENTRY2(SHA1NEXTE, V,dq, W,dq, cpuid(SHA_NI)), 530 [0xc9] = X86_OP_ENTRY2(SHA1MSG1, V,dq, W,dq, cpuid(SHA_NI)), 531 [0xca] = X86_OP_ENTRY2(SHA1MSG2, V,dq, W,dq, cpuid(SHA_NI)), 532 [0xcb] = X86_OP_ENTRY2(SHA256RNDS2, V,dq, W,dq, cpuid(SHA_NI)), 533 [0xcc] = X86_OP_ENTRY2(SHA256MSG1, V,dq, W,dq, cpuid(SHA_NI)), 534 [0xcd] = X86_OP_ENTRY2(SHA256MSG2, V,dq, W,dq, cpuid(SHA_NI)), 535 536 [0xdb] = X86_OP_ENTRY3(VAESIMC, V,dq, None,None, W,dq, vex4 cpuid(AES) p_66), 537 [0xdc] = X86_OP_ENTRY3(VAESENC, V,x, H,x, W,x, vex4 cpuid(AES) p_66), 538 [0xdd] = X86_OP_ENTRY3(VAESENCLAST, V,x, H,x, W,x, vex4 cpuid(AES) p_66), 539 [0xde] = X86_OP_ENTRY3(VAESDEC, V,x, H,x, W,x, vex4 cpuid(AES) p_66), 540 [0xdf] = X86_OP_ENTRY3(VAESDECLAST, V,x, H,x, W,x, vex4 cpuid(AES) p_66), 541 542 /* 543 * REG selects srcdest2 operand, VEX.vvvv selects src3. VEX class not found 544 * in manual, assumed to be 13 from the VEX.L0 constraint. 545 */ 546 [0xe0] = X86_OP_ENTRY3(CMPccXADD, M,y, G,y, B,y, vex13 xchg chk(o64) cpuid(CMPCCXADD) p_66), 547 [0xe1] = X86_OP_ENTRY3(CMPccXADD, M,y, G,y, B,y, vex13 xchg chk(o64) cpuid(CMPCCXADD) p_66), 548 [0xe2] = X86_OP_ENTRY3(CMPccXADD, M,y, G,y, B,y, vex13 xchg chk(o64) cpuid(CMPCCXADD) p_66), 549 [0xe3] = X86_OP_ENTRY3(CMPccXADD, M,y, G,y, B,y, vex13 xchg chk(o64) cpuid(CMPCCXADD) p_66), 550 [0xe4] = X86_OP_ENTRY3(CMPccXADD, M,y, G,y, B,y, vex13 xchg chk(o64) cpuid(CMPCCXADD) p_66), 551 [0xe5] = X86_OP_ENTRY3(CMPccXADD, M,y, G,y, B,y, vex13 xchg chk(o64) cpuid(CMPCCXADD) p_66), 552 [0xe6] = X86_OP_ENTRY3(CMPccXADD, M,y, G,y, B,y, vex13 xchg chk(o64) cpuid(CMPCCXADD) p_66), 553 [0xe7] = X86_OP_ENTRY3(CMPccXADD, M,y, G,y, B,y, vex13 xchg chk(o64) cpuid(CMPCCXADD) p_66), 554 555 [0xe8] = X86_OP_ENTRY3(CMPccXADD, M,y, G,y, B,y, vex13 xchg chk(o64) cpuid(CMPCCXADD) p_66), 556 [0xe9] = X86_OP_ENTRY3(CMPccXADD, M,y, G,y, B,y, vex13 xchg chk(o64) cpuid(CMPCCXADD) p_66), 557 [0xea] = X86_OP_ENTRY3(CMPccXADD, M,y, G,y, B,y, vex13 xchg chk(o64) cpuid(CMPCCXADD) p_66), 558 [0xeb] = X86_OP_ENTRY3(CMPccXADD, M,y, G,y, B,y, vex13 xchg chk(o64) cpuid(CMPCCXADD) p_66), 559 [0xec] = X86_OP_ENTRY3(CMPccXADD, M,y, G,y, B,y, vex13 xchg chk(o64) cpuid(CMPCCXADD) p_66), 560 [0xed] = X86_OP_ENTRY3(CMPccXADD, M,y, G,y, B,y, vex13 xchg chk(o64) cpuid(CMPCCXADD) p_66), 561 [0xee] = X86_OP_ENTRY3(CMPccXADD, M,y, G,y, B,y, vex13 xchg chk(o64) cpuid(CMPCCXADD) p_66), 562 [0xef] = X86_OP_ENTRY3(CMPccXADD, M,y, G,y, B,y, vex13 xchg chk(o64) cpuid(CMPCCXADD) p_66), 563}; 564 565/* five rows for no prefix, 66, F3, F2, 66+F2 */ 566static const X86OpEntry opcodes_0F38_F0toFF[16][5] = { 567 [0] = { 568 X86_OP_ENTRY3(MOVBE, G,y, M,y, None,None, cpuid(MOVBE)), 569 X86_OP_ENTRY3(MOVBE, G,w, M,w, None,None, cpuid(MOVBE)), 570 {}, 571 X86_OP_ENTRY2(CRC32, G,d, E,b, cpuid(SSE42)), 572 X86_OP_ENTRY2(CRC32, G,d, E,b, cpuid(SSE42)), 573 }, 574 [1] = { 575 X86_OP_ENTRY3(MOVBE, M,y, G,y, None,None, cpuid(MOVBE)), 576 X86_OP_ENTRY3(MOVBE, M,w, G,w, None,None, cpuid(MOVBE)), 577 {}, 578 X86_OP_ENTRY2(CRC32, G,d, E,y, cpuid(SSE42)), 579 X86_OP_ENTRY2(CRC32, G,d, E,w, cpuid(SSE42)), 580 }, 581 [2] = { 582 X86_OP_ENTRY3(ANDN, G,y, B,y, E,y, vex13 cpuid(BMI1)), 583 {}, 584 {}, 585 {}, 586 {}, 587 }, 588 [3] = { 589 X86_OP_GROUP3(group17, B,y, E,y, None,None, vex13 cpuid(BMI1)), 590 {}, 591 {}, 592 {}, 593 {}, 594 }, 595 [5] = { 596 X86_OP_ENTRY3(BZHI, G,y, E,y, B,y, vex13 cpuid(BMI1)), 597 {}, 598 X86_OP_ENTRY3(PEXT, G,y, B,y, E,y, vex13 zextT0 cpuid(BMI2)), 599 X86_OP_ENTRY3(PDEP, G,y, B,y, E,y, vex13 zextT0 cpuid(BMI2)), 600 {}, 601 }, 602 [6] = { 603 {}, 604 X86_OP_ENTRY2(ADCX, G,y, E,y, cpuid(ADX)), 605 X86_OP_ENTRY2(ADOX, G,y, E,y, cpuid(ADX)), 606 X86_OP_ENTRY3(MULX, /* B,y, */ G,y, E,y, 2,y, vex13 cpuid(BMI2)), 607 {}, 608 }, 609 [7] = { 610 X86_OP_ENTRY3(BEXTR, G,y, E,y, B,y, vex13 zextT0 cpuid(BMI1)), 611 X86_OP_ENTRY3(SHLX, G,y, E,y, B,y, vex13 cpuid(BMI1)), 612 X86_OP_ENTRY3(SARX, G,y, E,y, B,y, vex13 sextT0 cpuid(BMI1)), 613 X86_OP_ENTRY3(SHRX, G,y, E,y, B,y, vex13 zextT0 cpuid(BMI1)), 614 {}, 615 }, 616}; 617 618static void decode_0F38(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b) 619{ 620 *b = x86_ldub_code(env, s); 621 if (*b < 0xf0) { 622 *entry = opcodes_0F38_00toEF[*b]; 623 } else { 624 int row = 0; 625 if (s->prefix & PREFIX_REPZ) { 626 /* The REPZ (F3) prefix has priority over 66 */ 627 row = 2; 628 } else { 629 row += s->prefix & PREFIX_REPNZ ? 3 : 0; 630 row += s->prefix & PREFIX_DATA ? 1 : 0; 631 } 632 *entry = opcodes_0F38_F0toFF[*b & 15][row]; 633 } 634} 635 636static void decode_VINSERTPS(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b) 637{ 638 static const X86OpEntry 639 vinsertps_reg = X86_OP_ENTRY4(VINSERTPS_r, V,dq, H,dq, U,dq, vex5 cpuid(SSE41) p_66), 640 vinsertps_mem = X86_OP_ENTRY4(VINSERTPS_m, V,dq, H,dq, M,d, vex5 cpuid(SSE41) p_66); 641 642 int modrm = get_modrm(s, env); 643 *entry = (modrm >> 6) == 3 ? vinsertps_reg : vinsertps_mem; 644} 645 646static const X86OpEntry opcodes_0F3A[256] = { 647 /* 648 * These are VEX-only, but incorrectly listed in the manual as exception type 4. 649 * Also the "qq" instructions are sometimes omitted by Table 2-17, but are VEX256 650 * only. 651 */ 652 [0x00] = X86_OP_ENTRY3(VPERMQ, V,qq, W,qq, I,b, vex6 chk(W1) cpuid(AVX2) p_66), 653 [0x01] = X86_OP_ENTRY3(VPERMQ, V,qq, W,qq, I,b, vex6 chk(W1) cpuid(AVX2) p_66), /* VPERMPD */ 654 [0x02] = X86_OP_ENTRY4(VBLENDPS, V,x, H,x, W,x, vex6 chk(W0) cpuid(AVX2) p_66), /* VPBLENDD */ 655 [0x04] = X86_OP_ENTRY3(VPERMILPS_i, V,x, W,x, I,b, vex6 chk(W0) cpuid(AVX) p_66), 656 [0x05] = X86_OP_ENTRY3(VPERMILPD_i, V,x, W,x, I,b, vex6 chk(W0) cpuid(AVX) p_66), 657 [0x06] = X86_OP_ENTRY4(VPERM2x128, V,qq, H,qq, W,qq, vex6 chk(W0) cpuid(AVX) p_66), 658 659 [0x14] = X86_OP_ENTRY3(PEXTRB, E,b, V,dq, I,b, vex5 cpuid(SSE41) op0_Rd p_66), 660 [0x15] = X86_OP_ENTRY3(PEXTRW, E,w, V,dq, I,b, vex5 cpuid(SSE41) op0_Rd p_66), 661 [0x16] = X86_OP_ENTRY3(PEXTR, E,y, V,dq, I,b, vex5 cpuid(SSE41) p_66), 662 [0x17] = X86_OP_ENTRY3(VEXTRACTPS, E,d, V,dq, I,b, vex5 cpuid(SSE41) p_66), 663 [0x1d] = X86_OP_ENTRY3(VCVTPS2PH, W,xh, V,x, I,b, vex11 chk(W0) cpuid(F16C) p_66), 664 665 [0x20] = X86_OP_ENTRY4(PINSRB, V,dq, H,dq, E,b, vex5 cpuid(SSE41) op2_Ry p_66), 666 [0x21] = X86_OP_GROUP0(VINSERTPS), 667 [0x22] = X86_OP_ENTRY4(PINSR, V,dq, H,dq, E,y, vex5 cpuid(SSE41) p_66), 668 669 [0x40] = X86_OP_ENTRY4(VDDPS, V,x, H,x, W,x, vex2 cpuid(SSE41) p_66), 670 [0x41] = X86_OP_ENTRY4(VDDPD, V,dq, H,dq, W,dq, vex2 cpuid(SSE41) p_66), 671 [0x42] = X86_OP_ENTRY4(VMPSADBW, V,x, H,x, W,x, vex2 cpuid(SSE41) avx2_256 p_66), 672 [0x44] = X86_OP_ENTRY4(PCLMULQDQ, V,dq, H,dq, W,dq, vex4 cpuid(PCLMULQDQ) p_66), 673 [0x46] = X86_OP_ENTRY4(VPERM2x128, V,qq, H,qq, W,qq, vex6 chk(W0) cpuid(AVX2) p_66), 674 675 [0x60] = X86_OP_ENTRY4(PCMPESTRM, None,None, V,dq, W,dq, vex4_unal cpuid(SSE42) p_66), 676 [0x61] = X86_OP_ENTRY4(PCMPESTRI, None,None, V,dq, W,dq, vex4_unal cpuid(SSE42) p_66), 677 [0x62] = X86_OP_ENTRY4(PCMPISTRM, None,None, V,dq, W,dq, vex4_unal cpuid(SSE42) p_66), 678 [0x63] = X86_OP_ENTRY4(PCMPISTRI, None,None, V,dq, W,dq, vex4_unal cpuid(SSE42) p_66), 679 680 [0x08] = X86_OP_ENTRY3(VROUNDPS, V,x, W,x, I,b, vex2 cpuid(SSE41) p_66), 681 [0x09] = X86_OP_ENTRY3(VROUNDPD, V,x, W,x, I,b, vex2 cpuid(SSE41) p_66), 682 /* 683 * Not listed as four operand in the manual. Also writes and reads 128-bits 684 * from the first two operands due to the V operand picking higher entries of 685 * the H operand; the "Vss,Hss,Wss" description from the manual is incorrect. 686 * For other unary operations such as VSQRTSx this is hidden by the "REPScalar" 687 * value of vex_special, because the table lists the operand types of VSQRTPx. 688 */ 689 [0x0a] = X86_OP_ENTRY4(VROUNDSS, V,x, H,x, W,ss, vex3 cpuid(SSE41) p_66), 690 [0x0b] = X86_OP_ENTRY4(VROUNDSD, V,x, H,x, W,sd, vex3 cpuid(SSE41) p_66), 691 [0x0c] = X86_OP_ENTRY4(VBLENDPS, V,x, H,x, W,x, vex4 cpuid(SSE41) p_66), 692 [0x0d] = X86_OP_ENTRY4(VBLENDPD, V,x, H,x, W,x, vex4 cpuid(SSE41) p_66), 693 [0x0e] = X86_OP_ENTRY4(VPBLENDW, V,x, H,x, W,x, vex4 cpuid(SSE41) avx2_256 p_66), 694 [0x0f] = X86_OP_ENTRY4(PALIGNR, V,x, H,x, W,x, vex4 cpuid(SSSE3) mmx avx2_256 p_00_66), 695 696 [0x18] = X86_OP_ENTRY4(VINSERTx128, V,qq, H,qq, W,qq, vex6 chk(W0) cpuid(AVX) p_66), 697 [0x19] = X86_OP_ENTRY3(VEXTRACTx128, W,dq, V,qq, I,b, vex6 chk(W0) cpuid(AVX) p_66), 698 699 [0x38] = X86_OP_ENTRY4(VINSERTx128, V,qq, H,qq, W,qq, vex6 chk(W0) cpuid(AVX2) p_66), 700 [0x39] = X86_OP_ENTRY3(VEXTRACTx128, W,dq, V,qq, I,b, vex6 chk(W0) cpuid(AVX2) p_66), 701 702 /* Listed incorrectly as type 4 */ 703 [0x4a] = X86_OP_ENTRY4(VBLENDVPS, V,x, H,x, W,x, vex6 chk(W0) cpuid(AVX) p_66), 704 [0x4b] = X86_OP_ENTRY4(VBLENDVPD, V,x, H,x, W,x, vex6 chk(W0) cpuid(AVX) p_66), 705 [0x4c] = X86_OP_ENTRY4(VPBLENDVB, V,x, H,x, W,x, vex6 chk(W0) cpuid(AVX) p_66 avx2_256), 706 707 [0xcc] = X86_OP_ENTRY3(SHA1RNDS4, V,dq, W,dq, I,b, cpuid(SHA_NI)), 708 709 [0xdf] = X86_OP_ENTRY3(VAESKEYGEN, V,dq, W,dq, I,b, vex4 cpuid(AES) p_66), 710 711 [0xF0] = X86_OP_ENTRY3(RORX, G,y, E,y, I,b, vex13 cpuid(BMI2) p_f2), 712}; 713 714static void decode_0F3A(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b) 715{ 716 *b = x86_ldub_code(env, s); 717 *entry = opcodes_0F3A[*b]; 718} 719 720/* 721 * There are some mistakes in the operands in the manual, and the load/store/register 722 * cases are easiest to keep separate, so the entries for 10-17 follow simplicity and 723 * efficiency of implementation rather than copying what the manual says. 724 * 725 * In particular: 726 * 727 * 1) "VMOVSS m32, xmm1" and "VMOVSD m64, xmm1" do not support VEX.vvvv != 1111b, 728 * but this is not mentioned in the tables. 729 * 730 * 2) MOVHLPS, MOVHPS, MOVHPD, MOVLPD, MOVLPS read the high quadword of one of their 731 * operands, which must therefore be dq; MOVLPD and MOVLPS also write the high 732 * quadword of the V operand. 733 */ 734static void decode_0F10(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b) 735{ 736 static const X86OpEntry opcodes_0F10_reg[4] = { 737 X86_OP_ENTRY3(MOVDQ, V,x, None,None, W,x, vex4_unal), /* MOVUPS */ 738 X86_OP_ENTRY3(MOVDQ, V,x, None,None, W,x, vex4_unal), /* MOVUPD */ 739 X86_OP_ENTRY3(VMOVSS, V,x, H,x, W,x, vex5), 740 X86_OP_ENTRY3(VMOVLPx, V,x, H,x, W,x, vex5), /* MOVSD */ 741 }; 742 743 static const X86OpEntry opcodes_0F10_mem[4] = { 744 X86_OP_ENTRY3(MOVDQ, V,x, None,None, W,x, vex4_unal), /* MOVUPS */ 745 X86_OP_ENTRY3(MOVDQ, V,x, None,None, W,x, vex4_unal), /* MOVUPD */ 746 X86_OP_ENTRY3(VMOVSS_ld, V,x, H,x, M,ss, vex5), 747 X86_OP_ENTRY3(VMOVSD_ld, V,x, H,x, M,sd, vex5), 748 }; 749 750 if ((get_modrm(s, env) >> 6) == 3) { 751 *entry = *decode_by_prefix(s, opcodes_0F10_reg); 752 } else { 753 *entry = *decode_by_prefix(s, opcodes_0F10_mem); 754 } 755} 756 757static void decode_0F11(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b) 758{ 759 static const X86OpEntry opcodes_0F11_reg[4] = { 760 X86_OP_ENTRY3(MOVDQ, W,x, None,None, V,x, vex4), /* MOVUPS */ 761 X86_OP_ENTRY3(MOVDQ, W,x, None,None, V,x, vex4), /* MOVUPD */ 762 X86_OP_ENTRY3(VMOVSS, W,x, H,x, V,x, vex5), 763 X86_OP_ENTRY3(VMOVLPx, W,x, H,x, V,q, vex5), /* MOVSD */ 764 }; 765 766 static const X86OpEntry opcodes_0F11_mem[4] = { 767 X86_OP_ENTRY3(MOVDQ, W,x, None,None, V,x, vex4), /* MOVUPS */ 768 X86_OP_ENTRY3(MOVDQ, W,x, None,None, V,x, vex4), /* MOVUPD */ 769 X86_OP_ENTRY3(VMOVSS_st, M,ss, None,None, V,x, vex5), 770 X86_OP_ENTRY3(VMOVLPx_st, M,sd, None,None, V,x, vex5), /* MOVSD */ 771 }; 772 773 if ((get_modrm(s, env) >> 6) == 3) { 774 *entry = *decode_by_prefix(s, opcodes_0F11_reg); 775 } else { 776 *entry = *decode_by_prefix(s, opcodes_0F11_mem); 777 } 778} 779 780static void decode_0F12(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b) 781{ 782 static const X86OpEntry opcodes_0F12_mem[4] = { 783 /* 784 * Use dq for operand for compatibility with gen_MOVSD and 785 * to allow VEX128 only. 786 */ 787 X86_OP_ENTRY3(VMOVLPx_ld, V,dq, H,dq, M,q, vex5), /* MOVLPS */ 788 X86_OP_ENTRY3(VMOVLPx_ld, V,dq, H,dq, M,q, vex5), /* MOVLPD */ 789 X86_OP_ENTRY3(VMOVSLDUP, V,x, None,None, W,x, vex4 cpuid(SSE3)), 790 X86_OP_ENTRY3(VMOVDDUP, V,x, None,None, WM,q, vex5 cpuid(SSE3)), /* qq if VEX.256 */ 791 }; 792 static const X86OpEntry opcodes_0F12_reg[4] = { 793 X86_OP_ENTRY3(VMOVHLPS, V,dq, H,dq, U,dq, vex7), 794 X86_OP_ENTRY3(VMOVLPx, W,x, H,x, U,q, vex5), /* MOVLPD */ 795 X86_OP_ENTRY3(VMOVSLDUP, V,x, None,None, U,x, vex4 cpuid(SSE3)), 796 X86_OP_ENTRY3(VMOVDDUP, V,x, None,None, U,x, vex5 cpuid(SSE3)), 797 }; 798 799 if ((get_modrm(s, env) >> 6) == 3) { 800 *entry = *decode_by_prefix(s, opcodes_0F12_reg); 801 } else { 802 *entry = *decode_by_prefix(s, opcodes_0F12_mem); 803 if ((s->prefix & PREFIX_REPNZ) && s->vex_l) { 804 entry->s2 = X86_SIZE_qq; 805 } 806 } 807} 808 809static void decode_0F16(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b) 810{ 811 static const X86OpEntry opcodes_0F16_mem[4] = { 812 /* 813 * Operand 1 technically only reads the low 64 bits, but uses dq so that 814 * it is easier to check for op0 == op1 in an endianness-neutral manner. 815 */ 816 X86_OP_ENTRY3(VMOVHPx_ld, V,dq, H,dq, M,q, vex5), /* MOVHPS */ 817 X86_OP_ENTRY3(VMOVHPx_ld, V,dq, H,dq, M,q, vex5), /* MOVHPD */ 818 X86_OP_ENTRY3(VMOVSHDUP, V,x, None,None, W,x, vex4 cpuid(SSE3)), 819 {}, 820 }; 821 static const X86OpEntry opcodes_0F16_reg[4] = { 822 /* Same as above, operand 1 could be Hq if it wasn't for big-endian. */ 823 X86_OP_ENTRY3(VMOVLHPS, V,dq, H,dq, U,q, vex7), 824 X86_OP_ENTRY3(VMOVHPx, V,x, H,x, U,x, vex5), /* MOVHPD */ 825 X86_OP_ENTRY3(VMOVSHDUP, V,x, None,None, U,x, vex4 cpuid(SSE3)), 826 {}, 827 }; 828 829 if ((get_modrm(s, env) >> 6) == 3) { 830 *entry = *decode_by_prefix(s, opcodes_0F16_reg); 831 } else { 832 *entry = *decode_by_prefix(s, opcodes_0F16_mem); 833 } 834} 835 836static void decode_0F2A(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b) 837{ 838 static const X86OpEntry opcodes_0F2A[4] = { 839 X86_OP_ENTRY3(CVTPI2Px, V,x, None,None, Q,q), 840 X86_OP_ENTRY3(CVTPI2Px, V,x, None,None, Q,q), 841 X86_OP_ENTRY3(VCVTSI2Sx, V,x, H,x, E,y, vex3), 842 X86_OP_ENTRY3(VCVTSI2Sx, V,x, H,x, E,y, vex3), 843 }; 844 *entry = *decode_by_prefix(s, opcodes_0F2A); 845} 846 847static void decode_0F2B(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b) 848{ 849 static const X86OpEntry opcodes_0F2B[4] = { 850 X86_OP_ENTRY3(MOVDQ, M,x, None,None, V,x, vex1), /* MOVNTPS */ 851 X86_OP_ENTRY3(MOVDQ, M,x, None,None, V,x, vex1), /* MOVNTPD */ 852 /* AMD extensions */ 853 X86_OP_ENTRY3(VMOVSS_st, M,ss, None,None, V,x, vex4 cpuid(SSE4A)), /* MOVNTSS */ 854 X86_OP_ENTRY3(VMOVLPx_st, M,sd, None,None, V,x, vex4 cpuid(SSE4A)), /* MOVNTSD */ 855 }; 856 857 *entry = *decode_by_prefix(s, opcodes_0F2B); 858} 859 860static void decode_0F2C(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b) 861{ 862 static const X86OpEntry opcodes_0F2C[4] = { 863 /* Listed as ps/pd in the manual, but CVTTPS2PI only reads 64-bit. */ 864 X86_OP_ENTRY3(CVTTPx2PI, P,q, None,None, W,q), 865 X86_OP_ENTRY3(CVTTPx2PI, P,q, None,None, W,dq), 866 X86_OP_ENTRY3(VCVTTSx2SI, G,y, None,None, W,ss, vex3), 867 X86_OP_ENTRY3(VCVTTSx2SI, G,y, None,None, W,sd, vex3), 868 }; 869 *entry = *decode_by_prefix(s, opcodes_0F2C); 870} 871 872static void decode_0F2D(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b) 873{ 874 static const X86OpEntry opcodes_0F2D[4] = { 875 /* Listed as ps/pd in the manual, but CVTPS2PI only reads 64-bit. */ 876 X86_OP_ENTRY3(CVTPx2PI, P,q, None,None, W,q), 877 X86_OP_ENTRY3(CVTPx2PI, P,q, None,None, W,dq), 878 X86_OP_ENTRY3(VCVTSx2SI, G,y, None,None, W,ss, vex3), 879 X86_OP_ENTRY3(VCVTSx2SI, G,y, None,None, W,sd, vex3), 880 }; 881 *entry = *decode_by_prefix(s, opcodes_0F2D); 882} 883 884static void decode_VxCOMISx(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b) 885{ 886 /* 887 * VUCOMISx and VCOMISx are different and use no-prefix and 0x66 for SS and SD 888 * respectively. Scalar values usually are associated with 0xF2 and 0xF3, for 889 * which X86_VEX_REPScalar exists, but here it has to be decoded by hand. 890 */ 891 entry->s1 = entry->s2 = (s->prefix & PREFIX_DATA ? X86_SIZE_sd : X86_SIZE_ss); 892 entry->gen = (*b == 0x2E ? gen_VUCOMI : gen_VCOMI); 893} 894 895static void decode_sse_unary(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b) 896{ 897 if (!(s->prefix & (PREFIX_REPZ | PREFIX_REPNZ))) { 898 entry->op1 = X86_TYPE_None; 899 entry->s1 = X86_SIZE_None; 900 } 901 switch (*b) { 902 case 0x51: entry->gen = gen_VSQRT; break; 903 case 0x52: entry->gen = gen_VRSQRT; break; 904 case 0x53: entry->gen = gen_VRCP; break; 905 } 906} 907 908static void decode_0F5A(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b) 909{ 910 static const X86OpEntry opcodes_0F5A[4] = { 911 X86_OP_ENTRY2(VCVTPS2PD, V,x, W,xh, vex2), /* VCVTPS2PD */ 912 X86_OP_ENTRY2(VCVTPD2PS, V,x, W,x, vex2), /* VCVTPD2PS */ 913 X86_OP_ENTRY3(VCVTSS2SD, V,x, H,x, W,x, vex2_rep3), /* VCVTSS2SD */ 914 X86_OP_ENTRY3(VCVTSD2SS, V,x, H,x, W,x, vex2_rep3), /* VCVTSD2SS */ 915 }; 916 *entry = *decode_by_prefix(s, opcodes_0F5A); 917} 918 919static void decode_0F5B(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b) 920{ 921 static const X86OpEntry opcodes_0F5B[4] = { 922 X86_OP_ENTRY2(VCVTDQ2PS, V,x, W,x, vex2), 923 X86_OP_ENTRY2(VCVTPS2DQ, V,x, W,x, vex2), 924 X86_OP_ENTRY2(VCVTTPS2DQ, V,x, W,x, vex2), 925 {}, 926 }; 927 *entry = *decode_by_prefix(s, opcodes_0F5B); 928} 929 930static void decode_0FE6(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b) 931{ 932 static const X86OpEntry opcodes_0FE6[4] = { 933 {}, 934 X86_OP_ENTRY2(VCVTTPD2DQ, V,x, W,x, vex2), 935 X86_OP_ENTRY2(VCVTDQ2PD, V,x, W,x, vex5), 936 X86_OP_ENTRY2(VCVTPD2DQ, V,x, W,x, vex2), 937 }; 938 *entry = *decode_by_prefix(s, opcodes_0FE6); 939} 940 941static const X86OpEntry opcodes_0F[256] = { 942 [0x0E] = X86_OP_ENTRY0(EMMS, cpuid(3DNOW)), /* femms */ 943 /* 944 * 3DNow!'s opcode byte comes *after* modrm and displacements, making it 945 * more like an Ib operand. Dispatch to the right helper in a single gen_* 946 * function. 947 */ 948 [0x0F] = X86_OP_ENTRY3(3dnow, P,q, Q,q, I,b, cpuid(3DNOW)), 949 950 [0x10] = X86_OP_GROUP0(0F10), 951 [0x11] = X86_OP_GROUP0(0F11), 952 [0x12] = X86_OP_GROUP0(0F12), 953 [0x13] = X86_OP_ENTRY3(VMOVLPx_st, M,q, None,None, V,q, vex5 p_00_66), 954 [0x14] = X86_OP_ENTRY3(VUNPCKLPx, V,x, H,x, W,x, vex4 p_00_66), 955 [0x15] = X86_OP_ENTRY3(VUNPCKHPx, V,x, H,x, W,x, vex4 p_00_66), 956 [0x16] = X86_OP_GROUP0(0F16), 957 /* Incorrectly listed as Mq,Vq in the manual */ 958 [0x17] = X86_OP_ENTRY3(VMOVHPx_st, M,q, None,None, V,dq, vex5 p_00_66), 959 960 [0x50] = X86_OP_ENTRY3(MOVMSK, G,y, None,None, U,x, vex7 p_00_66), 961 [0x51] = X86_OP_GROUP3(sse_unary, V,x, H,x, W,x, vex2_rep3 p_00_66_f3_f2), /* sqrtps */ 962 [0x52] = X86_OP_GROUP3(sse_unary, V,x, H,x, W,x, vex4_rep5 p_00_f3), /* rsqrtps */ 963 [0x53] = X86_OP_GROUP3(sse_unary, V,x, H,x, W,x, vex4_rep5 p_00_f3), /* rcpps */ 964 [0x54] = X86_OP_ENTRY3(PAND, V,x, H,x, W,x, vex4 p_00_66), /* vand */ 965 [0x55] = X86_OP_ENTRY3(PANDN, V,x, H,x, W,x, vex4 p_00_66), /* vandn */ 966 [0x56] = X86_OP_ENTRY3(POR, V,x, H,x, W,x, vex4 p_00_66), /* vor */ 967 [0x57] = X86_OP_ENTRY3(PXOR, V,x, H,x, W,x, vex4 p_00_66), /* vxor */ 968 969 [0x60] = X86_OP_ENTRY3(PUNPCKLBW, V,x, H,x, W,x, vex4 mmx avx2_256 p_00_66), 970 [0x61] = X86_OP_ENTRY3(PUNPCKLWD, V,x, H,x, W,x, vex4 mmx avx2_256 p_00_66), 971 [0x62] = X86_OP_ENTRY3(PUNPCKLDQ, V,x, H,x, W,x, vex4 mmx avx2_256 p_00_66), 972 [0x63] = X86_OP_ENTRY3(PACKSSWB, V,x, H,x, W,x, vex4 mmx avx2_256 p_00_66), 973 [0x64] = X86_OP_ENTRY3(PCMPGTB, V,x, H,x, W,x, vex4 mmx avx2_256 p_00_66), 974 [0x65] = X86_OP_ENTRY3(PCMPGTW, V,x, H,x, W,x, vex4 mmx avx2_256 p_00_66), 975 [0x66] = X86_OP_ENTRY3(PCMPGTD, V,x, H,x, W,x, vex4 mmx avx2_256 p_00_66), 976 [0x67] = X86_OP_ENTRY3(PACKUSWB, V,x, H,x, W,x, vex4 mmx avx2_256 p_00_66), 977 978 [0x70] = X86_OP_GROUP0(0F70), 979 [0x71] = X86_OP_GROUP0(group12), 980 [0x72] = X86_OP_GROUP0(group13), 981 [0x73] = X86_OP_GROUP0(group14), 982 [0x74] = X86_OP_ENTRY3(PCMPEQB, V,x, H,x, W,x, vex4 mmx avx2_256 p_00_66), 983 [0x75] = X86_OP_ENTRY3(PCMPEQW, V,x, H,x, W,x, vex4 mmx avx2_256 p_00_66), 984 [0x76] = X86_OP_ENTRY3(PCMPEQD, V,x, H,x, W,x, vex4 mmx avx2_256 p_00_66), 985 [0x77] = X86_OP_GROUP0(0F77), 986 987 [0x28] = X86_OP_ENTRY3(MOVDQ, V,x, None,None, W,x, vex1 p_00_66), /* MOVAPS */ 988 [0x29] = X86_OP_ENTRY3(MOVDQ, W,x, None,None, V,x, vex1 p_00_66), /* MOVAPS */ 989 [0x2A] = X86_OP_GROUP0(0F2A), 990 [0x2B] = X86_OP_GROUP0(0F2B), 991 [0x2C] = X86_OP_GROUP0(0F2C), 992 [0x2D] = X86_OP_GROUP0(0F2D), 993 [0x2E] = X86_OP_GROUP3(VxCOMISx, None,None, V,x, W,x, vex3 p_00_66), /* VUCOMISS/SD */ 994 [0x2F] = X86_OP_GROUP3(VxCOMISx, None,None, V,x, W,x, vex3 p_00_66), /* VCOMISS/SD */ 995 996 [0x38] = X86_OP_GROUP0(0F38), 997 [0x3a] = X86_OP_GROUP0(0F3A), 998 999 [0x58] = X86_OP_ENTRY3(VADD, V,x, H,x, W,x, vex2_rep3 p_00_66_f3_f2), 1000 [0x59] = X86_OP_ENTRY3(VMUL, V,x, H,x, W,x, vex2_rep3 p_00_66_f3_f2), 1001 [0x5a] = X86_OP_GROUP0(0F5A), 1002 [0x5b] = X86_OP_GROUP0(0F5B), 1003 [0x5c] = X86_OP_ENTRY3(VSUB, V,x, H,x, W,x, vex2_rep3 p_00_66_f3_f2), 1004 [0x5d] = X86_OP_ENTRY3(VMIN, V,x, H,x, W,x, vex2_rep3 p_00_66_f3_f2), 1005 [0x5e] = X86_OP_ENTRY3(VDIV, V,x, H,x, W,x, vex2_rep3 p_00_66_f3_f2), 1006 [0x5f] = X86_OP_ENTRY3(VMAX, V,x, H,x, W,x, vex2_rep3 p_00_66_f3_f2), 1007 1008 [0x68] = X86_OP_ENTRY3(PUNPCKHBW, V,x, H,x, W,x, vex4 mmx avx2_256 p_00_66), 1009 [0x69] = X86_OP_ENTRY3(PUNPCKHWD, V,x, H,x, W,x, vex4 mmx avx2_256 p_00_66), 1010 [0x6a] = X86_OP_ENTRY3(PUNPCKHDQ, V,x, H,x, W,x, vex4 mmx avx2_256 p_00_66), 1011 [0x6b] = X86_OP_ENTRY3(PACKSSDW, V,x, H,x, W,x, vex4 mmx avx2_256 p_00_66), 1012 [0x6c] = X86_OP_ENTRY3(PUNPCKLQDQ, V,x, H,x, W,x, vex4 p_66 avx2_256), 1013 [0x6d] = X86_OP_ENTRY3(PUNPCKHQDQ, V,x, H,x, W,x, vex4 p_66 avx2_256), 1014 [0x6e] = X86_OP_ENTRY3(MOVD_to, V,x, None,None, E,y, vex5 mmx p_00_66), /* wrong dest Vy on SDM! */ 1015 [0x6f] = X86_OP_GROUP0(0F6F), 1016 1017 [0x78] = X86_OP_GROUP0(0F78), 1018 [0x79] = X86_OP_GROUP2(0F79, V,x, U,x, cpuid(SSE4A)), 1019 [0x7c] = X86_OP_ENTRY3(VHADD, V,x, H,x, W,x, vex2 cpuid(SSE3) p_66_f2), 1020 [0x7d] = X86_OP_ENTRY3(VHSUB, V,x, H,x, W,x, vex2 cpuid(SSE3) p_66_f2), 1021 [0x7e] = X86_OP_GROUP0(0F7E), 1022 [0x7f] = X86_OP_GROUP0(0F7F), 1023 1024 [0xae] = X86_OP_GROUP0(group15), 1025 1026 [0xc2] = X86_OP_ENTRY4(VCMP, V,x, H,x, W,x, vex2_rep3 p_00_66_f3_f2), 1027 [0xc4] = X86_OP_ENTRY4(PINSRW, V,dq,H,dq,E,w, vex5 mmx p_00_66), 1028 [0xc5] = X86_OP_ENTRY3(PEXTRW, G,d, U,dq,I,b, vex5 mmx p_00_66), 1029 [0xc6] = X86_OP_ENTRY4(VSHUF, V,x, H,x, W,x, vex4 p_00_66), 1030 1031 [0xd0] = X86_OP_ENTRY3(VADDSUB, V,x, H,x, W,x, vex2 cpuid(SSE3) p_66_f2), 1032 [0xd1] = X86_OP_ENTRY3(PSRLW_r, V,x, H,x, W,x, vex4 mmx avx2_256 p_00_66), 1033 [0xd2] = X86_OP_ENTRY3(PSRLD_r, V,x, H,x, W,x, vex4 mmx avx2_256 p_00_66), 1034 [0xd3] = X86_OP_ENTRY3(PSRLQ_r, V,x, H,x, W,x, vex4 mmx avx2_256 p_00_66), 1035 [0xd4] = X86_OP_ENTRY3(PADDQ, V,x, H,x, W,x, vex4 mmx avx2_256 p_00_66), 1036 [0xd5] = X86_OP_ENTRY3(PMULLW, V,x, H,x, W,x, vex4 mmx avx2_256 p_00_66), 1037 [0xd6] = X86_OP_GROUP0(0FD6), 1038 [0xd7] = X86_OP_ENTRY3(PMOVMSKB, G,d, None,None, U,x, vex7 mmx avx2_256 p_00_66), 1039 1040 [0xe0] = X86_OP_ENTRY3(PAVGB, V,x, H,x, W,x, vex4 mmx avx2_256 p_00_66), 1041 [0xe1] = X86_OP_ENTRY3(PSRAW_r, V,x, H,x, W,x, vex7 mmx avx2_256 p_00_66), 1042 [0xe2] = X86_OP_ENTRY3(PSRAD_r, V,x, H,x, W,x, vex7 mmx avx2_256 p_00_66), 1043 [0xe3] = X86_OP_ENTRY3(PAVGW, V,x, H,x, W,x, vex4 mmx avx2_256 p_00_66), 1044 [0xe4] = X86_OP_ENTRY3(PMULHUW, V,x, H,x, W,x, vex4 mmx avx2_256 p_00_66), 1045 [0xe5] = X86_OP_ENTRY3(PMULHW, V,x, H,x, W,x, vex4 mmx avx2_256 p_00_66), 1046 [0xe6] = X86_OP_GROUP0(0FE6), 1047 [0xe7] = X86_OP_ENTRY3(MOVDQ, W,x, None,None, V,x, vex1 mmx p_00_66), /* MOVNTQ/MOVNTDQ */ 1048 1049 [0xf0] = X86_OP_ENTRY3(MOVDQ, V,x, None,None, WM,x, vex4_unal cpuid(SSE3) p_f2), /* LDDQU */ 1050 [0xf1] = X86_OP_ENTRY3(PSLLW_r, V,x, H,x, W,x, vex7 mmx avx2_256 p_00_66), 1051 [0xf2] = X86_OP_ENTRY3(PSLLD_r, V,x, H,x, W,x, vex7 mmx avx2_256 p_00_66), 1052 [0xf3] = X86_OP_ENTRY3(PSLLQ_r, V,x, H,x, W,x, vex7 mmx avx2_256 p_00_66), 1053 [0xf4] = X86_OP_ENTRY3(PMULUDQ, V,x, H,x, W,x, vex4 mmx avx2_256 p_00_66), 1054 [0xf5] = X86_OP_ENTRY3(PMADDWD, V,x, H,x, W,x, vex4 mmx avx2_256 p_00_66), 1055 [0xf6] = X86_OP_ENTRY3(PSADBW, V,x, H,x, W,x, vex4 mmx avx2_256 p_00_66), 1056 [0xf7] = X86_OP_ENTRY3(MASKMOV, None,None, V,dq, U,dq, vex4_unal avx2_256 mmx p_00_66), 1057 1058 /* Incorrectly missing from 2-17 */ 1059 [0xd8] = X86_OP_ENTRY3(PSUBUSB, V,x, H,x, W,x, vex4 mmx avx2_256 p_00_66), 1060 [0xd9] = X86_OP_ENTRY3(PSUBUSW, V,x, H,x, W,x, vex4 mmx avx2_256 p_00_66), 1061 [0xda] = X86_OP_ENTRY3(PMINUB, V,x, H,x, W,x, vex4 mmx avx2_256 p_00_66), 1062 [0xdb] = X86_OP_ENTRY3(PAND, V,x, H,x, W,x, vex4 mmx avx2_256 p_00_66), 1063 [0xdc] = X86_OP_ENTRY3(PADDUSB, V,x, H,x, W,x, vex4 mmx avx2_256 p_00_66), 1064 [0xdd] = X86_OP_ENTRY3(PADDUSW, V,x, H,x, W,x, vex4 mmx avx2_256 p_00_66), 1065 [0xde] = X86_OP_ENTRY3(PMAXUB, V,x, H,x, W,x, vex4 mmx avx2_256 p_00_66), 1066 [0xdf] = X86_OP_ENTRY3(PANDN, V,x, H,x, W,x, vex4 mmx avx2_256 p_00_66), 1067 1068 [0xe8] = X86_OP_ENTRY3(PSUBSB, V,x, H,x, W,x, vex4 mmx avx2_256 p_00_66), 1069 [0xe9] = X86_OP_ENTRY3(PSUBSW, V,x, H,x, W,x, vex4 mmx avx2_256 p_00_66), 1070 [0xea] = X86_OP_ENTRY3(PMINSW, V,x, H,x, W,x, vex4 mmx avx2_256 p_00_66), 1071 [0xeb] = X86_OP_ENTRY3(POR, V,x, H,x, W,x, vex4 mmx avx2_256 p_00_66), 1072 [0xec] = X86_OP_ENTRY3(PADDSB, V,x, H,x, W,x, vex4 mmx avx2_256 p_00_66), 1073 [0xed] = X86_OP_ENTRY3(PADDSW, V,x, H,x, W,x, vex4 mmx avx2_256 p_00_66), 1074 [0xee] = X86_OP_ENTRY3(PMAXSW, V,x, H,x, W,x, vex4 mmx avx2_256 p_00_66), 1075 [0xef] = X86_OP_ENTRY3(PXOR, V,x, H,x, W,x, vex4 mmx avx2_256 p_00_66), 1076 1077 [0xf8] = X86_OP_ENTRY3(PSUBB, V,x, H,x, W,x, vex4 mmx avx2_256 p_00_66), 1078 [0xf9] = X86_OP_ENTRY3(PSUBW, V,x, H,x, W,x, vex4 mmx avx2_256 p_00_66), 1079 [0xfa] = X86_OP_ENTRY3(PSUBD, V,x, H,x, W,x, vex4 mmx avx2_256 p_00_66), 1080 [0xfb] = X86_OP_ENTRY3(PSUBQ, V,x, H,x, W,x, vex4 mmx avx2_256 p_00_66), 1081 [0xfc] = X86_OP_ENTRY3(PADDB, V,x, H,x, W,x, vex4 mmx avx2_256 p_00_66), 1082 [0xfd] = X86_OP_ENTRY3(PADDW, V,x, H,x, W,x, vex4 mmx avx2_256 p_00_66), 1083 [0xfe] = X86_OP_ENTRY3(PADDD, V,x, H,x, W,x, vex4 mmx avx2_256 p_00_66), 1084 /* 0xff = UD0 */ 1085}; 1086 1087static void do_decode_0F(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b) 1088{ 1089 *entry = opcodes_0F[*b]; 1090} 1091 1092static void decode_0F(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b) 1093{ 1094 *b = x86_ldub_code(env, s); 1095 do_decode_0F(s, env, entry, b); 1096} 1097 1098static const X86OpEntry opcodes_root[256] = { 1099 [0x0F] = X86_OP_GROUP0(0F), 1100}; 1101 1102#undef mmx 1103#undef vex1 1104#undef vex2 1105#undef vex3 1106#undef vex4 1107#undef vex4_unal 1108#undef vex5 1109#undef vex6 1110#undef vex7 1111#undef vex8 1112#undef vex11 1113#undef vex12 1114#undef vex13 1115 1116/* 1117 * Decode the fixed part of the opcode and place the last 1118 * in b. 1119 */ 1120static void decode_root(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b) 1121{ 1122 *entry = opcodes_root[*b]; 1123} 1124 1125 1126static int decode_modrm(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode, 1127 X86DecodedOp *op, X86OpType type) 1128{ 1129 int modrm = get_modrm(s, env); 1130 if ((modrm >> 6) == 3) { 1131 op->n = (modrm & 7); 1132 if (type != X86_TYPE_Q && type != X86_TYPE_N) { 1133 op->n |= REX_B(s); 1134 } 1135 } else { 1136 op->has_ea = true; 1137 op->n = -1; 1138 decode->mem = gen_lea_modrm_0(env, s, get_modrm(s, env)); 1139 } 1140 return modrm; 1141} 1142 1143static bool decode_op_size(DisasContext *s, X86OpEntry *e, X86OpSize size, MemOp *ot) 1144{ 1145 switch (size) { 1146 case X86_SIZE_b: /* byte */ 1147 *ot = MO_8; 1148 return true; 1149 1150 case X86_SIZE_d: /* 32-bit */ 1151 case X86_SIZE_ss: /* SSE/AVX scalar single precision */ 1152 *ot = MO_32; 1153 return true; 1154 1155 case X86_SIZE_p: /* Far pointer, return offset size */ 1156 case X86_SIZE_s: /* Descriptor, return offset size */ 1157 case X86_SIZE_v: /* 16/32/64-bit, based on operand size */ 1158 *ot = s->dflag; 1159 return true; 1160 1161 case X86_SIZE_pi: /* MMX */ 1162 case X86_SIZE_q: /* 64-bit */ 1163 case X86_SIZE_sd: /* SSE/AVX scalar double precision */ 1164 *ot = MO_64; 1165 return true; 1166 1167 case X86_SIZE_w: /* 16-bit */ 1168 *ot = MO_16; 1169 return true; 1170 1171 case X86_SIZE_y: /* 32/64-bit, based on operand size */ 1172 *ot = s->dflag == MO_16 ? MO_32 : s->dflag; 1173 return true; 1174 1175 case X86_SIZE_z: /* 16-bit for 16-bit operand size, else 32-bit */ 1176 *ot = s->dflag == MO_16 ? MO_16 : MO_32; 1177 return true; 1178 1179 case X86_SIZE_dq: /* SSE/AVX 128-bit */ 1180 if (e->special == X86_SPECIAL_MMX && 1181 !(s->prefix & (PREFIX_DATA | PREFIX_REPZ | PREFIX_REPNZ))) { 1182 *ot = MO_64; 1183 return true; 1184 } 1185 if (s->vex_l && e->s0 != X86_SIZE_qq && e->s1 != X86_SIZE_qq) { 1186 return false; 1187 } 1188 *ot = MO_128; 1189 return true; 1190 1191 case X86_SIZE_qq: /* AVX 256-bit */ 1192 if (!s->vex_l) { 1193 return false; 1194 } 1195 *ot = MO_256; 1196 return true; 1197 1198 case X86_SIZE_x: /* 128/256-bit, based on operand size */ 1199 if (e->special == X86_SPECIAL_MMX && 1200 !(s->prefix & (PREFIX_DATA | PREFIX_REPZ | PREFIX_REPNZ))) { 1201 *ot = MO_64; 1202 return true; 1203 } 1204 /* fall through */ 1205 case X86_SIZE_ps: /* SSE/AVX packed single precision */ 1206 case X86_SIZE_pd: /* SSE/AVX packed double precision */ 1207 *ot = s->vex_l ? MO_256 : MO_128; 1208 return true; 1209 1210 case X86_SIZE_xh: /* SSE/AVX packed half register */ 1211 *ot = s->vex_l ? MO_128 : MO_64; 1212 return true; 1213 1214 case X86_SIZE_d64: /* Default to 64-bit in 64-bit mode */ 1215 *ot = CODE64(s) && s->dflag == MO_32 ? MO_64 : s->dflag; 1216 return true; 1217 1218 case X86_SIZE_f64: /* Ignore size override prefix in 64-bit mode */ 1219 *ot = CODE64(s) ? MO_64 : s->dflag; 1220 return true; 1221 1222 default: 1223 *ot = -1; 1224 return true; 1225 } 1226} 1227 1228static bool decode_op(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode, 1229 X86DecodedOp *op, X86OpType type, int b) 1230{ 1231 int modrm; 1232 1233 switch (type) { 1234 case X86_TYPE_None: /* Implicit or absent */ 1235 case X86_TYPE_A: /* Implicit */ 1236 case X86_TYPE_F: /* EFLAGS/RFLAGS */ 1237 case X86_TYPE_X: /* string source */ 1238 case X86_TYPE_Y: /* string destination */ 1239 break; 1240 1241 case X86_TYPE_B: /* VEX.vvvv selects a GPR */ 1242 op->unit = X86_OP_INT; 1243 op->n = s->vex_v; 1244 break; 1245 1246 case X86_TYPE_C: /* REG in the modrm byte selects a control register */ 1247 op->unit = X86_OP_CR; 1248 goto get_reg; 1249 1250 case X86_TYPE_D: /* REG in the modrm byte selects a debug register */ 1251 op->unit = X86_OP_DR; 1252 goto get_reg; 1253 1254 case X86_TYPE_G: /* REG in the modrm byte selects a GPR */ 1255 op->unit = X86_OP_INT; 1256 goto get_reg; 1257 1258 case X86_TYPE_S: /* reg selects a segment register */ 1259 op->unit = X86_OP_SEG; 1260 goto get_reg; 1261 1262 case X86_TYPE_P: 1263 op->unit = X86_OP_MMX; 1264 goto get_reg; 1265 1266 case X86_TYPE_V: /* reg in the modrm byte selects an XMM/YMM register */ 1267 if (decode->e.special == X86_SPECIAL_MMX && 1268 !(s->prefix & (PREFIX_DATA | PREFIX_REPZ | PREFIX_REPNZ))) { 1269 op->unit = X86_OP_MMX; 1270 } else { 1271 op->unit = X86_OP_SSE; 1272 } 1273 get_reg: 1274 op->n = ((get_modrm(s, env) >> 3) & 7) | REX_R(s); 1275 break; 1276 1277 case X86_TYPE_E: /* ALU modrm operand */ 1278 op->unit = X86_OP_INT; 1279 goto get_modrm; 1280 1281 case X86_TYPE_Q: /* MMX modrm operand */ 1282 op->unit = X86_OP_MMX; 1283 goto get_modrm; 1284 1285 case X86_TYPE_W: /* XMM/YMM modrm operand */ 1286 if (decode->e.special == X86_SPECIAL_MMX && 1287 !(s->prefix & (PREFIX_DATA | PREFIX_REPZ | PREFIX_REPNZ))) { 1288 op->unit = X86_OP_MMX; 1289 } else { 1290 op->unit = X86_OP_SSE; 1291 } 1292 goto get_modrm; 1293 1294 case X86_TYPE_N: /* R/M in the modrm byte selects an MMX register */ 1295 op->unit = X86_OP_MMX; 1296 goto get_modrm_reg; 1297 1298 case X86_TYPE_U: /* R/M in the modrm byte selects an XMM/YMM register */ 1299 if (decode->e.special == X86_SPECIAL_MMX && 1300 !(s->prefix & (PREFIX_DATA | PREFIX_REPZ | PREFIX_REPNZ))) { 1301 op->unit = X86_OP_MMX; 1302 } else { 1303 op->unit = X86_OP_SSE; 1304 } 1305 goto get_modrm_reg; 1306 1307 case X86_TYPE_R: /* R/M in the modrm byte selects a register */ 1308 op->unit = X86_OP_INT; 1309 get_modrm_reg: 1310 modrm = get_modrm(s, env); 1311 if ((modrm >> 6) != 3) { 1312 return false; 1313 } 1314 goto get_modrm; 1315 1316 case X86_TYPE_WM: /* modrm byte selects an XMM/YMM memory operand */ 1317 op->unit = X86_OP_SSE; 1318 /* fall through */ 1319 case X86_TYPE_M: /* modrm byte selects a memory operand */ 1320 modrm = get_modrm(s, env); 1321 if ((modrm >> 6) == 3) { 1322 return false; 1323 } 1324 get_modrm: 1325 decode_modrm(s, env, decode, op, type); 1326 break; 1327 1328 case X86_TYPE_O: /* Absolute address encoded in the instruction */ 1329 op->unit = X86_OP_INT; 1330 op->has_ea = true; 1331 op->n = -1; 1332 decode->mem = (AddressParts) { 1333 .def_seg = R_DS, 1334 .base = -1, 1335 .index = -1, 1336 .disp = insn_get_addr(env, s, s->aflag) 1337 }; 1338 break; 1339 1340 case X86_TYPE_H: /* For AVX, VEX.vvvv selects an XMM/YMM register */ 1341 if ((s->prefix & PREFIX_VEX)) { 1342 op->unit = X86_OP_SSE; 1343 op->n = s->vex_v; 1344 break; 1345 } 1346 if (op == &decode->op[0]) { 1347 /* shifts place the destination in VEX.vvvv, use modrm */ 1348 return decode_op(s, env, decode, op, decode->e.op1, b); 1349 } else { 1350 return decode_op(s, env, decode, op, decode->e.op0, b); 1351 } 1352 1353 case X86_TYPE_I: /* Immediate */ 1354 case X86_TYPE_J: /* Relative offset for a jump */ 1355 op->unit = X86_OP_IMM; 1356 decode->immediate = insn_get_signed(env, s, op->ot); 1357 break; 1358 1359 case X86_TYPE_L: /* The upper 4 bits of the immediate select a 128-bit register */ 1360 op->n = insn_get(env, s, op->ot) >> 4; 1361 break; 1362 1363 case X86_TYPE_2op: 1364 *op = decode->op[0]; 1365 break; 1366 1367 case X86_TYPE_LoBits: 1368 op->n = (b & 7) | REX_B(s); 1369 op->unit = X86_OP_INT; 1370 break; 1371 1372 case X86_TYPE_0 ... X86_TYPE_7: 1373 op->n = type - X86_TYPE_0; 1374 op->unit = X86_OP_INT; 1375 break; 1376 1377 case X86_TYPE_ES ... X86_TYPE_GS: 1378 op->n = type - X86_TYPE_ES; 1379 op->unit = X86_OP_SEG; 1380 break; 1381 } 1382 1383 return true; 1384} 1385 1386static bool validate_sse_prefix(DisasContext *s, X86OpEntry *e) 1387{ 1388 uint16_t sse_prefixes; 1389 1390 if (!e->valid_prefix) { 1391 return true; 1392 } 1393 if (s->prefix & (PREFIX_REPZ | PREFIX_REPNZ)) { 1394 /* In SSE instructions, 0xF3 and 0xF2 cancel 0x66. */ 1395 s->prefix &= ~PREFIX_DATA; 1396 } 1397 1398 /* Now, either zero or one bit is set in sse_prefixes. */ 1399 sse_prefixes = s->prefix & (PREFIX_REPZ | PREFIX_REPNZ | PREFIX_DATA); 1400 return e->valid_prefix & (1 << sse_prefixes); 1401} 1402 1403static bool decode_insn(DisasContext *s, CPUX86State *env, X86DecodeFunc decode_func, 1404 X86DecodedInsn *decode) 1405{ 1406 X86OpEntry *e = &decode->e; 1407 1408 decode_func(s, env, e, &decode->b); 1409 while (e->is_decode) { 1410 e->is_decode = false; 1411 e->decode(s, env, e, &decode->b); 1412 } 1413 1414 if (!validate_sse_prefix(s, e)) { 1415 return false; 1416 } 1417 1418 /* First compute size of operands in order to initialize s->rip_offset. */ 1419 if (e->op0 != X86_TYPE_None) { 1420 if (!decode_op_size(s, e, e->s0, &decode->op[0].ot)) { 1421 return false; 1422 } 1423 if (e->op0 == X86_TYPE_I) { 1424 s->rip_offset += 1 << decode->op[0].ot; 1425 } 1426 } 1427 if (e->op1 != X86_TYPE_None) { 1428 if (!decode_op_size(s, e, e->s1, &decode->op[1].ot)) { 1429 return false; 1430 } 1431 if (e->op1 == X86_TYPE_I) { 1432 s->rip_offset += 1 << decode->op[1].ot; 1433 } 1434 } 1435 if (e->op2 != X86_TYPE_None) { 1436 if (!decode_op_size(s, e, e->s2, &decode->op[2].ot)) { 1437 return false; 1438 } 1439 if (e->op2 == X86_TYPE_I) { 1440 s->rip_offset += 1 << decode->op[2].ot; 1441 } 1442 } 1443 if (e->op3 != X86_TYPE_None) { 1444 /* 1445 * A couple instructions actually use the extra immediate byte for an Lx 1446 * register operand; those are handled in the gen_* functions as one off. 1447 */ 1448 assert(e->op3 == X86_TYPE_I && e->s3 == X86_SIZE_b); 1449 s->rip_offset += 1; 1450 } 1451 1452 if (e->op0 != X86_TYPE_None && 1453 !decode_op(s, env, decode, &decode->op[0], e->op0, decode->b)) { 1454 return false; 1455 } 1456 1457 if (e->op1 != X86_TYPE_None && 1458 !decode_op(s, env, decode, &decode->op[1], e->op1, decode->b)) { 1459 return false; 1460 } 1461 1462 if (e->op2 != X86_TYPE_None && 1463 !decode_op(s, env, decode, &decode->op[2], e->op2, decode->b)) { 1464 return false; 1465 } 1466 1467 if (e->op3 != X86_TYPE_None) { 1468 decode->immediate = insn_get_signed(env, s, MO_8); 1469 } 1470 1471 return true; 1472} 1473 1474static bool has_cpuid_feature(DisasContext *s, X86CPUIDFeature cpuid) 1475{ 1476 switch (cpuid) { 1477 case X86_FEAT_None: 1478 return true; 1479 case X86_FEAT_F16C: 1480 return (s->cpuid_ext_features & CPUID_EXT_F16C); 1481 case X86_FEAT_FMA: 1482 return (s->cpuid_ext_features & CPUID_EXT_FMA); 1483 case X86_FEAT_MOVBE: 1484 return (s->cpuid_ext_features & CPUID_EXT_MOVBE); 1485 case X86_FEAT_PCLMULQDQ: 1486 return (s->cpuid_ext_features & CPUID_EXT_PCLMULQDQ); 1487 case X86_FEAT_SSE: 1488 return (s->cpuid_ext_features & CPUID_SSE); 1489 case X86_FEAT_SSE2: 1490 return (s->cpuid_ext_features & CPUID_SSE2); 1491 case X86_FEAT_SSE3: 1492 return (s->cpuid_ext_features & CPUID_EXT_SSE3); 1493 case X86_FEAT_SSSE3: 1494 return (s->cpuid_ext_features & CPUID_EXT_SSSE3); 1495 case X86_FEAT_SSE41: 1496 return (s->cpuid_ext_features & CPUID_EXT_SSE41); 1497 case X86_FEAT_SSE42: 1498 return (s->cpuid_ext_features & CPUID_EXT_SSE42); 1499 case X86_FEAT_AES: 1500 if (!(s->cpuid_ext_features & CPUID_EXT_AES)) { 1501 return false; 1502 } else if (!(s->prefix & PREFIX_VEX)) { 1503 return true; 1504 } else if (!(s->cpuid_ext_features & CPUID_EXT_AVX)) { 1505 return false; 1506 } else { 1507 return !s->vex_l || (s->cpuid_7_0_ecx_features & CPUID_7_0_ECX_VAES); 1508 } 1509 1510 case X86_FEAT_AVX: 1511 return (s->cpuid_ext_features & CPUID_EXT_AVX); 1512 1513 case X86_FEAT_3DNOW: 1514 return (s->cpuid_ext2_features & CPUID_EXT2_3DNOW); 1515 case X86_FEAT_SSE4A: 1516 return (s->cpuid_ext3_features & CPUID_EXT3_SSE4A); 1517 1518 case X86_FEAT_ADX: 1519 return (s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_ADX); 1520 case X86_FEAT_BMI1: 1521 return (s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI1); 1522 case X86_FEAT_BMI2: 1523 return (s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI2); 1524 case X86_FEAT_AVX2: 1525 return (s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_AVX2); 1526 case X86_FEAT_SHA_NI: 1527 return (s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_SHA_NI); 1528 1529 case X86_FEAT_CMPCCXADD: 1530 return (s->cpuid_7_1_eax_features & CPUID_7_1_EAX_CMPCCXADD); 1531 } 1532 g_assert_not_reached(); 1533} 1534 1535static bool validate_vex(DisasContext *s, X86DecodedInsn *decode) 1536{ 1537 X86OpEntry *e = &decode->e; 1538 1539 switch (e->vex_special) { 1540 case X86_VEX_REPScalar: 1541 /* 1542 * Instructions which differ between 00/66 and F2/F3 in the 1543 * exception classification and the size of the memory operand. 1544 */ 1545 assert(e->vex_class == 1 || e->vex_class == 2 || e->vex_class == 4); 1546 if (s->prefix & (PREFIX_REPZ | PREFIX_REPNZ)) { 1547 e->vex_class = e->vex_class < 4 ? 3 : 5; 1548 if (s->vex_l) { 1549 goto illegal; 1550 } 1551 assert(decode->e.s2 == X86_SIZE_x); 1552 if (decode->op[2].has_ea) { 1553 decode->op[2].ot = s->prefix & PREFIX_REPZ ? MO_32 : MO_64; 1554 } 1555 } 1556 break; 1557 1558 case X86_VEX_SSEUnaligned: 1559 /* handled in sse_needs_alignment. */ 1560 break; 1561 1562 case X86_VEX_AVX2_256: 1563 if ((s->prefix & PREFIX_VEX) && s->vex_l && !has_cpuid_feature(s, X86_FEAT_AVX2)) { 1564 goto illegal; 1565 } 1566 } 1567 1568 switch (e->vex_class) { 1569 case 0: 1570 if (s->prefix & PREFIX_VEX) { 1571 goto illegal; 1572 } 1573 return true; 1574 case 1: 1575 case 2: 1576 case 3: 1577 case 4: 1578 case 5: 1579 case 7: 1580 if (s->prefix & PREFIX_VEX) { 1581 if (!(s->flags & HF_AVX_EN_MASK)) { 1582 goto illegal; 1583 } 1584 } else if (e->special != X86_SPECIAL_MMX || 1585 (s->prefix & (PREFIX_REPZ | PREFIX_REPNZ | PREFIX_DATA))) { 1586 if (!(s->flags & HF_OSFXSR_MASK)) { 1587 goto illegal; 1588 } 1589 } 1590 break; 1591 case 12: 1592 /* Must have a VSIB byte and no address prefix. */ 1593 assert(s->has_modrm); 1594 if ((s->modrm & 7) != 4 || s->aflag == MO_16) { 1595 goto illegal; 1596 } 1597 1598 /* Check no overlap between registers. */ 1599 if (!decode->op[0].has_ea && 1600 (decode->op[0].n == decode->mem.index || decode->op[0].n == decode->op[1].n)) { 1601 goto illegal; 1602 } 1603 assert(!decode->op[1].has_ea); 1604 if (decode->op[1].n == decode->mem.index) { 1605 goto illegal; 1606 } 1607 if (!decode->op[2].has_ea && 1608 (decode->op[2].n == decode->mem.index || decode->op[2].n == decode->op[1].n)) { 1609 goto illegal; 1610 } 1611 /* fall through */ 1612 case 6: 1613 case 11: 1614 if (!(s->prefix & PREFIX_VEX)) { 1615 goto illegal; 1616 } 1617 if (!(s->flags & HF_AVX_EN_MASK)) { 1618 goto illegal; 1619 } 1620 break; 1621 case 8: 1622 /* Non-VEX case handled in decode_0F77. */ 1623 assert(s->prefix & PREFIX_VEX); 1624 if (!(s->flags & HF_AVX_EN_MASK)) { 1625 goto illegal; 1626 } 1627 break; 1628 case 13: 1629 if (!(s->prefix & PREFIX_VEX)) { 1630 goto illegal; 1631 } 1632 if (s->vex_l) { 1633 goto illegal; 1634 } 1635 /* All integer instructions use VEX.vvvv, so exit. */ 1636 return true; 1637 } 1638 1639 if (s->vex_v != 0 && 1640 e->op0 != X86_TYPE_H && e->op0 != X86_TYPE_B && 1641 e->op1 != X86_TYPE_H && e->op1 != X86_TYPE_B && 1642 e->op2 != X86_TYPE_H && e->op2 != X86_TYPE_B) { 1643 goto illegal; 1644 } 1645 1646 if (s->flags & HF_TS_MASK) { 1647 goto nm_exception; 1648 } 1649 if (s->flags & HF_EM_MASK) { 1650 goto illegal; 1651 } 1652 1653 if (e->check) { 1654 if (e->check & X86_CHECK_VEX128) { 1655 if (s->vex_l) { 1656 goto illegal; 1657 } 1658 } 1659 if (e->check & X86_CHECK_W0) { 1660 if (s->vex_w) { 1661 goto illegal; 1662 } 1663 } 1664 if (e->check & X86_CHECK_W1) { 1665 if (!s->vex_w) { 1666 goto illegal; 1667 } 1668 } 1669 } 1670 return true; 1671 1672nm_exception: 1673 gen_NM_exception(s); 1674 return false; 1675illegal: 1676 gen_illegal_opcode(s); 1677 return false; 1678} 1679 1680/* 1681 * Convert one instruction. s->base.is_jmp is set if the translation must 1682 * be stopped. 1683 */ 1684static void disas_insn_new(DisasContext *s, CPUState *cpu, int b) 1685{ 1686 CPUX86State *env = cpu_env(cpu); 1687 bool first = true; 1688 X86DecodedInsn decode; 1689 X86DecodeFunc decode_func = decode_root; 1690 uint8_t cc_live; 1691 1692 s->has_modrm = false; 1693 1694 next_byte: 1695 if (first) { 1696 first = false; 1697 } else { 1698 b = x86_ldub_code(env, s); 1699 } 1700 /* Collect prefixes. */ 1701 switch (b) { 1702 case 0xf3: 1703 s->prefix |= PREFIX_REPZ; 1704 s->prefix &= ~PREFIX_REPNZ; 1705 goto next_byte; 1706 case 0xf2: 1707 s->prefix |= PREFIX_REPNZ; 1708 s->prefix &= ~PREFIX_REPZ; 1709 goto next_byte; 1710 case 0xf0: 1711 s->prefix |= PREFIX_LOCK; 1712 goto next_byte; 1713 case 0x2e: 1714 s->override = R_CS; 1715 goto next_byte; 1716 case 0x36: 1717 s->override = R_SS; 1718 goto next_byte; 1719 case 0x3e: 1720 s->override = R_DS; 1721 goto next_byte; 1722 case 0x26: 1723 s->override = R_ES; 1724 goto next_byte; 1725 case 0x64: 1726 s->override = R_FS; 1727 goto next_byte; 1728 case 0x65: 1729 s->override = R_GS; 1730 goto next_byte; 1731 case 0x66: 1732 s->prefix |= PREFIX_DATA; 1733 goto next_byte; 1734 case 0x67: 1735 s->prefix |= PREFIX_ADR; 1736 goto next_byte; 1737#ifdef TARGET_X86_64 1738 case 0x40 ... 0x4f: 1739 if (CODE64(s)) { 1740 /* REX prefix */ 1741 s->prefix |= PREFIX_REX; 1742 s->vex_w = (b >> 3) & 1; 1743 s->rex_r = (b & 0x4) << 1; 1744 s->rex_x = (b & 0x2) << 2; 1745 s->rex_b = (b & 0x1) << 3; 1746 goto next_byte; 1747 } 1748 break; 1749#endif 1750 case 0xc5: /* 2-byte VEX */ 1751 case 0xc4: /* 3-byte VEX */ 1752 /* 1753 * VEX prefixes cannot be used except in 32-bit mode. 1754 * Otherwise the instruction is LES or LDS. 1755 */ 1756 if (CODE32(s) && !VM86(s)) { 1757 static const int pp_prefix[4] = { 1758 0, PREFIX_DATA, PREFIX_REPZ, PREFIX_REPNZ 1759 }; 1760 int vex3, vex2 = x86_ldub_code(env, s); 1761 1762 if (!CODE64(s) && (vex2 & 0xc0) != 0xc0) { 1763 /* 1764 * 4.1.4.6: In 32-bit mode, bits [7:6] must be 11b, 1765 * otherwise the instruction is LES or LDS. 1766 */ 1767 s->pc--; /* rewind the advance_pc() x86_ldub_code() did */ 1768 break; 1769 } 1770 1771 /* 4.1.1-4.1.3: No preceding lock, 66, f2, f3, or rex prefixes. */ 1772 if (s->prefix & (PREFIX_REPZ | PREFIX_REPNZ 1773 | PREFIX_LOCK | PREFIX_DATA | PREFIX_REX)) { 1774 goto illegal_op; 1775 } 1776#ifdef TARGET_X86_64 1777 s->rex_r = (~vex2 >> 4) & 8; 1778#endif 1779 if (b == 0xc5) { 1780 /* 2-byte VEX prefix: RVVVVlpp, implied 0f leading opcode byte */ 1781 vex3 = vex2; 1782 decode_func = decode_0F; 1783 } else { 1784 /* 3-byte VEX prefix: RXBmmmmm wVVVVlpp */ 1785 vex3 = x86_ldub_code(env, s); 1786#ifdef TARGET_X86_64 1787 s->rex_x = (~vex2 >> 3) & 8; 1788 s->rex_b = (~vex2 >> 2) & 8; 1789#endif 1790 s->vex_w = (vex3 >> 7) & 1; 1791 switch (vex2 & 0x1f) { 1792 case 0x01: /* Implied 0f leading opcode bytes. */ 1793 decode_func = decode_0F; 1794 break; 1795 case 0x02: /* Implied 0f 38 leading opcode bytes. */ 1796 decode_func = decode_0F38; 1797 break; 1798 case 0x03: /* Implied 0f 3a leading opcode bytes. */ 1799 decode_func = decode_0F3A; 1800 break; 1801 default: /* Reserved for future use. */ 1802 goto unknown_op; 1803 } 1804 } 1805 s->vex_v = (~vex3 >> 3) & 0xf; 1806 s->vex_l = (vex3 >> 2) & 1; 1807 s->prefix |= pp_prefix[vex3 & 3] | PREFIX_VEX; 1808 } 1809 break; 1810 default: 1811 if (b >= 0x100) { 1812 b -= 0x100; 1813 decode_func = do_decode_0F; 1814 } 1815 break; 1816 } 1817 1818 /* Post-process prefixes. */ 1819 if (CODE64(s)) { 1820 /* 1821 * In 64-bit mode, the default data size is 32-bit. Select 64-bit 1822 * data with rex_w, and 16-bit data with 0x66; rex_w takes precedence 1823 * over 0x66 if both are present. 1824 */ 1825 s->dflag = (REX_W(s) ? MO_64 : s->prefix & PREFIX_DATA ? MO_16 : MO_32); 1826 /* In 64-bit mode, 0x67 selects 32-bit addressing. */ 1827 s->aflag = (s->prefix & PREFIX_ADR ? MO_32 : MO_64); 1828 } else { 1829 /* In 16/32-bit mode, 0x66 selects the opposite data size. */ 1830 if (CODE32(s) ^ ((s->prefix & PREFIX_DATA) != 0)) { 1831 s->dflag = MO_32; 1832 } else { 1833 s->dflag = MO_16; 1834 } 1835 /* In 16/32-bit mode, 0x67 selects the opposite addressing. */ 1836 if (CODE32(s) ^ ((s->prefix & PREFIX_ADR) != 0)) { 1837 s->aflag = MO_32; 1838 } else { 1839 s->aflag = MO_16; 1840 } 1841 } 1842 1843 memset(&decode, 0, sizeof(decode)); 1844 decode.cc_op = -1; 1845 decode.b = b; 1846 if (!decode_insn(s, env, decode_func, &decode)) { 1847 goto illegal_op; 1848 } 1849 if (!decode.e.gen) { 1850 goto unknown_op; 1851 } 1852 1853 if (!has_cpuid_feature(s, decode.e.cpuid)) { 1854 goto illegal_op; 1855 } 1856 1857 /* Checks that result in #UD come first. */ 1858 if (decode.e.check) { 1859 if (decode.e.check & X86_CHECK_i64) { 1860 if (CODE64(s)) { 1861 goto illegal_op; 1862 } 1863 } 1864 if (decode.e.check & X86_CHECK_o64) { 1865 if (!CODE64(s)) { 1866 goto illegal_op; 1867 } 1868 } 1869 if (decode.e.check & X86_CHECK_prot) { 1870 if (!PE(s) || VM86(s)) { 1871 goto illegal_op; 1872 } 1873 } 1874 } 1875 1876 switch (decode.e.special) { 1877 case X86_SPECIAL_None: 1878 break; 1879 1880 case X86_SPECIAL_Locked: 1881 if (decode.op[0].has_ea) { 1882 s->prefix |= PREFIX_LOCK; 1883 } 1884 decode.e.special = X86_SPECIAL_HasLock; 1885 /* fallthrough */ 1886 case X86_SPECIAL_HasLock: 1887 break; 1888 1889 case X86_SPECIAL_Op0_Rd: 1890 assert(decode.op[0].unit == X86_OP_INT); 1891 if (!decode.op[0].has_ea) { 1892 decode.op[0].ot = MO_32; 1893 } 1894 break; 1895 1896 case X86_SPECIAL_Op2_Ry: 1897 assert(decode.op[2].unit == X86_OP_INT); 1898 if (!decode.op[2].has_ea) { 1899 decode.op[2].ot = s->dflag == MO_16 ? MO_32 : s->dflag; 1900 } 1901 break; 1902 1903 case X86_SPECIAL_AVXExtMov: 1904 if (!decode.op[2].has_ea) { 1905 decode.op[2].ot = s->vex_l ? MO_256 : MO_128; 1906 } else if (s->vex_l) { 1907 decode.op[2].ot++; 1908 } 1909 break; 1910 1911 case X86_SPECIAL_SExtT0: 1912 case X86_SPECIAL_ZExtT0: 1913 /* Handled in gen_load. */ 1914 assert(decode.op[1].unit == X86_OP_INT); 1915 break; 1916 1917 default: 1918 break; 1919 } 1920 1921 if (s->prefix & PREFIX_LOCK) { 1922 if (decode.e.special != X86_SPECIAL_HasLock || !decode.op[0].has_ea) { 1923 goto illegal_op; 1924 } 1925 } 1926 1927 if (!validate_vex(s, &decode)) { 1928 return; 1929 } 1930 1931 /* 1932 * Checks that result in #GP or VMEXIT come second. Intercepts are 1933 * generally checked after non-memory exceptions (i.e. before all 1934 * exceptions if there is no memory operand). Exceptions are 1935 * vm86 checks (INTn, IRET, PUSHF/POPF), RSM and XSETBV (!). 1936 * 1937 * RSM and XSETBV will be handled in the gen_* functions 1938 * instead of using chk(). 1939 */ 1940 if (decode.e.check & X86_CHECK_cpl0) { 1941 if (CPL(s) != 0) { 1942 goto gp_fault; 1943 } 1944 } 1945 if (decode.e.intercept && unlikely(GUEST(s))) { 1946 gen_helper_svm_check_intercept(tcg_env, 1947 tcg_constant_i32(decode.e.intercept)); 1948 } 1949 if (decode.e.check) { 1950 if ((decode.e.check & X86_CHECK_vm86_iopl) && VM86(s)) { 1951 if (IOPL(s) < 3) { 1952 goto gp_fault; 1953 } 1954 } else if (decode.e.check & X86_CHECK_cpl_iopl) { 1955 if (IOPL(s) < CPL(s)) { 1956 goto gp_fault; 1957 } 1958 } 1959 } 1960 1961 if (decode.e.special == X86_SPECIAL_MMX && 1962 !(s->prefix & (PREFIX_REPZ | PREFIX_REPNZ | PREFIX_DATA))) { 1963 gen_helper_enter_mmx(tcg_env); 1964 } 1965 1966 if (decode.op[0].has_ea || decode.op[1].has_ea || decode.op[2].has_ea) { 1967 gen_load_ea(s, &decode.mem, decode.e.vex_class == 12); 1968 } 1969 if (s->prefix & PREFIX_LOCK) { 1970 gen_load(s, &decode, 2, s->T1); 1971 decode.e.gen(s, env, &decode); 1972 } else { 1973 if (decode.op[0].unit == X86_OP_MMX) { 1974 compute_mmx_offset(&decode.op[0]); 1975 } else if (decode.op[0].unit == X86_OP_SSE) { 1976 compute_xmm_offset(&decode.op[0]); 1977 } 1978 gen_load(s, &decode, 1, s->T0); 1979 gen_load(s, &decode, 2, s->T1); 1980 decode.e.gen(s, env, &decode); 1981 gen_writeback(s, &decode, 0, s->T0); 1982 } 1983 1984 /* 1985 * Write back flags after last memory access. Some newer ALU instructions, as 1986 * well as SSE instructions, write flags in the gen_* function, but that can 1987 * cause incorrect tracking of CC_OP for instructions that write to both memory 1988 * and flags. 1989 */ 1990 if (decode.cc_op != -1) { 1991 if (decode.cc_dst) { 1992 tcg_gen_mov_tl(cpu_cc_dst, decode.cc_dst); 1993 } 1994 if (decode.cc_src) { 1995 tcg_gen_mov_tl(cpu_cc_src, decode.cc_src); 1996 } 1997 if (decode.cc_src2) { 1998 tcg_gen_mov_tl(cpu_cc_src2, decode.cc_src2); 1999 } 2000 if (decode.cc_op == CC_OP_DYNAMIC) { 2001 tcg_gen_mov_i32(cpu_cc_op, decode.cc_op_dynamic); 2002 } 2003 set_cc_op(s, decode.cc_op); 2004 cc_live = cc_op_live[decode.cc_op]; 2005 } else { 2006 cc_live = 0; 2007 } 2008 if (decode.cc_op != CC_OP_DYNAMIC) { 2009 assert(!decode.cc_op_dynamic); 2010 assert(!!decode.cc_dst == !!(cc_live & USES_CC_DST)); 2011 assert(!!decode.cc_src == !!(cc_live & USES_CC_SRC)); 2012 assert(!!decode.cc_src2 == !!(cc_live & USES_CC_SRC2)); 2013 } 2014 2015 return; 2016 gp_fault: 2017 gen_exception_gpf(s); 2018 return; 2019 illegal_op: 2020 gen_illegal_opcode(s); 2021 return; 2022 unknown_op: 2023 gen_unknown_opcode(env, s); 2024} 2025