1/* 2 * New-style TCG opcode generator for i386 instructions 3 * 4 * Copyright (c) 2022 Red Hat, Inc. 5 * 6 * Author: Paolo Bonzini <pbonzini@redhat.com> 7 * 8 * This library is free software; you can redistribute it and/or 9 * modify it under the terms of the GNU Lesser General Public 10 * License as published by the Free Software Foundation; either 11 * version 2.1 of the License, or (at your option) any later version. 12 * 13 * This library is distributed in the hope that it will be useful, 14 * but WITHOUT ANY WARRANTY; without even the implied warranty of 15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 16 * Lesser General Public License for more details. 17 * 18 * You should have received a copy of the GNU Lesser General Public 19 * License along with this library; if not, see <http://www.gnu.org/licenses/>. 20 */ 21 22#define ZMM_OFFSET(reg) offsetof(CPUX86State, xmm_regs[reg]) 23 24typedef void (*SSEFunc_i_ep)(TCGv_i32 val, TCGv_ptr env, TCGv_ptr reg); 25typedef void (*SSEFunc_l_ep)(TCGv_i64 val, TCGv_ptr env, TCGv_ptr reg); 26typedef void (*SSEFunc_0_epp)(TCGv_ptr env, TCGv_ptr reg_a, TCGv_ptr reg_b); 27typedef void (*SSEFunc_0_eppp)(TCGv_ptr env, TCGv_ptr reg_a, TCGv_ptr reg_b, 28 TCGv_ptr reg_c); 29typedef void (*SSEFunc_0_epppp)(TCGv_ptr env, TCGv_ptr reg_a, TCGv_ptr reg_b, 30 TCGv_ptr reg_c, TCGv_ptr reg_d); 31typedef void (*SSEFunc_0_eppi)(TCGv_ptr env, TCGv_ptr reg_a, TCGv_ptr reg_b, 32 TCGv_i32 val); 33typedef void (*SSEFunc_0_epppi)(TCGv_ptr env, TCGv_ptr reg_a, TCGv_ptr reg_b, 34 TCGv_ptr reg_c, TCGv_i32 val); 35typedef void (*SSEFunc_0_ppi)(TCGv_ptr reg_a, TCGv_ptr reg_b, TCGv_i32 val); 36typedef void (*SSEFunc_0_pppi)(TCGv_ptr reg_a, TCGv_ptr reg_b, TCGv_ptr reg_c, 37 TCGv_i32 val); 38typedef void (*SSEFunc_0_eppt)(TCGv_ptr env, TCGv_ptr reg_a, TCGv_ptr reg_b, 39 TCGv val); 40typedef void (*SSEFunc_0_epppti)(TCGv_ptr env, TCGv_ptr reg_a, TCGv_ptr reg_b, 41 TCGv_ptr reg_c, TCGv a0, TCGv_i32 scale); 42typedef void (*SSEFunc_0_eppppi)(TCGv_ptr env, TCGv_ptr reg_a, TCGv_ptr reg_b, 43 TCGv_ptr reg_c, TCGv_ptr reg_d, TCGv_i32 flags); 44typedef void (*SSEFunc_0_eppppii)(TCGv_ptr env, TCGv_ptr reg_a, TCGv_ptr reg_b, 45 TCGv_ptr reg_c, TCGv_ptr reg_d, TCGv_i32 even, 46 TCGv_i32 odd); 47 48static inline TCGv_i32 tcg_constant8u_i32(uint8_t val) 49{ 50 return tcg_constant_i32(val); 51} 52 53static void gen_NM_exception(DisasContext *s) 54{ 55 gen_exception(s, EXCP07_PREX); 56} 57 58static void gen_load_ea(DisasContext *s, AddressParts *mem, bool is_vsib) 59{ 60 TCGv ea = gen_lea_modrm_1(s, *mem, is_vsib); 61 gen_lea_v_seg(s, s->aflag, ea, mem->def_seg, s->override); 62} 63 64static inline int mmx_offset(MemOp ot) 65{ 66 switch (ot) { 67 case MO_8: 68 return offsetof(MMXReg, MMX_B(0)); 69 case MO_16: 70 return offsetof(MMXReg, MMX_W(0)); 71 case MO_32: 72 return offsetof(MMXReg, MMX_L(0)); 73 case MO_64: 74 return offsetof(MMXReg, MMX_Q(0)); 75 default: 76 g_assert_not_reached(); 77 } 78} 79 80static inline int xmm_offset(MemOp ot) 81{ 82 switch (ot) { 83 case MO_8: 84 return offsetof(ZMMReg, ZMM_B(0)); 85 case MO_16: 86 return offsetof(ZMMReg, ZMM_W(0)); 87 case MO_32: 88 return offsetof(ZMMReg, ZMM_L(0)); 89 case MO_64: 90 return offsetof(ZMMReg, ZMM_Q(0)); 91 case MO_128: 92 return offsetof(ZMMReg, ZMM_X(0)); 93 case MO_256: 94 return offsetof(ZMMReg, ZMM_Y(0)); 95 default: 96 g_assert_not_reached(); 97 } 98} 99 100static int vector_reg_offset(X86DecodedOp *op) 101{ 102 assert(op->unit == X86_OP_MMX || op->unit == X86_OP_SSE); 103 104 if (op->unit == X86_OP_MMX) { 105 return op->offset - mmx_offset(op->ot); 106 } else { 107 return op->offset - xmm_offset(op->ot); 108 } 109} 110 111static int vector_elem_offset(X86DecodedOp *op, MemOp ot, int n) 112{ 113 int base_ofs = vector_reg_offset(op); 114 switch(ot) { 115 case MO_8: 116 if (op->unit == X86_OP_MMX) { 117 return base_ofs + offsetof(MMXReg, MMX_B(n)); 118 } else { 119 return base_ofs + offsetof(ZMMReg, ZMM_B(n)); 120 } 121 case MO_16: 122 if (op->unit == X86_OP_MMX) { 123 return base_ofs + offsetof(MMXReg, MMX_W(n)); 124 } else { 125 return base_ofs + offsetof(ZMMReg, ZMM_W(n)); 126 } 127 case MO_32: 128 if (op->unit == X86_OP_MMX) { 129 return base_ofs + offsetof(MMXReg, MMX_L(n)); 130 } else { 131 return base_ofs + offsetof(ZMMReg, ZMM_L(n)); 132 } 133 case MO_64: 134 if (op->unit == X86_OP_MMX) { 135 return base_ofs; 136 } else { 137 return base_ofs + offsetof(ZMMReg, ZMM_Q(n)); 138 } 139 case MO_128: 140 assert(op->unit == X86_OP_SSE); 141 return base_ofs + offsetof(ZMMReg, ZMM_X(n)); 142 case MO_256: 143 assert(op->unit == X86_OP_SSE); 144 return base_ofs + offsetof(ZMMReg, ZMM_Y(n)); 145 default: 146 g_assert_not_reached(); 147 } 148} 149 150static void compute_mmx_offset(X86DecodedOp *op) 151{ 152 if (!op->has_ea) { 153 op->offset = offsetof(CPUX86State, fpregs[op->n].mmx) + mmx_offset(op->ot); 154 } else { 155 op->offset = offsetof(CPUX86State, mmx_t0) + mmx_offset(op->ot); 156 } 157} 158 159static void compute_xmm_offset(X86DecodedOp *op) 160{ 161 if (!op->has_ea) { 162 op->offset = ZMM_OFFSET(op->n) + xmm_offset(op->ot); 163 } else { 164 op->offset = offsetof(CPUX86State, xmm_t0) + xmm_offset(op->ot); 165 } 166} 167 168static void gen_load_sse(DisasContext *s, TCGv temp, MemOp ot, int dest_ofs, bool aligned) 169{ 170 switch(ot) { 171 case MO_8: 172 gen_op_ld_v(s, MO_8, temp, s->A0); 173 tcg_gen_st8_tl(temp, tcg_env, dest_ofs); 174 break; 175 case MO_16: 176 gen_op_ld_v(s, MO_16, temp, s->A0); 177 tcg_gen_st16_tl(temp, tcg_env, dest_ofs); 178 break; 179 case MO_32: 180 gen_op_ld_v(s, MO_32, temp, s->A0); 181 tcg_gen_st32_tl(temp, tcg_env, dest_ofs); 182 break; 183 case MO_64: 184 gen_ldq_env_A0(s, dest_ofs); 185 break; 186 case MO_128: 187 gen_ldo_env_A0(s, dest_ofs, aligned); 188 break; 189 case MO_256: 190 gen_ldy_env_A0(s, dest_ofs, aligned); 191 break; 192 default: 193 g_assert_not_reached(); 194 } 195} 196 197static bool sse_needs_alignment(DisasContext *s, X86DecodedInsn *decode, MemOp ot) 198{ 199 switch (decode->e.vex_class) { 200 case 2: 201 case 4: 202 if ((s->prefix & PREFIX_VEX) || 203 decode->e.vex_special == X86_VEX_SSEUnaligned) { 204 /* MOST legacy SSE instructions require aligned memory operands, but not all. */ 205 return false; 206 } 207 /* fall through */ 208 case 1: 209 return ot >= MO_128; 210 211 default: 212 return false; 213 } 214} 215 216static void gen_load(DisasContext *s, X86DecodedInsn *decode, int opn, TCGv v) 217{ 218 X86DecodedOp *op = &decode->op[opn]; 219 220 switch (op->unit) { 221 case X86_OP_SKIP: 222 return; 223 case X86_OP_SEG: 224 tcg_gen_ld32u_tl(v, tcg_env, 225 offsetof(CPUX86State,segs[op->n].selector)); 226 break; 227 case X86_OP_CR: 228 tcg_gen_ld_tl(v, tcg_env, offsetof(CPUX86State, cr[op->n])); 229 break; 230 case X86_OP_DR: 231 tcg_gen_ld_tl(v, tcg_env, offsetof(CPUX86State, dr[op->n])); 232 break; 233 case X86_OP_INT: 234 if (op->has_ea) { 235 if (v == s->T0 && decode->e.special == X86_SPECIAL_SExtT0) { 236 gen_op_ld_v(s, op->ot | MO_SIGN, v, s->A0); 237 } else { 238 gen_op_ld_v(s, op->ot, v, s->A0); 239 } 240 241 } else if (op->ot == MO_8 && byte_reg_is_xH(s, op->n)) { 242 if (v == s->T0 && decode->e.special == X86_SPECIAL_SExtT0) { 243 tcg_gen_sextract_tl(v, cpu_regs[op->n - 4], 8, 8); 244 } else { 245 tcg_gen_extract_tl(v, cpu_regs[op->n - 4], 8, 8); 246 } 247 248 } else if (op->ot < MO_TL && v == s->T0 && 249 (decode->e.special == X86_SPECIAL_SExtT0 || 250 decode->e.special == X86_SPECIAL_ZExtT0)) { 251 if (decode->e.special == X86_SPECIAL_SExtT0) { 252 tcg_gen_ext_tl(v, cpu_regs[op->n], op->ot | MO_SIGN); 253 } else { 254 tcg_gen_ext_tl(v, cpu_regs[op->n], op->ot); 255 } 256 257 } else { 258 tcg_gen_mov_tl(v, cpu_regs[op->n]); 259 } 260 break; 261 case X86_OP_IMM: 262 tcg_gen_movi_tl(v, decode->immediate); 263 break; 264 265 case X86_OP_MMX: 266 compute_mmx_offset(op); 267 goto load_vector; 268 269 case X86_OP_SSE: 270 compute_xmm_offset(op); 271 load_vector: 272 if (op->has_ea) { 273 bool aligned = sse_needs_alignment(s, decode, op->ot); 274 gen_load_sse(s, v, op->ot, op->offset, aligned); 275 } 276 break; 277 278 default: 279 g_assert_not_reached(); 280 } 281} 282 283static TCGv_ptr op_ptr(X86DecodedInsn *decode, int opn) 284{ 285 X86DecodedOp *op = &decode->op[opn]; 286 if (op->v_ptr) { 287 return op->v_ptr; 288 } 289 op->v_ptr = tcg_temp_new_ptr(); 290 291 /* The temporary points to the MMXReg or ZMMReg. */ 292 tcg_gen_addi_ptr(op->v_ptr, tcg_env, vector_reg_offset(op)); 293 return op->v_ptr; 294} 295 296#define OP_PTR0 op_ptr(decode, 0) 297#define OP_PTR1 op_ptr(decode, 1) 298#define OP_PTR2 op_ptr(decode, 2) 299 300static void gen_writeback(DisasContext *s, X86DecodedInsn *decode, int opn, TCGv v) 301{ 302 X86DecodedOp *op = &decode->op[opn]; 303 switch (op->unit) { 304 case X86_OP_SKIP: 305 break; 306 case X86_OP_SEG: 307 /* Note that gen_movl_seg_T0 takes care of interrupt shadow and TF. */ 308 gen_movl_seg_T0(s, op->n); 309 break; 310 case X86_OP_INT: 311 if (op->has_ea) { 312 gen_op_st_v(s, op->ot, v, s->A0); 313 } else { 314 gen_op_mov_reg_v(s, op->ot, op->n, v); 315 } 316 break; 317 case X86_OP_MMX: 318 break; 319 case X86_OP_SSE: 320 if (!op->has_ea && (s->prefix & PREFIX_VEX) && op->ot <= MO_128) { 321 tcg_gen_gvec_dup_imm(MO_64, 322 offsetof(CPUX86State, xmm_regs[op->n].ZMM_X(1)), 323 16, 16, 0); 324 } 325 break; 326 case X86_OP_CR: 327 case X86_OP_DR: 328 default: 329 g_assert_not_reached(); 330 } 331} 332 333static inline int vector_len(DisasContext *s, X86DecodedInsn *decode) 334{ 335 if (decode->e.special == X86_SPECIAL_MMX && 336 !(s->prefix & (PREFIX_DATA | PREFIX_REPZ | PREFIX_REPNZ))) { 337 return 8; 338 } 339 return s->vex_l ? 32 : 16; 340} 341 342static void prepare_update1_cc(X86DecodedInsn *decode, DisasContext *s, CCOp op) 343{ 344 decode->cc_dst = s->T0; 345 decode->cc_op = op; 346} 347 348static void prepare_update2_cc(X86DecodedInsn *decode, DisasContext *s, CCOp op) 349{ 350 decode->cc_src = s->T1; 351 decode->cc_dst = s->T0; 352 decode->cc_op = op; 353} 354 355static void gen_store_sse(DisasContext *s, X86DecodedInsn *decode, int src_ofs) 356{ 357 MemOp ot = decode->op[0].ot; 358 int vec_len = vector_len(s, decode); 359 bool aligned = sse_needs_alignment(s, decode, ot); 360 361 if (!decode->op[0].has_ea) { 362 tcg_gen_gvec_mov(MO_64, decode->op[0].offset, src_ofs, vec_len, vec_len); 363 return; 364 } 365 366 switch (ot) { 367 case MO_64: 368 gen_stq_env_A0(s, src_ofs); 369 break; 370 case MO_128: 371 gen_sto_env_A0(s, src_ofs, aligned); 372 break; 373 case MO_256: 374 gen_sty_env_A0(s, src_ofs, aligned); 375 break; 376 default: 377 g_assert_not_reached(); 378 } 379} 380 381static void gen_helper_pavgusb(TCGv_ptr env, TCGv_ptr reg_a, TCGv_ptr reg_b) 382{ 383 gen_helper_pavgb_mmx(env, reg_a, reg_a, reg_b); 384} 385 386#define FN_3DNOW_MOVE ((SSEFunc_0_epp) (uintptr_t) 1) 387static const SSEFunc_0_epp fns_3dnow[] = { 388 [0x0c] = gen_helper_pi2fw, 389 [0x0d] = gen_helper_pi2fd, 390 [0x1c] = gen_helper_pf2iw, 391 [0x1d] = gen_helper_pf2id, 392 [0x8a] = gen_helper_pfnacc, 393 [0x8e] = gen_helper_pfpnacc, 394 [0x90] = gen_helper_pfcmpge, 395 [0x94] = gen_helper_pfmin, 396 [0x96] = gen_helper_pfrcp, 397 [0x97] = gen_helper_pfrsqrt, 398 [0x9a] = gen_helper_pfsub, 399 [0x9e] = gen_helper_pfadd, 400 [0xa0] = gen_helper_pfcmpgt, 401 [0xa4] = gen_helper_pfmax, 402 [0xa6] = FN_3DNOW_MOVE, /* PFRCPIT1; no need to actually increase precision */ 403 [0xa7] = FN_3DNOW_MOVE, /* PFRSQIT1 */ 404 [0xb6] = FN_3DNOW_MOVE, /* PFRCPIT2 */ 405 [0xaa] = gen_helper_pfsubr, 406 [0xae] = gen_helper_pfacc, 407 [0xb0] = gen_helper_pfcmpeq, 408 [0xb4] = gen_helper_pfmul, 409 [0xb7] = gen_helper_pmulhrw_mmx, 410 [0xbb] = gen_helper_pswapd, 411 [0xbf] = gen_helper_pavgusb, 412}; 413 414static void gen_3dnow(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) 415{ 416 uint8_t b = decode->immediate; 417 SSEFunc_0_epp fn = b < ARRAY_SIZE(fns_3dnow) ? fns_3dnow[b] : NULL; 418 419 if (!fn) { 420 gen_illegal_opcode(s); 421 return; 422 } 423 if (s->flags & HF_TS_MASK) { 424 gen_NM_exception(s); 425 return; 426 } 427 if (s->flags & HF_EM_MASK) { 428 gen_illegal_opcode(s); 429 return; 430 } 431 432 gen_helper_enter_mmx(tcg_env); 433 if (fn == FN_3DNOW_MOVE) { 434 tcg_gen_ld_i64(s->tmp1_i64, tcg_env, decode->op[1].offset); 435 tcg_gen_st_i64(s->tmp1_i64, tcg_env, decode->op[0].offset); 436 } else { 437 fn(tcg_env, OP_PTR0, OP_PTR1); 438 } 439} 440 441/* 442 * 00 = v*ps Vps, Hps, Wpd 443 * 66 = v*pd Vpd, Hpd, Wps 444 * f3 = v*ss Vss, Hss, Wps 445 * f2 = v*sd Vsd, Hsd, Wps 446 */ 447static inline void gen_unary_fp_sse(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode, 448 SSEFunc_0_epp pd_xmm, SSEFunc_0_epp ps_xmm, 449 SSEFunc_0_epp pd_ymm, SSEFunc_0_epp ps_ymm, 450 SSEFunc_0_eppp sd, SSEFunc_0_eppp ss) 451{ 452 if ((s->prefix & (PREFIX_REPZ | PREFIX_REPNZ)) != 0) { 453 SSEFunc_0_eppp fn = s->prefix & PREFIX_REPZ ? ss : sd; 454 if (!fn) { 455 gen_illegal_opcode(s); 456 return; 457 } 458 fn(tcg_env, OP_PTR0, OP_PTR1, OP_PTR2); 459 } else { 460 SSEFunc_0_epp ps, pd, fn; 461 ps = s->vex_l ? ps_ymm : ps_xmm; 462 pd = s->vex_l ? pd_ymm : pd_xmm; 463 fn = s->prefix & PREFIX_DATA ? pd : ps; 464 if (!fn) { 465 gen_illegal_opcode(s); 466 return; 467 } 468 fn(tcg_env, OP_PTR0, OP_PTR2); 469 } 470} 471#define UNARY_FP_SSE(uname, lname) \ 472static void gen_##uname(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) \ 473{ \ 474 gen_unary_fp_sse(s, env, decode, \ 475 gen_helper_##lname##pd_xmm, \ 476 gen_helper_##lname##ps_xmm, \ 477 gen_helper_##lname##pd_ymm, \ 478 gen_helper_##lname##ps_ymm, \ 479 gen_helper_##lname##sd, \ 480 gen_helper_##lname##ss); \ 481} 482UNARY_FP_SSE(VSQRT, sqrt) 483 484/* 485 * 00 = v*ps Vps, Hps, Wpd 486 * 66 = v*pd Vpd, Hpd, Wps 487 * f3 = v*ss Vss, Hss, Wps 488 * f2 = v*sd Vsd, Hsd, Wps 489 */ 490static inline void gen_fp_sse(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode, 491 SSEFunc_0_eppp pd_xmm, SSEFunc_0_eppp ps_xmm, 492 SSEFunc_0_eppp pd_ymm, SSEFunc_0_eppp ps_ymm, 493 SSEFunc_0_eppp sd, SSEFunc_0_eppp ss) 494{ 495 SSEFunc_0_eppp ps, pd, fn; 496 if ((s->prefix & (PREFIX_REPZ | PREFIX_REPNZ)) != 0) { 497 fn = s->prefix & PREFIX_REPZ ? ss : sd; 498 } else { 499 ps = s->vex_l ? ps_ymm : ps_xmm; 500 pd = s->vex_l ? pd_ymm : pd_xmm; 501 fn = s->prefix & PREFIX_DATA ? pd : ps; 502 } 503 if (fn) { 504 fn(tcg_env, OP_PTR0, OP_PTR1, OP_PTR2); 505 } else { 506 gen_illegal_opcode(s); 507 } 508} 509 510#define FP_SSE(uname, lname) \ 511static void gen_##uname(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) \ 512{ \ 513 gen_fp_sse(s, env, decode, \ 514 gen_helper_##lname##pd_xmm, \ 515 gen_helper_##lname##ps_xmm, \ 516 gen_helper_##lname##pd_ymm, \ 517 gen_helper_##lname##ps_ymm, \ 518 gen_helper_##lname##sd, \ 519 gen_helper_##lname##ss); \ 520} 521FP_SSE(VADD, add) 522FP_SSE(VMUL, mul) 523FP_SSE(VSUB, sub) 524FP_SSE(VMIN, min) 525FP_SSE(VDIV, div) 526FP_SSE(VMAX, max) 527 528#define FMA_SSE_PACKED(uname, ptr0, ptr1, ptr2, even, odd) \ 529static void gen_##uname##Px(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) \ 530{ \ 531 SSEFunc_0_eppppii xmm = s->vex_w ? gen_helper_fma4pd_xmm : gen_helper_fma4ps_xmm; \ 532 SSEFunc_0_eppppii ymm = s->vex_w ? gen_helper_fma4pd_ymm : gen_helper_fma4ps_ymm; \ 533 SSEFunc_0_eppppii fn = s->vex_l ? ymm : xmm; \ 534 \ 535 fn(tcg_env, OP_PTR0, ptr0, ptr1, ptr2, \ 536 tcg_constant_i32(even), \ 537 tcg_constant_i32((even) ^ (odd))); \ 538} 539 540#define FMA_SSE(uname, ptr0, ptr1, ptr2, flags) \ 541FMA_SSE_PACKED(uname, ptr0, ptr1, ptr2, flags, flags) \ 542static void gen_##uname##Sx(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) \ 543{ \ 544 SSEFunc_0_eppppi fn = s->vex_w ? gen_helper_fma4sd : gen_helper_fma4ss; \ 545 \ 546 fn(tcg_env, OP_PTR0, ptr0, ptr1, ptr2, \ 547 tcg_constant_i32(flags)); \ 548} \ 549 550FMA_SSE(VFMADD231, OP_PTR1, OP_PTR2, OP_PTR0, 0) 551FMA_SSE(VFMADD213, OP_PTR1, OP_PTR0, OP_PTR2, 0) 552FMA_SSE(VFMADD132, OP_PTR0, OP_PTR2, OP_PTR1, 0) 553 554FMA_SSE(VFNMADD231, OP_PTR1, OP_PTR2, OP_PTR0, float_muladd_negate_product) 555FMA_SSE(VFNMADD213, OP_PTR1, OP_PTR0, OP_PTR2, float_muladd_negate_product) 556FMA_SSE(VFNMADD132, OP_PTR0, OP_PTR2, OP_PTR1, float_muladd_negate_product) 557 558FMA_SSE(VFMSUB231, OP_PTR1, OP_PTR2, OP_PTR0, float_muladd_negate_c) 559FMA_SSE(VFMSUB213, OP_PTR1, OP_PTR0, OP_PTR2, float_muladd_negate_c) 560FMA_SSE(VFMSUB132, OP_PTR0, OP_PTR2, OP_PTR1, float_muladd_negate_c) 561 562FMA_SSE(VFNMSUB231, OP_PTR1, OP_PTR2, OP_PTR0, float_muladd_negate_c|float_muladd_negate_product) 563FMA_SSE(VFNMSUB213, OP_PTR1, OP_PTR0, OP_PTR2, float_muladd_negate_c|float_muladd_negate_product) 564FMA_SSE(VFNMSUB132, OP_PTR0, OP_PTR2, OP_PTR1, float_muladd_negate_c|float_muladd_negate_product) 565 566FMA_SSE_PACKED(VFMADDSUB231, OP_PTR1, OP_PTR2, OP_PTR0, float_muladd_negate_c, 0) 567FMA_SSE_PACKED(VFMADDSUB213, OP_PTR1, OP_PTR0, OP_PTR2, float_muladd_negate_c, 0) 568FMA_SSE_PACKED(VFMADDSUB132, OP_PTR0, OP_PTR2, OP_PTR1, float_muladd_negate_c, 0) 569 570FMA_SSE_PACKED(VFMSUBADD231, OP_PTR1, OP_PTR2, OP_PTR0, 0, float_muladd_negate_c) 571FMA_SSE_PACKED(VFMSUBADD213, OP_PTR1, OP_PTR0, OP_PTR2, 0, float_muladd_negate_c) 572FMA_SSE_PACKED(VFMSUBADD132, OP_PTR0, OP_PTR2, OP_PTR1, 0, float_muladd_negate_c) 573 574#define FP_UNPACK_SSE(uname, lname) \ 575static void gen_##uname(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) \ 576{ \ 577 /* PS maps to the DQ integer instruction, PD maps to QDQ. */ \ 578 gen_fp_sse(s, env, decode, \ 579 gen_helper_##lname##qdq_xmm, \ 580 gen_helper_##lname##dq_xmm, \ 581 gen_helper_##lname##qdq_ymm, \ 582 gen_helper_##lname##dq_ymm, \ 583 NULL, NULL); \ 584} 585FP_UNPACK_SSE(VUNPCKLPx, punpckl) 586FP_UNPACK_SSE(VUNPCKHPx, punpckh) 587 588/* 589 * 00 = v*ps Vps, Wpd 590 * f3 = v*ss Vss, Wps 591 */ 592static inline void gen_unary_fp32_sse(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode, 593 SSEFunc_0_epp ps_xmm, 594 SSEFunc_0_epp ps_ymm, 595 SSEFunc_0_eppp ss) 596{ 597 if ((s->prefix & (PREFIX_DATA | PREFIX_REPNZ)) != 0) { 598 goto illegal_op; 599 } else if (s->prefix & PREFIX_REPZ) { 600 if (!ss) { 601 goto illegal_op; 602 } 603 ss(tcg_env, OP_PTR0, OP_PTR1, OP_PTR2); 604 } else { 605 SSEFunc_0_epp fn = s->vex_l ? ps_ymm : ps_xmm; 606 if (!fn) { 607 goto illegal_op; 608 } 609 fn(tcg_env, OP_PTR0, OP_PTR2); 610 } 611 return; 612 613illegal_op: 614 gen_illegal_opcode(s); 615} 616#define UNARY_FP32_SSE(uname, lname) \ 617static void gen_##uname(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) \ 618{ \ 619 gen_unary_fp32_sse(s, env, decode, \ 620 gen_helper_##lname##ps_xmm, \ 621 gen_helper_##lname##ps_ymm, \ 622 gen_helper_##lname##ss); \ 623} 624UNARY_FP32_SSE(VRSQRT, rsqrt) 625UNARY_FP32_SSE(VRCP, rcp) 626 627/* 628 * 66 = v*pd Vpd, Hpd, Wpd 629 * f2 = v*ps Vps, Hps, Wps 630 */ 631static inline void gen_horizontal_fp_sse(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode, 632 SSEFunc_0_eppp pd_xmm, SSEFunc_0_eppp ps_xmm, 633 SSEFunc_0_eppp pd_ymm, SSEFunc_0_eppp ps_ymm) 634{ 635 SSEFunc_0_eppp ps, pd, fn; 636 ps = s->vex_l ? ps_ymm : ps_xmm; 637 pd = s->vex_l ? pd_ymm : pd_xmm; 638 fn = s->prefix & PREFIX_DATA ? pd : ps; 639 fn(tcg_env, OP_PTR0, OP_PTR1, OP_PTR2); 640} 641#define HORIZONTAL_FP_SSE(uname, lname) \ 642static void gen_##uname(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) \ 643{ \ 644 gen_horizontal_fp_sse(s, env, decode, \ 645 gen_helper_##lname##pd_xmm, gen_helper_##lname##ps_xmm, \ 646 gen_helper_##lname##pd_ymm, gen_helper_##lname##ps_ymm); \ 647} 648HORIZONTAL_FP_SSE(VHADD, hadd) 649HORIZONTAL_FP_SSE(VHSUB, hsub) 650HORIZONTAL_FP_SSE(VADDSUB, addsub) 651 652static inline void gen_ternary_sse(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode, 653 int op3, SSEFunc_0_epppp xmm, SSEFunc_0_epppp ymm) 654{ 655 SSEFunc_0_epppp fn = s->vex_l ? ymm : xmm; 656 TCGv_ptr ptr3 = tcg_temp_new_ptr(); 657 658 /* The format of the fourth input is Lx */ 659 tcg_gen_addi_ptr(ptr3, tcg_env, ZMM_OFFSET(op3)); 660 fn(tcg_env, OP_PTR0, OP_PTR1, OP_PTR2, ptr3); 661} 662#define TERNARY_SSE(uname, uvname, lname) \ 663static void gen_##uvname(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) \ 664{ \ 665 gen_ternary_sse(s, env, decode, (uint8_t)decode->immediate >> 4, \ 666 gen_helper_##lname##_xmm, gen_helper_##lname##_ymm); \ 667} \ 668static void gen_##uname(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) \ 669{ \ 670 gen_ternary_sse(s, env, decode, 0, \ 671 gen_helper_##lname##_xmm, gen_helper_##lname##_ymm); \ 672} 673TERNARY_SSE(BLENDVPS, VBLENDVPS, blendvps) 674TERNARY_SSE(BLENDVPD, VBLENDVPD, blendvpd) 675TERNARY_SSE(PBLENDVB, VPBLENDVB, pblendvb) 676 677static inline void gen_binary_imm_sse(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode, 678 SSEFunc_0_epppi xmm, SSEFunc_0_epppi ymm) 679{ 680 TCGv_i32 imm = tcg_constant8u_i32(decode->immediate); 681 if (!s->vex_l) { 682 xmm(tcg_env, OP_PTR0, OP_PTR1, OP_PTR2, imm); 683 } else { 684 ymm(tcg_env, OP_PTR0, OP_PTR1, OP_PTR2, imm); 685 } 686} 687 688#define BINARY_IMM_SSE(uname, lname) \ 689static void gen_##uname(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) \ 690{ \ 691 gen_binary_imm_sse(s, env, decode, \ 692 gen_helper_##lname##_xmm, \ 693 gen_helper_##lname##_ymm); \ 694} 695 696BINARY_IMM_SSE(VBLENDPD, blendpd) 697BINARY_IMM_SSE(VBLENDPS, blendps) 698BINARY_IMM_SSE(VPBLENDW, pblendw) 699BINARY_IMM_SSE(VDDPS, dpps) 700#define gen_helper_dppd_ymm NULL 701BINARY_IMM_SSE(VDDPD, dppd) 702BINARY_IMM_SSE(VMPSADBW, mpsadbw) 703BINARY_IMM_SSE(PCLMULQDQ, pclmulqdq) 704 705 706#define UNARY_INT_GVEC(uname, func, ...) \ 707static void gen_##uname(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) \ 708{ \ 709 int vec_len = vector_len(s, decode); \ 710 \ 711 func(__VA_ARGS__, decode->op[0].offset, \ 712 decode->op[2].offset, vec_len, vec_len); \ 713} 714UNARY_INT_GVEC(PABSB, tcg_gen_gvec_abs, MO_8) 715UNARY_INT_GVEC(PABSW, tcg_gen_gvec_abs, MO_16) 716UNARY_INT_GVEC(PABSD, tcg_gen_gvec_abs, MO_32) 717UNARY_INT_GVEC(VBROADCASTx128, tcg_gen_gvec_dup_mem, MO_128) 718UNARY_INT_GVEC(VPBROADCASTB, tcg_gen_gvec_dup_mem, MO_8) 719UNARY_INT_GVEC(VPBROADCASTW, tcg_gen_gvec_dup_mem, MO_16) 720UNARY_INT_GVEC(VPBROADCASTD, tcg_gen_gvec_dup_mem, MO_32) 721UNARY_INT_GVEC(VPBROADCASTQ, tcg_gen_gvec_dup_mem, MO_64) 722 723 724#define BINARY_INT_GVEC(uname, func, ...) \ 725static void gen_##uname(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) \ 726{ \ 727 int vec_len = vector_len(s, decode); \ 728 \ 729 func(__VA_ARGS__, \ 730 decode->op[0].offset, decode->op[1].offset, \ 731 decode->op[2].offset, vec_len, vec_len); \ 732} 733 734BINARY_INT_GVEC(PADDB, tcg_gen_gvec_add, MO_8) 735BINARY_INT_GVEC(PADDW, tcg_gen_gvec_add, MO_16) 736BINARY_INT_GVEC(PADDD, tcg_gen_gvec_add, MO_32) 737BINARY_INT_GVEC(PADDQ, tcg_gen_gvec_add, MO_64) 738BINARY_INT_GVEC(PADDSB, tcg_gen_gvec_ssadd, MO_8) 739BINARY_INT_GVEC(PADDSW, tcg_gen_gvec_ssadd, MO_16) 740BINARY_INT_GVEC(PADDUSB, tcg_gen_gvec_usadd, MO_8) 741BINARY_INT_GVEC(PADDUSW, tcg_gen_gvec_usadd, MO_16) 742BINARY_INT_GVEC(PAND, tcg_gen_gvec_and, MO_64) 743BINARY_INT_GVEC(PCMPEQB, tcg_gen_gvec_cmp, TCG_COND_EQ, MO_8) 744BINARY_INT_GVEC(PCMPEQD, tcg_gen_gvec_cmp, TCG_COND_EQ, MO_32) 745BINARY_INT_GVEC(PCMPEQW, tcg_gen_gvec_cmp, TCG_COND_EQ, MO_16) 746BINARY_INT_GVEC(PCMPEQQ, tcg_gen_gvec_cmp, TCG_COND_EQ, MO_64) 747BINARY_INT_GVEC(PCMPGTB, tcg_gen_gvec_cmp, TCG_COND_GT, MO_8) 748BINARY_INT_GVEC(PCMPGTW, tcg_gen_gvec_cmp, TCG_COND_GT, MO_16) 749BINARY_INT_GVEC(PCMPGTD, tcg_gen_gvec_cmp, TCG_COND_GT, MO_32) 750BINARY_INT_GVEC(PCMPGTQ, tcg_gen_gvec_cmp, TCG_COND_GT, MO_64) 751BINARY_INT_GVEC(PMAXSB, tcg_gen_gvec_smax, MO_8) 752BINARY_INT_GVEC(PMAXSW, tcg_gen_gvec_smax, MO_16) 753BINARY_INT_GVEC(PMAXSD, tcg_gen_gvec_smax, MO_32) 754BINARY_INT_GVEC(PMAXUB, tcg_gen_gvec_umax, MO_8) 755BINARY_INT_GVEC(PMAXUW, tcg_gen_gvec_umax, MO_16) 756BINARY_INT_GVEC(PMAXUD, tcg_gen_gvec_umax, MO_32) 757BINARY_INT_GVEC(PMINSB, tcg_gen_gvec_smin, MO_8) 758BINARY_INT_GVEC(PMINSW, tcg_gen_gvec_smin, MO_16) 759BINARY_INT_GVEC(PMINSD, tcg_gen_gvec_smin, MO_32) 760BINARY_INT_GVEC(PMINUB, tcg_gen_gvec_umin, MO_8) 761BINARY_INT_GVEC(PMINUW, tcg_gen_gvec_umin, MO_16) 762BINARY_INT_GVEC(PMINUD, tcg_gen_gvec_umin, MO_32) 763BINARY_INT_GVEC(PMULLW, tcg_gen_gvec_mul, MO_16) 764BINARY_INT_GVEC(PMULLD, tcg_gen_gvec_mul, MO_32) 765BINARY_INT_GVEC(POR, tcg_gen_gvec_or, MO_64) 766BINARY_INT_GVEC(PSUBB, tcg_gen_gvec_sub, MO_8) 767BINARY_INT_GVEC(PSUBW, tcg_gen_gvec_sub, MO_16) 768BINARY_INT_GVEC(PSUBD, tcg_gen_gvec_sub, MO_32) 769BINARY_INT_GVEC(PSUBQ, tcg_gen_gvec_sub, MO_64) 770BINARY_INT_GVEC(PSUBSB, tcg_gen_gvec_sssub, MO_8) 771BINARY_INT_GVEC(PSUBSW, tcg_gen_gvec_sssub, MO_16) 772BINARY_INT_GVEC(PSUBUSB, tcg_gen_gvec_ussub, MO_8) 773BINARY_INT_GVEC(PSUBUSW, tcg_gen_gvec_ussub, MO_16) 774BINARY_INT_GVEC(PXOR, tcg_gen_gvec_xor, MO_64) 775 776 777/* 778 * 00 = p* Pq, Qq (if mmx not NULL; no VEX) 779 * 66 = vp* Vx, Hx, Wx 780 * 781 * These are really the same encoding, because 1) V is the same as P when VEX.V 782 * is not present 2) P and Q are the same as H and W apart from MM/XMM 783 */ 784static inline void gen_binary_int_sse(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode, 785 SSEFunc_0_eppp mmx, SSEFunc_0_eppp xmm, SSEFunc_0_eppp ymm) 786{ 787 assert(!!mmx == !!(decode->e.special == X86_SPECIAL_MMX)); 788 789 if (mmx && (s->prefix & PREFIX_VEX) && !(s->prefix & PREFIX_DATA)) { 790 /* VEX encoding is not applicable to MMX instructions. */ 791 gen_illegal_opcode(s); 792 return; 793 } 794 if (!(s->prefix & PREFIX_DATA)) { 795 mmx(tcg_env, OP_PTR0, OP_PTR1, OP_PTR2); 796 } else if (!s->vex_l) { 797 xmm(tcg_env, OP_PTR0, OP_PTR1, OP_PTR2); 798 } else { 799 ymm(tcg_env, OP_PTR0, OP_PTR1, OP_PTR2); 800 } 801} 802 803 804#define BINARY_INT_MMX(uname, lname) \ 805static void gen_##uname(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) \ 806{ \ 807 gen_binary_int_sse(s, env, decode, \ 808 gen_helper_##lname##_mmx, \ 809 gen_helper_##lname##_xmm, \ 810 gen_helper_##lname##_ymm); \ 811} 812BINARY_INT_MMX(PUNPCKLBW, punpcklbw) 813BINARY_INT_MMX(PUNPCKLWD, punpcklwd) 814BINARY_INT_MMX(PUNPCKLDQ, punpckldq) 815BINARY_INT_MMX(PACKSSWB, packsswb) 816BINARY_INT_MMX(PACKUSWB, packuswb) 817BINARY_INT_MMX(PUNPCKHBW, punpckhbw) 818BINARY_INT_MMX(PUNPCKHWD, punpckhwd) 819BINARY_INT_MMX(PUNPCKHDQ, punpckhdq) 820BINARY_INT_MMX(PACKSSDW, packssdw) 821 822BINARY_INT_MMX(PAVGB, pavgb) 823BINARY_INT_MMX(PAVGW, pavgw) 824BINARY_INT_MMX(PMADDWD, pmaddwd) 825BINARY_INT_MMX(PMULHUW, pmulhuw) 826BINARY_INT_MMX(PMULHW, pmulhw) 827BINARY_INT_MMX(PMULUDQ, pmuludq) 828BINARY_INT_MMX(PSADBW, psadbw) 829 830BINARY_INT_MMX(PSLLW_r, psllw) 831BINARY_INT_MMX(PSLLD_r, pslld) 832BINARY_INT_MMX(PSLLQ_r, psllq) 833BINARY_INT_MMX(PSRLW_r, psrlw) 834BINARY_INT_MMX(PSRLD_r, psrld) 835BINARY_INT_MMX(PSRLQ_r, psrlq) 836BINARY_INT_MMX(PSRAW_r, psraw) 837BINARY_INT_MMX(PSRAD_r, psrad) 838 839BINARY_INT_MMX(PHADDW, phaddw) 840BINARY_INT_MMX(PHADDSW, phaddsw) 841BINARY_INT_MMX(PHADDD, phaddd) 842BINARY_INT_MMX(PHSUBW, phsubw) 843BINARY_INT_MMX(PHSUBSW, phsubsw) 844BINARY_INT_MMX(PHSUBD, phsubd) 845BINARY_INT_MMX(PMADDUBSW, pmaddubsw) 846BINARY_INT_MMX(PSHUFB, pshufb) 847BINARY_INT_MMX(PSIGNB, psignb) 848BINARY_INT_MMX(PSIGNW, psignw) 849BINARY_INT_MMX(PSIGND, psignd) 850BINARY_INT_MMX(PMULHRSW, pmulhrsw) 851 852/* Instructions with no MMX equivalent. */ 853#define BINARY_INT_SSE(uname, lname) \ 854static void gen_##uname(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) \ 855{ \ 856 gen_binary_int_sse(s, env, decode, \ 857 NULL, \ 858 gen_helper_##lname##_xmm, \ 859 gen_helper_##lname##_ymm); \ 860} 861 862/* Instructions with no MMX equivalent. */ 863BINARY_INT_SSE(PUNPCKLQDQ, punpcklqdq) 864BINARY_INT_SSE(PUNPCKHQDQ, punpckhqdq) 865BINARY_INT_SSE(VPACKUSDW, packusdw) 866BINARY_INT_SSE(VPERMILPS, vpermilps) 867BINARY_INT_SSE(VPERMILPD, vpermilpd) 868BINARY_INT_SSE(VMASKMOVPS, vpmaskmovd) 869BINARY_INT_SSE(VMASKMOVPD, vpmaskmovq) 870 871BINARY_INT_SSE(PMULDQ, pmuldq) 872 873BINARY_INT_SSE(VAESDEC, aesdec) 874BINARY_INT_SSE(VAESDECLAST, aesdeclast) 875BINARY_INT_SSE(VAESENC, aesenc) 876BINARY_INT_SSE(VAESENCLAST, aesenclast) 877 878#define UNARY_CMP_SSE(uname, lname) \ 879static void gen_##uname(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) \ 880{ \ 881 if (!s->vex_l) { \ 882 gen_helper_##lname##_xmm(tcg_env, OP_PTR1, OP_PTR2); \ 883 } else { \ 884 gen_helper_##lname##_ymm(tcg_env, OP_PTR1, OP_PTR2); \ 885 } \ 886 set_cc_op(s, CC_OP_EFLAGS); \ 887} 888UNARY_CMP_SSE(VPTEST, ptest) 889UNARY_CMP_SSE(VTESTPS, vtestps) 890UNARY_CMP_SSE(VTESTPD, vtestpd) 891 892static inline void gen_unary_int_sse(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode, 893 SSEFunc_0_epp xmm, SSEFunc_0_epp ymm) 894{ 895 if (!s->vex_l) { 896 xmm(tcg_env, OP_PTR0, OP_PTR2); 897 } else { 898 ymm(tcg_env, OP_PTR0, OP_PTR2); 899 } 900} 901 902#define UNARY_INT_SSE(uname, lname) \ 903static void gen_##uname(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) \ 904{ \ 905 gen_unary_int_sse(s, env, decode, \ 906 gen_helper_##lname##_xmm, \ 907 gen_helper_##lname##_ymm); \ 908} 909 910UNARY_INT_SSE(VPMOVSXBW, pmovsxbw) 911UNARY_INT_SSE(VPMOVSXBD, pmovsxbd) 912UNARY_INT_SSE(VPMOVSXBQ, pmovsxbq) 913UNARY_INT_SSE(VPMOVSXWD, pmovsxwd) 914UNARY_INT_SSE(VPMOVSXWQ, pmovsxwq) 915UNARY_INT_SSE(VPMOVSXDQ, pmovsxdq) 916 917UNARY_INT_SSE(VPMOVZXBW, pmovzxbw) 918UNARY_INT_SSE(VPMOVZXBD, pmovzxbd) 919UNARY_INT_SSE(VPMOVZXBQ, pmovzxbq) 920UNARY_INT_SSE(VPMOVZXWD, pmovzxwd) 921UNARY_INT_SSE(VPMOVZXWQ, pmovzxwq) 922UNARY_INT_SSE(VPMOVZXDQ, pmovzxdq) 923 924UNARY_INT_SSE(VMOVSLDUP, pmovsldup) 925UNARY_INT_SSE(VMOVSHDUP, pmovshdup) 926UNARY_INT_SSE(VMOVDDUP, pmovdldup) 927 928UNARY_INT_SSE(VCVTDQ2PD, cvtdq2pd) 929UNARY_INT_SSE(VCVTPD2DQ, cvtpd2dq) 930UNARY_INT_SSE(VCVTTPD2DQ, cvttpd2dq) 931UNARY_INT_SSE(VCVTDQ2PS, cvtdq2ps) 932UNARY_INT_SSE(VCVTPS2DQ, cvtps2dq) 933UNARY_INT_SSE(VCVTTPS2DQ, cvttps2dq) 934UNARY_INT_SSE(VCVTPH2PS, cvtph2ps) 935 936 937static inline void gen_unary_imm_sse(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode, 938 SSEFunc_0_ppi xmm, SSEFunc_0_ppi ymm) 939{ 940 TCGv_i32 imm = tcg_constant8u_i32(decode->immediate); 941 if (!s->vex_l) { 942 xmm(OP_PTR0, OP_PTR1, imm); 943 } else { 944 ymm(OP_PTR0, OP_PTR1, imm); 945 } 946} 947 948#define UNARY_IMM_SSE(uname, lname) \ 949static void gen_##uname(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) \ 950{ \ 951 gen_unary_imm_sse(s, env, decode, \ 952 gen_helper_##lname##_xmm, \ 953 gen_helper_##lname##_ymm); \ 954} 955 956UNARY_IMM_SSE(PSHUFD, pshufd) 957UNARY_IMM_SSE(PSHUFHW, pshufhw) 958UNARY_IMM_SSE(PSHUFLW, pshuflw) 959#define gen_helper_vpermq_xmm NULL 960UNARY_IMM_SSE(VPERMQ, vpermq) 961UNARY_IMM_SSE(VPERMILPS_i, vpermilps_imm) 962UNARY_IMM_SSE(VPERMILPD_i, vpermilpd_imm) 963 964static inline void gen_unary_imm_fp_sse(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode, 965 SSEFunc_0_eppi xmm, SSEFunc_0_eppi ymm) 966{ 967 TCGv_i32 imm = tcg_constant8u_i32(decode->immediate); 968 if (!s->vex_l) { 969 xmm(tcg_env, OP_PTR0, OP_PTR1, imm); 970 } else { 971 ymm(tcg_env, OP_PTR0, OP_PTR1, imm); 972 } 973} 974 975#define UNARY_IMM_FP_SSE(uname, lname) \ 976static void gen_##uname(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) \ 977{ \ 978 gen_unary_imm_fp_sse(s, env, decode, \ 979 gen_helper_##lname##_xmm, \ 980 gen_helper_##lname##_ymm); \ 981} 982 983UNARY_IMM_FP_SSE(VROUNDPS, roundps) 984UNARY_IMM_FP_SSE(VROUNDPD, roundpd) 985 986static inline void gen_vexw_avx(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode, 987 SSEFunc_0_eppp d_xmm, SSEFunc_0_eppp q_xmm, 988 SSEFunc_0_eppp d_ymm, SSEFunc_0_eppp q_ymm) 989{ 990 SSEFunc_0_eppp d = s->vex_l ? d_ymm : d_xmm; 991 SSEFunc_0_eppp q = s->vex_l ? q_ymm : q_xmm; 992 SSEFunc_0_eppp fn = s->vex_w ? q : d; 993 fn(tcg_env, OP_PTR0, OP_PTR1, OP_PTR2); 994} 995 996/* VEX.W affects whether to operate on 32- or 64-bit elements. */ 997#define VEXW_AVX(uname, lname) \ 998static void gen_##uname(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) \ 999{ \ 1000 gen_vexw_avx(s, env, decode, \ 1001 gen_helper_##lname##d_xmm, gen_helper_##lname##q_xmm, \ 1002 gen_helper_##lname##d_ymm, gen_helper_##lname##q_ymm); \ 1003} 1004VEXW_AVX(VPSLLV, vpsllv) 1005VEXW_AVX(VPSRLV, vpsrlv) 1006VEXW_AVX(VPSRAV, vpsrav) 1007VEXW_AVX(VPMASKMOV, vpmaskmov) 1008 1009/* Same as above, but with extra arguments to the helper. */ 1010static inline void gen_vsib_avx(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode, 1011 SSEFunc_0_epppti d_xmm, SSEFunc_0_epppti q_xmm, 1012 SSEFunc_0_epppti d_ymm, SSEFunc_0_epppti q_ymm) 1013{ 1014 SSEFunc_0_epppti d = s->vex_l ? d_ymm : d_xmm; 1015 SSEFunc_0_epppti q = s->vex_l ? q_ymm : q_xmm; 1016 SSEFunc_0_epppti fn = s->vex_w ? q : d; 1017 TCGv_i32 scale = tcg_constant_i32(decode->mem.scale); 1018 TCGv_ptr index = tcg_temp_new_ptr(); 1019 1020 /* Pass third input as (index, base, scale) */ 1021 tcg_gen_addi_ptr(index, tcg_env, ZMM_OFFSET(decode->mem.index)); 1022 fn(tcg_env, OP_PTR0, OP_PTR1, index, s->A0, scale); 1023 1024 /* 1025 * There are two output operands, so zero OP1's high 128 bits 1026 * in the VEX.128 case. 1027 */ 1028 if (!s->vex_l) { 1029 int ymmh_ofs = vector_elem_offset(&decode->op[1], MO_128, 1); 1030 tcg_gen_gvec_dup_imm(MO_64, ymmh_ofs, 16, 16, 0); 1031 } 1032} 1033#define VSIB_AVX(uname, lname) \ 1034static void gen_##uname(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) \ 1035{ \ 1036 gen_vsib_avx(s, env, decode, \ 1037 gen_helper_##lname##d_xmm, gen_helper_##lname##q_xmm, \ 1038 gen_helper_##lname##d_ymm, gen_helper_##lname##q_ymm); \ 1039} 1040VSIB_AVX(VPGATHERD, vpgatherd) 1041VSIB_AVX(VPGATHERQ, vpgatherq) 1042 1043/* ADCX/ADOX do not have memory operands and can use set_cc_op. */ 1044static void gen_ADCOX(DisasContext *s, CPUX86State *env, MemOp ot, int cc_op) 1045{ 1046 int opposite_cc_op; 1047 TCGv carry_in = NULL; 1048 TCGv carry_out = (cc_op == CC_OP_ADCX ? cpu_cc_dst : cpu_cc_src2); 1049 TCGv zero; 1050 1051 if (cc_op == s->cc_op || s->cc_op == CC_OP_ADCOX) { 1052 /* Re-use the carry-out from a previous round. */ 1053 carry_in = carry_out; 1054 } else { 1055 /* We don't have a carry-in, get it out of EFLAGS. */ 1056 if (s->cc_op != CC_OP_ADCX && s->cc_op != CC_OP_ADOX) { 1057 gen_compute_eflags(s); 1058 } 1059 carry_in = s->tmp0; 1060 tcg_gen_extract_tl(carry_in, cpu_cc_src, 1061 ctz32(cc_op == CC_OP_ADCX ? CC_C : CC_O), 1); 1062 } 1063 1064 switch (ot) { 1065#ifdef TARGET_X86_64 1066 case MO_32: 1067 /* If TL is 64-bit just do everything in 64-bit arithmetic. */ 1068 tcg_gen_ext32u_tl(s->T0, s->T0); 1069 tcg_gen_ext32u_tl(s->T1, s->T1); 1070 tcg_gen_add_i64(s->T0, s->T0, s->T1); 1071 tcg_gen_add_i64(s->T0, s->T0, carry_in); 1072 tcg_gen_shri_i64(carry_out, s->T0, 32); 1073 break; 1074#endif 1075 default: 1076 zero = tcg_constant_tl(0); 1077 tcg_gen_add2_tl(s->T0, carry_out, s->T0, zero, carry_in, zero); 1078 tcg_gen_add2_tl(s->T0, carry_out, s->T0, carry_out, s->T1, zero); 1079 break; 1080 } 1081 1082 opposite_cc_op = cc_op == CC_OP_ADCX ? CC_OP_ADOX : CC_OP_ADCX; 1083 if (s->cc_op == CC_OP_ADCOX || s->cc_op == opposite_cc_op) { 1084 /* Merge with the carry-out from the opposite instruction. */ 1085 set_cc_op(s, CC_OP_ADCOX); 1086 } else { 1087 set_cc_op(s, cc_op); 1088 } 1089} 1090 1091static void gen_ADCX(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) 1092{ 1093 gen_ADCOX(s, env, decode->op[0].ot, CC_OP_ADCX); 1094} 1095 1096static void gen_ADOX(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) 1097{ 1098 gen_ADCOX(s, env, decode->op[0].ot, CC_OP_ADOX); 1099} 1100 1101static void gen_ANDN(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) 1102{ 1103 MemOp ot = decode->op[0].ot; 1104 1105 tcg_gen_andc_tl(s->T0, s->T1, s->T0); 1106 prepare_update1_cc(decode, s, CC_OP_LOGICB + ot); 1107} 1108 1109static void gen_BEXTR(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) 1110{ 1111 MemOp ot = decode->op[0].ot; 1112 TCGv bound = tcg_constant_tl(ot == MO_64 ? 63 : 31); 1113 TCGv zero = tcg_constant_tl(0); 1114 TCGv mone = tcg_constant_tl(-1); 1115 1116 /* 1117 * Extract START, and shift the operand. 1118 * Shifts larger than operand size get zeros. 1119 */ 1120 tcg_gen_ext8u_tl(s->A0, s->T1); 1121 tcg_gen_shr_tl(s->T0, s->T0, s->A0); 1122 1123 tcg_gen_movcond_tl(TCG_COND_LEU, s->T0, s->A0, bound, s->T0, zero); 1124 1125 /* 1126 * Extract the LEN into an inverse mask. Lengths larger than 1127 * operand size get all zeros, length 0 gets all ones. 1128 */ 1129 tcg_gen_extract_tl(s->A0, s->T1, 8, 8); 1130 tcg_gen_shl_tl(s->T1, mone, s->A0); 1131 tcg_gen_movcond_tl(TCG_COND_LEU, s->T1, s->A0, bound, s->T1, zero); 1132 tcg_gen_andc_tl(s->T0, s->T0, s->T1); 1133 1134 prepare_update1_cc(decode, s, CC_OP_LOGICB + ot); 1135} 1136 1137/* BLSI do not have memory operands and can use set_cc_op. */ 1138static void gen_BLSI(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) 1139{ 1140 MemOp ot = decode->op[0].ot; 1141 1142 tcg_gen_mov_tl(cpu_cc_src, s->T0); 1143 tcg_gen_neg_tl(s->T1, s->T0); 1144 tcg_gen_and_tl(s->T0, s->T0, s->T1); 1145 tcg_gen_mov_tl(cpu_cc_dst, s->T0); 1146 set_cc_op(s, CC_OP_BMILGB + ot); 1147} 1148 1149/* BLSMSK do not have memory operands and can use set_cc_op. */ 1150static void gen_BLSMSK(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) 1151{ 1152 MemOp ot = decode->op[0].ot; 1153 1154 tcg_gen_mov_tl(cpu_cc_src, s->T0); 1155 tcg_gen_subi_tl(s->T1, s->T0, 1); 1156 tcg_gen_xor_tl(s->T0, s->T0, s->T1); 1157 tcg_gen_mov_tl(cpu_cc_dst, s->T0); 1158 set_cc_op(s, CC_OP_BMILGB + ot); 1159} 1160 1161/* BLSR do not have memory operands and can use set_cc_op. */ 1162static void gen_BLSR(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) 1163{ 1164 MemOp ot = decode->op[0].ot; 1165 1166 tcg_gen_mov_tl(cpu_cc_src, s->T0); 1167 tcg_gen_subi_tl(s->T1, s->T0, 1); 1168 tcg_gen_and_tl(s->T0, s->T0, s->T1); 1169 tcg_gen_mov_tl(cpu_cc_dst, s->T0); 1170 set_cc_op(s, CC_OP_BMILGB + ot); 1171} 1172 1173static void gen_BZHI(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) 1174{ 1175 MemOp ot = decode->op[0].ot; 1176 TCGv bound = tcg_constant_tl(ot == MO_64 ? 63 : 31); 1177 TCGv zero = tcg_constant_tl(0); 1178 TCGv mone = tcg_constant_tl(-1); 1179 1180 tcg_gen_ext8u_tl(s->T1, s->T1); 1181 1182 tcg_gen_shl_tl(s->A0, mone, s->T1); 1183 tcg_gen_movcond_tl(TCG_COND_LEU, s->A0, s->T1, bound, s->A0, zero); 1184 tcg_gen_andc_tl(s->T0, s->T0, s->A0); 1185 /* 1186 * Note that since we're using BMILG (in order to get O 1187 * cleared) we need to store the inverse into C. 1188 */ 1189 tcg_gen_setcond_tl(TCG_COND_LEU, s->T1, s->T1, bound); 1190 prepare_update2_cc(decode, s, CC_OP_BMILGB + ot); 1191} 1192 1193static void gen_CMPccXADD(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) 1194{ 1195 TCGLabel *label_top = gen_new_label(); 1196 TCGLabel *label_bottom = gen_new_label(); 1197 TCGv oldv = tcg_temp_new(); 1198 TCGv newv = tcg_temp_new(); 1199 TCGv cmpv = tcg_temp_new(); 1200 TCGCond cond; 1201 1202 TCGv cmp_lhs, cmp_rhs; 1203 MemOp ot, ot_full; 1204 1205 int jcc_op = (decode->b >> 1) & 7; 1206 static const TCGCond cond_table[8] = { 1207 [JCC_O] = TCG_COND_LT, /* test sign bit by comparing against 0 */ 1208 [JCC_B] = TCG_COND_LTU, 1209 [JCC_Z] = TCG_COND_EQ, 1210 [JCC_BE] = TCG_COND_LEU, 1211 [JCC_S] = TCG_COND_LT, /* test sign bit by comparing against 0 */ 1212 [JCC_P] = TCG_COND_EQ, /* even parity - tests low bit of popcount */ 1213 [JCC_L] = TCG_COND_LT, 1214 [JCC_LE] = TCG_COND_LE, 1215 }; 1216 1217 cond = cond_table[jcc_op]; 1218 if (decode->b & 1) { 1219 cond = tcg_invert_cond(cond); 1220 } 1221 1222 ot = decode->op[0].ot; 1223 ot_full = ot | MO_LE; 1224 if (jcc_op >= JCC_S) { 1225 /* 1226 * Sign-extend values before subtracting for S, P (zero/sign extension 1227 * does not matter there) L, LE and their inverses. 1228 */ 1229 ot_full |= MO_SIGN; 1230 } 1231 1232 /* 1233 * cmpv will be moved to cc_src *after* cpu_regs[] is written back, so use 1234 * tcg_gen_ext_tl instead of gen_ext_tl. 1235 */ 1236 tcg_gen_ext_tl(cmpv, cpu_regs[decode->op[1].n], ot_full); 1237 1238 /* 1239 * Cmpxchg loop starts here. 1240 * - s->T1: addition operand (from decoder) 1241 * - s->A0: dest address (from decoder) 1242 * - s->cc_srcT: memory operand (lhs for comparison) 1243 * - cmpv: rhs for comparison 1244 */ 1245 gen_set_label(label_top); 1246 gen_op_ld_v(s, ot_full, s->cc_srcT, s->A0); 1247 tcg_gen_sub_tl(s->T0, s->cc_srcT, cmpv); 1248 1249 /* Compute the comparison result by hand, to avoid clobbering cc_*. */ 1250 switch (jcc_op) { 1251 case JCC_O: 1252 /* (src1 ^ src2) & (src1 ^ dst). newv is only used here for a moment */ 1253 tcg_gen_xor_tl(newv, s->cc_srcT, s->T0); 1254 tcg_gen_xor_tl(s->tmp0, s->cc_srcT, cmpv); 1255 tcg_gen_and_tl(s->tmp0, s->tmp0, newv); 1256 tcg_gen_sextract_tl(s->tmp0, s->tmp0, 0, 8 << ot); 1257 cmp_lhs = s->tmp0, cmp_rhs = tcg_constant_tl(0); 1258 break; 1259 1260 case JCC_P: 1261 tcg_gen_ext8u_tl(s->tmp0, s->T0); 1262 tcg_gen_ctpop_tl(s->tmp0, s->tmp0); 1263 tcg_gen_andi_tl(s->tmp0, s->tmp0, 1); 1264 cmp_lhs = s->tmp0, cmp_rhs = tcg_constant_tl(0); 1265 break; 1266 1267 case JCC_S: 1268 tcg_gen_sextract_tl(s->tmp0, s->T0, 0, 8 << ot); 1269 cmp_lhs = s->tmp0, cmp_rhs = tcg_constant_tl(0); 1270 break; 1271 1272 default: 1273 cmp_lhs = s->cc_srcT, cmp_rhs = cmpv; 1274 break; 1275 } 1276 1277 /* Compute new value: if condition does not hold, just store back s->cc_srcT */ 1278 tcg_gen_add_tl(newv, s->cc_srcT, s->T1); 1279 tcg_gen_movcond_tl(cond, newv, cmp_lhs, cmp_rhs, newv, s->cc_srcT); 1280 tcg_gen_atomic_cmpxchg_tl(oldv, s->A0, s->cc_srcT, newv, s->mem_index, ot_full); 1281 1282 /* Exit unconditionally if cmpxchg succeeded. */ 1283 tcg_gen_brcond_tl(TCG_COND_EQ, oldv, s->cc_srcT, label_bottom); 1284 1285 /* Try again if there was actually a store to make. */ 1286 tcg_gen_brcond_tl(cond, cmp_lhs, cmp_rhs, label_top); 1287 gen_set_label(label_bottom); 1288 1289 /* Store old value to registers only after a successful store. */ 1290 gen_writeback(s, decode, 1, s->cc_srcT); 1291 1292 decode->cc_dst = s->T0; 1293 decode->cc_src = cmpv; 1294 decode->cc_op = CC_OP_SUBB + ot; 1295} 1296 1297static void gen_CRC32(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) 1298{ 1299 MemOp ot = decode->op[2].ot; 1300 1301 tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0); 1302 gen_helper_crc32(s->T0, s->tmp2_i32, s->T1, tcg_constant_i32(8 << ot)); 1303} 1304 1305static void gen_CVTPI2Px(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) 1306{ 1307 gen_helper_enter_mmx(tcg_env); 1308 if (s->prefix & PREFIX_DATA) { 1309 gen_helper_cvtpi2pd(tcg_env, OP_PTR0, OP_PTR2); 1310 } else { 1311 gen_helper_cvtpi2ps(tcg_env, OP_PTR0, OP_PTR2); 1312 } 1313} 1314 1315static void gen_CVTPx2PI(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) 1316{ 1317 gen_helper_enter_mmx(tcg_env); 1318 if (s->prefix & PREFIX_DATA) { 1319 gen_helper_cvtpd2pi(tcg_env, OP_PTR0, OP_PTR2); 1320 } else { 1321 gen_helper_cvtps2pi(tcg_env, OP_PTR0, OP_PTR2); 1322 } 1323} 1324 1325static void gen_CVTTPx2PI(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) 1326{ 1327 gen_helper_enter_mmx(tcg_env); 1328 if (s->prefix & PREFIX_DATA) { 1329 gen_helper_cvttpd2pi(tcg_env, OP_PTR0, OP_PTR2); 1330 } else { 1331 gen_helper_cvttps2pi(tcg_env, OP_PTR0, OP_PTR2); 1332 } 1333} 1334 1335static void gen_EMMS(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) 1336{ 1337 gen_helper_emms(tcg_env); 1338} 1339 1340static void gen_EXTRQ_i(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) 1341{ 1342 TCGv_i32 length = tcg_constant_i32(decode->immediate & 63); 1343 TCGv_i32 index = tcg_constant_i32((decode->immediate >> 8) & 63); 1344 1345 gen_helper_extrq_i(tcg_env, OP_PTR0, index, length); 1346} 1347 1348static void gen_EXTRQ_r(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) 1349{ 1350 gen_helper_extrq_r(tcg_env, OP_PTR0, OP_PTR2); 1351} 1352 1353static void gen_INSERTQ_i(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) 1354{ 1355 TCGv_i32 length = tcg_constant_i32(decode->immediate & 63); 1356 TCGv_i32 index = tcg_constant_i32((decode->immediate >> 8) & 63); 1357 1358 gen_helper_insertq_i(tcg_env, OP_PTR0, OP_PTR1, index, length); 1359} 1360 1361static void gen_INSERTQ_r(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) 1362{ 1363 gen_helper_insertq_r(tcg_env, OP_PTR0, OP_PTR2); 1364} 1365 1366static void gen_LDMXCSR(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) 1367{ 1368 tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T1); 1369 gen_helper_ldmxcsr(tcg_env, s->tmp2_i32); 1370} 1371 1372static void gen_MASKMOV(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) 1373{ 1374 gen_lea_v_seg(s, s->aflag, cpu_regs[R_EDI], R_DS, s->override); 1375 1376 if (s->prefix & PREFIX_DATA) { 1377 gen_helper_maskmov_xmm(tcg_env, OP_PTR1, OP_PTR2, s->A0); 1378 } else { 1379 gen_helper_maskmov_mmx(tcg_env, OP_PTR1, OP_PTR2, s->A0); 1380 } 1381} 1382 1383static void gen_MOVBE(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) 1384{ 1385 MemOp ot = decode->op[0].ot; 1386 1387 /* M operand type does not load/store */ 1388 if (decode->e.op0 == X86_TYPE_M) { 1389 tcg_gen_qemu_st_tl(s->T0, s->A0, s->mem_index, ot | MO_BE); 1390 } else { 1391 tcg_gen_qemu_ld_tl(s->T0, s->A0, s->mem_index, ot | MO_BE); 1392 } 1393} 1394 1395static void gen_MOVD_from(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) 1396{ 1397 MemOp ot = decode->op[2].ot; 1398 1399 switch (ot) { 1400 case MO_32: 1401#ifdef TARGET_X86_64 1402 tcg_gen_ld32u_tl(s->T0, tcg_env, decode->op[2].offset); 1403 break; 1404 case MO_64: 1405#endif 1406 tcg_gen_ld_tl(s->T0, tcg_env, decode->op[2].offset); 1407 break; 1408 default: 1409 abort(); 1410 } 1411} 1412 1413static void gen_MOVD_to(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) 1414{ 1415 MemOp ot = decode->op[2].ot; 1416 int vec_len = vector_len(s, decode); 1417 int lo_ofs = vector_elem_offset(&decode->op[0], ot, 0); 1418 1419 tcg_gen_gvec_dup_imm(MO_64, decode->op[0].offset, vec_len, vec_len, 0); 1420 1421 switch (ot) { 1422 case MO_32: 1423#ifdef TARGET_X86_64 1424 tcg_gen_st32_tl(s->T1, tcg_env, lo_ofs); 1425 break; 1426 case MO_64: 1427#endif 1428 tcg_gen_st_tl(s->T1, tcg_env, lo_ofs); 1429 break; 1430 default: 1431 g_assert_not_reached(); 1432 } 1433} 1434 1435static void gen_MOVDQ(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) 1436{ 1437 gen_store_sse(s, decode, decode->op[2].offset); 1438} 1439 1440static void gen_MOVMSK(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) 1441{ 1442 typeof(gen_helper_movmskps_ymm) *ps, *pd, *fn; 1443 ps = s->vex_l ? gen_helper_movmskps_ymm : gen_helper_movmskps_xmm; 1444 pd = s->vex_l ? gen_helper_movmskpd_ymm : gen_helper_movmskpd_xmm; 1445 fn = s->prefix & PREFIX_DATA ? pd : ps; 1446 fn(s->tmp2_i32, tcg_env, OP_PTR2); 1447 tcg_gen_extu_i32_tl(s->T0, s->tmp2_i32); 1448} 1449 1450static void gen_MOVQ(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) 1451{ 1452 int vec_len = vector_len(s, decode); 1453 int lo_ofs = vector_elem_offset(&decode->op[0], MO_64, 0); 1454 1455 tcg_gen_ld_i64(s->tmp1_i64, tcg_env, decode->op[2].offset); 1456 if (decode->op[0].has_ea) { 1457 tcg_gen_qemu_st_i64(s->tmp1_i64, s->A0, s->mem_index, MO_LEUQ); 1458 } else { 1459 /* 1460 * tcg_gen_gvec_dup_i64(MO_64, op0.offset, 8, vec_len, s->tmp1_64) would 1461 * seem to work, but it does not on big-endian platforms; the cleared parts 1462 * are always at higher addresses, but cross-endian emulation inverts the 1463 * byte order so that the cleared parts need to be at *lower* addresses. 1464 * Because oprsz is 8, we see this here even for SSE; but more in general, 1465 * it disqualifies using oprsz < maxsz to emulate VEX128. 1466 */ 1467 tcg_gen_gvec_dup_imm(MO_64, decode->op[0].offset, vec_len, vec_len, 0); 1468 tcg_gen_st_i64(s->tmp1_i64, tcg_env, lo_ofs); 1469 } 1470} 1471 1472static void gen_MOVq_dq(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) 1473{ 1474 gen_helper_enter_mmx(tcg_env); 1475 /* Otherwise the same as any other movq. */ 1476 return gen_MOVQ(s, env, decode); 1477} 1478 1479static void gen_MULX(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) 1480{ 1481 MemOp ot = decode->op[0].ot; 1482 1483 /* low part of result in VEX.vvvv, high in MODRM */ 1484 switch (ot) { 1485 case MO_32: 1486#ifdef TARGET_X86_64 1487 tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0); 1488 tcg_gen_trunc_tl_i32(s->tmp3_i32, s->T1); 1489 tcg_gen_mulu2_i32(s->tmp2_i32, s->tmp3_i32, 1490 s->tmp2_i32, s->tmp3_i32); 1491 tcg_gen_extu_i32_tl(cpu_regs[s->vex_v], s->tmp2_i32); 1492 tcg_gen_extu_i32_tl(s->T0, s->tmp3_i32); 1493 break; 1494 1495 case MO_64: 1496#endif 1497 tcg_gen_mulu2_tl(cpu_regs[s->vex_v], s->T0, s->T0, s->T1); 1498 break; 1499 1500 default: 1501 g_assert_not_reached(); 1502 } 1503} 1504 1505static void gen_PALIGNR(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) 1506{ 1507 TCGv_i32 imm = tcg_constant8u_i32(decode->immediate); 1508 if (!(s->prefix & PREFIX_DATA)) { 1509 gen_helper_palignr_mmx(tcg_env, OP_PTR0, OP_PTR1, OP_PTR2, imm); 1510 } else if (!s->vex_l) { 1511 gen_helper_palignr_xmm(tcg_env, OP_PTR0, OP_PTR1, OP_PTR2, imm); 1512 } else { 1513 gen_helper_palignr_ymm(tcg_env, OP_PTR0, OP_PTR1, OP_PTR2, imm); 1514 } 1515} 1516 1517static void gen_PANDN(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) 1518{ 1519 int vec_len = vector_len(s, decode); 1520 1521 /* Careful, operand order is reversed! */ 1522 tcg_gen_gvec_andc(MO_64, 1523 decode->op[0].offset, decode->op[2].offset, 1524 decode->op[1].offset, vec_len, vec_len); 1525} 1526 1527static void gen_PCMPESTRI(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) 1528{ 1529 TCGv_i32 imm = tcg_constant8u_i32(decode->immediate); 1530 gen_helper_pcmpestri_xmm(tcg_env, OP_PTR1, OP_PTR2, imm); 1531 set_cc_op(s, CC_OP_EFLAGS); 1532} 1533 1534static void gen_PCMPESTRM(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) 1535{ 1536 TCGv_i32 imm = tcg_constant8u_i32(decode->immediate); 1537 gen_helper_pcmpestrm_xmm(tcg_env, OP_PTR1, OP_PTR2, imm); 1538 set_cc_op(s, CC_OP_EFLAGS); 1539 if ((s->prefix & PREFIX_VEX) && !s->vex_l) { 1540 tcg_gen_gvec_dup_imm(MO_64, offsetof(CPUX86State, xmm_regs[0].ZMM_X(1)), 1541 16, 16, 0); 1542 } 1543} 1544 1545static void gen_PCMPISTRI(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) 1546{ 1547 TCGv_i32 imm = tcg_constant8u_i32(decode->immediate); 1548 gen_helper_pcmpistri_xmm(tcg_env, OP_PTR1, OP_PTR2, imm); 1549 set_cc_op(s, CC_OP_EFLAGS); 1550} 1551 1552static void gen_PCMPISTRM(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) 1553{ 1554 TCGv_i32 imm = tcg_constant8u_i32(decode->immediate); 1555 gen_helper_pcmpistrm_xmm(tcg_env, OP_PTR1, OP_PTR2, imm); 1556 set_cc_op(s, CC_OP_EFLAGS); 1557 if ((s->prefix & PREFIX_VEX) && !s->vex_l) { 1558 tcg_gen_gvec_dup_imm(MO_64, offsetof(CPUX86State, xmm_regs[0].ZMM_X(1)), 1559 16, 16, 0); 1560 } 1561} 1562 1563static void gen_PDEP(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) 1564{ 1565 gen_helper_pdep(s->T0, s->T0, s->T1); 1566} 1567 1568static void gen_PEXT(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) 1569{ 1570 gen_helper_pext(s->T0, s->T0, s->T1); 1571} 1572 1573static inline void gen_pextr(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode, MemOp ot) 1574{ 1575 int vec_len = vector_len(s, decode); 1576 int mask = (vec_len >> ot) - 1; 1577 int val = decode->immediate & mask; 1578 1579 switch (ot) { 1580 case MO_8: 1581 tcg_gen_ld8u_tl(s->T0, tcg_env, vector_elem_offset(&decode->op[1], ot, val)); 1582 break; 1583 case MO_16: 1584 tcg_gen_ld16u_tl(s->T0, tcg_env, vector_elem_offset(&decode->op[1], ot, val)); 1585 break; 1586 case MO_32: 1587#ifdef TARGET_X86_64 1588 tcg_gen_ld32u_tl(s->T0, tcg_env, vector_elem_offset(&decode->op[1], ot, val)); 1589 break; 1590 case MO_64: 1591#endif 1592 tcg_gen_ld_tl(s->T0, tcg_env, vector_elem_offset(&decode->op[1], ot, val)); 1593 break; 1594 default: 1595 abort(); 1596 } 1597} 1598 1599static void gen_PEXTRB(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) 1600{ 1601 gen_pextr(s, env, decode, MO_8); 1602} 1603 1604static void gen_PEXTRW(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) 1605{ 1606 gen_pextr(s, env, decode, MO_16); 1607} 1608 1609static void gen_PEXTR(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) 1610{ 1611 MemOp ot = decode->op[0].ot; 1612 gen_pextr(s, env, decode, ot); 1613} 1614 1615static inline void gen_pinsr(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode, MemOp ot) 1616{ 1617 int vec_len = vector_len(s, decode); 1618 int mask = (vec_len >> ot) - 1; 1619 int val = decode->immediate & mask; 1620 1621 if (decode->op[1].offset != decode->op[0].offset) { 1622 assert(vec_len == 16); 1623 gen_store_sse(s, decode, decode->op[1].offset); 1624 } 1625 1626 switch (ot) { 1627 case MO_8: 1628 tcg_gen_st8_tl(s->T1, tcg_env, vector_elem_offset(&decode->op[0], ot, val)); 1629 break; 1630 case MO_16: 1631 tcg_gen_st16_tl(s->T1, tcg_env, vector_elem_offset(&decode->op[0], ot, val)); 1632 break; 1633 case MO_32: 1634#ifdef TARGET_X86_64 1635 tcg_gen_st32_tl(s->T1, tcg_env, vector_elem_offset(&decode->op[0], ot, val)); 1636 break; 1637 case MO_64: 1638#endif 1639 tcg_gen_st_tl(s->T1, tcg_env, vector_elem_offset(&decode->op[0], ot, val)); 1640 break; 1641 default: 1642 abort(); 1643 } 1644} 1645 1646static void gen_PINSRB(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) 1647{ 1648 gen_pinsr(s, env, decode, MO_8); 1649} 1650 1651static void gen_PINSRW(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) 1652{ 1653 gen_pinsr(s, env, decode, MO_16); 1654} 1655 1656static void gen_PINSR(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) 1657{ 1658 gen_pinsr(s, env, decode, decode->op[2].ot); 1659} 1660 1661static void gen_pmovmskb_i64(TCGv_i64 d, TCGv_i64 s) 1662{ 1663 TCGv_i64 t = tcg_temp_new_i64(); 1664 1665 tcg_gen_andi_i64(d, s, 0x8080808080808080ull); 1666 1667 /* 1668 * After each shift+or pair: 1669 * 0: a.......b.......c.......d.......e.......f.......g.......h....... 1670 * 7: ab......bc......cd......de......ef......fg......gh......h....... 1671 * 14: abcd....bcde....cdef....defg....efgh....fgh.....gh......h....... 1672 * 28: abcdefghbcdefgh.cdefgh..defgh...efgh....fgh.....gh......h....... 1673 * The result is left in the high bits of the word. 1674 */ 1675 tcg_gen_shli_i64(t, d, 7); 1676 tcg_gen_or_i64(d, d, t); 1677 tcg_gen_shli_i64(t, d, 14); 1678 tcg_gen_or_i64(d, d, t); 1679 tcg_gen_shli_i64(t, d, 28); 1680 tcg_gen_or_i64(d, d, t); 1681} 1682 1683static void gen_pmovmskb_vec(unsigned vece, TCGv_vec d, TCGv_vec s) 1684{ 1685 TCGv_vec t = tcg_temp_new_vec_matching(d); 1686 TCGv_vec m = tcg_constant_vec_matching(d, MO_8, 0x80); 1687 1688 /* See above */ 1689 tcg_gen_and_vec(vece, d, s, m); 1690 tcg_gen_shli_vec(vece, t, d, 7); 1691 tcg_gen_or_vec(vece, d, d, t); 1692 tcg_gen_shli_vec(vece, t, d, 14); 1693 tcg_gen_or_vec(vece, d, d, t); 1694 tcg_gen_shli_vec(vece, t, d, 28); 1695 tcg_gen_or_vec(vece, d, d, t); 1696} 1697 1698#ifdef TARGET_X86_64 1699#define TCG_TARGET_HAS_extract2_tl TCG_TARGET_HAS_extract2_i64 1700#else 1701#define TCG_TARGET_HAS_extract2_tl TCG_TARGET_HAS_extract2_i32 1702#endif 1703 1704static void gen_PMOVMSKB(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) 1705{ 1706 static const TCGOpcode vecop_list[] = { INDEX_op_shli_vec, 0 }; 1707 static const GVecGen2 g = { 1708 .fni8 = gen_pmovmskb_i64, 1709 .fniv = gen_pmovmskb_vec, 1710 .opt_opc = vecop_list, 1711 .vece = MO_64, 1712 .prefer_i64 = TCG_TARGET_REG_BITS == 64 1713 }; 1714 MemOp ot = decode->op[2].ot; 1715 int vec_len = vector_len(s, decode); 1716 TCGv t = tcg_temp_new(); 1717 1718 tcg_gen_gvec_2(offsetof(CPUX86State, xmm_t0) + xmm_offset(ot), decode->op[2].offset, 1719 vec_len, vec_len, &g); 1720 tcg_gen_ld8u_tl(s->T0, tcg_env, offsetof(CPUX86State, xmm_t0.ZMM_B(vec_len - 1))); 1721 while (vec_len > 8) { 1722 vec_len -= 8; 1723 if (TCG_TARGET_HAS_extract2_tl) { 1724 /* 1725 * Load the next byte of the result into the high byte of T. 1726 * TCG does a similar expansion of deposit to shl+extract2; by 1727 * loading the whole word, the shift left is avoided. 1728 */ 1729#ifdef TARGET_X86_64 1730 tcg_gen_ld_tl(t, tcg_env, offsetof(CPUX86State, xmm_t0.ZMM_Q((vec_len - 1) / 8))); 1731#else 1732 tcg_gen_ld_tl(t, tcg_env, offsetof(CPUX86State, xmm_t0.ZMM_L((vec_len - 1) / 4))); 1733#endif 1734 1735 tcg_gen_extract2_tl(s->T0, t, s->T0, TARGET_LONG_BITS - 8); 1736 } else { 1737 /* 1738 * The _previous_ value is deposited into bits 8 and higher of t. Because 1739 * those bits are known to be zero after ld8u, this becomes a shift+or 1740 * if deposit is not available. 1741 */ 1742 tcg_gen_ld8u_tl(t, tcg_env, offsetof(CPUX86State, xmm_t0.ZMM_B(vec_len - 1))); 1743 tcg_gen_deposit_tl(s->T0, t, s->T0, 8, TARGET_LONG_BITS - 8); 1744 } 1745 } 1746} 1747 1748static void gen_PSHUFW(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) 1749{ 1750 TCGv_i32 imm = tcg_constant8u_i32(decode->immediate); 1751 gen_helper_pshufw_mmx(OP_PTR0, OP_PTR1, imm); 1752} 1753 1754static void gen_PSRLW_i(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) 1755{ 1756 int vec_len = vector_len(s, decode); 1757 1758 if (decode->immediate >= 16) { 1759 tcg_gen_gvec_dup_imm(MO_64, decode->op[0].offset, vec_len, vec_len, 0); 1760 } else { 1761 tcg_gen_gvec_shri(MO_16, 1762 decode->op[0].offset, decode->op[1].offset, 1763 decode->immediate, vec_len, vec_len); 1764 } 1765} 1766 1767static void gen_PSLLW_i(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) 1768{ 1769 int vec_len = vector_len(s, decode); 1770 1771 if (decode->immediate >= 16) { 1772 tcg_gen_gvec_dup_imm(MO_64, decode->op[0].offset, vec_len, vec_len, 0); 1773 } else { 1774 tcg_gen_gvec_shli(MO_16, 1775 decode->op[0].offset, decode->op[1].offset, 1776 decode->immediate, vec_len, vec_len); 1777 } 1778} 1779 1780static void gen_PSRAW_i(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) 1781{ 1782 int vec_len = vector_len(s, decode); 1783 1784 if (decode->immediate >= 16) { 1785 decode->immediate = 15; 1786 } 1787 tcg_gen_gvec_sari(MO_16, 1788 decode->op[0].offset, decode->op[1].offset, 1789 decode->immediate, vec_len, vec_len); 1790} 1791 1792static void gen_PSRLD_i(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) 1793{ 1794 int vec_len = vector_len(s, decode); 1795 1796 if (decode->immediate >= 32) { 1797 tcg_gen_gvec_dup_imm(MO_64, decode->op[0].offset, vec_len, vec_len, 0); 1798 } else { 1799 tcg_gen_gvec_shri(MO_32, 1800 decode->op[0].offset, decode->op[1].offset, 1801 decode->immediate, vec_len, vec_len); 1802 } 1803} 1804 1805static void gen_PSLLD_i(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) 1806{ 1807 int vec_len = vector_len(s, decode); 1808 1809 if (decode->immediate >= 32) { 1810 tcg_gen_gvec_dup_imm(MO_64, decode->op[0].offset, vec_len, vec_len, 0); 1811 } else { 1812 tcg_gen_gvec_shli(MO_32, 1813 decode->op[0].offset, decode->op[1].offset, 1814 decode->immediate, vec_len, vec_len); 1815 } 1816} 1817 1818static void gen_PSRAD_i(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) 1819{ 1820 int vec_len = vector_len(s, decode); 1821 1822 if (decode->immediate >= 32) { 1823 decode->immediate = 31; 1824 } 1825 tcg_gen_gvec_sari(MO_32, 1826 decode->op[0].offset, decode->op[1].offset, 1827 decode->immediate, vec_len, vec_len); 1828} 1829 1830static void gen_PSRLQ_i(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) 1831{ 1832 int vec_len = vector_len(s, decode); 1833 1834 if (decode->immediate >= 64) { 1835 tcg_gen_gvec_dup_imm(MO_64, decode->op[0].offset, vec_len, vec_len, 0); 1836 } else { 1837 tcg_gen_gvec_shri(MO_64, 1838 decode->op[0].offset, decode->op[1].offset, 1839 decode->immediate, vec_len, vec_len); 1840 } 1841} 1842 1843static void gen_PSLLQ_i(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) 1844{ 1845 int vec_len = vector_len(s, decode); 1846 1847 if (decode->immediate >= 64) { 1848 tcg_gen_gvec_dup_imm(MO_64, decode->op[0].offset, vec_len, vec_len, 0); 1849 } else { 1850 tcg_gen_gvec_shli(MO_64, 1851 decode->op[0].offset, decode->op[1].offset, 1852 decode->immediate, vec_len, vec_len); 1853 } 1854} 1855 1856static TCGv_ptr make_imm8u_xmm_vec(uint8_t imm, int vec_len) 1857{ 1858 MemOp ot = vec_len == 16 ? MO_128 : MO_256; 1859 TCGv_i32 imm_v = tcg_constant8u_i32(imm); 1860 TCGv_ptr ptr = tcg_temp_new_ptr(); 1861 1862 tcg_gen_gvec_dup_imm(MO_64, offsetof(CPUX86State, xmm_t0) + xmm_offset(ot), 1863 vec_len, vec_len, 0); 1864 1865 tcg_gen_addi_ptr(ptr, tcg_env, offsetof(CPUX86State, xmm_t0)); 1866 tcg_gen_st_i32(imm_v, tcg_env, offsetof(CPUX86State, xmm_t0.ZMM_L(0))); 1867 return ptr; 1868} 1869 1870static void gen_PSRLDQ_i(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) 1871{ 1872 int vec_len = vector_len(s, decode); 1873 TCGv_ptr imm_vec = make_imm8u_xmm_vec(decode->immediate, vec_len); 1874 1875 if (s->vex_l) { 1876 gen_helper_psrldq_ymm(tcg_env, OP_PTR0, OP_PTR1, imm_vec); 1877 } else { 1878 gen_helper_psrldq_xmm(tcg_env, OP_PTR0, OP_PTR1, imm_vec); 1879 } 1880} 1881 1882static void gen_PSLLDQ_i(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) 1883{ 1884 int vec_len = vector_len(s, decode); 1885 TCGv_ptr imm_vec = make_imm8u_xmm_vec(decode->immediate, vec_len); 1886 1887 if (s->vex_l) { 1888 gen_helper_pslldq_ymm(tcg_env, OP_PTR0, OP_PTR1, imm_vec); 1889 } else { 1890 gen_helper_pslldq_xmm(tcg_env, OP_PTR0, OP_PTR1, imm_vec); 1891 } 1892} 1893 1894static void gen_RORX(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) 1895{ 1896 MemOp ot = decode->op[0].ot; 1897 int mask = ot == MO_64 ? 63 : 31; 1898 int b = decode->immediate & mask; 1899 1900 switch (ot) { 1901 case MO_32: 1902#ifdef TARGET_X86_64 1903 tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0); 1904 tcg_gen_rotri_i32(s->tmp2_i32, s->tmp2_i32, b); 1905 tcg_gen_extu_i32_tl(s->T0, s->tmp2_i32); 1906 break; 1907 1908 case MO_64: 1909#endif 1910 tcg_gen_rotri_tl(s->T0, s->T0, b); 1911 break; 1912 1913 default: 1914 g_assert_not_reached(); 1915 } 1916} 1917 1918static void gen_SARX(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) 1919{ 1920 MemOp ot = decode->op[0].ot; 1921 int mask; 1922 1923 mask = ot == MO_64 ? 63 : 31; 1924 tcg_gen_andi_tl(s->T1, s->T1, mask); 1925 tcg_gen_sar_tl(s->T0, s->T0, s->T1); 1926} 1927 1928static void gen_SHA1NEXTE(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) 1929{ 1930 gen_helper_sha1nexte(OP_PTR0, OP_PTR1, OP_PTR2); 1931} 1932 1933static void gen_SHA1MSG1(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) 1934{ 1935 gen_helper_sha1msg1(OP_PTR0, OP_PTR1, OP_PTR2); 1936} 1937 1938static void gen_SHA1MSG2(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) 1939{ 1940 gen_helper_sha1msg2(OP_PTR0, OP_PTR1, OP_PTR2); 1941} 1942 1943static void gen_SHA1RNDS4(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) 1944{ 1945 switch(decode->immediate & 3) { 1946 case 0: 1947 gen_helper_sha1rnds4_f0(OP_PTR0, OP_PTR0, OP_PTR1); 1948 break; 1949 case 1: 1950 gen_helper_sha1rnds4_f1(OP_PTR0, OP_PTR0, OP_PTR1); 1951 break; 1952 case 2: 1953 gen_helper_sha1rnds4_f2(OP_PTR0, OP_PTR0, OP_PTR1); 1954 break; 1955 case 3: 1956 gen_helper_sha1rnds4_f3(OP_PTR0, OP_PTR0, OP_PTR1); 1957 break; 1958 } 1959} 1960 1961static void gen_SHA256MSG1(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) 1962{ 1963 gen_helper_sha256msg1(OP_PTR0, OP_PTR1, OP_PTR2); 1964} 1965 1966static void gen_SHA256MSG2(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) 1967{ 1968 gen_helper_sha256msg2(OP_PTR0, OP_PTR1, OP_PTR2); 1969} 1970 1971static void gen_SHA256RNDS2(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) 1972{ 1973 TCGv_i32 wk0 = tcg_temp_new_i32(); 1974 TCGv_i32 wk1 = tcg_temp_new_i32(); 1975 1976 tcg_gen_ld_i32(wk0, tcg_env, ZMM_OFFSET(0) + offsetof(ZMMReg, ZMM_L(0))); 1977 tcg_gen_ld_i32(wk1, tcg_env, ZMM_OFFSET(0) + offsetof(ZMMReg, ZMM_L(1))); 1978 1979 gen_helper_sha256rnds2(OP_PTR0, OP_PTR1, OP_PTR2, wk0, wk1); 1980} 1981 1982static void gen_SHLX(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) 1983{ 1984 MemOp ot = decode->op[0].ot; 1985 int mask; 1986 1987 mask = ot == MO_64 ? 63 : 31; 1988 tcg_gen_andi_tl(s->T1, s->T1, mask); 1989 tcg_gen_shl_tl(s->T0, s->T0, s->T1); 1990} 1991 1992static void gen_SHRX(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) 1993{ 1994 MemOp ot = decode->op[0].ot; 1995 int mask; 1996 1997 mask = ot == MO_64 ? 63 : 31; 1998 tcg_gen_andi_tl(s->T1, s->T1, mask); 1999 tcg_gen_shr_tl(s->T0, s->T0, s->T1); 2000} 2001 2002static void gen_VAESKEYGEN(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) 2003{ 2004 TCGv_i32 imm = tcg_constant8u_i32(decode->immediate); 2005 assert(!s->vex_l); 2006 gen_helper_aeskeygenassist_xmm(tcg_env, OP_PTR0, OP_PTR1, imm); 2007} 2008 2009static void gen_STMXCSR(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) 2010{ 2011 gen_helper_update_mxcsr(tcg_env); 2012 tcg_gen_ld32u_tl(s->T0, tcg_env, offsetof(CPUX86State, mxcsr)); 2013} 2014 2015static void gen_VAESIMC(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) 2016{ 2017 assert(!s->vex_l); 2018 gen_helper_aesimc_xmm(tcg_env, OP_PTR0, OP_PTR2); 2019} 2020 2021/* 2022 * 00 = v*ps Vps, Hps, Wpd 2023 * 66 = v*pd Vpd, Hpd, Wps 2024 * f3 = v*ss Vss, Hss, Wps 2025 * f2 = v*sd Vsd, Hsd, Wps 2026 */ 2027#define SSE_CMP(x) { \ 2028 gen_helper_ ## x ## ps ## _xmm, gen_helper_ ## x ## pd ## _xmm, \ 2029 gen_helper_ ## x ## ss, gen_helper_ ## x ## sd, \ 2030 gen_helper_ ## x ## ps ## _ymm, gen_helper_ ## x ## pd ## _ymm} 2031static const SSEFunc_0_eppp gen_helper_cmp_funcs[32][6] = { 2032 SSE_CMP(cmpeq), 2033 SSE_CMP(cmplt), 2034 SSE_CMP(cmple), 2035 SSE_CMP(cmpunord), 2036 SSE_CMP(cmpneq), 2037 SSE_CMP(cmpnlt), 2038 SSE_CMP(cmpnle), 2039 SSE_CMP(cmpord), 2040 2041 SSE_CMP(cmpequ), 2042 SSE_CMP(cmpnge), 2043 SSE_CMP(cmpngt), 2044 SSE_CMP(cmpfalse), 2045 SSE_CMP(cmpnequ), 2046 SSE_CMP(cmpge), 2047 SSE_CMP(cmpgt), 2048 SSE_CMP(cmptrue), 2049 2050 SSE_CMP(cmpeqs), 2051 SSE_CMP(cmpltq), 2052 SSE_CMP(cmpleq), 2053 SSE_CMP(cmpunords), 2054 SSE_CMP(cmpneqq), 2055 SSE_CMP(cmpnltq), 2056 SSE_CMP(cmpnleq), 2057 SSE_CMP(cmpords), 2058 2059 SSE_CMP(cmpequs), 2060 SSE_CMP(cmpngeq), 2061 SSE_CMP(cmpngtq), 2062 SSE_CMP(cmpfalses), 2063 SSE_CMP(cmpnequs), 2064 SSE_CMP(cmpgeq), 2065 SSE_CMP(cmpgtq), 2066 SSE_CMP(cmptrues), 2067}; 2068#undef SSE_CMP 2069 2070static void gen_VCMP(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) 2071{ 2072 int index = decode->immediate & (s->prefix & PREFIX_VEX ? 31 : 7); 2073 int b = 2074 s->prefix & PREFIX_REPZ ? 2 /* ss */ : 2075 s->prefix & PREFIX_REPNZ ? 3 /* sd */ : 2076 !!(s->prefix & PREFIX_DATA) /* pd */ + (s->vex_l << 2); 2077 2078 gen_helper_cmp_funcs[index][b](tcg_env, OP_PTR0, OP_PTR1, OP_PTR2); 2079} 2080 2081static void gen_VCOMI(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) 2082{ 2083 SSEFunc_0_epp fn; 2084 fn = s->prefix & PREFIX_DATA ? gen_helper_comisd : gen_helper_comiss; 2085 fn(tcg_env, OP_PTR1, OP_PTR2); 2086 set_cc_op(s, CC_OP_EFLAGS); 2087} 2088 2089static void gen_VCVTPD2PS(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) 2090{ 2091 if (s->vex_l) { 2092 gen_helper_cvtpd2ps_ymm(tcg_env, OP_PTR0, OP_PTR2); 2093 } else { 2094 gen_helper_cvtpd2ps_xmm(tcg_env, OP_PTR0, OP_PTR2); 2095 } 2096} 2097 2098static void gen_VCVTPS2PD(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) 2099{ 2100 if (s->vex_l) { 2101 gen_helper_cvtps2pd_ymm(tcg_env, OP_PTR0, OP_PTR2); 2102 } else { 2103 gen_helper_cvtps2pd_xmm(tcg_env, OP_PTR0, OP_PTR2); 2104 } 2105} 2106 2107static void gen_VCVTPS2PH(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) 2108{ 2109 gen_unary_imm_fp_sse(s, env, decode, 2110 gen_helper_cvtps2ph_xmm, 2111 gen_helper_cvtps2ph_ymm); 2112 /* 2113 * VCVTPS2PH is the only instruction that performs an operation on a 2114 * register source and then *stores* into memory. 2115 */ 2116 if (decode->op[0].has_ea) { 2117 gen_store_sse(s, decode, decode->op[0].offset); 2118 } 2119} 2120 2121static void gen_VCVTSD2SS(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) 2122{ 2123 gen_helper_cvtsd2ss(tcg_env, OP_PTR0, OP_PTR1, OP_PTR2); 2124} 2125 2126static void gen_VCVTSS2SD(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) 2127{ 2128 gen_helper_cvtss2sd(tcg_env, OP_PTR0, OP_PTR1, OP_PTR2); 2129} 2130 2131static void gen_VCVTSI2Sx(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) 2132{ 2133 int vec_len = vector_len(s, decode); 2134 TCGv_i32 in; 2135 2136 tcg_gen_gvec_mov(MO_64, decode->op[0].offset, decode->op[1].offset, vec_len, vec_len); 2137 2138#ifdef TARGET_X86_64 2139 MemOp ot = decode->op[2].ot; 2140 if (ot == MO_64) { 2141 if (s->prefix & PREFIX_REPNZ) { 2142 gen_helper_cvtsq2sd(tcg_env, OP_PTR0, s->T1); 2143 } else { 2144 gen_helper_cvtsq2ss(tcg_env, OP_PTR0, s->T1); 2145 } 2146 return; 2147 } 2148 in = s->tmp2_i32; 2149 tcg_gen_trunc_tl_i32(in, s->T1); 2150#else 2151 in = s->T1; 2152#endif 2153 2154 if (s->prefix & PREFIX_REPNZ) { 2155 gen_helper_cvtsi2sd(tcg_env, OP_PTR0, in); 2156 } else { 2157 gen_helper_cvtsi2ss(tcg_env, OP_PTR0, in); 2158 } 2159} 2160 2161static inline void gen_VCVTtSx2SI(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode, 2162 SSEFunc_i_ep ss2si, SSEFunc_l_ep ss2sq, 2163 SSEFunc_i_ep sd2si, SSEFunc_l_ep sd2sq) 2164{ 2165 TCGv_i32 out; 2166 2167#ifdef TARGET_X86_64 2168 MemOp ot = decode->op[0].ot; 2169 if (ot == MO_64) { 2170 if (s->prefix & PREFIX_REPNZ) { 2171 sd2sq(s->T0, tcg_env, OP_PTR2); 2172 } else { 2173 ss2sq(s->T0, tcg_env, OP_PTR2); 2174 } 2175 return; 2176 } 2177 2178 out = s->tmp2_i32; 2179#else 2180 out = s->T0; 2181#endif 2182 if (s->prefix & PREFIX_REPNZ) { 2183 sd2si(out, tcg_env, OP_PTR2); 2184 } else { 2185 ss2si(out, tcg_env, OP_PTR2); 2186 } 2187#ifdef TARGET_X86_64 2188 tcg_gen_extu_i32_tl(s->T0, out); 2189#endif 2190} 2191 2192#ifndef TARGET_X86_64 2193#define gen_helper_cvtss2sq NULL 2194#define gen_helper_cvtsd2sq NULL 2195#define gen_helper_cvttss2sq NULL 2196#define gen_helper_cvttsd2sq NULL 2197#endif 2198 2199static void gen_VCVTSx2SI(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) 2200{ 2201 gen_VCVTtSx2SI(s, env, decode, 2202 gen_helper_cvtss2si, gen_helper_cvtss2sq, 2203 gen_helper_cvtsd2si, gen_helper_cvtsd2sq); 2204} 2205 2206static void gen_VCVTTSx2SI(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) 2207{ 2208 gen_VCVTtSx2SI(s, env, decode, 2209 gen_helper_cvttss2si, gen_helper_cvttss2sq, 2210 gen_helper_cvttsd2si, gen_helper_cvttsd2sq); 2211} 2212 2213static void gen_VEXTRACTx128(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) 2214{ 2215 int mask = decode->immediate & 1; 2216 int src_ofs = vector_elem_offset(&decode->op[1], MO_128, mask); 2217 if (decode->op[0].has_ea) { 2218 /* VEX-only instruction, no alignment requirements. */ 2219 gen_sto_env_A0(s, src_ofs, false); 2220 } else { 2221 tcg_gen_gvec_mov(MO_64, decode->op[0].offset, src_ofs, 16, 16); 2222 } 2223} 2224 2225static void gen_VEXTRACTPS(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) 2226{ 2227 gen_pextr(s, env, decode, MO_32); 2228} 2229 2230static void gen_vinsertps(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) 2231{ 2232 int val = decode->immediate; 2233 int dest_word = (val >> 4) & 3; 2234 int new_mask = (val & 15) | (1 << dest_word); 2235 int vec_len = 16; 2236 2237 assert(!s->vex_l); 2238 2239 if (new_mask == 15) { 2240 /* All zeroes except possibly for the inserted element */ 2241 tcg_gen_gvec_dup_imm(MO_64, decode->op[0].offset, vec_len, vec_len, 0); 2242 } else if (decode->op[1].offset != decode->op[0].offset) { 2243 gen_store_sse(s, decode, decode->op[1].offset); 2244 } 2245 2246 if (new_mask != (val & 15)) { 2247 tcg_gen_st_i32(s->tmp2_i32, tcg_env, 2248 vector_elem_offset(&decode->op[0], MO_32, dest_word)); 2249 } 2250 2251 if (new_mask != 15) { 2252 TCGv_i32 zero = tcg_constant_i32(0); /* float32_zero */ 2253 int i; 2254 for (i = 0; i < 4; i++) { 2255 if ((val >> i) & 1) { 2256 tcg_gen_st_i32(zero, tcg_env, 2257 vector_elem_offset(&decode->op[0], MO_32, i)); 2258 } 2259 } 2260 } 2261} 2262 2263static void gen_VINSERTPS_r(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) 2264{ 2265 int val = decode->immediate; 2266 tcg_gen_ld_i32(s->tmp2_i32, tcg_env, 2267 vector_elem_offset(&decode->op[2], MO_32, (val >> 6) & 3)); 2268 gen_vinsertps(s, env, decode); 2269} 2270 2271static void gen_VINSERTPS_m(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) 2272{ 2273 tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0, s->mem_index, MO_LEUL); 2274 gen_vinsertps(s, env, decode); 2275} 2276 2277static void gen_VINSERTx128(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) 2278{ 2279 int mask = decode->immediate & 1; 2280 tcg_gen_gvec_mov(MO_64, 2281 decode->op[0].offset + offsetof(YMMReg, YMM_X(mask)), 2282 decode->op[2].offset + offsetof(YMMReg, YMM_X(0)), 16, 16); 2283 tcg_gen_gvec_mov(MO_64, 2284 decode->op[0].offset + offsetof(YMMReg, YMM_X(!mask)), 2285 decode->op[1].offset + offsetof(YMMReg, YMM_X(!mask)), 16, 16); 2286} 2287 2288static inline void gen_maskmov(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode, 2289 SSEFunc_0_eppt xmm, SSEFunc_0_eppt ymm) 2290{ 2291 if (!s->vex_l) { 2292 xmm(tcg_env, OP_PTR2, OP_PTR1, s->A0); 2293 } else { 2294 ymm(tcg_env, OP_PTR2, OP_PTR1, s->A0); 2295 } 2296} 2297 2298static void gen_VMASKMOVPD_st(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) 2299{ 2300 gen_maskmov(s, env, decode, gen_helper_vpmaskmovq_st_xmm, gen_helper_vpmaskmovq_st_ymm); 2301} 2302 2303static void gen_VMASKMOVPS_st(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) 2304{ 2305 gen_maskmov(s, env, decode, gen_helper_vpmaskmovd_st_xmm, gen_helper_vpmaskmovd_st_ymm); 2306} 2307 2308static void gen_VMOVHPx_ld(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) 2309{ 2310 gen_ldq_env_A0(s, decode->op[0].offset + offsetof(XMMReg, XMM_Q(1))); 2311 if (decode->op[0].offset != decode->op[1].offset) { 2312 tcg_gen_ld_i64(s->tmp1_i64, tcg_env, decode->op[1].offset + offsetof(XMMReg, XMM_Q(0))); 2313 tcg_gen_st_i64(s->tmp1_i64, tcg_env, decode->op[0].offset + offsetof(XMMReg, XMM_Q(0))); 2314 } 2315} 2316 2317static void gen_VMOVHPx_st(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) 2318{ 2319 gen_stq_env_A0(s, decode->op[2].offset + offsetof(XMMReg, XMM_Q(1))); 2320} 2321 2322static void gen_VMOVHPx(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) 2323{ 2324 if (decode->op[0].offset != decode->op[2].offset) { 2325 tcg_gen_ld_i64(s->tmp1_i64, tcg_env, decode->op[2].offset + offsetof(XMMReg, XMM_Q(1))); 2326 tcg_gen_st_i64(s->tmp1_i64, tcg_env, decode->op[0].offset + offsetof(XMMReg, XMM_Q(1))); 2327 } 2328 if (decode->op[0].offset != decode->op[1].offset) { 2329 tcg_gen_ld_i64(s->tmp1_i64, tcg_env, decode->op[1].offset + offsetof(XMMReg, XMM_Q(0))); 2330 tcg_gen_st_i64(s->tmp1_i64, tcg_env, decode->op[0].offset + offsetof(XMMReg, XMM_Q(0))); 2331 } 2332} 2333 2334static void gen_VMOVHLPS(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) 2335{ 2336 tcg_gen_ld_i64(s->tmp1_i64, tcg_env, decode->op[2].offset + offsetof(XMMReg, XMM_Q(1))); 2337 tcg_gen_st_i64(s->tmp1_i64, tcg_env, decode->op[0].offset + offsetof(XMMReg, XMM_Q(0))); 2338 if (decode->op[0].offset != decode->op[1].offset) { 2339 tcg_gen_ld_i64(s->tmp1_i64, tcg_env, decode->op[1].offset + offsetof(XMMReg, XMM_Q(1))); 2340 tcg_gen_st_i64(s->tmp1_i64, tcg_env, decode->op[0].offset + offsetof(XMMReg, XMM_Q(1))); 2341 } 2342} 2343 2344static void gen_VMOVLHPS(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) 2345{ 2346 tcg_gen_ld_i64(s->tmp1_i64, tcg_env, decode->op[2].offset); 2347 tcg_gen_st_i64(s->tmp1_i64, tcg_env, decode->op[0].offset + offsetof(XMMReg, XMM_Q(1))); 2348 if (decode->op[0].offset != decode->op[1].offset) { 2349 tcg_gen_ld_i64(s->tmp1_i64, tcg_env, decode->op[1].offset + offsetof(XMMReg, XMM_Q(0))); 2350 tcg_gen_st_i64(s->tmp1_i64, tcg_env, decode->op[0].offset + offsetof(XMMReg, XMM_Q(0))); 2351 } 2352} 2353 2354/* 2355 * Note that MOVLPx supports 256-bit operation unlike MOVHLPx, MOVLHPx, MOXHPx. 2356 * Use a gvec move to move everything above the bottom 64 bits. 2357 */ 2358 2359static void gen_VMOVLPx(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) 2360{ 2361 int vec_len = vector_len(s, decode); 2362 2363 tcg_gen_ld_i64(s->tmp1_i64, tcg_env, decode->op[2].offset + offsetof(XMMReg, XMM_Q(0))); 2364 tcg_gen_gvec_mov(MO_64, decode->op[0].offset, decode->op[1].offset, vec_len, vec_len); 2365 tcg_gen_st_i64(s->tmp1_i64, tcg_env, decode->op[0].offset + offsetof(XMMReg, XMM_Q(0))); 2366} 2367 2368static void gen_VMOVLPx_ld(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) 2369{ 2370 int vec_len = vector_len(s, decode); 2371 2372 tcg_gen_qemu_ld_i64(s->tmp1_i64, s->A0, s->mem_index, MO_LEUQ); 2373 tcg_gen_gvec_mov(MO_64, decode->op[0].offset, decode->op[1].offset, vec_len, vec_len); 2374 tcg_gen_st_i64(s->tmp1_i64, OP_PTR0, offsetof(ZMMReg, ZMM_Q(0))); 2375} 2376 2377static void gen_VMOVLPx_st(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) 2378{ 2379 tcg_gen_ld_i64(s->tmp1_i64, OP_PTR2, offsetof(ZMMReg, ZMM_Q(0))); 2380 tcg_gen_qemu_st_i64(s->tmp1_i64, s->A0, s->mem_index, MO_LEUQ); 2381} 2382 2383static void gen_VMOVSD_ld(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) 2384{ 2385 TCGv_i64 zero = tcg_constant_i64(0); 2386 2387 tcg_gen_qemu_ld_i64(s->tmp1_i64, s->A0, s->mem_index, MO_LEUQ); 2388 tcg_gen_st_i64(zero, OP_PTR0, offsetof(ZMMReg, ZMM_Q(1))); 2389 tcg_gen_st_i64(s->tmp1_i64, OP_PTR0, offsetof(ZMMReg, ZMM_Q(0))); 2390} 2391 2392static void gen_VMOVSS(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) 2393{ 2394 int vec_len = vector_len(s, decode); 2395 2396 tcg_gen_ld_i32(s->tmp2_i32, OP_PTR2, offsetof(ZMMReg, ZMM_L(0))); 2397 tcg_gen_gvec_mov(MO_64, decode->op[0].offset, decode->op[1].offset, vec_len, vec_len); 2398 tcg_gen_st_i32(s->tmp2_i32, OP_PTR0, offsetof(ZMMReg, ZMM_L(0))); 2399} 2400 2401static void gen_VMOVSS_ld(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) 2402{ 2403 int vec_len = vector_len(s, decode); 2404 2405 tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0, s->mem_index, MO_LEUL); 2406 tcg_gen_gvec_dup_imm(MO_64, decode->op[0].offset, vec_len, vec_len, 0); 2407 tcg_gen_st_i32(s->tmp2_i32, OP_PTR0, offsetof(ZMMReg, ZMM_L(0))); 2408} 2409 2410static void gen_VMOVSS_st(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) 2411{ 2412 tcg_gen_ld_i32(s->tmp2_i32, OP_PTR2, offsetof(ZMMReg, ZMM_L(0))); 2413 tcg_gen_qemu_st_i32(s->tmp2_i32, s->A0, s->mem_index, MO_LEUL); 2414} 2415 2416static void gen_VPMASKMOV_st(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) 2417{ 2418 if (s->vex_w) { 2419 gen_VMASKMOVPD_st(s, env, decode); 2420 } else { 2421 gen_VMASKMOVPS_st(s, env, decode); 2422 } 2423} 2424 2425static void gen_VPERMD(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) 2426{ 2427 assert(s->vex_l); 2428 gen_helper_vpermd_ymm(OP_PTR0, OP_PTR1, OP_PTR2); 2429} 2430 2431static void gen_VPERM2x128(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) 2432{ 2433 TCGv_i32 imm = tcg_constant8u_i32(decode->immediate); 2434 assert(s->vex_l); 2435 gen_helper_vpermdq_ymm(OP_PTR0, OP_PTR1, OP_PTR2, imm); 2436} 2437 2438static void gen_VPHMINPOSUW(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) 2439{ 2440 assert(!s->vex_l); 2441 gen_helper_phminposuw_xmm(tcg_env, OP_PTR0, OP_PTR2); 2442} 2443 2444static void gen_VROUNDSD(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) 2445{ 2446 TCGv_i32 imm = tcg_constant8u_i32(decode->immediate); 2447 assert(!s->vex_l); 2448 gen_helper_roundsd_xmm(tcg_env, OP_PTR0, OP_PTR1, OP_PTR2, imm); 2449} 2450 2451static void gen_VROUNDSS(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) 2452{ 2453 TCGv_i32 imm = tcg_constant8u_i32(decode->immediate); 2454 assert(!s->vex_l); 2455 gen_helper_roundss_xmm(tcg_env, OP_PTR0, OP_PTR1, OP_PTR2, imm); 2456} 2457 2458static void gen_VSHUF(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) 2459{ 2460 TCGv_i32 imm = tcg_constant_i32(decode->immediate); 2461 SSEFunc_0_pppi ps, pd, fn; 2462 ps = s->vex_l ? gen_helper_shufps_ymm : gen_helper_shufps_xmm; 2463 pd = s->vex_l ? gen_helper_shufpd_ymm : gen_helper_shufpd_xmm; 2464 fn = s->prefix & PREFIX_DATA ? pd : ps; 2465 fn(OP_PTR0, OP_PTR1, OP_PTR2, imm); 2466} 2467 2468static void gen_VUCOMI(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) 2469{ 2470 SSEFunc_0_epp fn; 2471 fn = s->prefix & PREFIX_DATA ? gen_helper_ucomisd : gen_helper_ucomiss; 2472 fn(tcg_env, OP_PTR1, OP_PTR2); 2473 set_cc_op(s, CC_OP_EFLAGS); 2474} 2475 2476static void gen_VZEROALL(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) 2477{ 2478 TCGv_ptr ptr = tcg_temp_new_ptr(); 2479 2480 tcg_gen_addi_ptr(ptr, tcg_env, offsetof(CPUX86State, xmm_regs)); 2481 gen_helper_memset(ptr, ptr, tcg_constant_i32(0), 2482 tcg_constant_ptr(CPU_NB_REGS * sizeof(ZMMReg))); 2483} 2484 2485static void gen_VZEROUPPER(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) 2486{ 2487 int i; 2488 2489 for (i = 0; i < CPU_NB_REGS; i++) { 2490 int offset = offsetof(CPUX86State, xmm_regs[i].ZMM_X(1)); 2491 tcg_gen_gvec_dup_imm(MO_64, offset, 16, 16, 0); 2492 } 2493} 2494