1139c1837SPaolo Bonzini/* 2139c1837SPaolo Bonzini * Tiny Code Generator for QEMU 3139c1837SPaolo Bonzini * 4139c1837SPaolo Bonzini * Copyright (c) 2008 Fabrice Bellard 5139c1837SPaolo Bonzini * 6139c1837SPaolo Bonzini * Permission is hereby granted, free of charge, to any person obtaining a copy 7139c1837SPaolo Bonzini * of this software and associated documentation files (the "Software"), to deal 8139c1837SPaolo Bonzini * in the Software without restriction, including without limitation the rights 9139c1837SPaolo Bonzini * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10139c1837SPaolo Bonzini * copies of the Software, and to permit persons to whom the Software is 11139c1837SPaolo Bonzini * furnished to do so, subject to the following conditions: 12139c1837SPaolo Bonzini * 13139c1837SPaolo Bonzini * The above copyright notice and this permission notice shall be included in 14139c1837SPaolo Bonzini * all copies or substantial portions of the Software. 15139c1837SPaolo Bonzini * 16139c1837SPaolo Bonzini * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17139c1837SPaolo Bonzini * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18139c1837SPaolo Bonzini * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19139c1837SPaolo Bonzini * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20139c1837SPaolo Bonzini * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21139c1837SPaolo Bonzini * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 22139c1837SPaolo Bonzini * THE SOFTWARE. 23139c1837SPaolo Bonzini */ 24139c1837SPaolo Bonzini 25b1ee3c67SRichard Henderson#include "../tcg-ldst.c.inc" 26139c1837SPaolo Bonzini#include "../tcg-pool.c.inc" 27139c1837SPaolo Bonzini 28139c1837SPaolo Bonzini#ifdef CONFIG_DEBUG_TCG 29139c1837SPaolo Bonzinistatic const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = { 30139c1837SPaolo Bonzini#if TCG_TARGET_REG_BITS == 64 31139c1837SPaolo Bonzini "%rax", "%rcx", "%rdx", "%rbx", "%rsp", "%rbp", "%rsi", "%rdi", 32139c1837SPaolo Bonzini#else 33139c1837SPaolo Bonzini "%eax", "%ecx", "%edx", "%ebx", "%esp", "%ebp", "%esi", "%edi", 34139c1837SPaolo Bonzini#endif 35139c1837SPaolo Bonzini "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15", 36139c1837SPaolo Bonzini "%xmm0", "%xmm1", "%xmm2", "%xmm3", "%xmm4", "%xmm5", "%xmm6", "%xmm7", 37139c1837SPaolo Bonzini#if TCG_TARGET_REG_BITS == 64 38139c1837SPaolo Bonzini "%xmm8", "%xmm9", "%xmm10", "%xmm11", 39139c1837SPaolo Bonzini "%xmm12", "%xmm13", "%xmm14", "%xmm15", 40139c1837SPaolo Bonzini#endif 41139c1837SPaolo Bonzini}; 42139c1837SPaolo Bonzini#endif 43139c1837SPaolo Bonzini 44139c1837SPaolo Bonzinistatic const int tcg_target_reg_alloc_order[] = { 45139c1837SPaolo Bonzini#if TCG_TARGET_REG_BITS == 64 46139c1837SPaolo Bonzini TCG_REG_RBP, 47139c1837SPaolo Bonzini TCG_REG_RBX, 48139c1837SPaolo Bonzini TCG_REG_R12, 49139c1837SPaolo Bonzini TCG_REG_R13, 50139c1837SPaolo Bonzini TCG_REG_R14, 51139c1837SPaolo Bonzini TCG_REG_R15, 52139c1837SPaolo Bonzini TCG_REG_R10, 53139c1837SPaolo Bonzini TCG_REG_R11, 54139c1837SPaolo Bonzini TCG_REG_R9, 55139c1837SPaolo Bonzini TCG_REG_R8, 56139c1837SPaolo Bonzini TCG_REG_RCX, 57139c1837SPaolo Bonzini TCG_REG_RDX, 58139c1837SPaolo Bonzini TCG_REG_RSI, 59139c1837SPaolo Bonzini TCG_REG_RDI, 60139c1837SPaolo Bonzini TCG_REG_RAX, 61139c1837SPaolo Bonzini#else 62139c1837SPaolo Bonzini TCG_REG_EBX, 63139c1837SPaolo Bonzini TCG_REG_ESI, 64139c1837SPaolo Bonzini TCG_REG_EDI, 65139c1837SPaolo Bonzini TCG_REG_EBP, 66139c1837SPaolo Bonzini TCG_REG_ECX, 67139c1837SPaolo Bonzini TCG_REG_EDX, 68139c1837SPaolo Bonzini TCG_REG_EAX, 69139c1837SPaolo Bonzini#endif 70139c1837SPaolo Bonzini TCG_REG_XMM0, 71139c1837SPaolo Bonzini TCG_REG_XMM1, 72139c1837SPaolo Bonzini TCG_REG_XMM2, 73139c1837SPaolo Bonzini TCG_REG_XMM3, 74139c1837SPaolo Bonzini TCG_REG_XMM4, 75139c1837SPaolo Bonzini TCG_REG_XMM5, 76139c1837SPaolo Bonzini#ifndef _WIN64 77139c1837SPaolo Bonzini /* The Win64 ABI has xmm6-xmm15 as caller-saves, and we do not save 78139c1837SPaolo Bonzini any of them. Therefore only allow xmm0-xmm5 to be allocated. */ 79139c1837SPaolo Bonzini TCG_REG_XMM6, 80139c1837SPaolo Bonzini TCG_REG_XMM7, 81139c1837SPaolo Bonzini#if TCG_TARGET_REG_BITS == 64 82139c1837SPaolo Bonzini TCG_REG_XMM8, 83139c1837SPaolo Bonzini TCG_REG_XMM9, 84139c1837SPaolo Bonzini TCG_REG_XMM10, 85139c1837SPaolo Bonzini TCG_REG_XMM11, 86139c1837SPaolo Bonzini TCG_REG_XMM12, 87139c1837SPaolo Bonzini TCG_REG_XMM13, 88139c1837SPaolo Bonzini TCG_REG_XMM14, 89139c1837SPaolo Bonzini TCG_REG_XMM15, 90139c1837SPaolo Bonzini#endif 91139c1837SPaolo Bonzini#endif 92139c1837SPaolo Bonzini}; 93139c1837SPaolo Bonzini 94098d0fc1SRichard Henderson#define TCG_TMP_VEC TCG_REG_XMM5 95098d0fc1SRichard Henderson 96139c1837SPaolo Bonzinistatic const int tcg_target_call_iarg_regs[] = { 97139c1837SPaolo Bonzini#if TCG_TARGET_REG_BITS == 64 98139c1837SPaolo Bonzini#if defined(_WIN64) 99139c1837SPaolo Bonzini TCG_REG_RCX, 100139c1837SPaolo Bonzini TCG_REG_RDX, 101139c1837SPaolo Bonzini#else 102139c1837SPaolo Bonzini TCG_REG_RDI, 103139c1837SPaolo Bonzini TCG_REG_RSI, 104139c1837SPaolo Bonzini TCG_REG_RDX, 105139c1837SPaolo Bonzini TCG_REG_RCX, 106139c1837SPaolo Bonzini#endif 107139c1837SPaolo Bonzini TCG_REG_R8, 108139c1837SPaolo Bonzini TCG_REG_R9, 109139c1837SPaolo Bonzini#else 110139c1837SPaolo Bonzini /* 32 bit mode uses stack based calling convention (GCC default). */ 111139c1837SPaolo Bonzini#endif 112139c1837SPaolo Bonzini}; 113139c1837SPaolo Bonzini 1145e3d0c19SRichard Hendersonstatic TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot) 1155e3d0c19SRichard Henderson{ 1165e3d0c19SRichard Henderson switch (kind) { 1175e3d0c19SRichard Henderson case TCG_CALL_RET_NORMAL: 1185e3d0c19SRichard Henderson tcg_debug_assert(slot >= 0 && slot <= 1); 1195e3d0c19SRichard Henderson return slot ? TCG_REG_EDX : TCG_REG_EAX; 120c4f4a00aSRichard Henderson#ifdef _WIN64 121c4f4a00aSRichard Henderson case TCG_CALL_RET_BY_VEC: 122c4f4a00aSRichard Henderson tcg_debug_assert(slot == 0); 123c4f4a00aSRichard Henderson return TCG_REG_XMM0; 124c4f4a00aSRichard Henderson#endif 1255e3d0c19SRichard Henderson default: 1265e3d0c19SRichard Henderson g_assert_not_reached(); 1275e3d0c19SRichard Henderson } 1285e3d0c19SRichard Henderson} 129139c1837SPaolo Bonzini 130139c1837SPaolo Bonzini/* Constants we accept. */ 131139c1837SPaolo Bonzini#define TCG_CT_CONST_S32 0x100 132139c1837SPaolo Bonzini#define TCG_CT_CONST_U32 0x200 133139c1837SPaolo Bonzini#define TCG_CT_CONST_I32 0x400 134139c1837SPaolo Bonzini#define TCG_CT_CONST_WSZ 0x800 135d3d1c30cSRichard Henderson#define TCG_CT_CONST_TST 0x1000 136d8387f0eSRichard Henderson#define TCG_CT_CONST_ZERO 0x2000 137139c1837SPaolo Bonzini 138139c1837SPaolo Bonzini/* Registers used with L constraint, which are the first argument 139139c1837SPaolo Bonzini registers on x86_64, and two random call clobbered registers on 140139c1837SPaolo Bonzini i386. */ 141139c1837SPaolo Bonzini#if TCG_TARGET_REG_BITS == 64 142139c1837SPaolo Bonzini# define TCG_REG_L0 tcg_target_call_iarg_regs[0] 143139c1837SPaolo Bonzini# define TCG_REG_L1 tcg_target_call_iarg_regs[1] 144139c1837SPaolo Bonzini#else 145139c1837SPaolo Bonzini# define TCG_REG_L0 TCG_REG_EAX 146139c1837SPaolo Bonzini# define TCG_REG_L1 TCG_REG_EDX 147139c1837SPaolo Bonzini#endif 148139c1837SPaolo Bonzini 149df903b94SRichard Henderson#if TCG_TARGET_REG_BITS == 64 150df903b94SRichard Henderson# define ALL_GENERAL_REGS 0x0000ffffu 151df903b94SRichard Henderson# define ALL_VECTOR_REGS 0xffff0000u 152df903b94SRichard Henderson# define ALL_BYTEL_REGS ALL_GENERAL_REGS 153df903b94SRichard Henderson#else 154df903b94SRichard Henderson# define ALL_GENERAL_REGS 0x000000ffu 155df903b94SRichard Henderson# define ALL_VECTOR_REGS 0x00ff0000u 15636df88c0SRichard Henderson# define ALL_BYTEL_REGS 0x0000000fu 157df903b94SRichard Henderson#endif 158915e1d52SRichard Henderson#define SOFTMMU_RESERVE_REGS \ 159915e1d52SRichard Henderson (tcg_use_softmmu ? (1 << TCG_REG_L0) | (1 << TCG_REG_L1) : 0) 160df903b94SRichard Henderson 161dbedadbaSRichard Henderson#define have_bmi2 (cpuinfo & CPUINFO_BMI2) 162dbedadbaSRichard Henderson#define have_lzcnt (cpuinfo & CPUINFO_LZCNT) 163139c1837SPaolo Bonzini 164705ed477SRichard Hendersonstatic const tcg_insn_unit *tb_ret_addr; 165139c1837SPaolo Bonzini 166139c1837SPaolo Bonzinistatic bool patch_reloc(tcg_insn_unit *code_ptr, int type, 167139c1837SPaolo Bonzini intptr_t value, intptr_t addend) 168139c1837SPaolo Bonzini{ 169139c1837SPaolo Bonzini value += addend; 170139c1837SPaolo Bonzini switch(type) { 171139c1837SPaolo Bonzini case R_386_PC32: 172705ed477SRichard Henderson value -= (uintptr_t)tcg_splitwx_to_rx(code_ptr); 173139c1837SPaolo Bonzini if (value != (int32_t)value) { 174139c1837SPaolo Bonzini return false; 175139c1837SPaolo Bonzini } 176139c1837SPaolo Bonzini /* FALLTHRU */ 177139c1837SPaolo Bonzini case R_386_32: 178139c1837SPaolo Bonzini tcg_patch32(code_ptr, value); 179139c1837SPaolo Bonzini break; 180139c1837SPaolo Bonzini case R_386_PC8: 181705ed477SRichard Henderson value -= (uintptr_t)tcg_splitwx_to_rx(code_ptr); 182139c1837SPaolo Bonzini if (value != (int8_t)value) { 183139c1837SPaolo Bonzini return false; 184139c1837SPaolo Bonzini } 185139c1837SPaolo Bonzini tcg_patch8(code_ptr, value); 186139c1837SPaolo Bonzini break; 187139c1837SPaolo Bonzini default: 188732e89f4SRichard Henderson g_assert_not_reached(); 189139c1837SPaolo Bonzini } 190139c1837SPaolo Bonzini return true; 191139c1837SPaolo Bonzini} 192139c1837SPaolo Bonzini 193139c1837SPaolo Bonzini/* test if a constant matches the constraint */ 19421e9a8aeSRichard Hendersonstatic bool tcg_target_const_match(int64_t val, int ct, 19521e9a8aeSRichard Henderson TCGType type, TCGCond cond, int vece) 196139c1837SPaolo Bonzini{ 197139c1837SPaolo Bonzini if (ct & TCG_CT_CONST) { 198139c1837SPaolo Bonzini return 1; 199139c1837SPaolo Bonzini } 200c7c778b5SRichard Henderson if (type == TCG_TYPE_I32) { 201d3d1c30cSRichard Henderson if (ct & (TCG_CT_CONST_S32 | TCG_CT_CONST_U32 | 202d3d1c30cSRichard Henderson TCG_CT_CONST_I32 | TCG_CT_CONST_TST)) { 203c7c778b5SRichard Henderson return 1; 204c7c778b5SRichard Henderson } 205c7c778b5SRichard Henderson } else { 206139c1837SPaolo Bonzini if ((ct & TCG_CT_CONST_S32) && val == (int32_t)val) { 207139c1837SPaolo Bonzini return 1; 208139c1837SPaolo Bonzini } 209139c1837SPaolo Bonzini if ((ct & TCG_CT_CONST_U32) && val == (uint32_t)val) { 210139c1837SPaolo Bonzini return 1; 211139c1837SPaolo Bonzini } 212139c1837SPaolo Bonzini if ((ct & TCG_CT_CONST_I32) && ~val == (int32_t)~val) { 213139c1837SPaolo Bonzini return 1; 214139c1837SPaolo Bonzini } 215d3d1c30cSRichard Henderson /* 216d3d1c30cSRichard Henderson * This will be used in combination with TCG_CT_CONST_S32, 217d3d1c30cSRichard Henderson * so "normal" TESTQ is already matched. Also accept: 218d3d1c30cSRichard Henderson * TESTQ -> TESTL (uint32_t) 219d3d1c30cSRichard Henderson * TESTQ -> BT (is_power_of_2) 220d3d1c30cSRichard Henderson */ 221d3d1c30cSRichard Henderson if ((ct & TCG_CT_CONST_TST) 222d3d1c30cSRichard Henderson && is_tst_cond(cond) 223d3d1c30cSRichard Henderson && (val == (uint32_t)val || is_power_of_2(val))) { 224d3d1c30cSRichard Henderson return 1; 225d3d1c30cSRichard Henderson } 226c7c778b5SRichard Henderson } 227139c1837SPaolo Bonzini if ((ct & TCG_CT_CONST_WSZ) && val == (type == TCG_TYPE_I32 ? 32 : 64)) { 228139c1837SPaolo Bonzini return 1; 229139c1837SPaolo Bonzini } 230d8387f0eSRichard Henderson if ((ct & TCG_CT_CONST_ZERO) && val == 0) { 231d8387f0eSRichard Henderson return 1; 232d8387f0eSRichard Henderson } 233139c1837SPaolo Bonzini return 0; 234139c1837SPaolo Bonzini} 235139c1837SPaolo Bonzini 236139c1837SPaolo Bonzini# define LOWREGMASK(x) ((x) & 7) 237139c1837SPaolo Bonzini 238139c1837SPaolo Bonzini#define P_EXT 0x100 /* 0x0f opcode prefix */ 239139c1837SPaolo Bonzini#define P_EXT38 0x200 /* 0x0f 0x38 opcode prefix */ 240139c1837SPaolo Bonzini#define P_DATA16 0x400 /* 0x66 opcode prefix */ 241fc88a523SRichard Henderson#define P_VEXW 0x1000 /* Set VEX.W = 1 */ 242139c1837SPaolo Bonzini#if TCG_TARGET_REG_BITS == 64 243fc88a523SRichard Henderson# define P_REXW P_VEXW /* Set REX.W = 1; match VEXW */ 244139c1837SPaolo Bonzini# define P_REXB_R 0x2000 /* REG field as byte register */ 245139c1837SPaolo Bonzini# define P_REXB_RM 0x4000 /* R/M field as byte register */ 246139c1837SPaolo Bonzini# define P_GS 0x8000 /* gs segment override */ 247139c1837SPaolo Bonzini#else 248139c1837SPaolo Bonzini# define P_REXW 0 249139c1837SPaolo Bonzini# define P_REXB_R 0 250139c1837SPaolo Bonzini# define P_REXB_RM 0 251139c1837SPaolo Bonzini# define P_GS 0 252139c1837SPaolo Bonzini#endif 253139c1837SPaolo Bonzini#define P_EXT3A 0x10000 /* 0x0f 0x3a opcode prefix */ 254139c1837SPaolo Bonzini#define P_SIMDF3 0x20000 /* 0xf3 opcode prefix */ 255139c1837SPaolo Bonzini#define P_SIMDF2 0x40000 /* 0xf2 opcode prefix */ 256139c1837SPaolo Bonzini#define P_VEXL 0x80000 /* Set VEX.L = 1 */ 25708b032f7SRichard Henderson#define P_EVEX 0x100000 /* Requires EVEX encoding */ 258139c1837SPaolo Bonzini 259afa37be4SPaolo Bonzini#define OPC_ARITH_EbIb (0x80) 260139c1837SPaolo Bonzini#define OPC_ARITH_EvIz (0x81) 261139c1837SPaolo Bonzini#define OPC_ARITH_EvIb (0x83) 262139c1837SPaolo Bonzini#define OPC_ARITH_GvEv (0x03) /* ... plus (ARITH_FOO << 3) */ 263139c1837SPaolo Bonzini#define OPC_ANDN (0xf2 | P_EXT38) 264139c1837SPaolo Bonzini#define OPC_ADD_GvEv (OPC_ARITH_GvEv | (ARITH_ADD << 3)) 265139c1837SPaolo Bonzini#define OPC_AND_GvEv (OPC_ARITH_GvEv | (ARITH_AND << 3)) 266139c1837SPaolo Bonzini#define OPC_BLENDPS (0x0c | P_EXT3A | P_DATA16) 267139c1837SPaolo Bonzini#define OPC_BSF (0xbc | P_EXT) 268139c1837SPaolo Bonzini#define OPC_BSR (0xbd | P_EXT) 269139c1837SPaolo Bonzini#define OPC_BSWAP (0xc8 | P_EXT) 270139c1837SPaolo Bonzini#define OPC_CALL_Jz (0xe8) 271139c1837SPaolo Bonzini#define OPC_CMOVCC (0x40 | P_EXT) /* ... plus condition code */ 272139c1837SPaolo Bonzini#define OPC_CMP_GvEv (OPC_ARITH_GvEv | (ARITH_CMP << 3)) 273139c1837SPaolo Bonzini#define OPC_DEC_r32 (0x48) 274139c1837SPaolo Bonzini#define OPC_IMUL_GvEv (0xaf | P_EXT) 275139c1837SPaolo Bonzini#define OPC_IMUL_GvEvIb (0x6b) 276139c1837SPaolo Bonzini#define OPC_IMUL_GvEvIz (0x69) 277139c1837SPaolo Bonzini#define OPC_INC_r32 (0x40) 278139c1837SPaolo Bonzini#define OPC_JCC_long (0x80 | P_EXT) /* ... plus condition code */ 279139c1837SPaolo Bonzini#define OPC_JCC_short (0x70) /* ... plus condition code */ 280139c1837SPaolo Bonzini#define OPC_JMP_long (0xe9) 281139c1837SPaolo Bonzini#define OPC_JMP_short (0xeb) 282139c1837SPaolo Bonzini#define OPC_LEA (0x8d) 283139c1837SPaolo Bonzini#define OPC_LZCNT (0xbd | P_EXT | P_SIMDF3) 284139c1837SPaolo Bonzini#define OPC_MOVB_EvGv (0x88) /* stores, more or less */ 285139c1837SPaolo Bonzini#define OPC_MOVL_EvGv (0x89) /* stores, more or less */ 286139c1837SPaolo Bonzini#define OPC_MOVL_GvEv (0x8b) /* loads, more or less */ 287139c1837SPaolo Bonzini#define OPC_MOVB_EvIz (0xc6) 288139c1837SPaolo Bonzini#define OPC_MOVL_EvIz (0xc7) 28973f97f0aSRichard Henderson#define OPC_MOVB_Ib (0xb0) 290139c1837SPaolo Bonzini#define OPC_MOVL_Iv (0xb8) 291139c1837SPaolo Bonzini#define OPC_MOVBE_GyMy (0xf0 | P_EXT38) 292139c1837SPaolo Bonzini#define OPC_MOVBE_MyGy (0xf1 | P_EXT38) 293139c1837SPaolo Bonzini#define OPC_MOVD_VyEy (0x6e | P_EXT | P_DATA16) 294139c1837SPaolo Bonzini#define OPC_MOVD_EyVy (0x7e | P_EXT | P_DATA16) 295139c1837SPaolo Bonzini#define OPC_MOVDDUP (0x12 | P_EXT | P_SIMDF2) 296139c1837SPaolo Bonzini#define OPC_MOVDQA_VxWx (0x6f | P_EXT | P_DATA16) 297139c1837SPaolo Bonzini#define OPC_MOVDQA_WxVx (0x7f | P_EXT | P_DATA16) 298139c1837SPaolo Bonzini#define OPC_MOVDQU_VxWx (0x6f | P_EXT | P_SIMDF3) 299139c1837SPaolo Bonzini#define OPC_MOVDQU_WxVx (0x7f | P_EXT | P_SIMDF3) 300139c1837SPaolo Bonzini#define OPC_MOVQ_VqWq (0x7e | P_EXT | P_SIMDF3) 301139c1837SPaolo Bonzini#define OPC_MOVQ_WqVq (0xd6 | P_EXT | P_DATA16) 302139c1837SPaolo Bonzini#define OPC_MOVSBL (0xbe | P_EXT) 303139c1837SPaolo Bonzini#define OPC_MOVSWL (0xbf | P_EXT) 304139c1837SPaolo Bonzini#define OPC_MOVSLQ (0x63 | P_REXW) 305139c1837SPaolo Bonzini#define OPC_MOVZBL (0xb6 | P_EXT) 306139c1837SPaolo Bonzini#define OPC_MOVZWL (0xb7 | P_EXT) 307139c1837SPaolo Bonzini#define OPC_PABSB (0x1c | P_EXT38 | P_DATA16) 308139c1837SPaolo Bonzini#define OPC_PABSW (0x1d | P_EXT38 | P_DATA16) 309139c1837SPaolo Bonzini#define OPC_PABSD (0x1e | P_EXT38 | P_DATA16) 310dac1648fSRichard Henderson#define OPC_VPABSQ (0x1f | P_EXT38 | P_DATA16 | P_VEXW | P_EVEX) 311139c1837SPaolo Bonzini#define OPC_PACKSSDW (0x6b | P_EXT | P_DATA16) 312139c1837SPaolo Bonzini#define OPC_PACKSSWB (0x63 | P_EXT | P_DATA16) 313139c1837SPaolo Bonzini#define OPC_PACKUSDW (0x2b | P_EXT38 | P_DATA16) 314139c1837SPaolo Bonzini#define OPC_PACKUSWB (0x67 | P_EXT | P_DATA16) 315139c1837SPaolo Bonzini#define OPC_PADDB (0xfc | P_EXT | P_DATA16) 316139c1837SPaolo Bonzini#define OPC_PADDW (0xfd | P_EXT | P_DATA16) 317139c1837SPaolo Bonzini#define OPC_PADDD (0xfe | P_EXT | P_DATA16) 318139c1837SPaolo Bonzini#define OPC_PADDQ (0xd4 | P_EXT | P_DATA16) 319139c1837SPaolo Bonzini#define OPC_PADDSB (0xec | P_EXT | P_DATA16) 320139c1837SPaolo Bonzini#define OPC_PADDSW (0xed | P_EXT | P_DATA16) 321139c1837SPaolo Bonzini#define OPC_PADDUB (0xdc | P_EXT | P_DATA16) 322139c1837SPaolo Bonzini#define OPC_PADDUW (0xdd | P_EXT | P_DATA16) 323139c1837SPaolo Bonzini#define OPC_PAND (0xdb | P_EXT | P_DATA16) 324139c1837SPaolo Bonzini#define OPC_PANDN (0xdf | P_EXT | P_DATA16) 325139c1837SPaolo Bonzini#define OPC_PBLENDW (0x0e | P_EXT3A | P_DATA16) 326139c1837SPaolo Bonzini#define OPC_PCMPEQB (0x74 | P_EXT | P_DATA16) 327139c1837SPaolo Bonzini#define OPC_PCMPEQW (0x75 | P_EXT | P_DATA16) 328139c1837SPaolo Bonzini#define OPC_PCMPEQD (0x76 | P_EXT | P_DATA16) 329139c1837SPaolo Bonzini#define OPC_PCMPEQQ (0x29 | P_EXT38 | P_DATA16) 330139c1837SPaolo Bonzini#define OPC_PCMPGTB (0x64 | P_EXT | P_DATA16) 331139c1837SPaolo Bonzini#define OPC_PCMPGTW (0x65 | P_EXT | P_DATA16) 332139c1837SPaolo Bonzini#define OPC_PCMPGTD (0x66 | P_EXT | P_DATA16) 333139c1837SPaolo Bonzini#define OPC_PCMPGTQ (0x37 | P_EXT38 | P_DATA16) 334098d0fc1SRichard Henderson#define OPC_PEXTRD (0x16 | P_EXT3A | P_DATA16) 335098d0fc1SRichard Henderson#define OPC_PINSRD (0x22 | P_EXT3A | P_DATA16) 336139c1837SPaolo Bonzini#define OPC_PMAXSB (0x3c | P_EXT38 | P_DATA16) 337139c1837SPaolo Bonzini#define OPC_PMAXSW (0xee | P_EXT | P_DATA16) 338139c1837SPaolo Bonzini#define OPC_PMAXSD (0x3d | P_EXT38 | P_DATA16) 339dac1648fSRichard Henderson#define OPC_VPMAXSQ (0x3d | P_EXT38 | P_DATA16 | P_VEXW | P_EVEX) 340139c1837SPaolo Bonzini#define OPC_PMAXUB (0xde | P_EXT | P_DATA16) 341139c1837SPaolo Bonzini#define OPC_PMAXUW (0x3e | P_EXT38 | P_DATA16) 342139c1837SPaolo Bonzini#define OPC_PMAXUD (0x3f | P_EXT38 | P_DATA16) 343dac1648fSRichard Henderson#define OPC_VPMAXUQ (0x3f | P_EXT38 | P_DATA16 | P_VEXW | P_EVEX) 344139c1837SPaolo Bonzini#define OPC_PMINSB (0x38 | P_EXT38 | P_DATA16) 345139c1837SPaolo Bonzini#define OPC_PMINSW (0xea | P_EXT | P_DATA16) 346139c1837SPaolo Bonzini#define OPC_PMINSD (0x39 | P_EXT38 | P_DATA16) 347dac1648fSRichard Henderson#define OPC_VPMINSQ (0x39 | P_EXT38 | P_DATA16 | P_VEXW | P_EVEX) 348139c1837SPaolo Bonzini#define OPC_PMINUB (0xda | P_EXT | P_DATA16) 349139c1837SPaolo Bonzini#define OPC_PMINUW (0x3a | P_EXT38 | P_DATA16) 350139c1837SPaolo Bonzini#define OPC_PMINUD (0x3b | P_EXT38 | P_DATA16) 351dac1648fSRichard Henderson#define OPC_VPMINUQ (0x3b | P_EXT38 | P_DATA16 | P_VEXW | P_EVEX) 352139c1837SPaolo Bonzini#define OPC_PMOVSXBW (0x20 | P_EXT38 | P_DATA16) 353139c1837SPaolo Bonzini#define OPC_PMOVSXWD (0x23 | P_EXT38 | P_DATA16) 354139c1837SPaolo Bonzini#define OPC_PMOVSXDQ (0x25 | P_EXT38 | P_DATA16) 355139c1837SPaolo Bonzini#define OPC_PMOVZXBW (0x30 | P_EXT38 | P_DATA16) 356139c1837SPaolo Bonzini#define OPC_PMOVZXWD (0x33 | P_EXT38 | P_DATA16) 357139c1837SPaolo Bonzini#define OPC_PMOVZXDQ (0x35 | P_EXT38 | P_DATA16) 358139c1837SPaolo Bonzini#define OPC_PMULLW (0xd5 | P_EXT | P_DATA16) 359139c1837SPaolo Bonzini#define OPC_PMULLD (0x40 | P_EXT38 | P_DATA16) 3604c8b9686SRichard Henderson#define OPC_VPMULLQ (0x40 | P_EXT38 | P_DATA16 | P_VEXW | P_EVEX) 361139c1837SPaolo Bonzini#define OPC_POR (0xeb | P_EXT | P_DATA16) 362139c1837SPaolo Bonzini#define OPC_PSHUFB (0x00 | P_EXT38 | P_DATA16) 363139c1837SPaolo Bonzini#define OPC_PSHUFD (0x70 | P_EXT | P_DATA16) 364139c1837SPaolo Bonzini#define OPC_PSHUFLW (0x70 | P_EXT | P_SIMDF2) 365139c1837SPaolo Bonzini#define OPC_PSHUFHW (0x70 | P_EXT | P_SIMDF3) 366139c1837SPaolo Bonzini#define OPC_PSHIFTW_Ib (0x71 | P_EXT | P_DATA16) /* /2 /6 /4 */ 3674e73f842SRichard Henderson#define OPC_PSHIFTD_Ib (0x72 | P_EXT | P_DATA16) /* /1 /2 /6 /4 */ 368139c1837SPaolo Bonzini#define OPC_PSHIFTQ_Ib (0x73 | P_EXT | P_DATA16) /* /2 /6 /4 */ 369139c1837SPaolo Bonzini#define OPC_PSLLW (0xf1 | P_EXT | P_DATA16) 370139c1837SPaolo Bonzini#define OPC_PSLLD (0xf2 | P_EXT | P_DATA16) 371139c1837SPaolo Bonzini#define OPC_PSLLQ (0xf3 | P_EXT | P_DATA16) 372139c1837SPaolo Bonzini#define OPC_PSRAW (0xe1 | P_EXT | P_DATA16) 373139c1837SPaolo Bonzini#define OPC_PSRAD (0xe2 | P_EXT | P_DATA16) 3743cc18d18SRichard Henderson#define OPC_VPSRAQ (0xe2 | P_EXT | P_DATA16 | P_VEXW | P_EVEX) 375139c1837SPaolo Bonzini#define OPC_PSRLW (0xd1 | P_EXT | P_DATA16) 376139c1837SPaolo Bonzini#define OPC_PSRLD (0xd2 | P_EXT | P_DATA16) 377139c1837SPaolo Bonzini#define OPC_PSRLQ (0xd3 | P_EXT | P_DATA16) 378139c1837SPaolo Bonzini#define OPC_PSUBB (0xf8 | P_EXT | P_DATA16) 379139c1837SPaolo Bonzini#define OPC_PSUBW (0xf9 | P_EXT | P_DATA16) 380139c1837SPaolo Bonzini#define OPC_PSUBD (0xfa | P_EXT | P_DATA16) 381139c1837SPaolo Bonzini#define OPC_PSUBQ (0xfb | P_EXT | P_DATA16) 382139c1837SPaolo Bonzini#define OPC_PSUBSB (0xe8 | P_EXT | P_DATA16) 383139c1837SPaolo Bonzini#define OPC_PSUBSW (0xe9 | P_EXT | P_DATA16) 384139c1837SPaolo Bonzini#define OPC_PSUBUB (0xd8 | P_EXT | P_DATA16) 385139c1837SPaolo Bonzini#define OPC_PSUBUW (0xd9 | P_EXT | P_DATA16) 386139c1837SPaolo Bonzini#define OPC_PUNPCKLBW (0x60 | P_EXT | P_DATA16) 387139c1837SPaolo Bonzini#define OPC_PUNPCKLWD (0x61 | P_EXT | P_DATA16) 388139c1837SPaolo Bonzini#define OPC_PUNPCKLDQ (0x62 | P_EXT | P_DATA16) 389139c1837SPaolo Bonzini#define OPC_PUNPCKLQDQ (0x6c | P_EXT | P_DATA16) 390139c1837SPaolo Bonzini#define OPC_PUNPCKHBW (0x68 | P_EXT | P_DATA16) 391139c1837SPaolo Bonzini#define OPC_PUNPCKHWD (0x69 | P_EXT | P_DATA16) 392139c1837SPaolo Bonzini#define OPC_PUNPCKHDQ (0x6a | P_EXT | P_DATA16) 393139c1837SPaolo Bonzini#define OPC_PUNPCKHQDQ (0x6d | P_EXT | P_DATA16) 394139c1837SPaolo Bonzini#define OPC_PXOR (0xef | P_EXT | P_DATA16) 395139c1837SPaolo Bonzini#define OPC_POP_r32 (0x58) 396139c1837SPaolo Bonzini#define OPC_POPCNT (0xb8 | P_EXT | P_SIMDF3) 397139c1837SPaolo Bonzini#define OPC_PUSH_r32 (0x50) 398139c1837SPaolo Bonzini#define OPC_PUSH_Iv (0x68) 399139c1837SPaolo Bonzini#define OPC_PUSH_Ib (0x6a) 400139c1837SPaolo Bonzini#define OPC_RET (0xc3) 401139c1837SPaolo Bonzini#define OPC_SETCC (0x90 | P_EXT | P_REXB_RM) /* ... plus cc */ 402139c1837SPaolo Bonzini#define OPC_SHIFT_1 (0xd1) 403139c1837SPaolo Bonzini#define OPC_SHIFT_Ib (0xc1) 404139c1837SPaolo Bonzini#define OPC_SHIFT_cl (0xd3) 405139c1837SPaolo Bonzini#define OPC_SARX (0xf7 | P_EXT38 | P_SIMDF3) 406139c1837SPaolo Bonzini#define OPC_SHUFPS (0xc6 | P_EXT) 407139c1837SPaolo Bonzini#define OPC_SHLX (0xf7 | P_EXT38 | P_DATA16) 408139c1837SPaolo Bonzini#define OPC_SHRX (0xf7 | P_EXT38 | P_SIMDF2) 409139c1837SPaolo Bonzini#define OPC_SHRD_Ib (0xac | P_EXT) 410d3d1c30cSRichard Henderson#define OPC_TESTB (0x84) 411139c1837SPaolo Bonzini#define OPC_TESTL (0x85) 412139c1837SPaolo Bonzini#define OPC_TZCNT (0xbc | P_EXT | P_SIMDF3) 413139c1837SPaolo Bonzini#define OPC_UD2 (0x0b | P_EXT) 414139c1837SPaolo Bonzini#define OPC_VPBLENDD (0x02 | P_EXT3A | P_DATA16) 415139c1837SPaolo Bonzini#define OPC_VPBLENDVB (0x4c | P_EXT3A | P_DATA16) 416d5896749SRichard Henderson#define OPC_VPBLENDMB (0x66 | P_EXT38 | P_DATA16 | P_EVEX) 417d5896749SRichard Henderson#define OPC_VPBLENDMW (0x66 | P_EXT38 | P_DATA16 | P_VEXW | P_EVEX) 418d5896749SRichard Henderson#define OPC_VPBLENDMD (0x64 | P_EXT38 | P_DATA16 | P_EVEX) 419d5896749SRichard Henderson#define OPC_VPBLENDMQ (0x64 | P_EXT38 | P_DATA16 | P_VEXW | P_EVEX) 420717da87dSRichard Henderson#define OPC_VPCMPB (0x3f | P_EXT3A | P_DATA16 | P_EVEX) 421717da87dSRichard Henderson#define OPC_VPCMPUB (0x3e | P_EXT3A | P_DATA16 | P_EVEX) 422717da87dSRichard Henderson#define OPC_VPCMPW (0x3f | P_EXT3A | P_DATA16 | P_VEXW | P_EVEX) 423717da87dSRichard Henderson#define OPC_VPCMPUW (0x3e | P_EXT3A | P_DATA16 | P_VEXW | P_EVEX) 424717da87dSRichard Henderson#define OPC_VPCMPD (0x1f | P_EXT3A | P_DATA16 | P_EVEX) 425717da87dSRichard Henderson#define OPC_VPCMPUD (0x1e | P_EXT3A | P_DATA16 | P_EVEX) 426717da87dSRichard Henderson#define OPC_VPCMPQ (0x1f | P_EXT3A | P_DATA16 | P_VEXW | P_EVEX) 427717da87dSRichard Henderson#define OPC_VPCMPUQ (0x1e | P_EXT3A | P_DATA16 | P_VEXW | P_EVEX) 428139c1837SPaolo Bonzini#define OPC_VPINSRB (0x20 | P_EXT3A | P_DATA16) 429139c1837SPaolo Bonzini#define OPC_VPINSRW (0xc4 | P_EXT | P_DATA16) 430139c1837SPaolo Bonzini#define OPC_VBROADCASTSS (0x18 | P_EXT38 | P_DATA16) 431139c1837SPaolo Bonzini#define OPC_VBROADCASTSD (0x19 | P_EXT38 | P_DATA16) 432139c1837SPaolo Bonzini#define OPC_VPBROADCASTB (0x78 | P_EXT38 | P_DATA16) 433139c1837SPaolo Bonzini#define OPC_VPBROADCASTW (0x79 | P_EXT38 | P_DATA16) 434139c1837SPaolo Bonzini#define OPC_VPBROADCASTD (0x58 | P_EXT38 | P_DATA16) 435139c1837SPaolo Bonzini#define OPC_VPBROADCASTQ (0x59 | P_EXT38 | P_DATA16) 436717da87dSRichard Henderson#define OPC_VPMOVM2B (0x28 | P_EXT38 | P_SIMDF3 | P_EVEX) 437717da87dSRichard Henderson#define OPC_VPMOVM2W (0x28 | P_EXT38 | P_SIMDF3 | P_VEXW | P_EVEX) 438717da87dSRichard Henderson#define OPC_VPMOVM2D (0x38 | P_EXT38 | P_SIMDF3 | P_EVEX) 439717da87dSRichard Henderson#define OPC_VPMOVM2Q (0x38 | P_EXT38 | P_SIMDF3 | P_VEXW | P_EVEX) 440fc88a523SRichard Henderson#define OPC_VPERMQ (0x00 | P_EXT3A | P_DATA16 | P_VEXW) 441139c1837SPaolo Bonzini#define OPC_VPERM2I128 (0x46 | P_EXT3A | P_DATA16 | P_VEXL) 442102cd35cSRichard Henderson#define OPC_VPROLVD (0x15 | P_EXT38 | P_DATA16 | P_EVEX) 443102cd35cSRichard Henderson#define OPC_VPROLVQ (0x15 | P_EXT38 | P_DATA16 | P_VEXW | P_EVEX) 444102cd35cSRichard Henderson#define OPC_VPRORVD (0x14 | P_EXT38 | P_DATA16 | P_EVEX) 445102cd35cSRichard Henderson#define OPC_VPRORVQ (0x14 | P_EXT38 | P_DATA16 | P_VEXW | P_EVEX) 446965d5d06SRichard Henderson#define OPC_VPSHLDW (0x70 | P_EXT3A | P_DATA16 | P_VEXW | P_EVEX) 447965d5d06SRichard Henderson#define OPC_VPSHLDD (0x71 | P_EXT3A | P_DATA16 | P_EVEX) 448965d5d06SRichard Henderson#define OPC_VPSHLDQ (0x71 | P_EXT3A | P_DATA16 | P_VEXW | P_EVEX) 449965d5d06SRichard Henderson#define OPC_VPSHLDVW (0x70 | P_EXT38 | P_DATA16 | P_VEXW | P_EVEX) 450965d5d06SRichard Henderson#define OPC_VPSHLDVD (0x71 | P_EXT38 | P_DATA16 | P_EVEX) 451965d5d06SRichard Henderson#define OPC_VPSHLDVQ (0x71 | P_EXT38 | P_DATA16 | P_VEXW | P_EVEX) 452965d5d06SRichard Henderson#define OPC_VPSHRDVW (0x72 | P_EXT38 | P_DATA16 | P_VEXW | P_EVEX) 453965d5d06SRichard Henderson#define OPC_VPSHRDVD (0x73 | P_EXT38 | P_DATA16 | P_EVEX) 454965d5d06SRichard Henderson#define OPC_VPSHRDVQ (0x73 | P_EXT38 | P_DATA16 | P_VEXW | P_EVEX) 455ef77ce0dSRichard Henderson#define OPC_VPSLLVW (0x12 | P_EXT38 | P_DATA16 | P_VEXW | P_EVEX) 456139c1837SPaolo Bonzini#define OPC_VPSLLVD (0x47 | P_EXT38 | P_DATA16) 457fc88a523SRichard Henderson#define OPC_VPSLLVQ (0x47 | P_EXT38 | P_DATA16 | P_VEXW) 458ef77ce0dSRichard Henderson#define OPC_VPSRAVW (0x11 | P_EXT38 | P_DATA16 | P_VEXW | P_EVEX) 459139c1837SPaolo Bonzini#define OPC_VPSRAVD (0x46 | P_EXT38 | P_DATA16) 460ef77ce0dSRichard Henderson#define OPC_VPSRAVQ (0x46 | P_EXT38 | P_DATA16 | P_VEXW | P_EVEX) 461ef77ce0dSRichard Henderson#define OPC_VPSRLVW (0x10 | P_EXT38 | P_DATA16 | P_VEXW | P_EVEX) 462139c1837SPaolo Bonzini#define OPC_VPSRLVD (0x45 | P_EXT38 | P_DATA16) 463fc88a523SRichard Henderson#define OPC_VPSRLVQ (0x45 | P_EXT38 | P_DATA16 | P_VEXW) 4643143767bSRichard Henderson#define OPC_VPTERNLOGQ (0x25 | P_EXT3A | P_DATA16 | P_VEXW | P_EVEX) 465*782cffa4SRichard Henderson#define OPC_VPTESTMB (0x26 | P_EXT38 | P_DATA16 | P_EVEX) 466*782cffa4SRichard Henderson#define OPC_VPTESTMW (0x26 | P_EXT38 | P_DATA16 | P_VEXW | P_EVEX) 467*782cffa4SRichard Henderson#define OPC_VPTESTMD (0x27 | P_EXT38 | P_DATA16 | P_EVEX) 468*782cffa4SRichard Henderson#define OPC_VPTESTMQ (0x27 | P_EXT38 | P_DATA16 | P_VEXW | P_EVEX) 469*782cffa4SRichard Henderson#define OPC_VPTESTNMB (0x26 | P_EXT38 | P_SIMDF3 | P_EVEX) 470*782cffa4SRichard Henderson#define OPC_VPTESTNMW (0x26 | P_EXT38 | P_SIMDF3 | P_VEXW | P_EVEX) 471*782cffa4SRichard Henderson#define OPC_VPTESTNMD (0x27 | P_EXT38 | P_SIMDF3 | P_EVEX) 472*782cffa4SRichard Henderson#define OPC_VPTESTNMQ (0x27 | P_EXT38 | P_SIMDF3 | P_VEXW | P_EVEX) 473139c1837SPaolo Bonzini#define OPC_VZEROUPPER (0x77 | P_EXT) 474139c1837SPaolo Bonzini#define OPC_XCHG_ax_r32 (0x90) 475767c2503SRichard Henderson#define OPC_XCHG_EvGv (0x87) 476139c1837SPaolo Bonzini 477b1ee3c67SRichard Henderson#define OPC_GRP3_Eb (0xf6) 478139c1837SPaolo Bonzini#define OPC_GRP3_Ev (0xf7) 479139c1837SPaolo Bonzini#define OPC_GRP5 (0xff) 480139c1837SPaolo Bonzini#define OPC_GRP14 (0x73 | P_EXT | P_DATA16) 481d3d1c30cSRichard Henderson#define OPC_GRPBT (0xba | P_EXT) 482d3d1c30cSRichard Henderson 483d3d1c30cSRichard Henderson#define OPC_GRPBT_BT 4 484d3d1c30cSRichard Henderson#define OPC_GRPBT_BTS 5 485d3d1c30cSRichard Henderson#define OPC_GRPBT_BTR 6 486d3d1c30cSRichard Henderson#define OPC_GRPBT_BTC 7 487139c1837SPaolo Bonzini 488139c1837SPaolo Bonzini/* Group 1 opcode extensions for 0x80-0x83. 489139c1837SPaolo Bonzini These are also used as modifiers for OPC_ARITH. */ 490139c1837SPaolo Bonzini#define ARITH_ADD 0 491139c1837SPaolo Bonzini#define ARITH_OR 1 492139c1837SPaolo Bonzini#define ARITH_ADC 2 493139c1837SPaolo Bonzini#define ARITH_SBB 3 494139c1837SPaolo Bonzini#define ARITH_AND 4 495139c1837SPaolo Bonzini#define ARITH_SUB 5 496139c1837SPaolo Bonzini#define ARITH_XOR 6 497139c1837SPaolo Bonzini#define ARITH_CMP 7 498139c1837SPaolo Bonzini 499139c1837SPaolo Bonzini/* Group 2 opcode extensions for 0xc0, 0xc1, 0xd0-0xd3. */ 500139c1837SPaolo Bonzini#define SHIFT_ROL 0 501139c1837SPaolo Bonzini#define SHIFT_ROR 1 502139c1837SPaolo Bonzini#define SHIFT_SHL 4 503139c1837SPaolo Bonzini#define SHIFT_SHR 5 504139c1837SPaolo Bonzini#define SHIFT_SAR 7 505139c1837SPaolo Bonzini 506139c1837SPaolo Bonzini/* Group 3 opcode extensions for 0xf6, 0xf7. To be used with OPC_GRP3. */ 507b1ee3c67SRichard Henderson#define EXT3_TESTi 0 508139c1837SPaolo Bonzini#define EXT3_NOT 2 509139c1837SPaolo Bonzini#define EXT3_NEG 3 510139c1837SPaolo Bonzini#define EXT3_MUL 4 511139c1837SPaolo Bonzini#define EXT3_IMUL 5 512139c1837SPaolo Bonzini#define EXT3_DIV 6 513139c1837SPaolo Bonzini#define EXT3_IDIV 7 514139c1837SPaolo Bonzini 515139c1837SPaolo Bonzini/* Group 5 opcode extensions for 0xff. To be used with OPC_GRP5. */ 516139c1837SPaolo Bonzini#define EXT5_INC_Ev 0 517139c1837SPaolo Bonzini#define EXT5_DEC_Ev 1 518139c1837SPaolo Bonzini#define EXT5_CALLN_Ev 2 519139c1837SPaolo Bonzini#define EXT5_JMPN_Ev 4 520139c1837SPaolo Bonzini 521139c1837SPaolo Bonzini/* Condition codes to be added to OPC_JCC_{long,short}. */ 522139c1837SPaolo Bonzini#define JCC_JMP (-1) 523139c1837SPaolo Bonzini#define JCC_JO 0x0 524139c1837SPaolo Bonzini#define JCC_JNO 0x1 525139c1837SPaolo Bonzini#define JCC_JB 0x2 526139c1837SPaolo Bonzini#define JCC_JAE 0x3 527139c1837SPaolo Bonzini#define JCC_JE 0x4 528139c1837SPaolo Bonzini#define JCC_JNE 0x5 529139c1837SPaolo Bonzini#define JCC_JBE 0x6 530139c1837SPaolo Bonzini#define JCC_JA 0x7 531139c1837SPaolo Bonzini#define JCC_JS 0x8 532139c1837SPaolo Bonzini#define JCC_JNS 0x9 533139c1837SPaolo Bonzini#define JCC_JP 0xa 534139c1837SPaolo Bonzini#define JCC_JNP 0xb 535139c1837SPaolo Bonzini#define JCC_JL 0xc 536139c1837SPaolo Bonzini#define JCC_JGE 0xd 537139c1837SPaolo Bonzini#define JCC_JLE 0xe 538139c1837SPaolo Bonzini#define JCC_JG 0xf 539139c1837SPaolo Bonzini 540139c1837SPaolo Bonzinistatic const uint8_t tcg_cond_to_jcc[] = { 541139c1837SPaolo Bonzini [TCG_COND_EQ] = JCC_JE, 542139c1837SPaolo Bonzini [TCG_COND_NE] = JCC_JNE, 543139c1837SPaolo Bonzini [TCG_COND_LT] = JCC_JL, 544139c1837SPaolo Bonzini [TCG_COND_GE] = JCC_JGE, 545139c1837SPaolo Bonzini [TCG_COND_LE] = JCC_JLE, 546139c1837SPaolo Bonzini [TCG_COND_GT] = JCC_JG, 547139c1837SPaolo Bonzini [TCG_COND_LTU] = JCC_JB, 548139c1837SPaolo Bonzini [TCG_COND_GEU] = JCC_JAE, 549139c1837SPaolo Bonzini [TCG_COND_LEU] = JCC_JBE, 550139c1837SPaolo Bonzini [TCG_COND_GTU] = JCC_JA, 551303214aaSRichard Henderson [TCG_COND_TSTEQ] = JCC_JE, 552303214aaSRichard Henderson [TCG_COND_TSTNE] = JCC_JNE, 553139c1837SPaolo Bonzini}; 554139c1837SPaolo Bonzini 555139c1837SPaolo Bonzini#if TCG_TARGET_REG_BITS == 64 556139c1837SPaolo Bonzinistatic void tcg_out_opc(TCGContext *s, int opc, int r, int rm, int x) 557139c1837SPaolo Bonzini{ 558139c1837SPaolo Bonzini int rex; 559139c1837SPaolo Bonzini 560139c1837SPaolo Bonzini if (opc & P_GS) { 561139c1837SPaolo Bonzini tcg_out8(s, 0x65); 562139c1837SPaolo Bonzini } 563139c1837SPaolo Bonzini if (opc & P_DATA16) { 564139c1837SPaolo Bonzini /* We should never be asking for both 16 and 64-bit operation. */ 565139c1837SPaolo Bonzini tcg_debug_assert((opc & P_REXW) == 0); 566139c1837SPaolo Bonzini tcg_out8(s, 0x66); 567139c1837SPaolo Bonzini } 568139c1837SPaolo Bonzini if (opc & P_SIMDF3) { 569139c1837SPaolo Bonzini tcg_out8(s, 0xf3); 570139c1837SPaolo Bonzini } else if (opc & P_SIMDF2) { 571139c1837SPaolo Bonzini tcg_out8(s, 0xf2); 572139c1837SPaolo Bonzini } 573139c1837SPaolo Bonzini 574139c1837SPaolo Bonzini rex = 0; 575139c1837SPaolo Bonzini rex |= (opc & P_REXW) ? 0x8 : 0x0; /* REX.W */ 576139c1837SPaolo Bonzini rex |= (r & 8) >> 1; /* REX.R */ 577139c1837SPaolo Bonzini rex |= (x & 8) >> 2; /* REX.X */ 578139c1837SPaolo Bonzini rex |= (rm & 8) >> 3; /* REX.B */ 579139c1837SPaolo Bonzini 580139c1837SPaolo Bonzini /* P_REXB_{R,RM} indicates that the given register is the low byte. 581139c1837SPaolo Bonzini For %[abcd]l we need no REX prefix, but for %{si,di,bp,sp}l we do, 582139c1837SPaolo Bonzini as otherwise the encoding indicates %[abcd]h. Note that the values 583139c1837SPaolo Bonzini that are ORed in merely indicate that the REX byte must be present; 584139c1837SPaolo Bonzini those bits get discarded in output. */ 585139c1837SPaolo Bonzini rex |= opc & (r >= 4 ? P_REXB_R : 0); 586139c1837SPaolo Bonzini rex |= opc & (rm >= 4 ? P_REXB_RM : 0); 587139c1837SPaolo Bonzini 588139c1837SPaolo Bonzini if (rex) { 589139c1837SPaolo Bonzini tcg_out8(s, (uint8_t)(rex | 0x40)); 590139c1837SPaolo Bonzini } 591139c1837SPaolo Bonzini 592139c1837SPaolo Bonzini if (opc & (P_EXT | P_EXT38 | P_EXT3A)) { 593139c1837SPaolo Bonzini tcg_out8(s, 0x0f); 594139c1837SPaolo Bonzini if (opc & P_EXT38) { 595139c1837SPaolo Bonzini tcg_out8(s, 0x38); 596139c1837SPaolo Bonzini } else if (opc & P_EXT3A) { 597139c1837SPaolo Bonzini tcg_out8(s, 0x3a); 598139c1837SPaolo Bonzini } 599139c1837SPaolo Bonzini } 600139c1837SPaolo Bonzini 601139c1837SPaolo Bonzini tcg_out8(s, opc); 602139c1837SPaolo Bonzini} 603139c1837SPaolo Bonzini#else 604139c1837SPaolo Bonzinistatic void tcg_out_opc(TCGContext *s, int opc) 605139c1837SPaolo Bonzini{ 606139c1837SPaolo Bonzini if (opc & P_DATA16) { 607139c1837SPaolo Bonzini tcg_out8(s, 0x66); 608139c1837SPaolo Bonzini } 609139c1837SPaolo Bonzini if (opc & P_SIMDF3) { 610139c1837SPaolo Bonzini tcg_out8(s, 0xf3); 611139c1837SPaolo Bonzini } else if (opc & P_SIMDF2) { 612139c1837SPaolo Bonzini tcg_out8(s, 0xf2); 613139c1837SPaolo Bonzini } 614139c1837SPaolo Bonzini if (opc & (P_EXT | P_EXT38 | P_EXT3A)) { 615139c1837SPaolo Bonzini tcg_out8(s, 0x0f); 616139c1837SPaolo Bonzini if (opc & P_EXT38) { 617139c1837SPaolo Bonzini tcg_out8(s, 0x38); 618139c1837SPaolo Bonzini } else if (opc & P_EXT3A) { 619139c1837SPaolo Bonzini tcg_out8(s, 0x3a); 620139c1837SPaolo Bonzini } 621139c1837SPaolo Bonzini } 622139c1837SPaolo Bonzini tcg_out8(s, opc); 623139c1837SPaolo Bonzini} 624139c1837SPaolo Bonzini/* Discard the register arguments to tcg_out_opc early, so as not to penalize 625139c1837SPaolo Bonzini the 32-bit compilation paths. This method works with all versions of gcc, 626139c1837SPaolo Bonzini whereas relying on optimization may not be able to exclude them. */ 627139c1837SPaolo Bonzini#define tcg_out_opc(s, opc, r, rm, x) (tcg_out_opc)(s, opc) 628139c1837SPaolo Bonzini#endif 629139c1837SPaolo Bonzini 630139c1837SPaolo Bonzinistatic void tcg_out_modrm(TCGContext *s, int opc, int r, int rm) 631139c1837SPaolo Bonzini{ 632139c1837SPaolo Bonzini tcg_out_opc(s, opc, r, rm, 0); 633139c1837SPaolo Bonzini tcg_out8(s, 0xc0 | (LOWREGMASK(r) << 3) | LOWREGMASK(rm)); 634139c1837SPaolo Bonzini} 635139c1837SPaolo Bonzini 636139c1837SPaolo Bonzinistatic void tcg_out_vex_opc(TCGContext *s, int opc, int r, int v, 637139c1837SPaolo Bonzini int rm, int index) 638139c1837SPaolo Bonzini{ 639139c1837SPaolo Bonzini int tmp; 640139c1837SPaolo Bonzini 641d3b41127SRichard Henderson if (opc & P_GS) { 642d3b41127SRichard Henderson tcg_out8(s, 0x65); 643d3b41127SRichard Henderson } 644139c1837SPaolo Bonzini /* Use the two byte form if possible, which cannot encode 645139c1837SPaolo Bonzini VEX.W, VEX.B, VEX.X, or an m-mmmm field other than P_EXT. */ 646fc88a523SRichard Henderson if ((opc & (P_EXT | P_EXT38 | P_EXT3A | P_VEXW)) == P_EXT 647139c1837SPaolo Bonzini && ((rm | index) & 8) == 0) { 648139c1837SPaolo Bonzini /* Two byte VEX prefix. */ 649139c1837SPaolo Bonzini tcg_out8(s, 0xc5); 650139c1837SPaolo Bonzini 651139c1837SPaolo Bonzini tmp = (r & 8 ? 0 : 0x80); /* VEX.R */ 652139c1837SPaolo Bonzini } else { 653139c1837SPaolo Bonzini /* Three byte VEX prefix. */ 654139c1837SPaolo Bonzini tcg_out8(s, 0xc4); 655139c1837SPaolo Bonzini 656139c1837SPaolo Bonzini /* VEX.m-mmmm */ 657139c1837SPaolo Bonzini if (opc & P_EXT3A) { 658139c1837SPaolo Bonzini tmp = 3; 659139c1837SPaolo Bonzini } else if (opc & P_EXT38) { 660139c1837SPaolo Bonzini tmp = 2; 661139c1837SPaolo Bonzini } else if (opc & P_EXT) { 662139c1837SPaolo Bonzini tmp = 1; 663139c1837SPaolo Bonzini } else { 664139c1837SPaolo Bonzini g_assert_not_reached(); 665139c1837SPaolo Bonzini } 666139c1837SPaolo Bonzini tmp |= (r & 8 ? 0 : 0x80); /* VEX.R */ 667139c1837SPaolo Bonzini tmp |= (index & 8 ? 0 : 0x40); /* VEX.X */ 668139c1837SPaolo Bonzini tmp |= (rm & 8 ? 0 : 0x20); /* VEX.B */ 669139c1837SPaolo Bonzini tcg_out8(s, tmp); 670139c1837SPaolo Bonzini 671fc88a523SRichard Henderson tmp = (opc & P_VEXW ? 0x80 : 0); /* VEX.W */ 672139c1837SPaolo Bonzini } 673139c1837SPaolo Bonzini 674139c1837SPaolo Bonzini tmp |= (opc & P_VEXL ? 0x04 : 0); /* VEX.L */ 675139c1837SPaolo Bonzini /* VEX.pp */ 676139c1837SPaolo Bonzini if (opc & P_DATA16) { 677139c1837SPaolo Bonzini tmp |= 1; /* 0x66 */ 678139c1837SPaolo Bonzini } else if (opc & P_SIMDF3) { 679139c1837SPaolo Bonzini tmp |= 2; /* 0xf3 */ 680139c1837SPaolo Bonzini } else if (opc & P_SIMDF2) { 681139c1837SPaolo Bonzini tmp |= 3; /* 0xf2 */ 682139c1837SPaolo Bonzini } 683139c1837SPaolo Bonzini tmp |= (~v & 15) << 3; /* VEX.vvvv */ 684139c1837SPaolo Bonzini tcg_out8(s, tmp); 685139c1837SPaolo Bonzini tcg_out8(s, opc); 686139c1837SPaolo Bonzini} 687139c1837SPaolo Bonzini 68808b032f7SRichard Hendersonstatic void tcg_out_evex_opc(TCGContext *s, int opc, int r, int v, 689c044ec0dSRichard Henderson int rm, int index, int aaa, bool z) 69008b032f7SRichard Henderson{ 69108b032f7SRichard Henderson /* The entire 4-byte evex prefix; with R' and V' set. */ 69208b032f7SRichard Henderson uint32_t p = 0x08041062; 69308b032f7SRichard Henderson int mm, pp; 69408b032f7SRichard Henderson 69508b032f7SRichard Henderson tcg_debug_assert(have_avx512vl); 69608b032f7SRichard Henderson 69708b032f7SRichard Henderson /* EVEX.mm */ 69808b032f7SRichard Henderson if (opc & P_EXT3A) { 69908b032f7SRichard Henderson mm = 3; 70008b032f7SRichard Henderson } else if (opc & P_EXT38) { 70108b032f7SRichard Henderson mm = 2; 70208b032f7SRichard Henderson } else if (opc & P_EXT) { 70308b032f7SRichard Henderson mm = 1; 70408b032f7SRichard Henderson } else { 70508b032f7SRichard Henderson g_assert_not_reached(); 70608b032f7SRichard Henderson } 70708b032f7SRichard Henderson 70808b032f7SRichard Henderson /* EVEX.pp */ 70908b032f7SRichard Henderson if (opc & P_DATA16) { 71008b032f7SRichard Henderson pp = 1; /* 0x66 */ 71108b032f7SRichard Henderson } else if (opc & P_SIMDF3) { 71208b032f7SRichard Henderson pp = 2; /* 0xf3 */ 71308b032f7SRichard Henderson } else if (opc & P_SIMDF2) { 71408b032f7SRichard Henderson pp = 3; /* 0xf2 */ 71508b032f7SRichard Henderson } else { 71608b032f7SRichard Henderson pp = 0; 71708b032f7SRichard Henderson } 71808b032f7SRichard Henderson 71908b032f7SRichard Henderson p = deposit32(p, 8, 2, mm); 72008b032f7SRichard Henderson p = deposit32(p, 13, 1, (rm & 8) == 0); /* EVEX.RXB.B */ 72108b032f7SRichard Henderson p = deposit32(p, 14, 1, (index & 8) == 0); /* EVEX.RXB.X */ 72208b032f7SRichard Henderson p = deposit32(p, 15, 1, (r & 8) == 0); /* EVEX.RXB.R */ 72308b032f7SRichard Henderson p = deposit32(p, 16, 2, pp); 72408b032f7SRichard Henderson p = deposit32(p, 19, 4, ~v); 72508b032f7SRichard Henderson p = deposit32(p, 23, 1, (opc & P_VEXW) != 0); 726c044ec0dSRichard Henderson p = deposit32(p, 24, 3, aaa); 72708b032f7SRichard Henderson p = deposit32(p, 29, 2, (opc & P_VEXL) != 0); 728c044ec0dSRichard Henderson p = deposit32(p, 31, 1, z); 72908b032f7SRichard Henderson 73008b032f7SRichard Henderson tcg_out32(s, p); 73108b032f7SRichard Henderson tcg_out8(s, opc); 73208b032f7SRichard Henderson} 73308b032f7SRichard Henderson 734139c1837SPaolo Bonzinistatic void tcg_out_vex_modrm(TCGContext *s, int opc, int r, int v, int rm) 735139c1837SPaolo Bonzini{ 73608b032f7SRichard Henderson if (opc & P_EVEX) { 737c044ec0dSRichard Henderson tcg_out_evex_opc(s, opc, r, v, rm, 0, 0, false); 73808b032f7SRichard Henderson } else { 739139c1837SPaolo Bonzini tcg_out_vex_opc(s, opc, r, v, rm, 0); 74008b032f7SRichard Henderson } 741139c1837SPaolo Bonzini tcg_out8(s, 0xc0 | (LOWREGMASK(r) << 3) | LOWREGMASK(rm)); 742139c1837SPaolo Bonzini} 743139c1837SPaolo Bonzini 744bc97b3adSRichard Hendersonstatic void tcg_out_vex_modrm_type(TCGContext *s, int opc, 745bc97b3adSRichard Henderson int r, int v, int rm, TCGType type) 746bc97b3adSRichard Henderson{ 747bc97b3adSRichard Henderson if (type == TCG_TYPE_V256) { 748bc97b3adSRichard Henderson opc |= P_VEXL; 749bc97b3adSRichard Henderson } 750bc97b3adSRichard Henderson tcg_out_vex_modrm(s, opc, r, v, rm); 751bc97b3adSRichard Henderson} 752bc97b3adSRichard Henderson 753d5896749SRichard Hendersonstatic void tcg_out_evex_modrm_type(TCGContext *s, int opc, int r, int v, 754d5896749SRichard Henderson int rm, int aaa, bool z, TCGType type) 755d5896749SRichard Henderson{ 756d5896749SRichard Henderson if (type == TCG_TYPE_V256) { 757d5896749SRichard Henderson opc |= P_VEXL; 758d5896749SRichard Henderson } 759d5896749SRichard Henderson tcg_out_evex_opc(s, opc, r, v, rm, 0, aaa, z); 760d5896749SRichard Henderson tcg_out8(s, 0xc0 | (LOWREGMASK(r) << 3) | LOWREGMASK(rm)); 761d5896749SRichard Henderson} 762d5896749SRichard Henderson 763139c1837SPaolo Bonzini/* Output an opcode with a full "rm + (index<<shift) + offset" address mode. 764139c1837SPaolo Bonzini We handle either RM and INDEX missing with a negative value. In 64-bit 765139c1837SPaolo Bonzini mode for absolute addresses, ~RM is the size of the immediate operand 766139c1837SPaolo Bonzini that will follow the instruction. */ 767139c1837SPaolo Bonzini 768139c1837SPaolo Bonzinistatic void tcg_out_sib_offset(TCGContext *s, int r, int rm, int index, 769139c1837SPaolo Bonzini int shift, intptr_t offset) 770139c1837SPaolo Bonzini{ 771139c1837SPaolo Bonzini int mod, len; 772139c1837SPaolo Bonzini 773139c1837SPaolo Bonzini if (index < 0 && rm < 0) { 774139c1837SPaolo Bonzini if (TCG_TARGET_REG_BITS == 64) { 775139c1837SPaolo Bonzini /* Try for a rip-relative addressing mode. This has replaced 776139c1837SPaolo Bonzini the 32-bit-mode absolute addressing encoding. */ 777139c1837SPaolo Bonzini intptr_t pc = (intptr_t)s->code_ptr + 5 + ~rm; 778139c1837SPaolo Bonzini intptr_t disp = offset - pc; 779139c1837SPaolo Bonzini if (disp == (int32_t)disp) { 780139c1837SPaolo Bonzini tcg_out8(s, (LOWREGMASK(r) << 3) | 5); 781139c1837SPaolo Bonzini tcg_out32(s, disp); 782139c1837SPaolo Bonzini return; 783139c1837SPaolo Bonzini } 784139c1837SPaolo Bonzini 785139c1837SPaolo Bonzini /* Try for an absolute address encoding. This requires the 786139c1837SPaolo Bonzini use of the MODRM+SIB encoding and is therefore larger than 787139c1837SPaolo Bonzini rip-relative addressing. */ 788139c1837SPaolo Bonzini if (offset == (int32_t)offset) { 789139c1837SPaolo Bonzini tcg_out8(s, (LOWREGMASK(r) << 3) | 4); 790139c1837SPaolo Bonzini tcg_out8(s, (4 << 3) | 5); 791139c1837SPaolo Bonzini tcg_out32(s, offset); 792139c1837SPaolo Bonzini return; 793139c1837SPaolo Bonzini } 794139c1837SPaolo Bonzini 795139c1837SPaolo Bonzini /* ??? The memory isn't directly addressable. */ 796139c1837SPaolo Bonzini g_assert_not_reached(); 797139c1837SPaolo Bonzini } else { 798139c1837SPaolo Bonzini /* Absolute address. */ 799139c1837SPaolo Bonzini tcg_out8(s, (r << 3) | 5); 800139c1837SPaolo Bonzini tcg_out32(s, offset); 801139c1837SPaolo Bonzini return; 802139c1837SPaolo Bonzini } 803139c1837SPaolo Bonzini } 804139c1837SPaolo Bonzini 805139c1837SPaolo Bonzini /* Find the length of the immediate addend. Note that the encoding 806139c1837SPaolo Bonzini that would be used for (%ebp) indicates absolute addressing. */ 807139c1837SPaolo Bonzini if (rm < 0) { 808139c1837SPaolo Bonzini mod = 0, len = 4, rm = 5; 809139c1837SPaolo Bonzini } else if (offset == 0 && LOWREGMASK(rm) != TCG_REG_EBP) { 810139c1837SPaolo Bonzini mod = 0, len = 0; 811139c1837SPaolo Bonzini } else if (offset == (int8_t)offset) { 812139c1837SPaolo Bonzini mod = 0x40, len = 1; 813139c1837SPaolo Bonzini } else { 814139c1837SPaolo Bonzini mod = 0x80, len = 4; 815139c1837SPaolo Bonzini } 816139c1837SPaolo Bonzini 817139c1837SPaolo Bonzini /* Use a single byte MODRM format if possible. Note that the encoding 818139c1837SPaolo Bonzini that would be used for %esp is the escape to the two byte form. */ 819139c1837SPaolo Bonzini if (index < 0 && LOWREGMASK(rm) != TCG_REG_ESP) { 820139c1837SPaolo Bonzini /* Single byte MODRM format. */ 821139c1837SPaolo Bonzini tcg_out8(s, mod | (LOWREGMASK(r) << 3) | LOWREGMASK(rm)); 822139c1837SPaolo Bonzini } else { 823139c1837SPaolo Bonzini /* Two byte MODRM+SIB format. */ 824139c1837SPaolo Bonzini 825139c1837SPaolo Bonzini /* Note that the encoding that would place %esp into the index 826139c1837SPaolo Bonzini field indicates no index register. In 64-bit mode, the REX.X 827139c1837SPaolo Bonzini bit counts, so %r12 can be used as the index. */ 828139c1837SPaolo Bonzini if (index < 0) { 829139c1837SPaolo Bonzini index = 4; 830139c1837SPaolo Bonzini } else { 831139c1837SPaolo Bonzini tcg_debug_assert(index != TCG_REG_ESP); 832139c1837SPaolo Bonzini } 833139c1837SPaolo Bonzini 834139c1837SPaolo Bonzini tcg_out8(s, mod | (LOWREGMASK(r) << 3) | 4); 835139c1837SPaolo Bonzini tcg_out8(s, (shift << 6) | (LOWREGMASK(index) << 3) | LOWREGMASK(rm)); 836139c1837SPaolo Bonzini } 837139c1837SPaolo Bonzini 838139c1837SPaolo Bonzini if (len == 1) { 839139c1837SPaolo Bonzini tcg_out8(s, offset); 840139c1837SPaolo Bonzini } else if (len == 4) { 841139c1837SPaolo Bonzini tcg_out32(s, offset); 842139c1837SPaolo Bonzini } 843139c1837SPaolo Bonzini} 844139c1837SPaolo Bonzini 845139c1837SPaolo Bonzinistatic void tcg_out_modrm_sib_offset(TCGContext *s, int opc, int r, int rm, 846139c1837SPaolo Bonzini int index, int shift, intptr_t offset) 847139c1837SPaolo Bonzini{ 848139c1837SPaolo Bonzini tcg_out_opc(s, opc, r, rm < 0 ? 0 : rm, index < 0 ? 0 : index); 849139c1837SPaolo Bonzini tcg_out_sib_offset(s, r, rm, index, shift, offset); 850139c1837SPaolo Bonzini} 851139c1837SPaolo Bonzini 852139c1837SPaolo Bonzinistatic void tcg_out_vex_modrm_sib_offset(TCGContext *s, int opc, int r, int v, 853139c1837SPaolo Bonzini int rm, int index, int shift, 854139c1837SPaolo Bonzini intptr_t offset) 855139c1837SPaolo Bonzini{ 856139c1837SPaolo Bonzini tcg_out_vex_opc(s, opc, r, v, rm < 0 ? 0 : rm, index < 0 ? 0 : index); 857139c1837SPaolo Bonzini tcg_out_sib_offset(s, r, rm, index, shift, offset); 858139c1837SPaolo Bonzini} 859139c1837SPaolo Bonzini 860139c1837SPaolo Bonzini/* A simplification of the above with no index or shift. */ 861139c1837SPaolo Bonzinistatic inline void tcg_out_modrm_offset(TCGContext *s, int opc, int r, 862139c1837SPaolo Bonzini int rm, intptr_t offset) 863139c1837SPaolo Bonzini{ 864139c1837SPaolo Bonzini tcg_out_modrm_sib_offset(s, opc, r, rm, -1, 0, offset); 865139c1837SPaolo Bonzini} 866139c1837SPaolo Bonzini 867139c1837SPaolo Bonzinistatic inline void tcg_out_vex_modrm_offset(TCGContext *s, int opc, int r, 868139c1837SPaolo Bonzini int v, int rm, intptr_t offset) 869139c1837SPaolo Bonzini{ 870139c1837SPaolo Bonzini tcg_out_vex_modrm_sib_offset(s, opc, r, v, rm, -1, 0, offset); 871139c1837SPaolo Bonzini} 872139c1837SPaolo Bonzini 873139c1837SPaolo Bonzini/* Output an opcode with an expected reference to the constant pool. */ 874139c1837SPaolo Bonzinistatic inline void tcg_out_modrm_pool(TCGContext *s, int opc, int r) 875139c1837SPaolo Bonzini{ 876139c1837SPaolo Bonzini tcg_out_opc(s, opc, r, 0, 0); 877139c1837SPaolo Bonzini /* Absolute for 32-bit, pc-relative for 64-bit. */ 878139c1837SPaolo Bonzini tcg_out8(s, LOWREGMASK(r) << 3 | 5); 879139c1837SPaolo Bonzini tcg_out32(s, 0); 880139c1837SPaolo Bonzini} 881139c1837SPaolo Bonzini 882139c1837SPaolo Bonzini/* Output an opcode with an expected reference to the constant pool. */ 883139c1837SPaolo Bonzinistatic inline void tcg_out_vex_modrm_pool(TCGContext *s, int opc, int r) 884139c1837SPaolo Bonzini{ 885139c1837SPaolo Bonzini tcg_out_vex_opc(s, opc, r, 0, 0, 0); 886139c1837SPaolo Bonzini /* Absolute for 32-bit, pc-relative for 64-bit. */ 887139c1837SPaolo Bonzini tcg_out8(s, LOWREGMASK(r) << 3 | 5); 888139c1837SPaolo Bonzini tcg_out32(s, 0); 889139c1837SPaolo Bonzini} 890139c1837SPaolo Bonzini 891139c1837SPaolo Bonzini/* Generate dest op= src. Uses the same ARITH_* codes as tgen_arithi. */ 892139c1837SPaolo Bonzinistatic inline void tgen_arithr(TCGContext *s, int subop, int dest, int src) 893139c1837SPaolo Bonzini{ 894139c1837SPaolo Bonzini /* Propagate an opcode prefix, such as P_REXW. */ 895139c1837SPaolo Bonzini int ext = subop & ~0x7; 896139c1837SPaolo Bonzini subop &= 0x7; 897139c1837SPaolo Bonzini 898139c1837SPaolo Bonzini tcg_out_modrm(s, OPC_ARITH_GvEv + (subop << 3) + ext, dest, src); 899139c1837SPaolo Bonzini} 900139c1837SPaolo Bonzini 901139c1837SPaolo Bonzinistatic bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg) 902139c1837SPaolo Bonzini{ 903139c1837SPaolo Bonzini int rexw = 0; 904139c1837SPaolo Bonzini 905139c1837SPaolo Bonzini if (arg == ret) { 906139c1837SPaolo Bonzini return true; 907139c1837SPaolo Bonzini } 908139c1837SPaolo Bonzini switch (type) { 909139c1837SPaolo Bonzini case TCG_TYPE_I64: 910139c1837SPaolo Bonzini rexw = P_REXW; 911139c1837SPaolo Bonzini /* fallthru */ 912139c1837SPaolo Bonzini case TCG_TYPE_I32: 913139c1837SPaolo Bonzini if (ret < 16) { 914139c1837SPaolo Bonzini if (arg < 16) { 915139c1837SPaolo Bonzini tcg_out_modrm(s, OPC_MOVL_GvEv + rexw, ret, arg); 916139c1837SPaolo Bonzini } else { 917139c1837SPaolo Bonzini tcg_out_vex_modrm(s, OPC_MOVD_EyVy + rexw, arg, 0, ret); 918139c1837SPaolo Bonzini } 919139c1837SPaolo Bonzini } else { 920139c1837SPaolo Bonzini if (arg < 16) { 921139c1837SPaolo Bonzini tcg_out_vex_modrm(s, OPC_MOVD_VyEy + rexw, ret, 0, arg); 922139c1837SPaolo Bonzini } else { 923139c1837SPaolo Bonzini tcg_out_vex_modrm(s, OPC_MOVQ_VqWq, ret, 0, arg); 924139c1837SPaolo Bonzini } 925139c1837SPaolo Bonzini } 926139c1837SPaolo Bonzini break; 927139c1837SPaolo Bonzini 928139c1837SPaolo Bonzini case TCG_TYPE_V64: 929139c1837SPaolo Bonzini tcg_debug_assert(ret >= 16 && arg >= 16); 930139c1837SPaolo Bonzini tcg_out_vex_modrm(s, OPC_MOVQ_VqWq, ret, 0, arg); 931139c1837SPaolo Bonzini break; 932139c1837SPaolo Bonzini case TCG_TYPE_V128: 933139c1837SPaolo Bonzini tcg_debug_assert(ret >= 16 && arg >= 16); 934139c1837SPaolo Bonzini tcg_out_vex_modrm(s, OPC_MOVDQA_VxWx, ret, 0, arg); 935139c1837SPaolo Bonzini break; 936139c1837SPaolo Bonzini case TCG_TYPE_V256: 937139c1837SPaolo Bonzini tcg_debug_assert(ret >= 16 && arg >= 16); 938139c1837SPaolo Bonzini tcg_out_vex_modrm(s, OPC_MOVDQA_VxWx | P_VEXL, ret, 0, arg); 939139c1837SPaolo Bonzini break; 940139c1837SPaolo Bonzini 941139c1837SPaolo Bonzini default: 942139c1837SPaolo Bonzini g_assert_not_reached(); 943139c1837SPaolo Bonzini } 944139c1837SPaolo Bonzini return true; 945139c1837SPaolo Bonzini} 946139c1837SPaolo Bonzini 947139c1837SPaolo Bonzinistatic const int avx2_dup_insn[4] = { 948139c1837SPaolo Bonzini OPC_VPBROADCASTB, OPC_VPBROADCASTW, 949139c1837SPaolo Bonzini OPC_VPBROADCASTD, OPC_VPBROADCASTQ, 950139c1837SPaolo Bonzini}; 951139c1837SPaolo Bonzini 952139c1837SPaolo Bonzinistatic bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece, 953139c1837SPaolo Bonzini TCGReg r, TCGReg a) 954139c1837SPaolo Bonzini{ 955139c1837SPaolo Bonzini if (have_avx2) { 956bc97b3adSRichard Henderson tcg_out_vex_modrm_type(s, avx2_dup_insn[vece], r, 0, a, type); 957139c1837SPaolo Bonzini } else { 958139c1837SPaolo Bonzini switch (vece) { 959139c1837SPaolo Bonzini case MO_8: 960139c1837SPaolo Bonzini /* ??? With zero in a register, use PSHUFB. */ 961139c1837SPaolo Bonzini tcg_out_vex_modrm(s, OPC_PUNPCKLBW, r, a, a); 962139c1837SPaolo Bonzini a = r; 963139c1837SPaolo Bonzini /* FALLTHRU */ 964139c1837SPaolo Bonzini case MO_16: 965139c1837SPaolo Bonzini tcg_out_vex_modrm(s, OPC_PUNPCKLWD, r, a, a); 966139c1837SPaolo Bonzini a = r; 967139c1837SPaolo Bonzini /* FALLTHRU */ 968139c1837SPaolo Bonzini case MO_32: 969139c1837SPaolo Bonzini tcg_out_vex_modrm(s, OPC_PSHUFD, r, 0, a); 970139c1837SPaolo Bonzini /* imm8 operand: all output lanes selected from input lane 0. */ 971139c1837SPaolo Bonzini tcg_out8(s, 0); 972139c1837SPaolo Bonzini break; 973139c1837SPaolo Bonzini case MO_64: 974139c1837SPaolo Bonzini tcg_out_vex_modrm(s, OPC_PUNPCKLQDQ, r, a, a); 975139c1837SPaolo Bonzini break; 976139c1837SPaolo Bonzini default: 977139c1837SPaolo Bonzini g_assert_not_reached(); 978139c1837SPaolo Bonzini } 979139c1837SPaolo Bonzini } 980139c1837SPaolo Bonzini return true; 981139c1837SPaolo Bonzini} 982139c1837SPaolo Bonzini 983139c1837SPaolo Bonzinistatic bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece, 984139c1837SPaolo Bonzini TCGReg r, TCGReg base, intptr_t offset) 985139c1837SPaolo Bonzini{ 986139c1837SPaolo Bonzini if (have_avx2) { 987139c1837SPaolo Bonzini int vex_l = (type == TCG_TYPE_V256 ? P_VEXL : 0); 988139c1837SPaolo Bonzini tcg_out_vex_modrm_offset(s, avx2_dup_insn[vece] + vex_l, 989139c1837SPaolo Bonzini r, 0, base, offset); 990139c1837SPaolo Bonzini } else { 991139c1837SPaolo Bonzini switch (vece) { 992139c1837SPaolo Bonzini case MO_64: 993139c1837SPaolo Bonzini tcg_out_vex_modrm_offset(s, OPC_MOVDDUP, r, 0, base, offset); 994139c1837SPaolo Bonzini break; 995139c1837SPaolo Bonzini case MO_32: 996139c1837SPaolo Bonzini tcg_out_vex_modrm_offset(s, OPC_VBROADCASTSS, r, 0, base, offset); 997139c1837SPaolo Bonzini break; 998139c1837SPaolo Bonzini case MO_16: 999139c1837SPaolo Bonzini tcg_out_vex_modrm_offset(s, OPC_VPINSRW, r, r, base, offset); 1000139c1837SPaolo Bonzini tcg_out8(s, 0); /* imm8 */ 1001139c1837SPaolo Bonzini tcg_out_dup_vec(s, type, vece, r, r); 1002139c1837SPaolo Bonzini break; 1003139c1837SPaolo Bonzini case MO_8: 1004139c1837SPaolo Bonzini tcg_out_vex_modrm_offset(s, OPC_VPINSRB, r, r, base, offset); 1005139c1837SPaolo Bonzini tcg_out8(s, 0); /* imm8 */ 1006139c1837SPaolo Bonzini tcg_out_dup_vec(s, type, vece, r, r); 1007139c1837SPaolo Bonzini break; 1008139c1837SPaolo Bonzini default: 1009139c1837SPaolo Bonzini g_assert_not_reached(); 1010139c1837SPaolo Bonzini } 1011139c1837SPaolo Bonzini } 1012139c1837SPaolo Bonzini return true; 1013139c1837SPaolo Bonzini} 1014139c1837SPaolo Bonzini 10154e186175SRichard Hendersonstatic void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece, 10164e186175SRichard Henderson TCGReg ret, int64_t arg) 1017139c1837SPaolo Bonzini{ 1018139c1837SPaolo Bonzini int vex_l = (type == TCG_TYPE_V256 ? P_VEXL : 0); 1019139c1837SPaolo Bonzini 1020139c1837SPaolo Bonzini if (arg == 0) { 1021139c1837SPaolo Bonzini tcg_out_vex_modrm(s, OPC_PXOR, ret, ret, ret); 1022139c1837SPaolo Bonzini return; 1023139c1837SPaolo Bonzini } 1024139c1837SPaolo Bonzini if (arg == -1) { 1025139c1837SPaolo Bonzini tcg_out_vex_modrm(s, OPC_PCMPEQB + vex_l, ret, ret, ret); 1026139c1837SPaolo Bonzini return; 1027139c1837SPaolo Bonzini } 1028139c1837SPaolo Bonzini 10294e186175SRichard Henderson if (TCG_TARGET_REG_BITS == 32 && vece < MO_64) { 10304e186175SRichard Henderson if (have_avx2) { 10314e186175SRichard Henderson tcg_out_vex_modrm_pool(s, OPC_VPBROADCASTD + vex_l, ret); 10324e186175SRichard Henderson } else { 10334e186175SRichard Henderson tcg_out_vex_modrm_pool(s, OPC_VBROADCASTSS, ret); 10344e186175SRichard Henderson } 10354e186175SRichard Henderson new_pool_label(s, arg, R_386_32, s->code_ptr - 4, 0); 10364e186175SRichard Henderson } else { 1037139c1837SPaolo Bonzini if (type == TCG_TYPE_V64) { 1038139c1837SPaolo Bonzini tcg_out_vex_modrm_pool(s, OPC_MOVQ_VqWq, ret); 1039139c1837SPaolo Bonzini } else if (have_avx2) { 1040139c1837SPaolo Bonzini tcg_out_vex_modrm_pool(s, OPC_VPBROADCASTQ + vex_l, ret); 1041139c1837SPaolo Bonzini } else { 1042139c1837SPaolo Bonzini tcg_out_vex_modrm_pool(s, OPC_MOVDDUP, ret); 1043139c1837SPaolo Bonzini } 10444e186175SRichard Henderson if (TCG_TARGET_REG_BITS == 64) { 1045139c1837SPaolo Bonzini new_pool_label(s, arg, R_386_PC32, s->code_ptr - 4, -4); 1046139c1837SPaolo Bonzini } else { 10474e186175SRichard Henderson new_pool_l2(s, R_386_32, s->code_ptr - 4, 0, arg, arg >> 32); 1048139c1837SPaolo Bonzini } 1049139c1837SPaolo Bonzini } 1050139c1837SPaolo Bonzini} 1051139c1837SPaolo Bonzini 10520a6a8bc8SRichard Hendersonstatic void tcg_out_movi_vec(TCGContext *s, TCGType type, 10530a6a8bc8SRichard Henderson TCGReg ret, tcg_target_long arg) 10540a6a8bc8SRichard Henderson{ 10550a6a8bc8SRichard Henderson if (arg == 0) { 10560a6a8bc8SRichard Henderson tcg_out_vex_modrm(s, OPC_PXOR, ret, ret, ret); 10570a6a8bc8SRichard Henderson return; 10580a6a8bc8SRichard Henderson } 10590a6a8bc8SRichard Henderson if (arg == -1) { 10600a6a8bc8SRichard Henderson tcg_out_vex_modrm(s, OPC_PCMPEQB, ret, ret, ret); 10610a6a8bc8SRichard Henderson return; 10620a6a8bc8SRichard Henderson } 10630a6a8bc8SRichard Henderson 10640a6a8bc8SRichard Henderson int rexw = (type == TCG_TYPE_I32 ? 0 : P_REXW); 10650a6a8bc8SRichard Henderson tcg_out_vex_modrm_pool(s, OPC_MOVD_VyEy + rexw, ret); 10660a6a8bc8SRichard Henderson if (TCG_TARGET_REG_BITS == 64) { 10670a6a8bc8SRichard Henderson new_pool_label(s, arg, R_386_PC32, s->code_ptr - 4, -4); 10680a6a8bc8SRichard Henderson } else { 10690a6a8bc8SRichard Henderson new_pool_label(s, arg, R_386_32, s->code_ptr - 4, 0); 10700a6a8bc8SRichard Henderson } 10710a6a8bc8SRichard Henderson} 10720a6a8bc8SRichard Henderson 10730a6a8bc8SRichard Hendersonstatic void tcg_out_movi_int(TCGContext *s, TCGType type, 1074139c1837SPaolo Bonzini TCGReg ret, tcg_target_long arg) 1075139c1837SPaolo Bonzini{ 1076139c1837SPaolo Bonzini tcg_target_long diff; 1077139c1837SPaolo Bonzini 1078139c1837SPaolo Bonzini if (arg == 0) { 1079139c1837SPaolo Bonzini tgen_arithr(s, ARITH_XOR, ret, ret); 1080139c1837SPaolo Bonzini return; 1081139c1837SPaolo Bonzini } 1082139c1837SPaolo Bonzini if (arg == (uint32_t)arg || type == TCG_TYPE_I32) { 1083139c1837SPaolo Bonzini tcg_out_opc(s, OPC_MOVL_Iv + LOWREGMASK(ret), 0, ret, 0); 1084139c1837SPaolo Bonzini tcg_out32(s, arg); 1085139c1837SPaolo Bonzini return; 1086139c1837SPaolo Bonzini } 1087139c1837SPaolo Bonzini if (arg == (int32_t)arg) { 1088139c1837SPaolo Bonzini tcg_out_modrm(s, OPC_MOVL_EvIz + P_REXW, 0, ret); 1089139c1837SPaolo Bonzini tcg_out32(s, arg); 1090139c1837SPaolo Bonzini return; 1091139c1837SPaolo Bonzini } 1092139c1837SPaolo Bonzini 1093139c1837SPaolo Bonzini /* Try a 7 byte pc-relative lea before the 10 byte movq. */ 1094705ed477SRichard Henderson diff = tcg_pcrel_diff(s, (const void *)arg) - 7; 1095139c1837SPaolo Bonzini if (diff == (int32_t)diff) { 1096139c1837SPaolo Bonzini tcg_out_opc(s, OPC_LEA | P_REXW, ret, 0, 0); 1097139c1837SPaolo Bonzini tcg_out8(s, (LOWREGMASK(ret) << 3) | 5); 1098139c1837SPaolo Bonzini tcg_out32(s, diff); 1099139c1837SPaolo Bonzini return; 1100139c1837SPaolo Bonzini } 1101139c1837SPaolo Bonzini 1102139c1837SPaolo Bonzini tcg_out_opc(s, OPC_MOVL_Iv + P_REXW + LOWREGMASK(ret), 0, ret, 0); 1103139c1837SPaolo Bonzini tcg_out64(s, arg); 1104139c1837SPaolo Bonzini} 1105139c1837SPaolo Bonzini 11060a6a8bc8SRichard Hendersonstatic void tcg_out_movi(TCGContext *s, TCGType type, 11070a6a8bc8SRichard Henderson TCGReg ret, tcg_target_long arg) 11080a6a8bc8SRichard Henderson{ 11090a6a8bc8SRichard Henderson switch (type) { 11100a6a8bc8SRichard Henderson case TCG_TYPE_I32: 11110a6a8bc8SRichard Henderson#if TCG_TARGET_REG_BITS == 64 11120a6a8bc8SRichard Henderson case TCG_TYPE_I64: 11130a6a8bc8SRichard Henderson#endif 11140a6a8bc8SRichard Henderson if (ret < 16) { 11150a6a8bc8SRichard Henderson tcg_out_movi_int(s, type, ret, arg); 11160a6a8bc8SRichard Henderson } else { 11170a6a8bc8SRichard Henderson tcg_out_movi_vec(s, type, ret, arg); 11180a6a8bc8SRichard Henderson } 11190a6a8bc8SRichard Henderson break; 11200a6a8bc8SRichard Henderson default: 11210a6a8bc8SRichard Henderson g_assert_not_reached(); 11220a6a8bc8SRichard Henderson } 11230a6a8bc8SRichard Henderson} 11240a6a8bc8SRichard Henderson 1125767c2503SRichard Hendersonstatic bool tcg_out_xchg(TCGContext *s, TCGType type, TCGReg r1, TCGReg r2) 1126767c2503SRichard Henderson{ 1127767c2503SRichard Henderson int rexw = type == TCG_TYPE_I32 ? 0 : P_REXW; 1128767c2503SRichard Henderson tcg_out_modrm(s, OPC_XCHG_EvGv + rexw, r1, r2); 1129767c2503SRichard Henderson return true; 1130767c2503SRichard Henderson} 1131767c2503SRichard Henderson 11326a6d772eSRichard Hendersonstatic void tcg_out_addi_ptr(TCGContext *s, TCGReg rd, TCGReg rs, 11336a6d772eSRichard Henderson tcg_target_long imm) 11346a6d772eSRichard Henderson{ 11356a6d772eSRichard Henderson /* This function is only used for passing structs by reference. */ 11367d9e1ee4SRichard Henderson tcg_debug_assert(imm == (int32_t)imm); 113798899850SRichard Henderson tcg_out_modrm_offset(s, OPC_LEA | P_REXW, rd, rs, imm); 11386a6d772eSRichard Henderson} 11396a6d772eSRichard Henderson 1140139c1837SPaolo Bonzinistatic inline void tcg_out_pushi(TCGContext *s, tcg_target_long val) 1141139c1837SPaolo Bonzini{ 1142139c1837SPaolo Bonzini if (val == (int8_t)val) { 1143139c1837SPaolo Bonzini tcg_out_opc(s, OPC_PUSH_Ib, 0, 0, 0); 1144139c1837SPaolo Bonzini tcg_out8(s, val); 1145139c1837SPaolo Bonzini } else if (val == (int32_t)val) { 1146139c1837SPaolo Bonzini tcg_out_opc(s, OPC_PUSH_Iv, 0, 0, 0); 1147139c1837SPaolo Bonzini tcg_out32(s, val); 1148139c1837SPaolo Bonzini } else { 1149732e89f4SRichard Henderson g_assert_not_reached(); 1150139c1837SPaolo Bonzini } 1151139c1837SPaolo Bonzini} 1152139c1837SPaolo Bonzini 1153139c1837SPaolo Bonzinistatic inline void tcg_out_mb(TCGContext *s, TCGArg a0) 1154139c1837SPaolo Bonzini{ 1155139c1837SPaolo Bonzini /* Given the strength of x86 memory ordering, we only need care for 1156139c1837SPaolo Bonzini store-load ordering. Experimentally, "lock orl $0,0(%esp)" is 1157139c1837SPaolo Bonzini faster than "mfence", so don't bother with the sse insn. */ 1158139c1837SPaolo Bonzini if (a0 & TCG_MO_ST_LD) { 1159139c1837SPaolo Bonzini tcg_out8(s, 0xf0); 1160139c1837SPaolo Bonzini tcg_out_modrm_offset(s, OPC_ARITH_EvIb, ARITH_OR, TCG_REG_ESP, 0); 1161139c1837SPaolo Bonzini tcg_out8(s, 0); 1162139c1837SPaolo Bonzini } 1163139c1837SPaolo Bonzini} 1164139c1837SPaolo Bonzini 1165139c1837SPaolo Bonzinistatic inline void tcg_out_push(TCGContext *s, int reg) 1166139c1837SPaolo Bonzini{ 1167139c1837SPaolo Bonzini tcg_out_opc(s, OPC_PUSH_r32 + LOWREGMASK(reg), 0, reg, 0); 1168139c1837SPaolo Bonzini} 1169139c1837SPaolo Bonzini 1170139c1837SPaolo Bonzinistatic inline void tcg_out_pop(TCGContext *s, int reg) 1171139c1837SPaolo Bonzini{ 1172139c1837SPaolo Bonzini tcg_out_opc(s, OPC_POP_r32 + LOWREGMASK(reg), 0, reg, 0); 1173139c1837SPaolo Bonzini} 1174139c1837SPaolo Bonzini 1175139c1837SPaolo Bonzinistatic void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, 1176139c1837SPaolo Bonzini TCGReg arg1, intptr_t arg2) 1177139c1837SPaolo Bonzini{ 1178139c1837SPaolo Bonzini switch (type) { 1179139c1837SPaolo Bonzini case TCG_TYPE_I32: 1180139c1837SPaolo Bonzini if (ret < 16) { 1181139c1837SPaolo Bonzini tcg_out_modrm_offset(s, OPC_MOVL_GvEv, ret, arg1, arg2); 1182139c1837SPaolo Bonzini } else { 1183139c1837SPaolo Bonzini tcg_out_vex_modrm_offset(s, OPC_MOVD_VyEy, ret, 0, arg1, arg2); 1184139c1837SPaolo Bonzini } 1185139c1837SPaolo Bonzini break; 1186139c1837SPaolo Bonzini case TCG_TYPE_I64: 1187139c1837SPaolo Bonzini if (ret < 16) { 1188139c1837SPaolo Bonzini tcg_out_modrm_offset(s, OPC_MOVL_GvEv | P_REXW, ret, arg1, arg2); 1189139c1837SPaolo Bonzini break; 1190139c1837SPaolo Bonzini } 1191139c1837SPaolo Bonzini /* FALLTHRU */ 1192139c1837SPaolo Bonzini case TCG_TYPE_V64: 1193139c1837SPaolo Bonzini /* There is no instruction that can validate 8-byte alignment. */ 1194139c1837SPaolo Bonzini tcg_debug_assert(ret >= 16); 1195139c1837SPaolo Bonzini tcg_out_vex_modrm_offset(s, OPC_MOVQ_VqWq, ret, 0, arg1, arg2); 1196139c1837SPaolo Bonzini break; 1197139c1837SPaolo Bonzini case TCG_TYPE_V128: 1198139c1837SPaolo Bonzini /* 1199139c1837SPaolo Bonzini * The gvec infrastructure is asserts that v128 vector loads 1200139c1837SPaolo Bonzini * and stores use a 16-byte aligned offset. Validate that the 1201139c1837SPaolo Bonzini * final pointer is aligned by using an insn that will SIGSEGV. 1202139c1837SPaolo Bonzini */ 1203139c1837SPaolo Bonzini tcg_debug_assert(ret >= 16); 1204139c1837SPaolo Bonzini tcg_out_vex_modrm_offset(s, OPC_MOVDQA_VxWx, ret, 0, arg1, arg2); 1205139c1837SPaolo Bonzini break; 1206139c1837SPaolo Bonzini case TCG_TYPE_V256: 1207139c1837SPaolo Bonzini /* 1208139c1837SPaolo Bonzini * The gvec infrastructure only requires 16-byte alignment, 1209139c1837SPaolo Bonzini * so here we must use an unaligned load. 1210139c1837SPaolo Bonzini */ 1211139c1837SPaolo Bonzini tcg_debug_assert(ret >= 16); 1212139c1837SPaolo Bonzini tcg_out_vex_modrm_offset(s, OPC_MOVDQU_VxWx | P_VEXL, 1213139c1837SPaolo Bonzini ret, 0, arg1, arg2); 1214139c1837SPaolo Bonzini break; 1215139c1837SPaolo Bonzini default: 1216139c1837SPaolo Bonzini g_assert_not_reached(); 1217139c1837SPaolo Bonzini } 1218139c1837SPaolo Bonzini} 1219139c1837SPaolo Bonzini 1220139c1837SPaolo Bonzinistatic void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, 1221139c1837SPaolo Bonzini TCGReg arg1, intptr_t arg2) 1222139c1837SPaolo Bonzini{ 1223139c1837SPaolo Bonzini switch (type) { 1224139c1837SPaolo Bonzini case TCG_TYPE_I32: 1225139c1837SPaolo Bonzini if (arg < 16) { 1226139c1837SPaolo Bonzini tcg_out_modrm_offset(s, OPC_MOVL_EvGv, arg, arg1, arg2); 1227139c1837SPaolo Bonzini } else { 1228139c1837SPaolo Bonzini tcg_out_vex_modrm_offset(s, OPC_MOVD_EyVy, arg, 0, arg1, arg2); 1229139c1837SPaolo Bonzini } 1230139c1837SPaolo Bonzini break; 1231139c1837SPaolo Bonzini case TCG_TYPE_I64: 1232139c1837SPaolo Bonzini if (arg < 16) { 1233139c1837SPaolo Bonzini tcg_out_modrm_offset(s, OPC_MOVL_EvGv | P_REXW, arg, arg1, arg2); 1234139c1837SPaolo Bonzini break; 1235139c1837SPaolo Bonzini } 1236139c1837SPaolo Bonzini /* FALLTHRU */ 1237139c1837SPaolo Bonzini case TCG_TYPE_V64: 1238139c1837SPaolo Bonzini /* There is no instruction that can validate 8-byte alignment. */ 1239139c1837SPaolo Bonzini tcg_debug_assert(arg >= 16); 1240139c1837SPaolo Bonzini tcg_out_vex_modrm_offset(s, OPC_MOVQ_WqVq, arg, 0, arg1, arg2); 1241139c1837SPaolo Bonzini break; 1242139c1837SPaolo Bonzini case TCG_TYPE_V128: 1243139c1837SPaolo Bonzini /* 1244139c1837SPaolo Bonzini * The gvec infrastructure is asserts that v128 vector loads 1245139c1837SPaolo Bonzini * and stores use a 16-byte aligned offset. Validate that the 1246139c1837SPaolo Bonzini * final pointer is aligned by using an insn that will SIGSEGV. 1247c4f4a00aSRichard Henderson * 1248c4f4a00aSRichard Henderson * This specific instance is also used by TCG_CALL_RET_BY_VEC, 1249c4f4a00aSRichard Henderson * for _WIN64, which must have SSE2 but may not have AVX. 1250139c1837SPaolo Bonzini */ 1251139c1837SPaolo Bonzini tcg_debug_assert(arg >= 16); 1252c4f4a00aSRichard Henderson if (have_avx1) { 1253139c1837SPaolo Bonzini tcg_out_vex_modrm_offset(s, OPC_MOVDQA_WxVx, arg, 0, arg1, arg2); 1254c4f4a00aSRichard Henderson } else { 1255c4f4a00aSRichard Henderson tcg_out_modrm_offset(s, OPC_MOVDQA_WxVx, arg, arg1, arg2); 1256c4f4a00aSRichard Henderson } 1257139c1837SPaolo Bonzini break; 1258139c1837SPaolo Bonzini case TCG_TYPE_V256: 1259139c1837SPaolo Bonzini /* 1260139c1837SPaolo Bonzini * The gvec infrastructure only requires 16-byte alignment, 1261139c1837SPaolo Bonzini * so here we must use an unaligned store. 1262139c1837SPaolo Bonzini */ 1263139c1837SPaolo Bonzini tcg_debug_assert(arg >= 16); 1264139c1837SPaolo Bonzini tcg_out_vex_modrm_offset(s, OPC_MOVDQU_WxVx | P_VEXL, 1265139c1837SPaolo Bonzini arg, 0, arg1, arg2); 1266139c1837SPaolo Bonzini break; 1267139c1837SPaolo Bonzini default: 1268139c1837SPaolo Bonzini g_assert_not_reached(); 1269139c1837SPaolo Bonzini } 1270139c1837SPaolo Bonzini} 1271139c1837SPaolo Bonzini 1272139c1837SPaolo Bonzinistatic bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val, 1273139c1837SPaolo Bonzini TCGReg base, intptr_t ofs) 1274139c1837SPaolo Bonzini{ 1275139c1837SPaolo Bonzini int rexw = 0; 1276139c1837SPaolo Bonzini if (TCG_TARGET_REG_BITS == 64 && type == TCG_TYPE_I64) { 1277139c1837SPaolo Bonzini if (val != (int32_t)val) { 1278139c1837SPaolo Bonzini return false; 1279139c1837SPaolo Bonzini } 1280139c1837SPaolo Bonzini rexw = P_REXW; 1281139c1837SPaolo Bonzini } else if (type != TCG_TYPE_I32) { 1282139c1837SPaolo Bonzini return false; 1283139c1837SPaolo Bonzini } 1284139c1837SPaolo Bonzini tcg_out_modrm_offset(s, OPC_MOVL_EvIz | rexw, 0, base, ofs); 1285139c1837SPaolo Bonzini tcg_out32(s, val); 1286139c1837SPaolo Bonzini return true; 1287139c1837SPaolo Bonzini} 1288139c1837SPaolo Bonzini 1289139c1837SPaolo Bonzinistatic void tcg_out_shifti(TCGContext *s, int subopc, int reg, int count) 1290139c1837SPaolo Bonzini{ 1291139c1837SPaolo Bonzini /* Propagate an opcode prefix, such as P_DATA16. */ 1292139c1837SPaolo Bonzini int ext = subopc & ~0x7; 1293139c1837SPaolo Bonzini subopc &= 0x7; 1294139c1837SPaolo Bonzini 1295139c1837SPaolo Bonzini if (count == 1) { 1296139c1837SPaolo Bonzini tcg_out_modrm(s, OPC_SHIFT_1 + ext, subopc, reg); 1297139c1837SPaolo Bonzini } else { 1298139c1837SPaolo Bonzini tcg_out_modrm(s, OPC_SHIFT_Ib + ext, subopc, reg); 1299139c1837SPaolo Bonzini tcg_out8(s, count); 1300139c1837SPaolo Bonzini } 1301139c1837SPaolo Bonzini} 1302139c1837SPaolo Bonzini 1303139c1837SPaolo Bonzinistatic inline void tcg_out_bswap32(TCGContext *s, int reg) 1304139c1837SPaolo Bonzini{ 1305139c1837SPaolo Bonzini tcg_out_opc(s, OPC_BSWAP + LOWREGMASK(reg), 0, reg, 0); 1306139c1837SPaolo Bonzini} 1307139c1837SPaolo Bonzini 1308139c1837SPaolo Bonzinistatic inline void tcg_out_rolw_8(TCGContext *s, int reg) 1309139c1837SPaolo Bonzini{ 1310139c1837SPaolo Bonzini tcg_out_shifti(s, SHIFT_ROL + P_DATA16, reg, 8); 1311139c1837SPaolo Bonzini} 1312139c1837SPaolo Bonzini 1313d0e66c89SRichard Hendersonstatic void tcg_out_ext8u(TCGContext *s, TCGReg dest, TCGReg src) 1314139c1837SPaolo Bonzini{ 1315139c1837SPaolo Bonzini /* movzbl */ 1316139c1837SPaolo Bonzini tcg_debug_assert(src < 4 || TCG_TARGET_REG_BITS == 64); 1317139c1837SPaolo Bonzini tcg_out_modrm(s, OPC_MOVZBL + P_REXB_RM, dest, src); 1318139c1837SPaolo Bonzini} 1319139c1837SPaolo Bonzini 1320678155b2SRichard Hendersonstatic void tcg_out_ext8s(TCGContext *s, TCGType type, TCGReg dest, TCGReg src) 1321139c1837SPaolo Bonzini{ 1322678155b2SRichard Henderson int rexw = type == TCG_TYPE_I32 ? 0 : P_REXW; 1323139c1837SPaolo Bonzini /* movsbl */ 1324139c1837SPaolo Bonzini tcg_debug_assert(src < 4 || TCG_TARGET_REG_BITS == 64); 1325139c1837SPaolo Bonzini tcg_out_modrm(s, OPC_MOVSBL + P_REXB_RM + rexw, dest, src); 1326139c1837SPaolo Bonzini} 1327139c1837SPaolo Bonzini 1328379afdffSRichard Hendersonstatic void tcg_out_ext16u(TCGContext *s, TCGReg dest, TCGReg src) 1329139c1837SPaolo Bonzini{ 1330139c1837SPaolo Bonzini /* movzwl */ 1331139c1837SPaolo Bonzini tcg_out_modrm(s, OPC_MOVZWL, dest, src); 1332139c1837SPaolo Bonzini} 1333139c1837SPaolo Bonzini 1334753e42eaSRichard Hendersonstatic void tcg_out_ext16s(TCGContext *s, TCGType type, TCGReg dest, TCGReg src) 1335139c1837SPaolo Bonzini{ 1336753e42eaSRichard Henderson int rexw = type == TCG_TYPE_I32 ? 0 : P_REXW; 1337139c1837SPaolo Bonzini /* movsw[lq] */ 1338139c1837SPaolo Bonzini tcg_out_modrm(s, OPC_MOVSWL + rexw, dest, src); 1339139c1837SPaolo Bonzini} 1340139c1837SPaolo Bonzini 13419ecf5f61SRichard Hendersonstatic void tcg_out_ext32u(TCGContext *s, TCGReg dest, TCGReg src) 1342139c1837SPaolo Bonzini{ 1343139c1837SPaolo Bonzini /* 32-bit mov zero extends. */ 1344139c1837SPaolo Bonzini tcg_out_modrm(s, OPC_MOVL_GvEv, dest, src); 1345139c1837SPaolo Bonzini} 1346139c1837SPaolo Bonzini 134752bf3398SRichard Hendersonstatic void tcg_out_ext32s(TCGContext *s, TCGReg dest, TCGReg src) 1348139c1837SPaolo Bonzini{ 134952bf3398SRichard Henderson tcg_debug_assert(TCG_TARGET_REG_BITS == 64); 1350139c1837SPaolo Bonzini tcg_out_modrm(s, OPC_MOVSLQ, dest, src); 1351139c1837SPaolo Bonzini} 1352139c1837SPaolo Bonzini 13539c6aa274SRichard Hendersonstatic void tcg_out_exts_i32_i64(TCGContext *s, TCGReg dest, TCGReg src) 13549c6aa274SRichard Henderson{ 13559c6aa274SRichard Henderson tcg_out_ext32s(s, dest, src); 13569c6aa274SRichard Henderson} 13579c6aa274SRichard Henderson 1358b9bfe000SRichard Hendersonstatic void tcg_out_extu_i32_i64(TCGContext *s, TCGReg dest, TCGReg src) 1359b9bfe000SRichard Henderson{ 1360b2485530SRichard Henderson if (dest != src) { 1361b9bfe000SRichard Henderson tcg_out_ext32u(s, dest, src); 1362b9bfe000SRichard Henderson } 1363b2485530SRichard Henderson} 1364b9bfe000SRichard Henderson 1365b8b94ac6SRichard Hendersonstatic void tcg_out_extrl_i64_i32(TCGContext *s, TCGReg dest, TCGReg src) 1366b8b94ac6SRichard Henderson{ 1367b8b94ac6SRichard Henderson tcg_out_ext32u(s, dest, src); 1368b8b94ac6SRichard Henderson} 1369b8b94ac6SRichard Henderson 1370139c1837SPaolo Bonzinistatic inline void tcg_out_bswap64(TCGContext *s, int reg) 1371139c1837SPaolo Bonzini{ 1372139c1837SPaolo Bonzini tcg_out_opc(s, OPC_BSWAP + P_REXW + LOWREGMASK(reg), 0, reg, 0); 1373139c1837SPaolo Bonzini} 1374139c1837SPaolo Bonzini 1375139c1837SPaolo Bonzinistatic void tgen_arithi(TCGContext *s, int c, int r0, 1376139c1837SPaolo Bonzini tcg_target_long val, int cf) 1377139c1837SPaolo Bonzini{ 1378139c1837SPaolo Bonzini int rexw = 0; 1379139c1837SPaolo Bonzini 1380139c1837SPaolo Bonzini if (TCG_TARGET_REG_BITS == 64) { 1381139c1837SPaolo Bonzini rexw = c & -8; 1382139c1837SPaolo Bonzini c &= 7; 1383139c1837SPaolo Bonzini } 1384139c1837SPaolo Bonzini 138564708db3SPaolo Bonzini switch (c) { 138664708db3SPaolo Bonzini case ARITH_ADD: 138764708db3SPaolo Bonzini case ARITH_SUB: 138864708db3SPaolo Bonzini if (!cf) { 138964708db3SPaolo Bonzini /* 139064708db3SPaolo Bonzini * ??? While INC is 2 bytes shorter than ADDL $1, they also induce 139164708db3SPaolo Bonzini * partial flags update stalls on Pentium4 and are not recommended 139264708db3SPaolo Bonzini * by current Intel optimization manuals. 139364708db3SPaolo Bonzini */ 139464708db3SPaolo Bonzini if (val == 1 || val == -1) { 1395139c1837SPaolo Bonzini int is_inc = (c == ARITH_ADD) ^ (val < 0); 1396139c1837SPaolo Bonzini if (TCG_TARGET_REG_BITS == 64) { 139764708db3SPaolo Bonzini /* 139864708db3SPaolo Bonzini * The single-byte increment encodings are re-tasked 139964708db3SPaolo Bonzini * as the REX prefixes. Use the MODRM encoding. 140064708db3SPaolo Bonzini */ 1401139c1837SPaolo Bonzini tcg_out_modrm(s, OPC_GRP5 + rexw, 1402139c1837SPaolo Bonzini (is_inc ? EXT5_INC_Ev : EXT5_DEC_Ev), r0); 1403139c1837SPaolo Bonzini } else { 1404139c1837SPaolo Bonzini tcg_out8(s, (is_inc ? OPC_INC_r32 : OPC_DEC_r32) + r0); 1405139c1837SPaolo Bonzini } 1406139c1837SPaolo Bonzini return; 1407139c1837SPaolo Bonzini } 140864708db3SPaolo Bonzini if (val == 128) { 140964708db3SPaolo Bonzini /* 141064708db3SPaolo Bonzini * Facilitate using an 8-bit immediate. Carry is inverted 141164708db3SPaolo Bonzini * by this transformation, so do it only if cf == 0. 141264708db3SPaolo Bonzini */ 141364708db3SPaolo Bonzini c ^= ARITH_ADD ^ ARITH_SUB; 141464708db3SPaolo Bonzini val = -128; 141564708db3SPaolo Bonzini } 141664708db3SPaolo Bonzini } 141764708db3SPaolo Bonzini break; 1418139c1837SPaolo Bonzini 141964708db3SPaolo Bonzini case ARITH_AND: 1420139c1837SPaolo Bonzini if (TCG_TARGET_REG_BITS == 64) { 1421139c1837SPaolo Bonzini if (val == 0xffffffffu) { 1422139c1837SPaolo Bonzini tcg_out_ext32u(s, r0, r0); 1423139c1837SPaolo Bonzini return; 1424139c1837SPaolo Bonzini } 1425139c1837SPaolo Bonzini if (val == (uint32_t)val) { 1426139c1837SPaolo Bonzini /* AND with no high bits set can use a 32-bit operation. */ 1427139c1837SPaolo Bonzini rexw = 0; 1428139c1837SPaolo Bonzini } 1429139c1837SPaolo Bonzini } 1430139c1837SPaolo Bonzini if (val == 0xffu && (r0 < 4 || TCG_TARGET_REG_BITS == 64)) { 1431139c1837SPaolo Bonzini tcg_out_ext8u(s, r0, r0); 1432139c1837SPaolo Bonzini return; 1433139c1837SPaolo Bonzini } 1434139c1837SPaolo Bonzini if (val == 0xffffu) { 1435139c1837SPaolo Bonzini tcg_out_ext16u(s, r0, r0); 1436139c1837SPaolo Bonzini return; 1437139c1837SPaolo Bonzini } 143864708db3SPaolo Bonzini break; 1439afa37be4SPaolo Bonzini 1440afa37be4SPaolo Bonzini case ARITH_OR: 1441afa37be4SPaolo Bonzini case ARITH_XOR: 1442afa37be4SPaolo Bonzini if (val >= 0x80 && val <= 0xff 1443afa37be4SPaolo Bonzini && (r0 < 4 || TCG_TARGET_REG_BITS == 64)) { 1444afa37be4SPaolo Bonzini tcg_out_modrm(s, OPC_ARITH_EbIb + P_REXB_RM, c, r0); 1445afa37be4SPaolo Bonzini tcg_out8(s, val); 1446afa37be4SPaolo Bonzini return; 1447afa37be4SPaolo Bonzini } 1448afa37be4SPaolo Bonzini break; 1449139c1837SPaolo Bonzini } 1450139c1837SPaolo Bonzini 1451139c1837SPaolo Bonzini if (val == (int8_t)val) { 1452139c1837SPaolo Bonzini tcg_out_modrm(s, OPC_ARITH_EvIb + rexw, c, r0); 1453139c1837SPaolo Bonzini tcg_out8(s, val); 1454139c1837SPaolo Bonzini return; 1455139c1837SPaolo Bonzini } 1456139c1837SPaolo Bonzini if (rexw == 0 || val == (int32_t)val) { 1457139c1837SPaolo Bonzini tcg_out_modrm(s, OPC_ARITH_EvIz + rexw, c, r0); 1458139c1837SPaolo Bonzini tcg_out32(s, val); 1459139c1837SPaolo Bonzini return; 1460139c1837SPaolo Bonzini } 1461139c1837SPaolo Bonzini 1462732e89f4SRichard Henderson g_assert_not_reached(); 1463139c1837SPaolo Bonzini} 1464139c1837SPaolo Bonzini 1465139c1837SPaolo Bonzinistatic void tcg_out_addi(TCGContext *s, int reg, tcg_target_long val) 1466139c1837SPaolo Bonzini{ 1467139c1837SPaolo Bonzini if (val != 0) { 1468139c1837SPaolo Bonzini tgen_arithi(s, ARITH_ADD + P_REXW, reg, val, 0); 1469139c1837SPaolo Bonzini } 1470139c1837SPaolo Bonzini} 1471139c1837SPaolo Bonzini 14721a057554SRichard Henderson/* Set SMALL to force a short forward branch. */ 14731a057554SRichard Hendersonstatic void tcg_out_jxx(TCGContext *s, int opc, TCGLabel *l, bool small) 1474139c1837SPaolo Bonzini{ 1475139c1837SPaolo Bonzini int32_t val, val1; 1476139c1837SPaolo Bonzini 1477139c1837SPaolo Bonzini if (l->has_value) { 1478139c1837SPaolo Bonzini val = tcg_pcrel_diff(s, l->u.value_ptr); 1479139c1837SPaolo Bonzini val1 = val - 2; 1480139c1837SPaolo Bonzini if ((int8_t)val1 == val1) { 1481139c1837SPaolo Bonzini if (opc == -1) { 1482139c1837SPaolo Bonzini tcg_out8(s, OPC_JMP_short); 1483139c1837SPaolo Bonzini } else { 1484139c1837SPaolo Bonzini tcg_out8(s, OPC_JCC_short + opc); 1485139c1837SPaolo Bonzini } 1486139c1837SPaolo Bonzini tcg_out8(s, val1); 1487139c1837SPaolo Bonzini } else { 14881a057554SRichard Henderson tcg_debug_assert(!small); 1489139c1837SPaolo Bonzini if (opc == -1) { 1490139c1837SPaolo Bonzini tcg_out8(s, OPC_JMP_long); 1491139c1837SPaolo Bonzini tcg_out32(s, val - 5); 1492139c1837SPaolo Bonzini } else { 1493139c1837SPaolo Bonzini tcg_out_opc(s, OPC_JCC_long + opc, 0, 0, 0); 1494139c1837SPaolo Bonzini tcg_out32(s, val - 6); 1495139c1837SPaolo Bonzini } 1496139c1837SPaolo Bonzini } 1497139c1837SPaolo Bonzini } else if (small) { 1498139c1837SPaolo Bonzini if (opc == -1) { 1499139c1837SPaolo Bonzini tcg_out8(s, OPC_JMP_short); 1500139c1837SPaolo Bonzini } else { 1501139c1837SPaolo Bonzini tcg_out8(s, OPC_JCC_short + opc); 1502139c1837SPaolo Bonzini } 1503139c1837SPaolo Bonzini tcg_out_reloc(s, s->code_ptr, R_386_PC8, l, -1); 1504139c1837SPaolo Bonzini s->code_ptr += 1; 1505139c1837SPaolo Bonzini } else { 1506139c1837SPaolo Bonzini if (opc == -1) { 1507139c1837SPaolo Bonzini tcg_out8(s, OPC_JMP_long); 1508139c1837SPaolo Bonzini } else { 1509139c1837SPaolo Bonzini tcg_out_opc(s, OPC_JCC_long + opc, 0, 0, 0); 1510139c1837SPaolo Bonzini } 1511139c1837SPaolo Bonzini tcg_out_reloc(s, s->code_ptr, R_386_PC32, l, -4); 1512139c1837SPaolo Bonzini s->code_ptr += 4; 1513139c1837SPaolo Bonzini } 1514139c1837SPaolo Bonzini} 1515139c1837SPaolo Bonzini 15166749d85bSRichard Hendersonstatic int tcg_out_cmp(TCGContext *s, TCGCond cond, TCGArg arg1, 15176749d85bSRichard Henderson TCGArg arg2, int const_arg2, int rexw) 1518139c1837SPaolo Bonzini{ 1519d3d1c30cSRichard Henderson int jz, js; 1520303214aaSRichard Henderson 1521303214aaSRichard Henderson if (!is_tst_cond(cond)) { 1522303214aaSRichard Henderson if (!const_arg2) { 1523303214aaSRichard Henderson tgen_arithr(s, ARITH_CMP + rexw, arg1, arg2); 1524303214aaSRichard Henderson } else if (arg2 == 0) { 1525139c1837SPaolo Bonzini tcg_out_modrm(s, OPC_TESTL + rexw, arg1, arg1); 1526139c1837SPaolo Bonzini } else { 1527303214aaSRichard Henderson tcg_debug_assert(!rexw || arg2 == (int32_t)arg2); 1528139c1837SPaolo Bonzini tgen_arithi(s, ARITH_CMP + rexw, arg1, arg2, 0); 1529139c1837SPaolo Bonzini } 15306749d85bSRichard Henderson return tcg_cond_to_jcc[cond]; 1531139c1837SPaolo Bonzini } 1532139c1837SPaolo Bonzini 1533303214aaSRichard Henderson jz = tcg_cond_to_jcc[cond]; 1534d3d1c30cSRichard Henderson js = (cond == TCG_COND_TSTNE ? JCC_JS : JCC_JNS); 1535303214aaSRichard Henderson 1536303214aaSRichard Henderson if (!const_arg2) { 1537303214aaSRichard Henderson tcg_out_modrm(s, OPC_TESTL + rexw, arg1, arg2); 1538303214aaSRichard Henderson return jz; 1539303214aaSRichard Henderson } 1540303214aaSRichard Henderson 1541303214aaSRichard Henderson if (arg2 <= 0xff && (TCG_TARGET_REG_BITS == 64 || arg1 < 4)) { 1542d3d1c30cSRichard Henderson if (arg2 == 0x80) { 1543d3d1c30cSRichard Henderson tcg_out_modrm(s, OPC_TESTB | P_REXB_R, arg1, arg1); 1544d3d1c30cSRichard Henderson return js; 1545d3d1c30cSRichard Henderson } 1546be1335dbSPaolo Bonzini if (arg2 == 0xff) { 1547be1335dbSPaolo Bonzini tcg_out_modrm(s, OPC_TESTB | P_REXB_R, arg1, arg1); 1548be1335dbSPaolo Bonzini return jz; 1549be1335dbSPaolo Bonzini } 1550303214aaSRichard Henderson tcg_out_modrm(s, OPC_GRP3_Eb | P_REXB_RM, EXT3_TESTi, arg1); 1551303214aaSRichard Henderson tcg_out8(s, arg2); 1552303214aaSRichard Henderson return jz; 1553303214aaSRichard Henderson } 1554303214aaSRichard Henderson 1555303214aaSRichard Henderson if ((arg2 & ~0xff00) == 0 && arg1 < 4) { 1556d3d1c30cSRichard Henderson if (arg2 == 0x8000) { 1557d3d1c30cSRichard Henderson tcg_out_modrm(s, OPC_TESTB, arg1 + 4, arg1 + 4); 1558d3d1c30cSRichard Henderson return js; 1559d3d1c30cSRichard Henderson } 1560be1335dbSPaolo Bonzini if (arg2 == 0xff00) { 1561be1335dbSPaolo Bonzini tcg_out_modrm(s, OPC_TESTB, arg1 + 4, arg1 + 4); 1562be1335dbSPaolo Bonzini return jz; 1563be1335dbSPaolo Bonzini } 1564303214aaSRichard Henderson tcg_out_modrm(s, OPC_GRP3_Eb, EXT3_TESTi, arg1 + 4); 1565303214aaSRichard Henderson tcg_out8(s, arg2 >> 8); 1566303214aaSRichard Henderson return jz; 1567303214aaSRichard Henderson } 1568303214aaSRichard Henderson 1569be1335dbSPaolo Bonzini if (arg2 == 0xffff) { 1570be1335dbSPaolo Bonzini tcg_out_modrm(s, OPC_TESTL | P_DATA16, arg1, arg1); 1571be1335dbSPaolo Bonzini return jz; 1572be1335dbSPaolo Bonzini } 1573be1335dbSPaolo Bonzini if (arg2 == 0xffffffffu) { 1574be1335dbSPaolo Bonzini tcg_out_modrm(s, OPC_TESTL, arg1, arg1); 1575be1335dbSPaolo Bonzini return jz; 1576be1335dbSPaolo Bonzini } 1577be1335dbSPaolo Bonzini 1578d3d1c30cSRichard Henderson if (is_power_of_2(rexw ? arg2 : (uint32_t)arg2)) { 1579d3d1c30cSRichard Henderson int jc = (cond == TCG_COND_TSTNE ? JCC_JB : JCC_JAE); 1580d3d1c30cSRichard Henderson int sh = ctz64(arg2); 1581d3d1c30cSRichard Henderson 1582d3d1c30cSRichard Henderson rexw = (sh & 32 ? P_REXW : 0); 1583d3d1c30cSRichard Henderson if ((sh & 31) == 31) { 1584d3d1c30cSRichard Henderson tcg_out_modrm(s, OPC_TESTL | rexw, arg1, arg1); 1585d3d1c30cSRichard Henderson return js; 1586d3d1c30cSRichard Henderson } else { 1587d3d1c30cSRichard Henderson tcg_out_modrm(s, OPC_GRPBT | rexw, OPC_GRPBT_BT, arg1); 1588d3d1c30cSRichard Henderson tcg_out8(s, sh); 1589d3d1c30cSRichard Henderson return jc; 1590d3d1c30cSRichard Henderson } 1591d3d1c30cSRichard Henderson } 1592d3d1c30cSRichard Henderson 1593303214aaSRichard Henderson if (rexw) { 1594303214aaSRichard Henderson if (arg2 == (uint32_t)arg2) { 1595303214aaSRichard Henderson rexw = 0; 1596303214aaSRichard Henderson } else { 1597303214aaSRichard Henderson tcg_debug_assert(arg2 == (int32_t)arg2); 1598303214aaSRichard Henderson } 1599303214aaSRichard Henderson } 1600303214aaSRichard Henderson tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_TESTi, arg1); 1601303214aaSRichard Henderson tcg_out32(s, arg2); 1602303214aaSRichard Henderson return jz; 1603303214aaSRichard Henderson} 1604303214aaSRichard Henderson 1605c359ce75SRichard Hendersonstatic void tcg_out_brcond(TCGContext *s, int rexw, TCGCond cond, 1606139c1837SPaolo Bonzini TCGArg arg1, TCGArg arg2, int const_arg2, 1607c359ce75SRichard Henderson TCGLabel *label, bool small) 1608139c1837SPaolo Bonzini{ 16096749d85bSRichard Henderson int jcc = tcg_out_cmp(s, cond, arg1, arg2, const_arg2, rexw); 16106749d85bSRichard Henderson tcg_out_jxx(s, jcc, label, small); 1611139c1837SPaolo Bonzini} 1612139c1837SPaolo Bonzini 1613c359ce75SRichard Henderson#if TCG_TARGET_REG_BITS == 32 1614139c1837SPaolo Bonzinistatic void tcg_out_brcond2(TCGContext *s, const TCGArg *args, 1615c359ce75SRichard Henderson const int *const_args, bool small) 1616139c1837SPaolo Bonzini{ 1617139c1837SPaolo Bonzini TCGLabel *label_next = gen_new_label(); 1618139c1837SPaolo Bonzini TCGLabel *label_this = arg_label(args[5]); 1619303214aaSRichard Henderson TCGCond cond = args[4]; 1620139c1837SPaolo Bonzini 1621303214aaSRichard Henderson switch (cond) { 1622139c1837SPaolo Bonzini case TCG_COND_EQ: 1623303214aaSRichard Henderson case TCG_COND_TSTEQ: 1624303214aaSRichard Henderson tcg_out_brcond(s, 0, tcg_invert_cond(cond), 1625303214aaSRichard Henderson args[0], args[2], const_args[2], label_next, 1); 1626303214aaSRichard Henderson tcg_out_brcond(s, 0, cond, args[1], args[3], const_args[3], 1627139c1837SPaolo Bonzini label_this, small); 1628139c1837SPaolo Bonzini break; 1629139c1837SPaolo Bonzini case TCG_COND_NE: 1630303214aaSRichard Henderson case TCG_COND_TSTNE: 1631303214aaSRichard Henderson tcg_out_brcond(s, 0, cond, args[0], args[2], const_args[2], 1632139c1837SPaolo Bonzini label_this, small); 1633303214aaSRichard Henderson tcg_out_brcond(s, 0, cond, args[1], args[3], const_args[3], 1634139c1837SPaolo Bonzini label_this, small); 1635139c1837SPaolo Bonzini break; 1636139c1837SPaolo Bonzini case TCG_COND_LT: 1637c359ce75SRichard Henderson tcg_out_brcond(s, 0, TCG_COND_LT, args[1], args[3], const_args[3], 1638139c1837SPaolo Bonzini label_this, small); 1639139c1837SPaolo Bonzini tcg_out_jxx(s, JCC_JNE, label_next, 1); 1640c359ce75SRichard Henderson tcg_out_brcond(s, 0, TCG_COND_LTU, args[0], args[2], const_args[2], 1641139c1837SPaolo Bonzini label_this, small); 1642139c1837SPaolo Bonzini break; 1643139c1837SPaolo Bonzini case TCG_COND_LE: 1644c359ce75SRichard Henderson tcg_out_brcond(s, 0, TCG_COND_LT, args[1], args[3], const_args[3], 1645139c1837SPaolo Bonzini label_this, small); 1646139c1837SPaolo Bonzini tcg_out_jxx(s, JCC_JNE, label_next, 1); 1647c359ce75SRichard Henderson tcg_out_brcond(s, 0, TCG_COND_LEU, args[0], args[2], const_args[2], 1648139c1837SPaolo Bonzini label_this, small); 1649139c1837SPaolo Bonzini break; 1650139c1837SPaolo Bonzini case TCG_COND_GT: 1651c359ce75SRichard Henderson tcg_out_brcond(s, 0, TCG_COND_GT, args[1], args[3], const_args[3], 1652139c1837SPaolo Bonzini label_this, small); 1653139c1837SPaolo Bonzini tcg_out_jxx(s, JCC_JNE, label_next, 1); 1654c359ce75SRichard Henderson tcg_out_brcond(s, 0, TCG_COND_GTU, args[0], args[2], const_args[2], 1655139c1837SPaolo Bonzini label_this, small); 1656139c1837SPaolo Bonzini break; 1657139c1837SPaolo Bonzini case TCG_COND_GE: 1658c359ce75SRichard Henderson tcg_out_brcond(s, 0, TCG_COND_GT, args[1], args[3], const_args[3], 1659139c1837SPaolo Bonzini label_this, small); 1660139c1837SPaolo Bonzini tcg_out_jxx(s, JCC_JNE, label_next, 1); 1661c359ce75SRichard Henderson tcg_out_brcond(s, 0, TCG_COND_GEU, args[0], args[2], const_args[2], 1662139c1837SPaolo Bonzini label_this, small); 1663139c1837SPaolo Bonzini break; 1664139c1837SPaolo Bonzini case TCG_COND_LTU: 1665c359ce75SRichard Henderson tcg_out_brcond(s, 0, TCG_COND_LTU, args[1], args[3], const_args[3], 1666139c1837SPaolo Bonzini label_this, small); 1667139c1837SPaolo Bonzini tcg_out_jxx(s, JCC_JNE, label_next, 1); 1668c359ce75SRichard Henderson tcg_out_brcond(s, 0, TCG_COND_LTU, args[0], args[2], const_args[2], 1669139c1837SPaolo Bonzini label_this, small); 1670139c1837SPaolo Bonzini break; 1671139c1837SPaolo Bonzini case TCG_COND_LEU: 1672c359ce75SRichard Henderson tcg_out_brcond(s, 0, TCG_COND_LTU, args[1], args[3], const_args[3], 1673139c1837SPaolo Bonzini label_this, small); 1674139c1837SPaolo Bonzini tcg_out_jxx(s, JCC_JNE, label_next, 1); 1675c359ce75SRichard Henderson tcg_out_brcond(s, 0, TCG_COND_LEU, args[0], args[2], const_args[2], 1676139c1837SPaolo Bonzini label_this, small); 1677139c1837SPaolo Bonzini break; 1678139c1837SPaolo Bonzini case TCG_COND_GTU: 1679c359ce75SRichard Henderson tcg_out_brcond(s, 0, TCG_COND_GTU, args[1], args[3], const_args[3], 1680139c1837SPaolo Bonzini label_this, small); 1681139c1837SPaolo Bonzini tcg_out_jxx(s, JCC_JNE, label_next, 1); 1682c359ce75SRichard Henderson tcg_out_brcond(s, 0, TCG_COND_GTU, args[0], args[2], const_args[2], 1683139c1837SPaolo Bonzini label_this, small); 1684139c1837SPaolo Bonzini break; 1685139c1837SPaolo Bonzini case TCG_COND_GEU: 1686c359ce75SRichard Henderson tcg_out_brcond(s, 0, TCG_COND_GTU, args[1], args[3], const_args[3], 1687139c1837SPaolo Bonzini label_this, small); 1688139c1837SPaolo Bonzini tcg_out_jxx(s, JCC_JNE, label_next, 1); 1689c359ce75SRichard Henderson tcg_out_brcond(s, 0, TCG_COND_GEU, args[0], args[2], const_args[2], 1690139c1837SPaolo Bonzini label_this, small); 1691139c1837SPaolo Bonzini break; 1692139c1837SPaolo Bonzini default: 1693732e89f4SRichard Henderson g_assert_not_reached(); 1694139c1837SPaolo Bonzini } 169592ab8e7dSRichard Henderson tcg_out_label(s, label_next); 1696139c1837SPaolo Bonzini} 1697139c1837SPaolo Bonzini#endif 1698139c1837SPaolo Bonzini 16997ba99a1cSRichard Hendersonstatic void tcg_out_setcond(TCGContext *s, int rexw, TCGCond cond, 17007ba99a1cSRichard Henderson TCGArg dest, TCGArg arg1, TCGArg arg2, 170195bf306eSRichard Henderson int const_arg2, bool neg) 1702139c1837SPaolo Bonzini{ 170319517b83SRichard Henderson int cmp_rexw = rexw; 17046950f68bSRichard Henderson bool inv = false; 170596658acaSRichard Henderson bool cleared; 17066749d85bSRichard Henderson int jcc; 17076950f68bSRichard Henderson 17086950f68bSRichard Henderson switch (cond) { 17096950f68bSRichard Henderson case TCG_COND_NE: 17106950f68bSRichard Henderson inv = true; 17116950f68bSRichard Henderson /* fall through */ 17126950f68bSRichard Henderson case TCG_COND_EQ: 17136950f68bSRichard Henderson /* If arg2 is 0, convert to LTU/GEU vs 1. */ 17146950f68bSRichard Henderson if (const_arg2 && arg2 == 0) { 17156950f68bSRichard Henderson arg2 = 1; 17166950f68bSRichard Henderson goto do_ltu; 17176950f68bSRichard Henderson } 17186950f68bSRichard Henderson break; 17196950f68bSRichard Henderson 172019517b83SRichard Henderson case TCG_COND_TSTNE: 172119517b83SRichard Henderson inv = true; 172219517b83SRichard Henderson /* fall through */ 172319517b83SRichard Henderson case TCG_COND_TSTEQ: 172419517b83SRichard Henderson /* If arg2 is -1, convert to LTU/GEU vs 1. */ 172519517b83SRichard Henderson if (const_arg2 && arg2 == 0xffffffffu) { 172619517b83SRichard Henderson arg2 = 1; 172719517b83SRichard Henderson cmp_rexw = 0; 172819517b83SRichard Henderson goto do_ltu; 172919517b83SRichard Henderson } 173019517b83SRichard Henderson break; 173119517b83SRichard Henderson 17326950f68bSRichard Henderson case TCG_COND_LEU: 17336950f68bSRichard Henderson inv = true; 17346950f68bSRichard Henderson /* fall through */ 17356950f68bSRichard Henderson case TCG_COND_GTU: 17366950f68bSRichard Henderson /* If arg2 is a register, swap for LTU/GEU. */ 17376950f68bSRichard Henderson if (!const_arg2) { 17386950f68bSRichard Henderson TCGReg t = arg1; 17396950f68bSRichard Henderson arg1 = arg2; 17406950f68bSRichard Henderson arg2 = t; 17416950f68bSRichard Henderson goto do_ltu; 17426950f68bSRichard Henderson } 17436950f68bSRichard Henderson break; 17446950f68bSRichard Henderson 17456950f68bSRichard Henderson case TCG_COND_GEU: 17466950f68bSRichard Henderson inv = true; 17476950f68bSRichard Henderson /* fall through */ 17486950f68bSRichard Henderson case TCG_COND_LTU: 17496950f68bSRichard Henderson do_ltu: 17506950f68bSRichard Henderson /* 17516950f68bSRichard Henderson * Relying on the carry bit, use SBB to produce -1 if LTU, 0 if GEU. 17526950f68bSRichard Henderson * We can then use NEG or INC to produce the desired result. 17536950f68bSRichard Henderson * This is always smaller than the SETCC expansion. 17546950f68bSRichard Henderson */ 175519517b83SRichard Henderson tcg_out_cmp(s, TCG_COND_LTU, arg1, arg2, const_arg2, cmp_rexw); 175695bf306eSRichard Henderson 175795bf306eSRichard Henderson /* X - X - C = -C = (C ? -1 : 0) */ 175895bf306eSRichard Henderson tgen_arithr(s, ARITH_SBB + (neg ? rexw : 0), dest, dest); 175995bf306eSRichard Henderson if (inv && neg) { 176095bf306eSRichard Henderson /* ~(C ? -1 : 0) = (C ? 0 : -1) */ 176195bf306eSRichard Henderson tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_NOT, dest); 176295bf306eSRichard Henderson } else if (inv) { 176395bf306eSRichard Henderson /* (C ? -1 : 0) + 1 = (C ? 0 : 1) */ 176495bf306eSRichard Henderson tgen_arithi(s, ARITH_ADD, dest, 1, 0); 176595bf306eSRichard Henderson } else if (!neg) { 176695bf306eSRichard Henderson /* -(C ? -1 : 0) = (C ? 1 : 0) */ 176795bf306eSRichard Henderson tcg_out_modrm(s, OPC_GRP3_Ev, EXT3_NEG, dest); 17686950f68bSRichard Henderson } 17696950f68bSRichard Henderson return; 17706950f68bSRichard Henderson 1771e91f015bSRichard Henderson case TCG_COND_GE: 1772e91f015bSRichard Henderson inv = true; 1773e91f015bSRichard Henderson /* fall through */ 1774e91f015bSRichard Henderson case TCG_COND_LT: 1775e91f015bSRichard Henderson /* If arg2 is 0, extract the sign bit. */ 1776e91f015bSRichard Henderson if (const_arg2 && arg2 == 0) { 1777e91f015bSRichard Henderson tcg_out_mov(s, rexw ? TCG_TYPE_I64 : TCG_TYPE_I32, dest, arg1); 1778e91f015bSRichard Henderson if (inv) { 1779e91f015bSRichard Henderson tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_NOT, dest); 1780e91f015bSRichard Henderson } 178195bf306eSRichard Henderson tcg_out_shifti(s, (neg ? SHIFT_SAR : SHIFT_SHR) + rexw, 178295bf306eSRichard Henderson dest, rexw ? 63 : 31); 1783e91f015bSRichard Henderson return; 1784e91f015bSRichard Henderson } 1785e91f015bSRichard Henderson break; 1786e91f015bSRichard Henderson 17876950f68bSRichard Henderson default: 17886950f68bSRichard Henderson break; 17896950f68bSRichard Henderson } 17906950f68bSRichard Henderson 179196658acaSRichard Henderson /* 179296658acaSRichard Henderson * If dest does not overlap the inputs, clearing it first is preferred. 179396658acaSRichard Henderson * The XOR breaks any false dependency for the low-byte write to dest, 179496658acaSRichard Henderson * and is also one byte smaller than MOVZBL. 179596658acaSRichard Henderson */ 179696658acaSRichard Henderson cleared = false; 179796658acaSRichard Henderson if (dest != arg1 && (const_arg2 || dest != arg2)) { 179896658acaSRichard Henderson tgen_arithr(s, ARITH_XOR, dest, dest); 179996658acaSRichard Henderson cleared = true; 180096658acaSRichard Henderson } 180196658acaSRichard Henderson 180219517b83SRichard Henderson jcc = tcg_out_cmp(s, cond, arg1, arg2, const_arg2, cmp_rexw); 18036749d85bSRichard Henderson tcg_out_modrm(s, OPC_SETCC | jcc, 0, dest); 180496658acaSRichard Henderson 180596658acaSRichard Henderson if (!cleared) { 1806139c1837SPaolo Bonzini tcg_out_ext8u(s, dest, dest); 1807139c1837SPaolo Bonzini } 180895bf306eSRichard Henderson if (neg) { 180995bf306eSRichard Henderson tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_NEG, dest); 181095bf306eSRichard Henderson } 181196658acaSRichard Henderson} 1812139c1837SPaolo Bonzini 18137ba99a1cSRichard Henderson#if TCG_TARGET_REG_BITS == 32 1814139c1837SPaolo Bonzinistatic void tcg_out_setcond2(TCGContext *s, const TCGArg *args, 1815139c1837SPaolo Bonzini const int *const_args) 1816139c1837SPaolo Bonzini{ 1817139c1837SPaolo Bonzini TCGArg new_args[6]; 1818139c1837SPaolo Bonzini TCGLabel *label_true, *label_over; 1819139c1837SPaolo Bonzini 1820139c1837SPaolo Bonzini memcpy(new_args, args+1, 5*sizeof(TCGArg)); 1821139c1837SPaolo Bonzini 1822139c1837SPaolo Bonzini if (args[0] == args[1] || args[0] == args[2] 1823139c1837SPaolo Bonzini || (!const_args[3] && args[0] == args[3]) 1824139c1837SPaolo Bonzini || (!const_args[4] && args[0] == args[4])) { 1825139c1837SPaolo Bonzini /* When the destination overlaps with one of the argument 1826139c1837SPaolo Bonzini registers, don't do anything tricky. */ 1827139c1837SPaolo Bonzini label_true = gen_new_label(); 1828139c1837SPaolo Bonzini label_over = gen_new_label(); 1829139c1837SPaolo Bonzini 1830139c1837SPaolo Bonzini new_args[5] = label_arg(label_true); 1831139c1837SPaolo Bonzini tcg_out_brcond2(s, new_args, const_args+1, 1); 1832139c1837SPaolo Bonzini 1833139c1837SPaolo Bonzini tcg_out_movi(s, TCG_TYPE_I32, args[0], 0); 1834139c1837SPaolo Bonzini tcg_out_jxx(s, JCC_JMP, label_over, 1); 183592ab8e7dSRichard Henderson tcg_out_label(s, label_true); 1836139c1837SPaolo Bonzini 1837139c1837SPaolo Bonzini tcg_out_movi(s, TCG_TYPE_I32, args[0], 1); 183892ab8e7dSRichard Henderson tcg_out_label(s, label_over); 1839139c1837SPaolo Bonzini } else { 1840139c1837SPaolo Bonzini /* When the destination does not overlap one of the arguments, 1841139c1837SPaolo Bonzini clear the destination first, jump if cond false, and emit an 1842139c1837SPaolo Bonzini increment in the true case. This results in smaller code. */ 1843139c1837SPaolo Bonzini 1844139c1837SPaolo Bonzini tcg_out_movi(s, TCG_TYPE_I32, args[0], 0); 1845139c1837SPaolo Bonzini 1846139c1837SPaolo Bonzini label_over = gen_new_label(); 1847139c1837SPaolo Bonzini new_args[4] = tcg_invert_cond(new_args[4]); 1848139c1837SPaolo Bonzini new_args[5] = label_arg(label_over); 1849139c1837SPaolo Bonzini tcg_out_brcond2(s, new_args, const_args+1, 1); 1850139c1837SPaolo Bonzini 1851139c1837SPaolo Bonzini tgen_arithi(s, ARITH_ADD, args[0], 1, 0); 185292ab8e7dSRichard Henderson tcg_out_label(s, label_over); 1853139c1837SPaolo Bonzini } 1854139c1837SPaolo Bonzini} 1855139c1837SPaolo Bonzini#endif 1856139c1837SPaolo Bonzini 1857c95da56bSRichard Hendersonstatic void tcg_out_cmov(TCGContext *s, int jcc, int rexw, 1858139c1837SPaolo Bonzini TCGReg dest, TCGReg v1) 1859139c1837SPaolo Bonzini{ 1860c95da56bSRichard Henderson tcg_out_modrm(s, OPC_CMOVCC | jcc | rexw, dest, v1); 1861139c1837SPaolo Bonzini} 1862139c1837SPaolo Bonzini 186378ddf0dcSRichard Hendersonstatic void tcg_out_movcond(TCGContext *s, int rexw, TCGCond cond, 186478ddf0dcSRichard Henderson TCGReg dest, TCGReg c1, TCGArg c2, int const_c2, 1865139c1837SPaolo Bonzini TCGReg v1) 1866139c1837SPaolo Bonzini{ 18676749d85bSRichard Henderson int jcc = tcg_out_cmp(s, cond, c1, c2, const_c2, rexw); 18686749d85bSRichard Henderson tcg_out_cmov(s, jcc, rexw, dest, v1); 1869139c1837SPaolo Bonzini} 1870139c1837SPaolo Bonzini 1871139c1837SPaolo Bonzinistatic void tcg_out_ctz(TCGContext *s, int rexw, TCGReg dest, TCGReg arg1, 1872139c1837SPaolo Bonzini TCGArg arg2, bool const_a2) 1873139c1837SPaolo Bonzini{ 1874139c1837SPaolo Bonzini if (have_bmi1) { 1875139c1837SPaolo Bonzini tcg_out_modrm(s, OPC_TZCNT + rexw, dest, arg1); 1876139c1837SPaolo Bonzini if (const_a2) { 1877139c1837SPaolo Bonzini tcg_debug_assert(arg2 == (rexw ? 64 : 32)); 1878139c1837SPaolo Bonzini } else { 1879139c1837SPaolo Bonzini tcg_debug_assert(dest != arg2); 1880c95da56bSRichard Henderson tcg_out_cmov(s, JCC_JB, rexw, dest, arg2); 1881139c1837SPaolo Bonzini } 1882139c1837SPaolo Bonzini } else { 1883139c1837SPaolo Bonzini tcg_debug_assert(dest != arg2); 1884139c1837SPaolo Bonzini tcg_out_modrm(s, OPC_BSF + rexw, dest, arg1); 1885c95da56bSRichard Henderson tcg_out_cmov(s, JCC_JE, rexw, dest, arg2); 1886139c1837SPaolo Bonzini } 1887139c1837SPaolo Bonzini} 1888139c1837SPaolo Bonzini 1889139c1837SPaolo Bonzinistatic void tcg_out_clz(TCGContext *s, int rexw, TCGReg dest, TCGReg arg1, 1890139c1837SPaolo Bonzini TCGArg arg2, bool const_a2) 1891139c1837SPaolo Bonzini{ 1892139c1837SPaolo Bonzini if (have_lzcnt) { 1893139c1837SPaolo Bonzini tcg_out_modrm(s, OPC_LZCNT + rexw, dest, arg1); 1894139c1837SPaolo Bonzini if (const_a2) { 1895139c1837SPaolo Bonzini tcg_debug_assert(arg2 == (rexw ? 64 : 32)); 1896139c1837SPaolo Bonzini } else { 1897139c1837SPaolo Bonzini tcg_debug_assert(dest != arg2); 1898c95da56bSRichard Henderson tcg_out_cmov(s, JCC_JB, rexw, dest, arg2); 1899139c1837SPaolo Bonzini } 1900139c1837SPaolo Bonzini } else { 1901139c1837SPaolo Bonzini tcg_debug_assert(!const_a2); 1902139c1837SPaolo Bonzini tcg_debug_assert(dest != arg1); 1903139c1837SPaolo Bonzini tcg_debug_assert(dest != arg2); 1904139c1837SPaolo Bonzini 1905139c1837SPaolo Bonzini /* Recall that the output of BSR is the index not the count. */ 1906139c1837SPaolo Bonzini tcg_out_modrm(s, OPC_BSR + rexw, dest, arg1); 1907139c1837SPaolo Bonzini tgen_arithi(s, ARITH_XOR + rexw, dest, rexw ? 63 : 31, 0); 1908139c1837SPaolo Bonzini 1909139c1837SPaolo Bonzini /* Since we have destroyed the flags from BSR, we have to re-test. */ 19106749d85bSRichard Henderson int jcc = tcg_out_cmp(s, TCG_COND_EQ, arg1, 0, 1, rexw); 19116749d85bSRichard Henderson tcg_out_cmov(s, jcc, rexw, dest, arg2); 1912139c1837SPaolo Bonzini } 1913139c1837SPaolo Bonzini} 1914139c1837SPaolo Bonzini 19152be7d76bSRichard Hendersonstatic void tcg_out_branch(TCGContext *s, int call, const tcg_insn_unit *dest) 1916139c1837SPaolo Bonzini{ 1917139c1837SPaolo Bonzini intptr_t disp = tcg_pcrel_diff(s, dest) - 5; 1918139c1837SPaolo Bonzini 1919139c1837SPaolo Bonzini if (disp == (int32_t)disp) { 1920139c1837SPaolo Bonzini tcg_out_opc(s, call ? OPC_CALL_Jz : OPC_JMP_long, 0, 0, 0); 1921139c1837SPaolo Bonzini tcg_out32(s, disp); 1922139c1837SPaolo Bonzini } else { 1923139c1837SPaolo Bonzini /* rip-relative addressing into the constant pool. 1924139c1837SPaolo Bonzini This is 6 + 8 = 14 bytes, as compared to using an 19257a21bee2SDaniel P. Berrangé immediate load 10 + 6 = 16 bytes, plus we may 1926139c1837SPaolo Bonzini be able to re-use the pool constant for more calls. */ 1927139c1837SPaolo Bonzini tcg_out_opc(s, OPC_GRP5, 0, 0, 0); 1928139c1837SPaolo Bonzini tcg_out8(s, (call ? EXT5_CALLN_Ev : EXT5_JMPN_Ev) << 3 | 5); 1929139c1837SPaolo Bonzini new_pool_label(s, (uintptr_t)dest, R_386_PC32, s->code_ptr, -4); 1930139c1837SPaolo Bonzini tcg_out32(s, 0); 1931139c1837SPaolo Bonzini } 1932139c1837SPaolo Bonzini} 1933139c1837SPaolo Bonzini 1934cee44b03SRichard Hendersonstatic void tcg_out_call(TCGContext *s, const tcg_insn_unit *dest, 1935cee44b03SRichard Henderson const TCGHelperInfo *info) 1936139c1837SPaolo Bonzini{ 1937139c1837SPaolo Bonzini tcg_out_branch(s, 1, dest); 1938c4f4a00aSRichard Henderson 1939c4f4a00aSRichard Henderson#ifndef _WIN32 1940c4f4a00aSRichard Henderson if (TCG_TARGET_REG_BITS == 32 && info->out_kind == TCG_CALL_RET_BY_REF) { 1941c4f4a00aSRichard Henderson /* 1942c4f4a00aSRichard Henderson * The sysv i386 abi for struct return places a reference as the 1943c4f4a00aSRichard Henderson * first argument of the stack, and pops that argument with the 1944c4f4a00aSRichard Henderson * return statement. Since we want to retain the aligned stack 1945c4f4a00aSRichard Henderson * pointer for the callee, we do not want to actually push that 1946c4f4a00aSRichard Henderson * argument before the call but rely on the normal store to the 1947c4f4a00aSRichard Henderson * stack slot. But we do need to compensate for the pop in order 1948c4f4a00aSRichard Henderson * to reset our correct stack pointer value. 1949c4f4a00aSRichard Henderson * Pushing a garbage value back onto the stack is quickest. 1950c4f4a00aSRichard Henderson */ 1951c4f4a00aSRichard Henderson tcg_out_push(s, TCG_REG_EAX); 1952c4f4a00aSRichard Henderson } 1953c4f4a00aSRichard Henderson#endif 1954139c1837SPaolo Bonzini} 1955139c1837SPaolo Bonzini 1956705ed477SRichard Hendersonstatic void tcg_out_jmp(TCGContext *s, const tcg_insn_unit *dest) 1957139c1837SPaolo Bonzini{ 1958139c1837SPaolo Bonzini tcg_out_branch(s, 0, dest); 1959139c1837SPaolo Bonzini} 1960139c1837SPaolo Bonzini 1961139c1837SPaolo Bonzinistatic void tcg_out_nopn(TCGContext *s, int n) 1962139c1837SPaolo Bonzini{ 1963139c1837SPaolo Bonzini int i; 1964139c1837SPaolo Bonzini /* Emit 1 or 2 operand size prefixes for the standard one byte nop, 1965139c1837SPaolo Bonzini * "xchg %eax,%eax", forming "xchg %ax,%ax". All cores accept the 1966139c1837SPaolo Bonzini * duplicate prefix, and all of the interesting recent cores can 1967139c1837SPaolo Bonzini * decode and discard the duplicates in a single cycle. 1968139c1837SPaolo Bonzini */ 1969139c1837SPaolo Bonzini tcg_debug_assert(n >= 1); 1970139c1837SPaolo Bonzini for (i = 1; i < n; ++i) { 1971139c1837SPaolo Bonzini tcg_out8(s, 0x66); 1972139c1837SPaolo Bonzini } 1973139c1837SPaolo Bonzini tcg_out8(s, 0x90); 1974139c1837SPaolo Bonzini} 1975139c1837SPaolo Bonzini 197661713c29SRichard Hendersontypedef struct { 197761713c29SRichard Henderson TCGReg base; 197861713c29SRichard Henderson int index; 197961713c29SRichard Henderson int ofs; 198061713c29SRichard Henderson int seg; 19811c5322d9SRichard Henderson TCGAtomAlign aa; 198261713c29SRichard Henderson} HostAddress; 198361713c29SRichard Henderson 19847b880107SRichard Hendersonbool tcg_target_has_memory_bswap(MemOp memop) 19857b880107SRichard Henderson{ 1986098d0fc1SRichard Henderson TCGAtomAlign aa; 1987098d0fc1SRichard Henderson 1988098d0fc1SRichard Henderson if (!have_movbe) { 1989098d0fc1SRichard Henderson return false; 1990098d0fc1SRichard Henderson } 1991098d0fc1SRichard Henderson if ((memop & MO_SIZE) < MO_128) { 1992098d0fc1SRichard Henderson return true; 1993098d0fc1SRichard Henderson } 1994098d0fc1SRichard Henderson 1995098d0fc1SRichard Henderson /* 1996098d0fc1SRichard Henderson * Reject 16-byte memop with 16-byte atomicity, i.e. VMOVDQA, 1997098d0fc1SRichard Henderson * but do allow a pair of 64-bit operations, i.e. MOVBEQ. 1998098d0fc1SRichard Henderson */ 1999098d0fc1SRichard Henderson aa = atom_and_align_for_opc(tcg_ctx, memop, MO_ATOM_IFALIGN, true); 2000098d0fc1SRichard Henderson return aa.atom < MO_128; 20017b880107SRichard Henderson} 20027b880107SRichard Henderson 2003139c1837SPaolo Bonzini/* 2004da8ab70aSRichard Henderson * Because i686 has no register parameters and because x86_64 has xchg 2005da8ab70aSRichard Henderson * to handle addr/data register overlap, we have placed all input arguments 2006da8ab70aSRichard Henderson * before we need might need a scratch reg. 2007da8ab70aSRichard Henderson * 2008da8ab70aSRichard Henderson * Even then, a scratch is only needed for l->raddr. Rather than expose 2009da8ab70aSRichard Henderson * a general-purpose scratch when we don't actually know it's available, 2010da8ab70aSRichard Henderson * use the ra_gen hook to load into RAX if needed. 2011da8ab70aSRichard Henderson */ 2012da8ab70aSRichard Henderson#if TCG_TARGET_REG_BITS == 64 2013da8ab70aSRichard Hendersonstatic TCGReg ldst_ra_gen(TCGContext *s, const TCGLabelQemuLdst *l, int arg) 2014da8ab70aSRichard Henderson{ 2015da8ab70aSRichard Henderson if (arg < 0) { 2016da8ab70aSRichard Henderson arg = TCG_REG_RAX; 2017da8ab70aSRichard Henderson } 2018da8ab70aSRichard Henderson tcg_out_movi(s, TCG_TYPE_PTR, arg, (uintptr_t)l->raddr); 2019da8ab70aSRichard Henderson return arg; 2020da8ab70aSRichard Henderson} 2021da8ab70aSRichard Hendersonstatic const TCGLdstHelperParam ldst_helper_param = { 2022da8ab70aSRichard Henderson .ra_gen = ldst_ra_gen 2023da8ab70aSRichard Henderson}; 2024da8ab70aSRichard Henderson#else 2025da8ab70aSRichard Hendersonstatic const TCGLdstHelperParam ldst_helper_param = { }; 2026da8ab70aSRichard Henderson#endif 2027da8ab70aSRichard Henderson 2028098d0fc1SRichard Hendersonstatic void tcg_out_vec_to_pair(TCGContext *s, TCGType type, 2029098d0fc1SRichard Henderson TCGReg l, TCGReg h, TCGReg v) 2030098d0fc1SRichard Henderson{ 2031098d0fc1SRichard Henderson int rexw = type == TCG_TYPE_I32 ? 0 : P_REXW; 2032098d0fc1SRichard Henderson 2033098d0fc1SRichard Henderson /* vpmov{d,q} %v, %l */ 2034098d0fc1SRichard Henderson tcg_out_vex_modrm(s, OPC_MOVD_EyVy + rexw, v, 0, l); 2035098d0fc1SRichard Henderson /* vpextr{d,q} $1, %v, %h */ 2036098d0fc1SRichard Henderson tcg_out_vex_modrm(s, OPC_PEXTRD + rexw, v, 0, h); 2037098d0fc1SRichard Henderson tcg_out8(s, 1); 2038098d0fc1SRichard Henderson} 2039098d0fc1SRichard Henderson 2040098d0fc1SRichard Hendersonstatic void tcg_out_pair_to_vec(TCGContext *s, TCGType type, 2041098d0fc1SRichard Henderson TCGReg v, TCGReg l, TCGReg h) 2042098d0fc1SRichard Henderson{ 2043098d0fc1SRichard Henderson int rexw = type == TCG_TYPE_I32 ? 0 : P_REXW; 2044098d0fc1SRichard Henderson 2045098d0fc1SRichard Henderson /* vmov{d,q} %l, %v */ 2046098d0fc1SRichard Henderson tcg_out_vex_modrm(s, OPC_MOVD_VyEy + rexw, v, 0, l); 2047098d0fc1SRichard Henderson /* vpinsr{d,q} $1, %h, %v, %v */ 2048098d0fc1SRichard Henderson tcg_out_vex_modrm(s, OPC_PINSRD + rexw, v, v, h); 2049098d0fc1SRichard Henderson tcg_out8(s, 1); 2050098d0fc1SRichard Henderson} 2051098d0fc1SRichard Henderson 2052da8ab70aSRichard Henderson/* 2053139c1837SPaolo Bonzini * Generate code for the slow path for a load at the end of block 2054139c1837SPaolo Bonzini */ 2055139c1837SPaolo Bonzinistatic bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l) 2056139c1837SPaolo Bonzini{ 2057da8ab70aSRichard Henderson MemOp opc = get_memop(l->oi); 2058139c1837SPaolo Bonzini tcg_insn_unit **label_ptr = &l->label_ptr[0]; 2059139c1837SPaolo Bonzini 2060139c1837SPaolo Bonzini /* resolve label address */ 2061139c1837SPaolo Bonzini tcg_patch32(label_ptr[0], s->code_ptr - label_ptr[0] - 4); 206230cc7a7eSRichard Henderson if (label_ptr[1]) { 2063139c1837SPaolo Bonzini tcg_patch32(label_ptr[1], s->code_ptr - label_ptr[1] - 4); 2064139c1837SPaolo Bonzini } 2065139c1837SPaolo Bonzini 2066da8ab70aSRichard Henderson tcg_out_ld_helper_args(s, l, &ldst_helper_param); 20670cadc1edSRichard Henderson tcg_out_branch(s, 1, qemu_ld_helpers[opc & MO_SIZE]); 2068da8ab70aSRichard Henderson tcg_out_ld_helper_ret(s, l, false, &ldst_helper_param); 2069139c1837SPaolo Bonzini 2070139c1837SPaolo Bonzini tcg_out_jmp(s, l->raddr); 2071139c1837SPaolo Bonzini return true; 2072139c1837SPaolo Bonzini} 2073139c1837SPaolo Bonzini 2074139c1837SPaolo Bonzini/* 2075139c1837SPaolo Bonzini * Generate code for the slow path for a store at the end of block 2076139c1837SPaolo Bonzini */ 2077139c1837SPaolo Bonzinistatic bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l) 2078139c1837SPaolo Bonzini{ 20790036e54eSRichard Henderson MemOp opc = get_memop(l->oi); 2080139c1837SPaolo Bonzini tcg_insn_unit **label_ptr = &l->label_ptr[0]; 2081139c1837SPaolo Bonzini 2082139c1837SPaolo Bonzini /* resolve label address */ 2083139c1837SPaolo Bonzini tcg_patch32(label_ptr[0], s->code_ptr - label_ptr[0] - 4); 208430cc7a7eSRichard Henderson if (label_ptr[1]) { 2085139c1837SPaolo Bonzini tcg_patch32(label_ptr[1], s->code_ptr - label_ptr[1] - 4); 2086139c1837SPaolo Bonzini } 2087139c1837SPaolo Bonzini 20880036e54eSRichard Henderson tcg_out_st_helper_args(s, l, &ldst_helper_param); 20890cadc1edSRichard Henderson tcg_out_branch(s, 1, qemu_st_helpers[opc & MO_SIZE]); 2090139c1837SPaolo Bonzini 20910036e54eSRichard Henderson tcg_out_jmp(s, l->raddr); 2092139c1837SPaolo Bonzini return true; 2093139c1837SPaolo Bonzini} 2094b1ee3c67SRichard Henderson 2095915e1d52SRichard Henderson#ifdef CONFIG_USER_ONLY 209661713c29SRichard Hendersonstatic HostAddress x86_guest_base = { 209761713c29SRichard Henderson .index = -1 209861713c29SRichard Henderson}; 209961713c29SRichard Henderson 2100139c1837SPaolo Bonzini#if defined(__x86_64__) && defined(__linux__) 2101139c1837SPaolo Bonzini# include <asm/prctl.h> 2102139c1837SPaolo Bonzini# include <sys/prctl.h> 2103139c1837SPaolo Bonziniint arch_prctl(int code, unsigned long addr); 2104139c1837SPaolo Bonzinistatic inline int setup_guest_base_seg(void) 2105139c1837SPaolo Bonzini{ 2106139c1837SPaolo Bonzini if (arch_prctl(ARCH_SET_GS, guest_base) == 0) { 2107139c1837SPaolo Bonzini return P_GS; 2108139c1837SPaolo Bonzini } 2109139c1837SPaolo Bonzini return 0; 2110139c1837SPaolo Bonzini} 2111915e1d52SRichard Henderson#define setup_guest_base_seg setup_guest_base_seg 211261713c29SRichard Henderson#elif defined(__x86_64__) && \ 211361713c29SRichard Henderson (defined (__FreeBSD__) || defined (__FreeBSD_kernel__)) 2114139c1837SPaolo Bonzini# include <machine/sysarch.h> 2115139c1837SPaolo Bonzinistatic inline int setup_guest_base_seg(void) 2116139c1837SPaolo Bonzini{ 2117139c1837SPaolo Bonzini if (sysarch(AMD64_SET_GSBASE, &guest_base) == 0) { 2118139c1837SPaolo Bonzini return P_GS; 2119139c1837SPaolo Bonzini } 2120139c1837SPaolo Bonzini return 0; 2121139c1837SPaolo Bonzini} 2122915e1d52SRichard Henderson#define setup_guest_base_seg setup_guest_base_seg 2123915e1d52SRichard Henderson#endif 2124139c1837SPaolo Bonzini#else 2125915e1d52SRichard Henderson# define x86_guest_base (*(HostAddress *)({ qemu_build_not_reached(); NULL; })) 2126915e1d52SRichard Henderson#endif /* CONFIG_USER_ONLY */ 2127915e1d52SRichard Henderson#ifndef setup_guest_base_seg 2128915e1d52SRichard Henderson# define setup_guest_base_seg() 0 2129915e1d52SRichard Henderson#endif 2130139c1837SPaolo Bonzini 2131d0a9bb5eSRichard Henderson#define MIN_TLB_MASK_TABLE_OFS INT_MIN 2132d0a9bb5eSRichard Henderson 2133530074c6SRichard Henderson/* 2134530074c6SRichard Henderson * For softmmu, perform the TLB load and compare. 2135530074c6SRichard Henderson * For useronly, perform any required alignment tests. 2136530074c6SRichard Henderson * In both cases, return a TCGLabelQemuLdst structure if the slow path 2137530074c6SRichard Henderson * is required and fill in @h with the host address for the fast path. 2138530074c6SRichard Henderson */ 2139530074c6SRichard Hendersonstatic TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h, 2140530074c6SRichard Henderson TCGReg addrlo, TCGReg addrhi, 2141530074c6SRichard Henderson MemOpIdx oi, bool is_ld) 2142530074c6SRichard Henderson{ 2143530074c6SRichard Henderson TCGLabelQemuLdst *ldst = NULL; 2144530074c6SRichard Henderson MemOp opc = get_memop(oi); 2145098d0fc1SRichard Henderson MemOp s_bits = opc & MO_SIZE; 21461c5322d9SRichard Henderson unsigned a_mask; 21471c5322d9SRichard Henderson 2148915e1d52SRichard Henderson if (tcg_use_softmmu) { 21491c5322d9SRichard Henderson h->index = TCG_REG_L0; 21501c5322d9SRichard Henderson h->ofs = 0; 21511c5322d9SRichard Henderson h->seg = 0; 2152915e1d52SRichard Henderson } else { 21531c5322d9SRichard Henderson *h = x86_guest_base; 2154915e1d52SRichard Henderson } 21551c5322d9SRichard Henderson h->base = addrlo; 2156098d0fc1SRichard Henderson h->aa = atom_and_align_for_opc(s, opc, MO_ATOM_IFALIGN, s_bits == MO_128); 21571c5322d9SRichard Henderson a_mask = (1 << h->aa.align) - 1; 2158530074c6SRichard Henderson 2159915e1d52SRichard Henderson if (tcg_use_softmmu) { 2160530074c6SRichard Henderson int cmp_ofs = is_ld ? offsetof(CPUTLBEntry, addr_read) 2161530074c6SRichard Henderson : offsetof(CPUTLBEntry, addr_write); 2162530074c6SRichard Henderson TCGType ttype = TCG_TYPE_I32; 2163530074c6SRichard Henderson TCGType tlbtype = TCG_TYPE_I32; 2164530074c6SRichard Henderson int trexw = 0, hrexw = 0, tlbrexw = 0; 2165530074c6SRichard Henderson unsigned mem_index = get_mmuidx(oi); 2166530074c6SRichard Henderson unsigned s_mask = (1 << s_bits) - 1; 2167d0a9bb5eSRichard Henderson int fast_ofs = tlb_mask_table_ofs(s, mem_index); 2168c60ad6e3SRichard Henderson int tlb_mask; 2169530074c6SRichard Henderson 2170530074c6SRichard Henderson ldst = new_ldst_label(s); 2171530074c6SRichard Henderson ldst->is_ld = is_ld; 2172530074c6SRichard Henderson ldst->oi = oi; 2173530074c6SRichard Henderson ldst->addrlo_reg = addrlo; 2174530074c6SRichard Henderson ldst->addrhi_reg = addrhi; 2175530074c6SRichard Henderson 2176530074c6SRichard Henderson if (TCG_TARGET_REG_BITS == 64) { 217763f4da91SRichard Henderson ttype = s->addr_type; 217863f4da91SRichard Henderson trexw = (ttype == TCG_TYPE_I32 ? 0 : P_REXW); 2179530074c6SRichard Henderson if (TCG_TYPE_PTR == TCG_TYPE_I64) { 2180530074c6SRichard Henderson hrexw = P_REXW; 2181a66efde1SRichard Henderson if (s->page_bits + s->tlb_dyn_max_bits > 32) { 2182530074c6SRichard Henderson tlbtype = TCG_TYPE_I64; 2183530074c6SRichard Henderson tlbrexw = P_REXW; 2184530074c6SRichard Henderson } 2185530074c6SRichard Henderson } 2186530074c6SRichard Henderson } 2187530074c6SRichard Henderson 2188530074c6SRichard Henderson tcg_out_mov(s, tlbtype, TCG_REG_L0, addrlo); 2189530074c6SRichard Henderson tcg_out_shifti(s, SHIFT_SHR + tlbrexw, TCG_REG_L0, 2190aece72b7SRichard Henderson s->page_bits - CPU_TLB_ENTRY_BITS); 2191530074c6SRichard Henderson 2192530074c6SRichard Henderson tcg_out_modrm_offset(s, OPC_AND_GvEv + trexw, TCG_REG_L0, TCG_AREG0, 2193d0a9bb5eSRichard Henderson fast_ofs + offsetof(CPUTLBDescFast, mask)); 2194530074c6SRichard Henderson 2195530074c6SRichard Henderson tcg_out_modrm_offset(s, OPC_ADD_GvEv + hrexw, TCG_REG_L0, TCG_AREG0, 2196d0a9bb5eSRichard Henderson fast_ofs + offsetof(CPUTLBDescFast, table)); 2197530074c6SRichard Henderson 2198530074c6SRichard Henderson /* 2199915e1d52SRichard Henderson * If the required alignment is at least as large as the access, 2200915e1d52SRichard Henderson * simply copy the address and mask. For lesser alignments, 2201915e1d52SRichard Henderson * check that we don't cross pages for the complete access. 2202530074c6SRichard Henderson */ 22031c5322d9SRichard Henderson if (a_mask >= s_mask) { 2204530074c6SRichard Henderson tcg_out_mov(s, ttype, TCG_REG_L1, addrlo); 2205530074c6SRichard Henderson } else { 2206530074c6SRichard Henderson tcg_out_modrm_offset(s, OPC_LEA + trexw, TCG_REG_L1, 2207530074c6SRichard Henderson addrlo, s_mask - a_mask); 2208530074c6SRichard Henderson } 2209aece72b7SRichard Henderson tlb_mask = s->page_mask | a_mask; 2210530074c6SRichard Henderson tgen_arithi(s, ARITH_AND + trexw, TCG_REG_L1, tlb_mask, 0); 2211530074c6SRichard Henderson 2212530074c6SRichard Henderson /* cmp 0(TCG_REG_L0), TCG_REG_L1 */ 2213530074c6SRichard Henderson tcg_out_modrm_offset(s, OPC_CMP_GvEv + trexw, 2214530074c6SRichard Henderson TCG_REG_L1, TCG_REG_L0, cmp_ofs); 2215530074c6SRichard Henderson 2216530074c6SRichard Henderson /* jne slow_path */ 2217530074c6SRichard Henderson tcg_out_opc(s, OPC_JCC_long + JCC_JNE, 0, 0, 0); 2218530074c6SRichard Henderson ldst->label_ptr[0] = s->code_ptr; 2219530074c6SRichard Henderson s->code_ptr += 4; 2220530074c6SRichard Henderson 222163f4da91SRichard Henderson if (TCG_TARGET_REG_BITS == 32 && s->addr_type == TCG_TYPE_I64) { 2222530074c6SRichard Henderson /* cmp 4(TCG_REG_L0), addrhi */ 2223915e1d52SRichard Henderson tcg_out_modrm_offset(s, OPC_CMP_GvEv, addrhi, 2224915e1d52SRichard Henderson TCG_REG_L0, cmp_ofs + 4); 2225530074c6SRichard Henderson 2226530074c6SRichard Henderson /* jne slow_path */ 2227530074c6SRichard Henderson tcg_out_opc(s, OPC_JCC_long + JCC_JNE, 0, 0, 0); 2228530074c6SRichard Henderson ldst->label_ptr[1] = s->code_ptr; 2229530074c6SRichard Henderson s->code_ptr += 4; 2230530074c6SRichard Henderson } 2231530074c6SRichard Henderson 2232530074c6SRichard Henderson /* TLB Hit. */ 22331fac4648SRichard Henderson tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_L0, TCG_REG_L0, 2234530074c6SRichard Henderson offsetof(CPUTLBEntry, addend)); 2235915e1d52SRichard Henderson } else if (a_mask) { 2236303214aaSRichard Henderson int jcc; 2237530074c6SRichard Henderson 2238303214aaSRichard Henderson ldst = new_ldst_label(s); 2239530074c6SRichard Henderson ldst->is_ld = is_ld; 2240530074c6SRichard Henderson ldst->oi = oi; 2241530074c6SRichard Henderson ldst->addrlo_reg = addrlo; 2242530074c6SRichard Henderson ldst->addrhi_reg = addrhi; 2243530074c6SRichard Henderson 2244530074c6SRichard Henderson /* jne slow_path */ 2245303214aaSRichard Henderson jcc = tcg_out_cmp(s, TCG_COND_TSTNE, addrlo, a_mask, true, false); 2246303214aaSRichard Henderson tcg_out_opc(s, OPC_JCC_long + jcc, 0, 0, 0); 2247530074c6SRichard Henderson ldst->label_ptr[0] = s->code_ptr; 2248530074c6SRichard Henderson s->code_ptr += 4; 2249530074c6SRichard Henderson } 2250530074c6SRichard Henderson 2251530074c6SRichard Henderson return ldst; 2252530074c6SRichard Henderson} 2253530074c6SRichard Henderson 2254139c1837SPaolo Bonzinistatic void tcg_out_qemu_ld_direct(TCGContext *s, TCGReg datalo, TCGReg datahi, 225561713c29SRichard Henderson HostAddress h, TCGType type, MemOp memop) 2256139c1837SPaolo Bonzini{ 2257d2ef1b83SRichard Henderson bool use_movbe = false; 2258bf12e224SRichard Henderson int rexw = (type == TCG_TYPE_I32 ? 0 : P_REXW); 2259139c1837SPaolo Bonzini int movop = OPC_MOVL_GvEv; 2260139c1837SPaolo Bonzini 2261d2ef1b83SRichard Henderson /* Do big-endian loads with movbe. */ 2262d2ef1b83SRichard Henderson if (memop & MO_BSWAP) { 2263d2ef1b83SRichard Henderson tcg_debug_assert(have_movbe); 2264d2ef1b83SRichard Henderson use_movbe = true; 2265139c1837SPaolo Bonzini movop = OPC_MOVBE_GyMy; 2266139c1837SPaolo Bonzini } 2267139c1837SPaolo Bonzini 2268139c1837SPaolo Bonzini switch (memop & MO_SSIZE) { 2269139c1837SPaolo Bonzini case MO_UB: 227061713c29SRichard Henderson tcg_out_modrm_sib_offset(s, OPC_MOVZBL + h.seg, datalo, 227161713c29SRichard Henderson h.base, h.index, 0, h.ofs); 2272139c1837SPaolo Bonzini break; 2273139c1837SPaolo Bonzini case MO_SB: 227461713c29SRichard Henderson tcg_out_modrm_sib_offset(s, OPC_MOVSBL + rexw + h.seg, datalo, 227561713c29SRichard Henderson h.base, h.index, 0, h.ofs); 2276139c1837SPaolo Bonzini break; 2277139c1837SPaolo Bonzini case MO_UW: 2278d2ef1b83SRichard Henderson if (use_movbe) { 2279d2ef1b83SRichard Henderson /* There is no extending movbe; only low 16-bits are modified. */ 228061713c29SRichard Henderson if (datalo != h.base && datalo != h.index) { 2281d2ef1b83SRichard Henderson /* XOR breaks dependency chains. */ 2282d2ef1b83SRichard Henderson tgen_arithr(s, ARITH_XOR, datalo, datalo); 228361713c29SRichard Henderson tcg_out_modrm_sib_offset(s, OPC_MOVBE_GyMy + P_DATA16 + h.seg, 228461713c29SRichard Henderson datalo, h.base, h.index, 0, h.ofs); 2285139c1837SPaolo Bonzini } else { 228661713c29SRichard Henderson tcg_out_modrm_sib_offset(s, OPC_MOVBE_GyMy + P_DATA16 + h.seg, 228761713c29SRichard Henderson datalo, h.base, h.index, 0, h.ofs); 2288d2ef1b83SRichard Henderson tcg_out_ext16u(s, datalo, datalo); 2289d2ef1b83SRichard Henderson } 2290d2ef1b83SRichard Henderson } else { 229161713c29SRichard Henderson tcg_out_modrm_sib_offset(s, OPC_MOVZWL + h.seg, datalo, 229261713c29SRichard Henderson h.base, h.index, 0, h.ofs); 2293139c1837SPaolo Bonzini } 2294d2ef1b83SRichard Henderson break; 2295d2ef1b83SRichard Henderson case MO_SW: 2296d2ef1b83SRichard Henderson if (use_movbe) { 229761713c29SRichard Henderson tcg_out_modrm_sib_offset(s, OPC_MOVBE_GyMy + P_DATA16 + h.seg, 229861713c29SRichard Henderson datalo, h.base, h.index, 0, h.ofs); 2299753e42eaSRichard Henderson tcg_out_ext16s(s, type, datalo, datalo); 2300139c1837SPaolo Bonzini } else { 230161713c29SRichard Henderson tcg_out_modrm_sib_offset(s, OPC_MOVSWL + rexw + h.seg, 230261713c29SRichard Henderson datalo, h.base, h.index, 0, h.ofs); 2303139c1837SPaolo Bonzini } 2304139c1837SPaolo Bonzini break; 2305139c1837SPaolo Bonzini case MO_UL: 230661713c29SRichard Henderson tcg_out_modrm_sib_offset(s, movop + h.seg, datalo, 230761713c29SRichard Henderson h.base, h.index, 0, h.ofs); 2308139c1837SPaolo Bonzini break; 2309139c1837SPaolo Bonzini#if TCG_TARGET_REG_BITS == 64 2310139c1837SPaolo Bonzini case MO_SL: 2311d2ef1b83SRichard Henderson if (use_movbe) { 231261713c29SRichard Henderson tcg_out_modrm_sib_offset(s, OPC_MOVBE_GyMy + h.seg, datalo, 231361713c29SRichard Henderson h.base, h.index, 0, h.ofs); 2314139c1837SPaolo Bonzini tcg_out_ext32s(s, datalo, datalo); 2315139c1837SPaolo Bonzini } else { 231661713c29SRichard Henderson tcg_out_modrm_sib_offset(s, OPC_MOVSLQ + h.seg, datalo, 231761713c29SRichard Henderson h.base, h.index, 0, h.ofs); 2318139c1837SPaolo Bonzini } 2319139c1837SPaolo Bonzini break; 2320139c1837SPaolo Bonzini#endif 2321fc313c64SFrédéric Pétrot case MO_UQ: 2322139c1837SPaolo Bonzini if (TCG_TARGET_REG_BITS == 64) { 232361713c29SRichard Henderson tcg_out_modrm_sib_offset(s, movop + P_REXW + h.seg, datalo, 232461713c29SRichard Henderson h.base, h.index, 0, h.ofs); 23253174941fSRichard Henderson break; 23263174941fSRichard Henderson } 2327d2ef1b83SRichard Henderson if (use_movbe) { 2328d2ef1b83SRichard Henderson TCGReg t = datalo; 2329139c1837SPaolo Bonzini datalo = datahi; 2330139c1837SPaolo Bonzini datahi = t; 2331139c1837SPaolo Bonzini } 233261713c29SRichard Henderson if (h.base == datalo || h.index == datalo) { 233361713c29SRichard Henderson tcg_out_modrm_sib_offset(s, OPC_LEA, datahi, 233461713c29SRichard Henderson h.base, h.index, 0, h.ofs); 233561713c29SRichard Henderson tcg_out_modrm_offset(s, movop + h.seg, datalo, datahi, 0); 233661713c29SRichard Henderson tcg_out_modrm_offset(s, movop + h.seg, datahi, datahi, 4); 2337139c1837SPaolo Bonzini } else { 233861713c29SRichard Henderson tcg_out_modrm_sib_offset(s, movop + h.seg, datalo, 233961713c29SRichard Henderson h.base, h.index, 0, h.ofs); 234061713c29SRichard Henderson tcg_out_modrm_sib_offset(s, movop + h.seg, datahi, 234161713c29SRichard Henderson h.base, h.index, 0, h.ofs + 4); 2342139c1837SPaolo Bonzini } 2343139c1837SPaolo Bonzini break; 2344098d0fc1SRichard Henderson 2345098d0fc1SRichard Henderson case MO_128: 2346098d0fc1SRichard Henderson tcg_debug_assert(TCG_TARGET_REG_BITS == 64); 2347098d0fc1SRichard Henderson 2348098d0fc1SRichard Henderson /* 2349098d0fc1SRichard Henderson * Without 16-byte atomicity, use integer regs. 2350098d0fc1SRichard Henderson * That is where we want the data, and it allows bswaps. 2351098d0fc1SRichard Henderson */ 2352098d0fc1SRichard Henderson if (h.aa.atom < MO_128) { 2353098d0fc1SRichard Henderson if (use_movbe) { 2354098d0fc1SRichard Henderson TCGReg t = datalo; 2355098d0fc1SRichard Henderson datalo = datahi; 2356098d0fc1SRichard Henderson datahi = t; 2357098d0fc1SRichard Henderson } 2358098d0fc1SRichard Henderson if (h.base == datalo || h.index == datalo) { 2359098d0fc1SRichard Henderson tcg_out_modrm_sib_offset(s, OPC_LEA + P_REXW, datahi, 2360098d0fc1SRichard Henderson h.base, h.index, 0, h.ofs); 2361098d0fc1SRichard Henderson tcg_out_modrm_offset(s, movop + P_REXW + h.seg, 2362098d0fc1SRichard Henderson datalo, datahi, 0); 2363098d0fc1SRichard Henderson tcg_out_modrm_offset(s, movop + P_REXW + h.seg, 2364098d0fc1SRichard Henderson datahi, datahi, 8); 2365098d0fc1SRichard Henderson } else { 2366098d0fc1SRichard Henderson tcg_out_modrm_sib_offset(s, movop + P_REXW + h.seg, datalo, 2367098d0fc1SRichard Henderson h.base, h.index, 0, h.ofs); 2368098d0fc1SRichard Henderson tcg_out_modrm_sib_offset(s, movop + P_REXW + h.seg, datahi, 2369098d0fc1SRichard Henderson h.base, h.index, 0, h.ofs + 8); 2370098d0fc1SRichard Henderson } 2371098d0fc1SRichard Henderson break; 2372098d0fc1SRichard Henderson } 2373098d0fc1SRichard Henderson 2374098d0fc1SRichard Henderson /* 2375098d0fc1SRichard Henderson * With 16-byte atomicity, a vector load is required. 2376098d0fc1SRichard Henderson * If we already have 16-byte alignment, then VMOVDQA always works. 2377098d0fc1SRichard Henderson * Else if VMOVDQU has atomicity with dynamic alignment, use that. 2378098d0fc1SRichard Henderson * Else use we require a runtime test for alignment for VMOVDQA; 2379098d0fc1SRichard Henderson * use VMOVDQU on the unaligned nonatomic path for simplicity. 2380098d0fc1SRichard Henderson */ 2381098d0fc1SRichard Henderson if (h.aa.align >= MO_128) { 2382098d0fc1SRichard Henderson tcg_out_vex_modrm_sib_offset(s, OPC_MOVDQA_VxWx + h.seg, 2383098d0fc1SRichard Henderson TCG_TMP_VEC, 0, 2384098d0fc1SRichard Henderson h.base, h.index, 0, h.ofs); 2385098d0fc1SRichard Henderson } else if (cpuinfo & CPUINFO_ATOMIC_VMOVDQU) { 2386098d0fc1SRichard Henderson tcg_out_vex_modrm_sib_offset(s, OPC_MOVDQU_VxWx + h.seg, 2387098d0fc1SRichard Henderson TCG_TMP_VEC, 0, 2388098d0fc1SRichard Henderson h.base, h.index, 0, h.ofs); 2389098d0fc1SRichard Henderson } else { 2390098d0fc1SRichard Henderson TCGLabel *l1 = gen_new_label(); 2391098d0fc1SRichard Henderson TCGLabel *l2 = gen_new_label(); 2392303214aaSRichard Henderson int jcc; 2393098d0fc1SRichard Henderson 2394303214aaSRichard Henderson jcc = tcg_out_cmp(s, TCG_COND_TSTNE, h.base, 15, true, false); 2395303214aaSRichard Henderson tcg_out_jxx(s, jcc, l1, true); 2396098d0fc1SRichard Henderson 2397098d0fc1SRichard Henderson tcg_out_vex_modrm_sib_offset(s, OPC_MOVDQA_VxWx + h.seg, 2398098d0fc1SRichard Henderson TCG_TMP_VEC, 0, 2399098d0fc1SRichard Henderson h.base, h.index, 0, h.ofs); 2400098d0fc1SRichard Henderson tcg_out_jxx(s, JCC_JMP, l2, true); 2401098d0fc1SRichard Henderson 2402098d0fc1SRichard Henderson tcg_out_label(s, l1); 2403098d0fc1SRichard Henderson tcg_out_vex_modrm_sib_offset(s, OPC_MOVDQU_VxWx + h.seg, 2404098d0fc1SRichard Henderson TCG_TMP_VEC, 0, 2405098d0fc1SRichard Henderson h.base, h.index, 0, h.ofs); 2406098d0fc1SRichard Henderson tcg_out_label(s, l2); 2407098d0fc1SRichard Henderson } 2408098d0fc1SRichard Henderson tcg_out_vec_to_pair(s, TCG_TYPE_I64, datalo, datahi, TCG_TMP_VEC); 2409098d0fc1SRichard Henderson break; 2410098d0fc1SRichard Henderson 2411139c1837SPaolo Bonzini default: 2412d2ef1b83SRichard Henderson g_assert_not_reached(); 2413139c1837SPaolo Bonzini } 2414139c1837SPaolo Bonzini} 2415139c1837SPaolo Bonzini 2416bf12e224SRichard Hendersonstatic void tcg_out_qemu_ld(TCGContext *s, TCGReg datalo, TCGReg datahi, 2417bf12e224SRichard Henderson TCGReg addrlo, TCGReg addrhi, 2418bf12e224SRichard Henderson MemOpIdx oi, TCGType data_type) 2419139c1837SPaolo Bonzini{ 2420530074c6SRichard Henderson TCGLabelQemuLdst *ldst; 242161713c29SRichard Henderson HostAddress h; 2422bf12e224SRichard Henderson 2423530074c6SRichard Henderson ldst = prepare_host_addr(s, &h, addrlo, addrhi, oi, true); 2424530074c6SRichard Henderson tcg_out_qemu_ld_direct(s, datalo, datahi, h, data_type, get_memop(oi)); 2425139c1837SPaolo Bonzini 2426530074c6SRichard Henderson if (ldst) { 2427530074c6SRichard Henderson ldst->type = data_type; 2428530074c6SRichard Henderson ldst->datalo_reg = datalo; 2429530074c6SRichard Henderson ldst->datahi_reg = datahi; 2430530074c6SRichard Henderson ldst->raddr = tcg_splitwx_to_rx(s->code_ptr); 2431b1ee3c67SRichard Henderson } 2432139c1837SPaolo Bonzini} 2433139c1837SPaolo Bonzini 2434139c1837SPaolo Bonzinistatic void tcg_out_qemu_st_direct(TCGContext *s, TCGReg datalo, TCGReg datahi, 243561713c29SRichard Henderson HostAddress h, MemOp memop) 2436139c1837SPaolo Bonzini{ 2437d2ef1b83SRichard Henderson bool use_movbe = false; 2438139c1837SPaolo Bonzini int movop = OPC_MOVL_EvGv; 2439139c1837SPaolo Bonzini 2440d2ef1b83SRichard Henderson /* 24417893e42dSPhilippe Mathieu-Daudé * Do big-endian stores with movbe or system-mode. 2442d2ef1b83SRichard Henderson * User-only without movbe will have its swapping done generically. 2443d2ef1b83SRichard Henderson */ 2444d2ef1b83SRichard Henderson if (memop & MO_BSWAP) { 2445d2ef1b83SRichard Henderson tcg_debug_assert(have_movbe); 2446d2ef1b83SRichard Henderson use_movbe = true; 2447139c1837SPaolo Bonzini movop = OPC_MOVBE_MyGy; 2448139c1837SPaolo Bonzini } 2449139c1837SPaolo Bonzini 2450139c1837SPaolo Bonzini switch (memop & MO_SIZE) { 2451139c1837SPaolo Bonzini case MO_8: 245207ce0b05SRichard Henderson /* This is handled with constraints on INDEX_op_qemu_st8_i32. */ 245307ce0b05SRichard Henderson tcg_debug_assert(TCG_TARGET_REG_BITS == 64 || datalo < 4); 245461713c29SRichard Henderson tcg_out_modrm_sib_offset(s, OPC_MOVB_EvGv + P_REXB_R + h.seg, 245561713c29SRichard Henderson datalo, h.base, h.index, 0, h.ofs); 2456139c1837SPaolo Bonzini break; 2457139c1837SPaolo Bonzini case MO_16: 245861713c29SRichard Henderson tcg_out_modrm_sib_offset(s, movop + P_DATA16 + h.seg, datalo, 245961713c29SRichard Henderson h.base, h.index, 0, h.ofs); 2460139c1837SPaolo Bonzini break; 2461139c1837SPaolo Bonzini case MO_32: 246261713c29SRichard Henderson tcg_out_modrm_sib_offset(s, movop + h.seg, datalo, 246361713c29SRichard Henderson h.base, h.index, 0, h.ofs); 2464139c1837SPaolo Bonzini break; 2465139c1837SPaolo Bonzini case MO_64: 2466139c1837SPaolo Bonzini if (TCG_TARGET_REG_BITS == 64) { 246761713c29SRichard Henderson tcg_out_modrm_sib_offset(s, movop + P_REXW + h.seg, datalo, 246861713c29SRichard Henderson h.base, h.index, 0, h.ofs); 2469139c1837SPaolo Bonzini } else { 2470d2ef1b83SRichard Henderson if (use_movbe) { 2471d2ef1b83SRichard Henderson TCGReg t = datalo; 2472139c1837SPaolo Bonzini datalo = datahi; 2473139c1837SPaolo Bonzini datahi = t; 2474139c1837SPaolo Bonzini } 247561713c29SRichard Henderson tcg_out_modrm_sib_offset(s, movop + h.seg, datalo, 247661713c29SRichard Henderson h.base, h.index, 0, h.ofs); 247761713c29SRichard Henderson tcg_out_modrm_sib_offset(s, movop + h.seg, datahi, 247861713c29SRichard Henderson h.base, h.index, 0, h.ofs + 4); 2479139c1837SPaolo Bonzini } 2480139c1837SPaolo Bonzini break; 2481098d0fc1SRichard Henderson 2482098d0fc1SRichard Henderson case MO_128: 2483098d0fc1SRichard Henderson tcg_debug_assert(TCG_TARGET_REG_BITS == 64); 2484098d0fc1SRichard Henderson 2485098d0fc1SRichard Henderson /* 2486098d0fc1SRichard Henderson * Without 16-byte atomicity, use integer regs. 2487098d0fc1SRichard Henderson * That is where we have the data, and it allows bswaps. 2488098d0fc1SRichard Henderson */ 2489098d0fc1SRichard Henderson if (h.aa.atom < MO_128) { 2490098d0fc1SRichard Henderson if (use_movbe) { 2491098d0fc1SRichard Henderson TCGReg t = datalo; 2492098d0fc1SRichard Henderson datalo = datahi; 2493098d0fc1SRichard Henderson datahi = t; 2494098d0fc1SRichard Henderson } 2495098d0fc1SRichard Henderson tcg_out_modrm_sib_offset(s, movop + P_REXW + h.seg, datalo, 2496098d0fc1SRichard Henderson h.base, h.index, 0, h.ofs); 2497098d0fc1SRichard Henderson tcg_out_modrm_sib_offset(s, movop + P_REXW + h.seg, datahi, 2498098d0fc1SRichard Henderson h.base, h.index, 0, h.ofs + 8); 2499098d0fc1SRichard Henderson break; 2500098d0fc1SRichard Henderson } 2501098d0fc1SRichard Henderson 2502098d0fc1SRichard Henderson /* 2503098d0fc1SRichard Henderson * With 16-byte atomicity, a vector store is required. 2504098d0fc1SRichard Henderson * If we already have 16-byte alignment, then VMOVDQA always works. 2505098d0fc1SRichard Henderson * Else if VMOVDQU has atomicity with dynamic alignment, use that. 2506098d0fc1SRichard Henderson * Else use we require a runtime test for alignment for VMOVDQA; 2507098d0fc1SRichard Henderson * use VMOVDQU on the unaligned nonatomic path for simplicity. 2508098d0fc1SRichard Henderson */ 2509098d0fc1SRichard Henderson tcg_out_pair_to_vec(s, TCG_TYPE_I64, TCG_TMP_VEC, datalo, datahi); 2510098d0fc1SRichard Henderson if (h.aa.align >= MO_128) { 2511098d0fc1SRichard Henderson tcg_out_vex_modrm_sib_offset(s, OPC_MOVDQA_WxVx + h.seg, 2512098d0fc1SRichard Henderson TCG_TMP_VEC, 0, 2513098d0fc1SRichard Henderson h.base, h.index, 0, h.ofs); 2514098d0fc1SRichard Henderson } else if (cpuinfo & CPUINFO_ATOMIC_VMOVDQU) { 2515098d0fc1SRichard Henderson tcg_out_vex_modrm_sib_offset(s, OPC_MOVDQU_WxVx + h.seg, 2516098d0fc1SRichard Henderson TCG_TMP_VEC, 0, 2517098d0fc1SRichard Henderson h.base, h.index, 0, h.ofs); 2518098d0fc1SRichard Henderson } else { 2519098d0fc1SRichard Henderson TCGLabel *l1 = gen_new_label(); 2520098d0fc1SRichard Henderson TCGLabel *l2 = gen_new_label(); 2521303214aaSRichard Henderson int jcc; 2522098d0fc1SRichard Henderson 2523303214aaSRichard Henderson jcc = tcg_out_cmp(s, TCG_COND_TSTNE, h.base, 15, true, false); 2524303214aaSRichard Henderson tcg_out_jxx(s, jcc, l1, true); 2525098d0fc1SRichard Henderson 2526098d0fc1SRichard Henderson tcg_out_vex_modrm_sib_offset(s, OPC_MOVDQA_WxVx + h.seg, 2527098d0fc1SRichard Henderson TCG_TMP_VEC, 0, 2528098d0fc1SRichard Henderson h.base, h.index, 0, h.ofs); 2529098d0fc1SRichard Henderson tcg_out_jxx(s, JCC_JMP, l2, true); 2530098d0fc1SRichard Henderson 2531098d0fc1SRichard Henderson tcg_out_label(s, l1); 2532098d0fc1SRichard Henderson tcg_out_vex_modrm_sib_offset(s, OPC_MOVDQU_WxVx + h.seg, 2533098d0fc1SRichard Henderson TCG_TMP_VEC, 0, 2534098d0fc1SRichard Henderson h.base, h.index, 0, h.ofs); 2535098d0fc1SRichard Henderson tcg_out_label(s, l2); 2536098d0fc1SRichard Henderson } 2537098d0fc1SRichard Henderson break; 2538098d0fc1SRichard Henderson 2539139c1837SPaolo Bonzini default: 2540d2ef1b83SRichard Henderson g_assert_not_reached(); 2541139c1837SPaolo Bonzini } 2542139c1837SPaolo Bonzini} 2543139c1837SPaolo Bonzini 2544bf12e224SRichard Hendersonstatic void tcg_out_qemu_st(TCGContext *s, TCGReg datalo, TCGReg datahi, 2545bf12e224SRichard Henderson TCGReg addrlo, TCGReg addrhi, 2546bf12e224SRichard Henderson MemOpIdx oi, TCGType data_type) 2547139c1837SPaolo Bonzini{ 2548530074c6SRichard Henderson TCGLabelQemuLdst *ldst; 254961713c29SRichard Henderson HostAddress h; 2550bf12e224SRichard Henderson 2551530074c6SRichard Henderson ldst = prepare_host_addr(s, &h, addrlo, addrhi, oi, false); 2552530074c6SRichard Henderson tcg_out_qemu_st_direct(s, datalo, datahi, h, get_memop(oi)); 2553139c1837SPaolo Bonzini 2554530074c6SRichard Henderson if (ldst) { 2555530074c6SRichard Henderson ldst->type = data_type; 2556530074c6SRichard Henderson ldst->datalo_reg = datalo; 2557530074c6SRichard Henderson ldst->datahi_reg = datahi; 2558530074c6SRichard Henderson ldst->raddr = tcg_splitwx_to_rx(s->code_ptr); 2559b1ee3c67SRichard Henderson } 2560139c1837SPaolo Bonzini} 2561139c1837SPaolo Bonzini 2562b55a8d9dSRichard Hendersonstatic void tcg_out_exit_tb(TCGContext *s, uintptr_t a0) 2563b55a8d9dSRichard Henderson{ 2564b55a8d9dSRichard Henderson /* Reuse the zeroing that exists for goto_ptr. */ 2565b55a8d9dSRichard Henderson if (a0 == 0) { 2566b55a8d9dSRichard Henderson tcg_out_jmp(s, tcg_code_gen_epilogue); 2567b55a8d9dSRichard Henderson } else { 2568b55a8d9dSRichard Henderson tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_EAX, a0); 2569b55a8d9dSRichard Henderson tcg_out_jmp(s, tb_ret_addr); 2570b55a8d9dSRichard Henderson } 2571b55a8d9dSRichard Henderson} 2572b55a8d9dSRichard Henderson 2573cf7d6b8eSRichard Hendersonstatic void tcg_out_goto_tb(TCGContext *s, int which) 2574cf7d6b8eSRichard Henderson{ 2575cf7d6b8eSRichard Henderson /* 2576cf7d6b8eSRichard Henderson * Jump displacement must be aligned for atomic patching; 2577cf7d6b8eSRichard Henderson * see if we need to add extra nops before jump 2578cf7d6b8eSRichard Henderson */ 2579cf7d6b8eSRichard Henderson int gap = QEMU_ALIGN_PTR_UP(s->code_ptr + 1, 4) - s->code_ptr; 2580cf7d6b8eSRichard Henderson if (gap != 1) { 2581cf7d6b8eSRichard Henderson tcg_out_nopn(s, gap - 1); 2582cf7d6b8eSRichard Henderson } 2583cf7d6b8eSRichard Henderson tcg_out8(s, OPC_JMP_long); /* jmp im */ 2584cf7d6b8eSRichard Henderson set_jmp_insn_offset(s, which); 2585cf7d6b8eSRichard Henderson tcg_out32(s, 0); 2586cf7d6b8eSRichard Henderson set_jmp_reset_offset(s, which); 2587cf7d6b8eSRichard Henderson} 2588cf7d6b8eSRichard Henderson 25890fe1c98dSRichard Hendersonvoid tb_target_set_jmp_target(const TranslationBlock *tb, int n, 25900fe1c98dSRichard Henderson uintptr_t jmp_rx, uintptr_t jmp_rw) 25910fe1c98dSRichard Henderson{ 25920fe1c98dSRichard Henderson /* patch the branch destination */ 25930fe1c98dSRichard Henderson uintptr_t addr = tb->jmp_target_addr[n]; 25940fe1c98dSRichard Henderson qatomic_set((int32_t *)jmp_rw, addr - (jmp_rx + 4)); 25950fe1c98dSRichard Henderson /* no need to flush icache explicitly */ 25960fe1c98dSRichard Henderson} 25970fe1c98dSRichard Henderson 2598139c1837SPaolo Bonzinistatic inline void tcg_out_op(TCGContext *s, TCGOpcode opc, 25995e8892dbSMiroslav Rezanina const TCGArg args[TCG_MAX_OP_ARGS], 26005e8892dbSMiroslav Rezanina const int const_args[TCG_MAX_OP_ARGS]) 2601139c1837SPaolo Bonzini{ 2602139c1837SPaolo Bonzini TCGArg a0, a1, a2; 2603139c1837SPaolo Bonzini int c, const_a2, vexop, rexw = 0; 2604139c1837SPaolo Bonzini 2605139c1837SPaolo Bonzini#if TCG_TARGET_REG_BITS == 64 2606139c1837SPaolo Bonzini# define OP_32_64(x) \ 2607139c1837SPaolo Bonzini case glue(glue(INDEX_op_, x), _i64): \ 2608139c1837SPaolo Bonzini rexw = P_REXW; /* FALLTHRU */ \ 2609139c1837SPaolo Bonzini case glue(glue(INDEX_op_, x), _i32) 2610139c1837SPaolo Bonzini#else 2611139c1837SPaolo Bonzini# define OP_32_64(x) \ 2612139c1837SPaolo Bonzini case glue(glue(INDEX_op_, x), _i32) 2613139c1837SPaolo Bonzini#endif 2614139c1837SPaolo Bonzini 2615139c1837SPaolo Bonzini /* Hoist the loads of the most common arguments. */ 2616139c1837SPaolo Bonzini a0 = args[0]; 2617139c1837SPaolo Bonzini a1 = args[1]; 2618139c1837SPaolo Bonzini a2 = args[2]; 2619139c1837SPaolo Bonzini const_a2 = const_args[2]; 2620139c1837SPaolo Bonzini 2621139c1837SPaolo Bonzini switch (opc) { 2622139c1837SPaolo Bonzini case INDEX_op_goto_ptr: 2623139c1837SPaolo Bonzini /* jmp to the given host address (could be epilogue) */ 2624139c1837SPaolo Bonzini tcg_out_modrm(s, OPC_GRP5, EXT5_JMPN_Ev, a0); 2625139c1837SPaolo Bonzini break; 2626139c1837SPaolo Bonzini case INDEX_op_br: 2627139c1837SPaolo Bonzini tcg_out_jxx(s, JCC_JMP, arg_label(a0), 0); 2628139c1837SPaolo Bonzini break; 2629139c1837SPaolo Bonzini OP_32_64(ld8u): 2630139c1837SPaolo Bonzini /* Note that we can ignore REXW for the zero-extend to 64-bit. */ 2631139c1837SPaolo Bonzini tcg_out_modrm_offset(s, OPC_MOVZBL, a0, a1, a2); 2632139c1837SPaolo Bonzini break; 2633139c1837SPaolo Bonzini OP_32_64(ld8s): 2634139c1837SPaolo Bonzini tcg_out_modrm_offset(s, OPC_MOVSBL + rexw, a0, a1, a2); 2635139c1837SPaolo Bonzini break; 2636139c1837SPaolo Bonzini OP_32_64(ld16u): 2637139c1837SPaolo Bonzini /* Note that we can ignore REXW for the zero-extend to 64-bit. */ 2638139c1837SPaolo Bonzini tcg_out_modrm_offset(s, OPC_MOVZWL, a0, a1, a2); 2639139c1837SPaolo Bonzini break; 2640139c1837SPaolo Bonzini OP_32_64(ld16s): 2641139c1837SPaolo Bonzini tcg_out_modrm_offset(s, OPC_MOVSWL + rexw, a0, a1, a2); 2642139c1837SPaolo Bonzini break; 2643139c1837SPaolo Bonzini#if TCG_TARGET_REG_BITS == 64 2644139c1837SPaolo Bonzini case INDEX_op_ld32u_i64: 2645139c1837SPaolo Bonzini#endif 2646139c1837SPaolo Bonzini case INDEX_op_ld_i32: 2647139c1837SPaolo Bonzini tcg_out_ld(s, TCG_TYPE_I32, a0, a1, a2); 2648139c1837SPaolo Bonzini break; 2649139c1837SPaolo Bonzini 2650139c1837SPaolo Bonzini OP_32_64(st8): 2651139c1837SPaolo Bonzini if (const_args[0]) { 2652139c1837SPaolo Bonzini tcg_out_modrm_offset(s, OPC_MOVB_EvIz, 0, a1, a2); 2653139c1837SPaolo Bonzini tcg_out8(s, a0); 2654139c1837SPaolo Bonzini } else { 2655139c1837SPaolo Bonzini tcg_out_modrm_offset(s, OPC_MOVB_EvGv | P_REXB_R, a0, a1, a2); 2656139c1837SPaolo Bonzini } 2657139c1837SPaolo Bonzini break; 2658139c1837SPaolo Bonzini OP_32_64(st16): 2659139c1837SPaolo Bonzini if (const_args[0]) { 2660139c1837SPaolo Bonzini tcg_out_modrm_offset(s, OPC_MOVL_EvIz | P_DATA16, 0, a1, a2); 2661139c1837SPaolo Bonzini tcg_out16(s, a0); 2662139c1837SPaolo Bonzini } else { 2663139c1837SPaolo Bonzini tcg_out_modrm_offset(s, OPC_MOVL_EvGv | P_DATA16, a0, a1, a2); 2664139c1837SPaolo Bonzini } 2665139c1837SPaolo Bonzini break; 2666139c1837SPaolo Bonzini#if TCG_TARGET_REG_BITS == 64 2667139c1837SPaolo Bonzini case INDEX_op_st32_i64: 2668139c1837SPaolo Bonzini#endif 2669139c1837SPaolo Bonzini case INDEX_op_st_i32: 2670139c1837SPaolo Bonzini if (const_args[0]) { 2671139c1837SPaolo Bonzini tcg_out_modrm_offset(s, OPC_MOVL_EvIz, 0, a1, a2); 2672139c1837SPaolo Bonzini tcg_out32(s, a0); 2673139c1837SPaolo Bonzini } else { 2674139c1837SPaolo Bonzini tcg_out_st(s, TCG_TYPE_I32, a0, a1, a2); 2675139c1837SPaolo Bonzini } 2676139c1837SPaolo Bonzini break; 2677139c1837SPaolo Bonzini 2678139c1837SPaolo Bonzini OP_32_64(add): 2679139c1837SPaolo Bonzini /* For 3-operand addition, use LEA. */ 2680139c1837SPaolo Bonzini if (a0 != a1) { 2681139c1837SPaolo Bonzini TCGArg c3 = 0; 2682139c1837SPaolo Bonzini if (const_a2) { 2683139c1837SPaolo Bonzini c3 = a2, a2 = -1; 2684139c1837SPaolo Bonzini } else if (a0 == a2) { 2685139c1837SPaolo Bonzini /* Watch out for dest = src + dest, since we've removed 2686139c1837SPaolo Bonzini the matching constraint on the add. */ 2687139c1837SPaolo Bonzini tgen_arithr(s, ARITH_ADD + rexw, a0, a1); 2688139c1837SPaolo Bonzini break; 2689139c1837SPaolo Bonzini } 2690139c1837SPaolo Bonzini 2691139c1837SPaolo Bonzini tcg_out_modrm_sib_offset(s, OPC_LEA + rexw, a0, a1, a2, 0, c3); 2692139c1837SPaolo Bonzini break; 2693139c1837SPaolo Bonzini } 2694139c1837SPaolo Bonzini c = ARITH_ADD; 2695139c1837SPaolo Bonzini goto gen_arith; 2696139c1837SPaolo Bonzini OP_32_64(sub): 2697139c1837SPaolo Bonzini c = ARITH_SUB; 2698139c1837SPaolo Bonzini goto gen_arith; 2699139c1837SPaolo Bonzini OP_32_64(and): 2700139c1837SPaolo Bonzini c = ARITH_AND; 2701139c1837SPaolo Bonzini goto gen_arith; 2702139c1837SPaolo Bonzini OP_32_64(or): 2703139c1837SPaolo Bonzini c = ARITH_OR; 2704139c1837SPaolo Bonzini goto gen_arith; 2705139c1837SPaolo Bonzini OP_32_64(xor): 2706139c1837SPaolo Bonzini c = ARITH_XOR; 2707139c1837SPaolo Bonzini goto gen_arith; 2708139c1837SPaolo Bonzini gen_arith: 2709139c1837SPaolo Bonzini if (const_a2) { 2710139c1837SPaolo Bonzini tgen_arithi(s, c + rexw, a0, a2, 0); 2711139c1837SPaolo Bonzini } else { 2712139c1837SPaolo Bonzini tgen_arithr(s, c + rexw, a0, a2); 2713139c1837SPaolo Bonzini } 2714139c1837SPaolo Bonzini break; 2715139c1837SPaolo Bonzini 2716139c1837SPaolo Bonzini OP_32_64(andc): 2717139c1837SPaolo Bonzini if (const_a2) { 2718139c1837SPaolo Bonzini tcg_out_mov(s, rexw ? TCG_TYPE_I64 : TCG_TYPE_I32, a0, a1); 2719139c1837SPaolo Bonzini tgen_arithi(s, ARITH_AND + rexw, a0, ~a2, 0); 2720139c1837SPaolo Bonzini } else { 2721139c1837SPaolo Bonzini tcg_out_vex_modrm(s, OPC_ANDN + rexw, a0, a2, a1); 2722139c1837SPaolo Bonzini } 2723139c1837SPaolo Bonzini break; 2724139c1837SPaolo Bonzini 2725139c1837SPaolo Bonzini OP_32_64(mul): 2726139c1837SPaolo Bonzini if (const_a2) { 2727139c1837SPaolo Bonzini int32_t val; 2728139c1837SPaolo Bonzini val = a2; 2729139c1837SPaolo Bonzini if (val == (int8_t)val) { 2730139c1837SPaolo Bonzini tcg_out_modrm(s, OPC_IMUL_GvEvIb + rexw, a0, a0); 2731139c1837SPaolo Bonzini tcg_out8(s, val); 2732139c1837SPaolo Bonzini } else { 2733139c1837SPaolo Bonzini tcg_out_modrm(s, OPC_IMUL_GvEvIz + rexw, a0, a0); 2734139c1837SPaolo Bonzini tcg_out32(s, val); 2735139c1837SPaolo Bonzini } 2736139c1837SPaolo Bonzini } else { 2737139c1837SPaolo Bonzini tcg_out_modrm(s, OPC_IMUL_GvEv + rexw, a0, a2); 2738139c1837SPaolo Bonzini } 2739139c1837SPaolo Bonzini break; 2740139c1837SPaolo Bonzini 2741139c1837SPaolo Bonzini OP_32_64(div2): 2742139c1837SPaolo Bonzini tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_IDIV, args[4]); 2743139c1837SPaolo Bonzini break; 2744139c1837SPaolo Bonzini OP_32_64(divu2): 2745139c1837SPaolo Bonzini tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_DIV, args[4]); 2746139c1837SPaolo Bonzini break; 2747139c1837SPaolo Bonzini 2748139c1837SPaolo Bonzini OP_32_64(shl): 2749139c1837SPaolo Bonzini /* For small constant 3-operand shift, use LEA. */ 2750139c1837SPaolo Bonzini if (const_a2 && a0 != a1 && (a2 - 1) < 3) { 2751139c1837SPaolo Bonzini if (a2 - 1 == 0) { 2752139c1837SPaolo Bonzini /* shl $1,a1,a0 -> lea (a1,a1),a0 */ 2753139c1837SPaolo Bonzini tcg_out_modrm_sib_offset(s, OPC_LEA + rexw, a0, a1, a1, 0, 0); 2754139c1837SPaolo Bonzini } else { 2755139c1837SPaolo Bonzini /* shl $n,a1,a0 -> lea 0(,a1,n),a0 */ 2756139c1837SPaolo Bonzini tcg_out_modrm_sib_offset(s, OPC_LEA + rexw, a0, -1, a1, a2, 0); 2757139c1837SPaolo Bonzini } 2758139c1837SPaolo Bonzini break; 2759139c1837SPaolo Bonzini } 2760139c1837SPaolo Bonzini c = SHIFT_SHL; 2761139c1837SPaolo Bonzini vexop = OPC_SHLX; 2762139c1837SPaolo Bonzini goto gen_shift_maybe_vex; 2763139c1837SPaolo Bonzini OP_32_64(shr): 2764139c1837SPaolo Bonzini c = SHIFT_SHR; 2765139c1837SPaolo Bonzini vexop = OPC_SHRX; 2766139c1837SPaolo Bonzini goto gen_shift_maybe_vex; 2767139c1837SPaolo Bonzini OP_32_64(sar): 2768139c1837SPaolo Bonzini c = SHIFT_SAR; 2769139c1837SPaolo Bonzini vexop = OPC_SARX; 2770139c1837SPaolo Bonzini goto gen_shift_maybe_vex; 2771139c1837SPaolo Bonzini OP_32_64(rotl): 2772139c1837SPaolo Bonzini c = SHIFT_ROL; 2773139c1837SPaolo Bonzini goto gen_shift; 2774139c1837SPaolo Bonzini OP_32_64(rotr): 2775139c1837SPaolo Bonzini c = SHIFT_ROR; 2776139c1837SPaolo Bonzini goto gen_shift; 2777139c1837SPaolo Bonzini gen_shift_maybe_vex: 2778139c1837SPaolo Bonzini if (have_bmi2) { 2779139c1837SPaolo Bonzini if (!const_a2) { 2780139c1837SPaolo Bonzini tcg_out_vex_modrm(s, vexop + rexw, a0, a2, a1); 2781139c1837SPaolo Bonzini break; 2782139c1837SPaolo Bonzini } 2783139c1837SPaolo Bonzini tcg_out_mov(s, rexw ? TCG_TYPE_I64 : TCG_TYPE_I32, a0, a1); 2784139c1837SPaolo Bonzini } 2785139c1837SPaolo Bonzini /* FALLTHRU */ 2786139c1837SPaolo Bonzini gen_shift: 2787139c1837SPaolo Bonzini if (const_a2) { 2788139c1837SPaolo Bonzini tcg_out_shifti(s, c + rexw, a0, a2); 2789139c1837SPaolo Bonzini } else { 2790139c1837SPaolo Bonzini tcg_out_modrm(s, OPC_SHIFT_cl + rexw, c, a0); 2791139c1837SPaolo Bonzini } 2792139c1837SPaolo Bonzini break; 2793139c1837SPaolo Bonzini 2794139c1837SPaolo Bonzini OP_32_64(ctz): 2795139c1837SPaolo Bonzini tcg_out_ctz(s, rexw, args[0], args[1], args[2], const_args[2]); 2796139c1837SPaolo Bonzini break; 2797139c1837SPaolo Bonzini OP_32_64(clz): 2798139c1837SPaolo Bonzini tcg_out_clz(s, rexw, args[0], args[1], args[2], const_args[2]); 2799139c1837SPaolo Bonzini break; 2800139c1837SPaolo Bonzini OP_32_64(ctpop): 2801139c1837SPaolo Bonzini tcg_out_modrm(s, OPC_POPCNT + rexw, a0, a1); 2802139c1837SPaolo Bonzini break; 2803139c1837SPaolo Bonzini 2804c359ce75SRichard Henderson OP_32_64(brcond): 2805c359ce75SRichard Henderson tcg_out_brcond(s, rexw, a2, a0, a1, const_args[1], 2806c359ce75SRichard Henderson arg_label(args[3]), 0); 2807139c1837SPaolo Bonzini break; 28087ba99a1cSRichard Henderson OP_32_64(setcond): 280995bf306eSRichard Henderson tcg_out_setcond(s, rexw, args[3], a0, a1, a2, const_a2, false); 281095bf306eSRichard Henderson break; 281195bf306eSRichard Henderson OP_32_64(negsetcond): 281295bf306eSRichard Henderson tcg_out_setcond(s, rexw, args[3], a0, a1, a2, const_a2, true); 2813139c1837SPaolo Bonzini break; 281478ddf0dcSRichard Henderson OP_32_64(movcond): 281578ddf0dcSRichard Henderson tcg_out_movcond(s, rexw, args[5], a0, a1, a2, const_a2, args[3]); 2816139c1837SPaolo Bonzini break; 2817139c1837SPaolo Bonzini 2818139c1837SPaolo Bonzini OP_32_64(bswap16): 28197335a3d6SRichard Henderson if (a2 & TCG_BSWAP_OS) { 28207335a3d6SRichard Henderson /* Output must be sign-extended. */ 28217335a3d6SRichard Henderson if (rexw) { 28227335a3d6SRichard Henderson tcg_out_bswap64(s, a0); 28237335a3d6SRichard Henderson tcg_out_shifti(s, SHIFT_SAR + rexw, a0, 48); 28247335a3d6SRichard Henderson } else { 28257335a3d6SRichard Henderson tcg_out_bswap32(s, a0); 28267335a3d6SRichard Henderson tcg_out_shifti(s, SHIFT_SAR, a0, 16); 28277335a3d6SRichard Henderson } 28287335a3d6SRichard Henderson } else if ((a2 & (TCG_BSWAP_IZ | TCG_BSWAP_OZ)) == TCG_BSWAP_OZ) { 28297335a3d6SRichard Henderson /* Output must be zero-extended, but input isn't. */ 28307335a3d6SRichard Henderson tcg_out_bswap32(s, a0); 28317335a3d6SRichard Henderson tcg_out_shifti(s, SHIFT_SHR, a0, 16); 28327335a3d6SRichard Henderson } else { 2833139c1837SPaolo Bonzini tcg_out_rolw_8(s, a0); 28347335a3d6SRichard Henderson } 2835139c1837SPaolo Bonzini break; 2836139c1837SPaolo Bonzini OP_32_64(bswap32): 2837139c1837SPaolo Bonzini tcg_out_bswap32(s, a0); 28387335a3d6SRichard Henderson if (rexw && (a2 & TCG_BSWAP_OS)) { 28397335a3d6SRichard Henderson tcg_out_ext32s(s, a0, a0); 28407335a3d6SRichard Henderson } 2841139c1837SPaolo Bonzini break; 2842139c1837SPaolo Bonzini 2843139c1837SPaolo Bonzini OP_32_64(neg): 2844139c1837SPaolo Bonzini tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_NEG, a0); 2845139c1837SPaolo Bonzini break; 2846139c1837SPaolo Bonzini OP_32_64(not): 2847139c1837SPaolo Bonzini tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_NOT, a0); 2848139c1837SPaolo Bonzini break; 2849139c1837SPaolo Bonzini 2850fecccfccSRichard Henderson case INDEX_op_qemu_ld_a64_i32: 2851fecccfccSRichard Henderson if (TCG_TARGET_REG_BITS == 32) { 2852bf12e224SRichard Henderson tcg_out_qemu_ld(s, a0, -1, a1, a2, args[3], TCG_TYPE_I32); 2853139c1837SPaolo Bonzini break; 2854fecccfccSRichard Henderson } 2855fecccfccSRichard Henderson /* fall through */ 2856fecccfccSRichard Henderson case INDEX_op_qemu_ld_a32_i32: 2857fecccfccSRichard Henderson tcg_out_qemu_ld(s, a0, -1, a1, -1, a2, TCG_TYPE_I32); 2858fecccfccSRichard Henderson break; 2859fecccfccSRichard Henderson case INDEX_op_qemu_ld_a32_i64: 2860bf12e224SRichard Henderson if (TCG_TARGET_REG_BITS == 64) { 2861bf12e224SRichard Henderson tcg_out_qemu_ld(s, a0, -1, a1, -1, a2, TCG_TYPE_I64); 2862fecccfccSRichard Henderson } else { 2863bf12e224SRichard Henderson tcg_out_qemu_ld(s, a0, a1, a2, -1, args[3], TCG_TYPE_I64); 2864fecccfccSRichard Henderson } 2865fecccfccSRichard Henderson break; 2866fecccfccSRichard Henderson case INDEX_op_qemu_ld_a64_i64: 2867fecccfccSRichard Henderson if (TCG_TARGET_REG_BITS == 64) { 2868fecccfccSRichard Henderson tcg_out_qemu_ld(s, a0, -1, a1, -1, a2, TCG_TYPE_I64); 2869bf12e224SRichard Henderson } else { 2870bf12e224SRichard Henderson tcg_out_qemu_ld(s, a0, a1, a2, args[3], args[4], TCG_TYPE_I64); 2871bf12e224SRichard Henderson } 2872139c1837SPaolo Bonzini break; 2873098d0fc1SRichard Henderson case INDEX_op_qemu_ld_a32_i128: 2874098d0fc1SRichard Henderson case INDEX_op_qemu_ld_a64_i128: 2875098d0fc1SRichard Henderson tcg_debug_assert(TCG_TARGET_REG_BITS == 64); 2876098d0fc1SRichard Henderson tcg_out_qemu_ld(s, a0, a1, a2, -1, args[3], TCG_TYPE_I128); 2877098d0fc1SRichard Henderson break; 2878fecccfccSRichard Henderson 2879fecccfccSRichard Henderson case INDEX_op_qemu_st_a64_i32: 2880fecccfccSRichard Henderson case INDEX_op_qemu_st8_a64_i32: 2881fecccfccSRichard Henderson if (TCG_TARGET_REG_BITS == 32) { 2882bf12e224SRichard Henderson tcg_out_qemu_st(s, a0, -1, a1, a2, args[3], TCG_TYPE_I32); 2883139c1837SPaolo Bonzini break; 2884fecccfccSRichard Henderson } 2885fecccfccSRichard Henderson /* fall through */ 2886fecccfccSRichard Henderson case INDEX_op_qemu_st_a32_i32: 2887fecccfccSRichard Henderson case INDEX_op_qemu_st8_a32_i32: 2888fecccfccSRichard Henderson tcg_out_qemu_st(s, a0, -1, a1, -1, a2, TCG_TYPE_I32); 2889fecccfccSRichard Henderson break; 2890fecccfccSRichard Henderson case INDEX_op_qemu_st_a32_i64: 2891bf12e224SRichard Henderson if (TCG_TARGET_REG_BITS == 64) { 2892bf12e224SRichard Henderson tcg_out_qemu_st(s, a0, -1, a1, -1, a2, TCG_TYPE_I64); 2893fecccfccSRichard Henderson } else { 2894bf12e224SRichard Henderson tcg_out_qemu_st(s, a0, a1, a2, -1, args[3], TCG_TYPE_I64); 2895fecccfccSRichard Henderson } 2896fecccfccSRichard Henderson break; 2897fecccfccSRichard Henderson case INDEX_op_qemu_st_a64_i64: 2898fecccfccSRichard Henderson if (TCG_TARGET_REG_BITS == 64) { 2899fecccfccSRichard Henderson tcg_out_qemu_st(s, a0, -1, a1, -1, a2, TCG_TYPE_I64); 2900bf12e224SRichard Henderson } else { 2901bf12e224SRichard Henderson tcg_out_qemu_st(s, a0, a1, a2, args[3], args[4], TCG_TYPE_I64); 2902bf12e224SRichard Henderson } 2903139c1837SPaolo Bonzini break; 2904098d0fc1SRichard Henderson case INDEX_op_qemu_st_a32_i128: 2905098d0fc1SRichard Henderson case INDEX_op_qemu_st_a64_i128: 2906098d0fc1SRichard Henderson tcg_debug_assert(TCG_TARGET_REG_BITS == 64); 2907098d0fc1SRichard Henderson tcg_out_qemu_st(s, a0, a1, a2, -1, args[3], TCG_TYPE_I128); 2908098d0fc1SRichard Henderson break; 2909139c1837SPaolo Bonzini 2910139c1837SPaolo Bonzini OP_32_64(mulu2): 2911139c1837SPaolo Bonzini tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_MUL, args[3]); 2912139c1837SPaolo Bonzini break; 2913139c1837SPaolo Bonzini OP_32_64(muls2): 2914139c1837SPaolo Bonzini tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_IMUL, args[3]); 2915139c1837SPaolo Bonzini break; 2916139c1837SPaolo Bonzini OP_32_64(add2): 2917139c1837SPaolo Bonzini if (const_args[4]) { 2918139c1837SPaolo Bonzini tgen_arithi(s, ARITH_ADD + rexw, a0, args[4], 1); 2919139c1837SPaolo Bonzini } else { 2920139c1837SPaolo Bonzini tgen_arithr(s, ARITH_ADD + rexw, a0, args[4]); 2921139c1837SPaolo Bonzini } 2922139c1837SPaolo Bonzini if (const_args[5]) { 2923139c1837SPaolo Bonzini tgen_arithi(s, ARITH_ADC + rexw, a1, args[5], 1); 2924139c1837SPaolo Bonzini } else { 2925139c1837SPaolo Bonzini tgen_arithr(s, ARITH_ADC + rexw, a1, args[5]); 2926139c1837SPaolo Bonzini } 2927139c1837SPaolo Bonzini break; 2928139c1837SPaolo Bonzini OP_32_64(sub2): 2929139c1837SPaolo Bonzini if (const_args[4]) { 2930139c1837SPaolo Bonzini tgen_arithi(s, ARITH_SUB + rexw, a0, args[4], 1); 2931139c1837SPaolo Bonzini } else { 2932139c1837SPaolo Bonzini tgen_arithr(s, ARITH_SUB + rexw, a0, args[4]); 2933139c1837SPaolo Bonzini } 2934139c1837SPaolo Bonzini if (const_args[5]) { 2935139c1837SPaolo Bonzini tgen_arithi(s, ARITH_SBB + rexw, a1, args[5], 1); 2936139c1837SPaolo Bonzini } else { 2937139c1837SPaolo Bonzini tgen_arithr(s, ARITH_SBB + rexw, a1, args[5]); 2938139c1837SPaolo Bonzini } 2939139c1837SPaolo Bonzini break; 2940139c1837SPaolo Bonzini 2941139c1837SPaolo Bonzini#if TCG_TARGET_REG_BITS == 32 2942139c1837SPaolo Bonzini case INDEX_op_brcond2_i32: 2943139c1837SPaolo Bonzini tcg_out_brcond2(s, args, const_args, 0); 2944139c1837SPaolo Bonzini break; 2945139c1837SPaolo Bonzini case INDEX_op_setcond2_i32: 2946139c1837SPaolo Bonzini tcg_out_setcond2(s, args, const_args); 2947139c1837SPaolo Bonzini break; 2948139c1837SPaolo Bonzini#else /* TCG_TARGET_REG_BITS == 64 */ 2949139c1837SPaolo Bonzini case INDEX_op_ld32s_i64: 2950139c1837SPaolo Bonzini tcg_out_modrm_offset(s, OPC_MOVSLQ, a0, a1, a2); 2951139c1837SPaolo Bonzini break; 2952139c1837SPaolo Bonzini case INDEX_op_ld_i64: 2953139c1837SPaolo Bonzini tcg_out_ld(s, TCG_TYPE_I64, a0, a1, a2); 2954139c1837SPaolo Bonzini break; 2955139c1837SPaolo Bonzini case INDEX_op_st_i64: 2956139c1837SPaolo Bonzini if (const_args[0]) { 2957139c1837SPaolo Bonzini tcg_out_modrm_offset(s, OPC_MOVL_EvIz | P_REXW, 0, a1, a2); 2958139c1837SPaolo Bonzini tcg_out32(s, a0); 2959139c1837SPaolo Bonzini } else { 2960139c1837SPaolo Bonzini tcg_out_st(s, TCG_TYPE_I64, a0, a1, a2); 2961139c1837SPaolo Bonzini } 2962139c1837SPaolo Bonzini break; 2963139c1837SPaolo Bonzini 2964139c1837SPaolo Bonzini case INDEX_op_bswap64_i64: 2965139c1837SPaolo Bonzini tcg_out_bswap64(s, a0); 2966139c1837SPaolo Bonzini break; 2967139c1837SPaolo Bonzini case INDEX_op_extrh_i64_i32: 2968139c1837SPaolo Bonzini tcg_out_shifti(s, SHIFT_SHR + P_REXW, a0, 32); 2969139c1837SPaolo Bonzini break; 2970139c1837SPaolo Bonzini#endif 2971139c1837SPaolo Bonzini 2972139c1837SPaolo Bonzini OP_32_64(deposit): 2973139c1837SPaolo Bonzini if (args[3] == 0 && args[4] == 8) { 2974139c1837SPaolo Bonzini /* load bits 0..7 */ 297573f97f0aSRichard Henderson if (const_a2) { 297673f97f0aSRichard Henderson tcg_out_opc(s, OPC_MOVB_Ib | P_REXB_RM | LOWREGMASK(a0), 297773f97f0aSRichard Henderson 0, a0, 0); 297873f97f0aSRichard Henderson tcg_out8(s, a2); 297973f97f0aSRichard Henderson } else { 2980139c1837SPaolo Bonzini tcg_out_modrm(s, OPC_MOVB_EvGv | P_REXB_R | P_REXB_RM, a2, a0); 298173f97f0aSRichard Henderson } 298236df88c0SRichard Henderson } else if (TCG_TARGET_REG_BITS == 32 && args[3] == 8 && args[4] == 8) { 2983139c1837SPaolo Bonzini /* load bits 8..15 */ 298473f97f0aSRichard Henderson if (const_a2) { 298573f97f0aSRichard Henderson tcg_out8(s, OPC_MOVB_Ib + a0 + 4); 298673f97f0aSRichard Henderson tcg_out8(s, a2); 298773f97f0aSRichard Henderson } else { 2988139c1837SPaolo Bonzini tcg_out_modrm(s, OPC_MOVB_EvGv, a2, a0 + 4); 298973f97f0aSRichard Henderson } 2990139c1837SPaolo Bonzini } else if (args[3] == 0 && args[4] == 16) { 2991139c1837SPaolo Bonzini /* load bits 0..15 */ 299273f97f0aSRichard Henderson if (const_a2) { 299373f97f0aSRichard Henderson tcg_out_opc(s, OPC_MOVL_Iv | P_DATA16 | LOWREGMASK(a0), 299473f97f0aSRichard Henderson 0, a0, 0); 299573f97f0aSRichard Henderson tcg_out16(s, a2); 299673f97f0aSRichard Henderson } else { 2997139c1837SPaolo Bonzini tcg_out_modrm(s, OPC_MOVL_EvGv | P_DATA16, a2, a0); 299873f97f0aSRichard Henderson } 2999139c1837SPaolo Bonzini } else { 3000732e89f4SRichard Henderson g_assert_not_reached(); 3001139c1837SPaolo Bonzini } 3002139c1837SPaolo Bonzini break; 3003139c1837SPaolo Bonzini 3004139c1837SPaolo Bonzini case INDEX_op_extract_i64: 3005139c1837SPaolo Bonzini if (a2 + args[3] == 32) { 3006139c1837SPaolo Bonzini /* This is a 32-bit zero-extending right shift. */ 3007139c1837SPaolo Bonzini tcg_out_mov(s, TCG_TYPE_I32, a0, a1); 3008139c1837SPaolo Bonzini tcg_out_shifti(s, SHIFT_SHR, a0, a2); 3009139c1837SPaolo Bonzini break; 3010139c1837SPaolo Bonzini } 3011139c1837SPaolo Bonzini /* FALLTHRU */ 3012139c1837SPaolo Bonzini case INDEX_op_extract_i32: 3013139c1837SPaolo Bonzini /* On the off-chance that we can use the high-byte registers. 3014139c1837SPaolo Bonzini Otherwise we emit the same ext16 + shift pattern that we 3015139c1837SPaolo Bonzini would have gotten from the normal tcg-op.c expansion. */ 3016139c1837SPaolo Bonzini tcg_debug_assert(a2 == 8 && args[3] == 8); 3017139c1837SPaolo Bonzini if (a1 < 4 && a0 < 8) { 3018139c1837SPaolo Bonzini tcg_out_modrm(s, OPC_MOVZBL, a0, a1 + 4); 3019139c1837SPaolo Bonzini } else { 3020139c1837SPaolo Bonzini tcg_out_ext16u(s, a0, a1); 3021139c1837SPaolo Bonzini tcg_out_shifti(s, SHIFT_SHR, a0, 8); 3022139c1837SPaolo Bonzini } 3023139c1837SPaolo Bonzini break; 3024139c1837SPaolo Bonzini 3025139c1837SPaolo Bonzini case INDEX_op_sextract_i32: 3026139c1837SPaolo Bonzini /* We don't implement sextract_i64, as we cannot sign-extend to 3027139c1837SPaolo Bonzini 64-bits without using the REX prefix that explicitly excludes 3028139c1837SPaolo Bonzini access to the high-byte registers. */ 3029139c1837SPaolo Bonzini tcg_debug_assert(a2 == 8 && args[3] == 8); 3030139c1837SPaolo Bonzini if (a1 < 4 && a0 < 8) { 3031139c1837SPaolo Bonzini tcg_out_modrm(s, OPC_MOVSBL, a0, a1 + 4); 3032139c1837SPaolo Bonzini } else { 3033753e42eaSRichard Henderson tcg_out_ext16s(s, TCG_TYPE_I32, a0, a1); 3034139c1837SPaolo Bonzini tcg_out_shifti(s, SHIFT_SAR, a0, 8); 3035139c1837SPaolo Bonzini } 3036139c1837SPaolo Bonzini break; 3037139c1837SPaolo Bonzini 3038139c1837SPaolo Bonzini OP_32_64(extract2): 3039139c1837SPaolo Bonzini /* Note that SHRD outputs to the r/m operand. */ 3040139c1837SPaolo Bonzini tcg_out_modrm(s, OPC_SHRD_Ib + rexw, a2, a0); 3041139c1837SPaolo Bonzini tcg_out8(s, args[3]); 3042139c1837SPaolo Bonzini break; 3043139c1837SPaolo Bonzini 3044139c1837SPaolo Bonzini case INDEX_op_mb: 3045139c1837SPaolo Bonzini tcg_out_mb(s, a0); 3046139c1837SPaolo Bonzini break; 3047139c1837SPaolo Bonzini case INDEX_op_mov_i32: /* Always emitted via tcg_out_mov. */ 3048139c1837SPaolo Bonzini case INDEX_op_mov_i64: 3049139c1837SPaolo Bonzini case INDEX_op_call: /* Always emitted via tcg_out_call. */ 3050b55a8d9dSRichard Henderson case INDEX_op_exit_tb: /* Always emitted via tcg_out_exit_tb. */ 3051cf7d6b8eSRichard Henderson case INDEX_op_goto_tb: /* Always emitted via tcg_out_goto_tb. */ 3052678155b2SRichard Henderson case INDEX_op_ext8s_i32: /* Always emitted via tcg_reg_alloc_op. */ 3053678155b2SRichard Henderson case INDEX_op_ext8s_i64: 3054d0e66c89SRichard Henderson case INDEX_op_ext8u_i32: 3055d0e66c89SRichard Henderson case INDEX_op_ext8u_i64: 3056753e42eaSRichard Henderson case INDEX_op_ext16s_i32: 3057753e42eaSRichard Henderson case INDEX_op_ext16s_i64: 3058379afdffSRichard Henderson case INDEX_op_ext16u_i32: 3059379afdffSRichard Henderson case INDEX_op_ext16u_i64: 306052bf3398SRichard Henderson case INDEX_op_ext32s_i64: 30619ecf5f61SRichard Henderson case INDEX_op_ext32u_i64: 30629c6aa274SRichard Henderson case INDEX_op_ext_i32_i64: 3063b9bfe000SRichard Henderson case INDEX_op_extu_i32_i64: 3064b8b94ac6SRichard Henderson case INDEX_op_extrl_i64_i32: 3065139c1837SPaolo Bonzini default: 3066732e89f4SRichard Henderson g_assert_not_reached(); 3067139c1837SPaolo Bonzini } 3068139c1837SPaolo Bonzini 3069139c1837SPaolo Bonzini#undef OP_32_64 3070139c1837SPaolo Bonzini} 3071139c1837SPaolo Bonzini 3072b8a56703SRichard Hendersonstatic int const umin_insn[4] = { 3073b8a56703SRichard Henderson OPC_PMINUB, OPC_PMINUW, OPC_PMINUD, OPC_VPMINUQ 3074b8a56703SRichard Henderson}; 3075b8a56703SRichard Henderson 3076b8a56703SRichard Hendersonstatic int const umax_insn[4] = { 3077b8a56703SRichard Henderson OPC_PMAXUB, OPC_PMAXUW, OPC_PMAXUD, OPC_VPMAXUQ 3078b8a56703SRichard Henderson}; 3079b8a56703SRichard Henderson 3080b8a56703SRichard Hendersonstatic bool tcg_out_cmp_vec_noinv(TCGContext *s, TCGType type, unsigned vece, 3081b8a56703SRichard Henderson TCGReg v0, TCGReg v1, TCGReg v2, TCGCond cond) 3082b8a56703SRichard Henderson{ 3083b8a56703SRichard Henderson static int const cmpeq_insn[4] = { 3084b8a56703SRichard Henderson OPC_PCMPEQB, OPC_PCMPEQW, OPC_PCMPEQD, OPC_PCMPEQQ 3085b8a56703SRichard Henderson }; 3086b8a56703SRichard Henderson static int const cmpgt_insn[4] = { 3087b8a56703SRichard Henderson OPC_PCMPGTB, OPC_PCMPGTW, OPC_PCMPGTD, OPC_PCMPGTQ 3088b8a56703SRichard Henderson }; 3089b8a56703SRichard Henderson 3090b8a56703SRichard Henderson enum { 3091b8a56703SRichard Henderson NEED_INV = 1, 3092b8a56703SRichard Henderson NEED_SWAP = 2, 3093b8a56703SRichard Henderson NEED_UMIN = 4, 3094b8a56703SRichard Henderson NEED_UMAX = 8, 3095b8a56703SRichard Henderson INVALID = 16, 3096b8a56703SRichard Henderson }; 3097b8a56703SRichard Henderson static const uint8_t cond_fixup[16] = { 3098b8a56703SRichard Henderson [0 ... 15] = INVALID, 3099b8a56703SRichard Henderson [TCG_COND_EQ] = 0, 3100b8a56703SRichard Henderson [TCG_COND_GT] = 0, 3101b8a56703SRichard Henderson [TCG_COND_NE] = NEED_INV, 3102b8a56703SRichard Henderson [TCG_COND_LE] = NEED_INV, 3103b8a56703SRichard Henderson [TCG_COND_LT] = NEED_SWAP, 3104b8a56703SRichard Henderson [TCG_COND_GE] = NEED_SWAP | NEED_INV, 3105b8a56703SRichard Henderson [TCG_COND_LEU] = NEED_UMIN, 3106b8a56703SRichard Henderson [TCG_COND_GTU] = NEED_UMIN | NEED_INV, 3107b8a56703SRichard Henderson [TCG_COND_GEU] = NEED_UMAX, 3108b8a56703SRichard Henderson [TCG_COND_LTU] = NEED_UMAX | NEED_INV, 3109b8a56703SRichard Henderson }; 3110b8a56703SRichard Henderson int fixup = cond_fixup[cond]; 3111b8a56703SRichard Henderson 3112b8a56703SRichard Henderson assert(!(fixup & INVALID)); 3113b8a56703SRichard Henderson 3114b8a56703SRichard Henderson if (fixup & NEED_INV) { 3115b8a56703SRichard Henderson cond = tcg_invert_cond(cond); 3116b8a56703SRichard Henderson } 3117b8a56703SRichard Henderson 3118b8a56703SRichard Henderson if (fixup & NEED_SWAP) { 3119b8a56703SRichard Henderson TCGReg swap = v1; 3120b8a56703SRichard Henderson v1 = v2; 3121b8a56703SRichard Henderson v2 = swap; 3122b8a56703SRichard Henderson cond = tcg_swap_cond(cond); 3123b8a56703SRichard Henderson } 3124b8a56703SRichard Henderson 3125b8a56703SRichard Henderson if (fixup & (NEED_UMIN | NEED_UMAX)) { 3126b8a56703SRichard Henderson int op = (fixup & NEED_UMIN ? umin_insn[vece] : umax_insn[vece]); 3127b8a56703SRichard Henderson 3128b8a56703SRichard Henderson /* avx2 does not have 64-bit min/max; adjusted during expand. */ 3129b8a56703SRichard Henderson assert(vece <= MO_32); 3130b8a56703SRichard Henderson 3131b8a56703SRichard Henderson tcg_out_vex_modrm_type(s, op, TCG_TMP_VEC, v1, v2, type); 3132b8a56703SRichard Henderson v2 = TCG_TMP_VEC; 3133b8a56703SRichard Henderson cond = TCG_COND_EQ; 3134b8a56703SRichard Henderson } 3135b8a56703SRichard Henderson 3136b8a56703SRichard Henderson switch (cond) { 3137b8a56703SRichard Henderson case TCG_COND_EQ: 3138b8a56703SRichard Henderson tcg_out_vex_modrm_type(s, cmpeq_insn[vece], v0, v1, v2, type); 3139b8a56703SRichard Henderson break; 3140b8a56703SRichard Henderson case TCG_COND_GT: 3141b8a56703SRichard Henderson tcg_out_vex_modrm_type(s, cmpgt_insn[vece], v0, v1, v2, type); 3142b8a56703SRichard Henderson break; 3143b8a56703SRichard Henderson default: 3144b8a56703SRichard Henderson g_assert_not_reached(); 3145b8a56703SRichard Henderson } 3146b8a56703SRichard Henderson return fixup & NEED_INV; 3147b8a56703SRichard Henderson} 3148b8a56703SRichard Henderson 3149717da87dSRichard Hendersonstatic void tcg_out_cmp_vec_k1(TCGContext *s, TCGType type, unsigned vece, 3150717da87dSRichard Henderson TCGReg v1, TCGReg v2, TCGCond cond) 3151717da87dSRichard Henderson{ 3152717da87dSRichard Henderson static const int cmpm_insn[2][4] = { 3153717da87dSRichard Henderson { OPC_VPCMPB, OPC_VPCMPW, OPC_VPCMPD, OPC_VPCMPQ }, 3154717da87dSRichard Henderson { OPC_VPCMPUB, OPC_VPCMPUW, OPC_VPCMPUD, OPC_VPCMPUQ } 3155717da87dSRichard Henderson }; 3156*782cffa4SRichard Henderson static const int testm_insn[4] = { 3157*782cffa4SRichard Henderson OPC_VPTESTMB, OPC_VPTESTMW, OPC_VPTESTMD, OPC_VPTESTMQ 3158*782cffa4SRichard Henderson }; 3159*782cffa4SRichard Henderson static const int testnm_insn[4] = { 3160*782cffa4SRichard Henderson OPC_VPTESTNMB, OPC_VPTESTNMW, OPC_VPTESTNMD, OPC_VPTESTNMQ 3161*782cffa4SRichard Henderson }; 3162*782cffa4SRichard Henderson 3163717da87dSRichard Henderson static const int cond_ext[16] = { 3164717da87dSRichard Henderson [TCG_COND_EQ] = 0, 3165717da87dSRichard Henderson [TCG_COND_NE] = 4, 3166717da87dSRichard Henderson [TCG_COND_LT] = 1, 3167717da87dSRichard Henderson [TCG_COND_LTU] = 1, 3168717da87dSRichard Henderson [TCG_COND_LE] = 2, 3169717da87dSRichard Henderson [TCG_COND_LEU] = 2, 3170717da87dSRichard Henderson [TCG_COND_NEVER] = 3, 3171717da87dSRichard Henderson [TCG_COND_GE] = 5, 3172717da87dSRichard Henderson [TCG_COND_GEU] = 5, 3173717da87dSRichard Henderson [TCG_COND_GT] = 6, 3174717da87dSRichard Henderson [TCG_COND_GTU] = 6, 3175717da87dSRichard Henderson [TCG_COND_ALWAYS] = 7, 3176717da87dSRichard Henderson }; 3177717da87dSRichard Henderson 3178*782cffa4SRichard Henderson switch (cond) { 3179*782cffa4SRichard Henderson case TCG_COND_TSTNE: 3180*782cffa4SRichard Henderson tcg_out_vex_modrm_type(s, testm_insn[vece], /* k1 */ 1, v1, v2, type); 3181*782cffa4SRichard Henderson break; 3182*782cffa4SRichard Henderson case TCG_COND_TSTEQ: 3183*782cffa4SRichard Henderson tcg_out_vex_modrm_type(s, testnm_insn[vece], /* k1 */ 1, v1, v2, type); 3184*782cffa4SRichard Henderson break; 3185*782cffa4SRichard Henderson default: 3186717da87dSRichard Henderson tcg_out_vex_modrm_type(s, cmpm_insn[is_unsigned_cond(cond)][vece], 3187717da87dSRichard Henderson /* k1 */ 1, v1, v2, type); 3188717da87dSRichard Henderson tcg_out8(s, cond_ext[cond]); 3189*782cffa4SRichard Henderson break; 3190*782cffa4SRichard Henderson } 3191717da87dSRichard Henderson} 3192717da87dSRichard Henderson 3193717da87dSRichard Hendersonstatic void tcg_out_k1_to_vec(TCGContext *s, TCGType type, 3194717da87dSRichard Henderson unsigned vece, TCGReg dest) 3195717da87dSRichard Henderson{ 3196717da87dSRichard Henderson static const int movm_insn[] = { 3197717da87dSRichard Henderson OPC_VPMOVM2B, OPC_VPMOVM2W, OPC_VPMOVM2D, OPC_VPMOVM2Q 3198717da87dSRichard Henderson }; 3199717da87dSRichard Henderson tcg_out_vex_modrm_type(s, movm_insn[vece], dest, 0, /* k1 */ 1, type); 3200717da87dSRichard Henderson} 3201717da87dSRichard Henderson 3202b8a56703SRichard Hendersonstatic void tcg_out_cmp_vec(TCGContext *s, TCGType type, unsigned vece, 3203b8a56703SRichard Henderson TCGReg v0, TCGReg v1, TCGReg v2, TCGCond cond) 3204b8a56703SRichard Henderson{ 3205717da87dSRichard Henderson /* 3206717da87dSRichard Henderson * With avx512, we have a complete set of comparisons into mask. 3207717da87dSRichard Henderson * Unless there's a single insn expansion for the comparision, 3208717da87dSRichard Henderson * expand via a mask in k1. 3209717da87dSRichard Henderson */ 3210717da87dSRichard Henderson if ((vece <= MO_16 ? have_avx512bw : have_avx512dq) 3211717da87dSRichard Henderson && cond != TCG_COND_EQ 3212717da87dSRichard Henderson && cond != TCG_COND_LT 3213717da87dSRichard Henderson && cond != TCG_COND_GT) { 3214717da87dSRichard Henderson tcg_out_cmp_vec_k1(s, type, vece, v1, v2, cond); 3215717da87dSRichard Henderson tcg_out_k1_to_vec(s, type, vece, v0); 3216717da87dSRichard Henderson return; 3217717da87dSRichard Henderson } 3218717da87dSRichard Henderson 3219b8a56703SRichard Henderson if (tcg_out_cmp_vec_noinv(s, type, vece, v0, v1, v2, cond)) { 3220b8a56703SRichard Henderson tcg_out_dupi_vec(s, type, vece, TCG_TMP_VEC, -1); 3221b8a56703SRichard Henderson tcg_out_vex_modrm_type(s, OPC_PXOR, v0, v0, TCG_TMP_VEC, type); 3222b8a56703SRichard Henderson } 3223b8a56703SRichard Henderson} 3224b8a56703SRichard Henderson 3225d5896749SRichard Hendersonstatic void tcg_out_cmpsel_vec_k1(TCGContext *s, TCGType type, unsigned vece, 3226d5896749SRichard Henderson TCGReg v0, TCGReg c1, TCGReg c2, 3227d5896749SRichard Henderson TCGReg v3, TCGReg v4, TCGCond cond) 3228d5896749SRichard Henderson{ 3229d5896749SRichard Henderson static const int vpblendm_insn[] = { 3230d5896749SRichard Henderson OPC_VPBLENDMB, OPC_VPBLENDMW, OPC_VPBLENDMD, OPC_VPBLENDMQ 3231d5896749SRichard Henderson }; 3232d5896749SRichard Henderson bool z = false; 3233d5896749SRichard Henderson 3234d5896749SRichard Henderson /* Swap to place constant in V4 to take advantage of zero-masking. */ 3235d5896749SRichard Henderson if (!v3) { 3236d5896749SRichard Henderson z = true; 3237d5896749SRichard Henderson v3 = v4; 3238d5896749SRichard Henderson cond = tcg_invert_cond(cond); 3239d5896749SRichard Henderson } 3240d5896749SRichard Henderson 3241d5896749SRichard Henderson tcg_out_cmp_vec_k1(s, type, vece, c1, c2, cond); 3242d5896749SRichard Henderson tcg_out_evex_modrm_type(s, vpblendm_insn[vece], v0, v4, v3, 3243d5896749SRichard Henderson /* k1 */1, z, type); 3244d5896749SRichard Henderson} 3245d5896749SRichard Henderson 3246db4121d2SRichard Hendersonstatic void tcg_out_cmpsel_vec(TCGContext *s, TCGType type, unsigned vece, 3247db4121d2SRichard Henderson TCGReg v0, TCGReg c1, TCGReg c2, 3248db4121d2SRichard Henderson TCGReg v3, TCGReg v4, TCGCond cond) 3249db4121d2SRichard Henderson{ 3250d5896749SRichard Henderson bool inv; 3251d5896749SRichard Henderson 3252d5896749SRichard Henderson if (vece <= MO_16 ? have_avx512bw : have_avx512vl) { 3253d5896749SRichard Henderson tcg_out_cmpsel_vec_k1(s, type, vece, v0, c1, c2, v3, v4, cond); 3254d5896749SRichard Henderson return; 3255d5896749SRichard Henderson } 3256d5896749SRichard Henderson 3257d5896749SRichard Henderson inv = tcg_out_cmp_vec_noinv(s, type, vece, TCG_TMP_VEC, c1, c2, cond); 3258d8387f0eSRichard Henderson 3259d8387f0eSRichard Henderson /* 3260d8387f0eSRichard Henderson * Since XMM0 is 16, the only way we get 0 into V3 3261d8387f0eSRichard Henderson * is via the constant zero constraint. 3262d8387f0eSRichard Henderson */ 3263d8387f0eSRichard Henderson if (!v3) { 3264d8387f0eSRichard Henderson if (inv) { 3265d8387f0eSRichard Henderson tcg_out_vex_modrm_type(s, OPC_PAND, v0, TCG_TMP_VEC, v4, type); 3266d8387f0eSRichard Henderson } else { 3267d8387f0eSRichard Henderson tcg_out_vex_modrm_type(s, OPC_PANDN, v0, TCG_TMP_VEC, v4, type); 3268d8387f0eSRichard Henderson } 3269d8387f0eSRichard Henderson } else { 3270d8387f0eSRichard Henderson if (inv) { 3271db4121d2SRichard Henderson TCGReg swap = v3; 3272db4121d2SRichard Henderson v3 = v4; 3273db4121d2SRichard Henderson v4 = swap; 3274db4121d2SRichard Henderson } 3275db4121d2SRichard Henderson tcg_out_vex_modrm_type(s, OPC_VPBLENDVB, v0, v4, v3, type); 3276db4121d2SRichard Henderson tcg_out8(s, (TCG_TMP_VEC - TCG_REG_XMM0) << 4); 3277db4121d2SRichard Henderson } 3278d8387f0eSRichard Henderson} 3279db4121d2SRichard Henderson 3280139c1837SPaolo Bonzinistatic void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, 3281139c1837SPaolo Bonzini unsigned vecl, unsigned vece, 32825e8892dbSMiroslav Rezanina const TCGArg args[TCG_MAX_OP_ARGS], 32835e8892dbSMiroslav Rezanina const int const_args[TCG_MAX_OP_ARGS]) 3284139c1837SPaolo Bonzini{ 3285139c1837SPaolo Bonzini static int const add_insn[4] = { 3286139c1837SPaolo Bonzini OPC_PADDB, OPC_PADDW, OPC_PADDD, OPC_PADDQ 3287139c1837SPaolo Bonzini }; 3288139c1837SPaolo Bonzini static int const ssadd_insn[4] = { 3289139c1837SPaolo Bonzini OPC_PADDSB, OPC_PADDSW, OPC_UD2, OPC_UD2 3290139c1837SPaolo Bonzini }; 3291139c1837SPaolo Bonzini static int const usadd_insn[4] = { 3292139c1837SPaolo Bonzini OPC_PADDUB, OPC_PADDUW, OPC_UD2, OPC_UD2 3293139c1837SPaolo Bonzini }; 3294139c1837SPaolo Bonzini static int const sub_insn[4] = { 3295139c1837SPaolo Bonzini OPC_PSUBB, OPC_PSUBW, OPC_PSUBD, OPC_PSUBQ 3296139c1837SPaolo Bonzini }; 3297139c1837SPaolo Bonzini static int const sssub_insn[4] = { 3298139c1837SPaolo Bonzini OPC_PSUBSB, OPC_PSUBSW, OPC_UD2, OPC_UD2 3299139c1837SPaolo Bonzini }; 3300139c1837SPaolo Bonzini static int const ussub_insn[4] = { 3301139c1837SPaolo Bonzini OPC_PSUBUB, OPC_PSUBUW, OPC_UD2, OPC_UD2 3302139c1837SPaolo Bonzini }; 3303139c1837SPaolo Bonzini static int const mul_insn[4] = { 33044c8b9686SRichard Henderson OPC_UD2, OPC_PMULLW, OPC_PMULLD, OPC_VPMULLQ 3305139c1837SPaolo Bonzini }; 3306139c1837SPaolo Bonzini static int const shift_imm_insn[4] = { 3307139c1837SPaolo Bonzini OPC_UD2, OPC_PSHIFTW_Ib, OPC_PSHIFTD_Ib, OPC_PSHIFTQ_Ib 3308139c1837SPaolo Bonzini }; 3309139c1837SPaolo Bonzini static int const punpckl_insn[4] = { 3310139c1837SPaolo Bonzini OPC_PUNPCKLBW, OPC_PUNPCKLWD, OPC_PUNPCKLDQ, OPC_PUNPCKLQDQ 3311139c1837SPaolo Bonzini }; 3312139c1837SPaolo Bonzini static int const punpckh_insn[4] = { 3313139c1837SPaolo Bonzini OPC_PUNPCKHBW, OPC_PUNPCKHWD, OPC_PUNPCKHDQ, OPC_PUNPCKHQDQ 3314139c1837SPaolo Bonzini }; 3315139c1837SPaolo Bonzini static int const packss_insn[4] = { 3316139c1837SPaolo Bonzini OPC_PACKSSWB, OPC_PACKSSDW, OPC_UD2, OPC_UD2 3317139c1837SPaolo Bonzini }; 3318139c1837SPaolo Bonzini static int const packus_insn[4] = { 3319139c1837SPaolo Bonzini OPC_PACKUSWB, OPC_PACKUSDW, OPC_UD2, OPC_UD2 3320139c1837SPaolo Bonzini }; 3321139c1837SPaolo Bonzini static int const smin_insn[4] = { 3322dac1648fSRichard Henderson OPC_PMINSB, OPC_PMINSW, OPC_PMINSD, OPC_VPMINSQ 3323139c1837SPaolo Bonzini }; 3324139c1837SPaolo Bonzini static int const smax_insn[4] = { 3325dac1648fSRichard Henderson OPC_PMAXSB, OPC_PMAXSW, OPC_PMAXSD, OPC_VPMAXSQ 3326139c1837SPaolo Bonzini }; 3327102cd35cSRichard Henderson static int const rotlv_insn[4] = { 3328102cd35cSRichard Henderson OPC_UD2, OPC_UD2, OPC_VPROLVD, OPC_VPROLVQ 3329102cd35cSRichard Henderson }; 3330102cd35cSRichard Henderson static int const rotrv_insn[4] = { 3331102cd35cSRichard Henderson OPC_UD2, OPC_UD2, OPC_VPRORVD, OPC_VPRORVQ 3332102cd35cSRichard Henderson }; 3333139c1837SPaolo Bonzini static int const shlv_insn[4] = { 3334ef77ce0dSRichard Henderson OPC_UD2, OPC_VPSLLVW, OPC_VPSLLVD, OPC_VPSLLVQ 3335139c1837SPaolo Bonzini }; 3336139c1837SPaolo Bonzini static int const shrv_insn[4] = { 3337ef77ce0dSRichard Henderson OPC_UD2, OPC_VPSRLVW, OPC_VPSRLVD, OPC_VPSRLVQ 3338139c1837SPaolo Bonzini }; 3339139c1837SPaolo Bonzini static int const sarv_insn[4] = { 3340ef77ce0dSRichard Henderson OPC_UD2, OPC_VPSRAVW, OPC_VPSRAVD, OPC_VPSRAVQ 3341139c1837SPaolo Bonzini }; 3342139c1837SPaolo Bonzini static int const shls_insn[4] = { 3343139c1837SPaolo Bonzini OPC_UD2, OPC_PSLLW, OPC_PSLLD, OPC_PSLLQ 3344139c1837SPaolo Bonzini }; 3345139c1837SPaolo Bonzini static int const shrs_insn[4] = { 3346139c1837SPaolo Bonzini OPC_UD2, OPC_PSRLW, OPC_PSRLD, OPC_PSRLQ 3347139c1837SPaolo Bonzini }; 3348139c1837SPaolo Bonzini static int const sars_insn[4] = { 334947b331b2SRichard Henderson OPC_UD2, OPC_PSRAW, OPC_PSRAD, OPC_VPSRAQ 3350139c1837SPaolo Bonzini }; 3351965d5d06SRichard Henderson static int const vpshldi_insn[4] = { 3352965d5d06SRichard Henderson OPC_UD2, OPC_VPSHLDW, OPC_VPSHLDD, OPC_VPSHLDQ 3353965d5d06SRichard Henderson }; 3354965d5d06SRichard Henderson static int const vpshldv_insn[4] = { 3355965d5d06SRichard Henderson OPC_UD2, OPC_VPSHLDVW, OPC_VPSHLDVD, OPC_VPSHLDVQ 3356965d5d06SRichard Henderson }; 3357965d5d06SRichard Henderson static int const vpshrdv_insn[4] = { 3358965d5d06SRichard Henderson OPC_UD2, OPC_VPSHRDVW, OPC_VPSHRDVD, OPC_VPSHRDVQ 3359965d5d06SRichard Henderson }; 3360139c1837SPaolo Bonzini static int const abs_insn[4] = { 3361dac1648fSRichard Henderson OPC_PABSB, OPC_PABSW, OPC_PABSD, OPC_VPABSQ 3362139c1837SPaolo Bonzini }; 3363139c1837SPaolo Bonzini 3364139c1837SPaolo Bonzini TCGType type = vecl + TCG_TYPE_V64; 3365139c1837SPaolo Bonzini int insn, sub; 3366cf320769SRichard Henderson TCGArg a0, a1, a2, a3; 3367139c1837SPaolo Bonzini 3368139c1837SPaolo Bonzini a0 = args[0]; 3369139c1837SPaolo Bonzini a1 = args[1]; 3370139c1837SPaolo Bonzini a2 = args[2]; 3371139c1837SPaolo Bonzini 3372139c1837SPaolo Bonzini switch (opc) { 3373139c1837SPaolo Bonzini case INDEX_op_add_vec: 3374139c1837SPaolo Bonzini insn = add_insn[vece]; 3375139c1837SPaolo Bonzini goto gen_simd; 3376139c1837SPaolo Bonzini case INDEX_op_ssadd_vec: 3377139c1837SPaolo Bonzini insn = ssadd_insn[vece]; 3378139c1837SPaolo Bonzini goto gen_simd; 3379139c1837SPaolo Bonzini case INDEX_op_usadd_vec: 3380139c1837SPaolo Bonzini insn = usadd_insn[vece]; 3381139c1837SPaolo Bonzini goto gen_simd; 3382139c1837SPaolo Bonzini case INDEX_op_sub_vec: 3383139c1837SPaolo Bonzini insn = sub_insn[vece]; 3384139c1837SPaolo Bonzini goto gen_simd; 3385139c1837SPaolo Bonzini case INDEX_op_sssub_vec: 3386139c1837SPaolo Bonzini insn = sssub_insn[vece]; 3387139c1837SPaolo Bonzini goto gen_simd; 3388139c1837SPaolo Bonzini case INDEX_op_ussub_vec: 3389139c1837SPaolo Bonzini insn = ussub_insn[vece]; 3390139c1837SPaolo Bonzini goto gen_simd; 3391139c1837SPaolo Bonzini case INDEX_op_mul_vec: 3392139c1837SPaolo Bonzini insn = mul_insn[vece]; 3393139c1837SPaolo Bonzini goto gen_simd; 3394139c1837SPaolo Bonzini case INDEX_op_and_vec: 3395139c1837SPaolo Bonzini insn = OPC_PAND; 3396139c1837SPaolo Bonzini goto gen_simd; 3397139c1837SPaolo Bonzini case INDEX_op_or_vec: 3398139c1837SPaolo Bonzini insn = OPC_POR; 3399139c1837SPaolo Bonzini goto gen_simd; 3400139c1837SPaolo Bonzini case INDEX_op_xor_vec: 3401139c1837SPaolo Bonzini insn = OPC_PXOR; 3402139c1837SPaolo Bonzini goto gen_simd; 3403139c1837SPaolo Bonzini case INDEX_op_smin_vec: 3404139c1837SPaolo Bonzini insn = smin_insn[vece]; 3405139c1837SPaolo Bonzini goto gen_simd; 3406139c1837SPaolo Bonzini case INDEX_op_umin_vec: 3407139c1837SPaolo Bonzini insn = umin_insn[vece]; 3408139c1837SPaolo Bonzini goto gen_simd; 3409139c1837SPaolo Bonzini case INDEX_op_smax_vec: 3410139c1837SPaolo Bonzini insn = smax_insn[vece]; 3411139c1837SPaolo Bonzini goto gen_simd; 3412139c1837SPaolo Bonzini case INDEX_op_umax_vec: 3413139c1837SPaolo Bonzini insn = umax_insn[vece]; 3414139c1837SPaolo Bonzini goto gen_simd; 3415139c1837SPaolo Bonzini case INDEX_op_shlv_vec: 3416139c1837SPaolo Bonzini insn = shlv_insn[vece]; 3417139c1837SPaolo Bonzini goto gen_simd; 3418139c1837SPaolo Bonzini case INDEX_op_shrv_vec: 3419139c1837SPaolo Bonzini insn = shrv_insn[vece]; 3420139c1837SPaolo Bonzini goto gen_simd; 3421139c1837SPaolo Bonzini case INDEX_op_sarv_vec: 3422139c1837SPaolo Bonzini insn = sarv_insn[vece]; 3423139c1837SPaolo Bonzini goto gen_simd; 3424102cd35cSRichard Henderson case INDEX_op_rotlv_vec: 3425102cd35cSRichard Henderson insn = rotlv_insn[vece]; 3426102cd35cSRichard Henderson goto gen_simd; 3427102cd35cSRichard Henderson case INDEX_op_rotrv_vec: 3428102cd35cSRichard Henderson insn = rotrv_insn[vece]; 3429102cd35cSRichard Henderson goto gen_simd; 3430139c1837SPaolo Bonzini case INDEX_op_shls_vec: 3431139c1837SPaolo Bonzini insn = shls_insn[vece]; 3432139c1837SPaolo Bonzini goto gen_simd; 3433139c1837SPaolo Bonzini case INDEX_op_shrs_vec: 3434139c1837SPaolo Bonzini insn = shrs_insn[vece]; 3435139c1837SPaolo Bonzini goto gen_simd; 3436139c1837SPaolo Bonzini case INDEX_op_sars_vec: 3437139c1837SPaolo Bonzini insn = sars_insn[vece]; 3438139c1837SPaolo Bonzini goto gen_simd; 3439139c1837SPaolo Bonzini case INDEX_op_x86_punpckl_vec: 3440139c1837SPaolo Bonzini insn = punpckl_insn[vece]; 3441139c1837SPaolo Bonzini goto gen_simd; 3442139c1837SPaolo Bonzini case INDEX_op_x86_punpckh_vec: 3443139c1837SPaolo Bonzini insn = punpckh_insn[vece]; 3444139c1837SPaolo Bonzini goto gen_simd; 3445139c1837SPaolo Bonzini case INDEX_op_x86_packss_vec: 3446139c1837SPaolo Bonzini insn = packss_insn[vece]; 3447139c1837SPaolo Bonzini goto gen_simd; 3448139c1837SPaolo Bonzini case INDEX_op_x86_packus_vec: 3449139c1837SPaolo Bonzini insn = packus_insn[vece]; 3450139c1837SPaolo Bonzini goto gen_simd; 3451965d5d06SRichard Henderson case INDEX_op_x86_vpshldv_vec: 3452965d5d06SRichard Henderson insn = vpshldv_insn[vece]; 3453965d5d06SRichard Henderson a1 = a2; 3454965d5d06SRichard Henderson a2 = args[3]; 3455965d5d06SRichard Henderson goto gen_simd; 3456965d5d06SRichard Henderson case INDEX_op_x86_vpshrdv_vec: 3457965d5d06SRichard Henderson insn = vpshrdv_insn[vece]; 3458965d5d06SRichard Henderson a1 = a2; 3459965d5d06SRichard Henderson a2 = args[3]; 3460965d5d06SRichard Henderson goto gen_simd; 3461139c1837SPaolo Bonzini#if TCG_TARGET_REG_BITS == 32 3462139c1837SPaolo Bonzini case INDEX_op_dup2_vec: 3463139c1837SPaolo Bonzini /* First merge the two 32-bit inputs to a single 64-bit element. */ 3464139c1837SPaolo Bonzini tcg_out_vex_modrm(s, OPC_PUNPCKLDQ, a0, a1, a2); 3465139c1837SPaolo Bonzini /* Then replicate the 64-bit elements across the rest of the vector. */ 3466139c1837SPaolo Bonzini if (type != TCG_TYPE_V64) { 3467139c1837SPaolo Bonzini tcg_out_dup_vec(s, type, MO_64, a0, a0); 3468139c1837SPaolo Bonzini } 3469139c1837SPaolo Bonzini break; 3470139c1837SPaolo Bonzini#endif 3471139c1837SPaolo Bonzini case INDEX_op_abs_vec: 3472139c1837SPaolo Bonzini insn = abs_insn[vece]; 3473139c1837SPaolo Bonzini a2 = a1; 3474139c1837SPaolo Bonzini a1 = 0; 3475139c1837SPaolo Bonzini goto gen_simd; 3476139c1837SPaolo Bonzini gen_simd: 3477139c1837SPaolo Bonzini tcg_debug_assert(insn != OPC_UD2); 3478bc97b3adSRichard Henderson tcg_out_vex_modrm_type(s, insn, a0, a1, a2, type); 3479139c1837SPaolo Bonzini break; 3480139c1837SPaolo Bonzini 3481139c1837SPaolo Bonzini case INDEX_op_cmp_vec: 3482b8a56703SRichard Henderson tcg_out_cmp_vec(s, type, vece, a0, a1, a2, args[3]); 3483b8a56703SRichard Henderson break; 3484139c1837SPaolo Bonzini 3485db4121d2SRichard Henderson case INDEX_op_cmpsel_vec: 3486db4121d2SRichard Henderson tcg_out_cmpsel_vec(s, type, vece, a0, a1, a2, 3487db4121d2SRichard Henderson args[3], args[4], args[5]); 3488db4121d2SRichard Henderson break; 3489db4121d2SRichard Henderson 3490139c1837SPaolo Bonzini case INDEX_op_andc_vec: 3491139c1837SPaolo Bonzini insn = OPC_PANDN; 3492bc97b3adSRichard Henderson tcg_out_vex_modrm_type(s, insn, a0, a2, a1, type); 3493139c1837SPaolo Bonzini break; 3494139c1837SPaolo Bonzini 3495139c1837SPaolo Bonzini case INDEX_op_shli_vec: 3496264e4182SRichard Henderson insn = shift_imm_insn[vece]; 3497139c1837SPaolo Bonzini sub = 6; 3498139c1837SPaolo Bonzini goto gen_shift; 3499139c1837SPaolo Bonzini case INDEX_op_shri_vec: 3500264e4182SRichard Henderson insn = shift_imm_insn[vece]; 3501139c1837SPaolo Bonzini sub = 2; 3502139c1837SPaolo Bonzini goto gen_shift; 3503139c1837SPaolo Bonzini case INDEX_op_sari_vec: 3504264e4182SRichard Henderson if (vece == MO_64) { 3505264e4182SRichard Henderson insn = OPC_PSHIFTD_Ib | P_VEXW | P_EVEX; 3506264e4182SRichard Henderson } else { 3507264e4182SRichard Henderson insn = shift_imm_insn[vece]; 3508264e4182SRichard Henderson } 3509139c1837SPaolo Bonzini sub = 4; 35104e73f842SRichard Henderson goto gen_shift; 35114e73f842SRichard Henderson case INDEX_op_rotli_vec: 35124e73f842SRichard Henderson insn = OPC_PSHIFTD_Ib | P_EVEX; /* VPROL[DQ] */ 35134e73f842SRichard Henderson if (vece == MO_64) { 35144e73f842SRichard Henderson insn |= P_VEXW; 35154e73f842SRichard Henderson } 35164e73f842SRichard Henderson sub = 1; 35174e73f842SRichard Henderson goto gen_shift; 3518139c1837SPaolo Bonzini gen_shift: 3519139c1837SPaolo Bonzini tcg_debug_assert(vece != MO_8); 3520bc97b3adSRichard Henderson tcg_out_vex_modrm_type(s, insn, sub, a0, a1, type); 3521139c1837SPaolo Bonzini tcg_out8(s, a2); 3522139c1837SPaolo Bonzini break; 3523139c1837SPaolo Bonzini 3524139c1837SPaolo Bonzini case INDEX_op_ld_vec: 3525139c1837SPaolo Bonzini tcg_out_ld(s, type, a0, a1, a2); 3526139c1837SPaolo Bonzini break; 3527139c1837SPaolo Bonzini case INDEX_op_st_vec: 3528139c1837SPaolo Bonzini tcg_out_st(s, type, a0, a1, a2); 3529139c1837SPaolo Bonzini break; 3530139c1837SPaolo Bonzini case INDEX_op_dupm_vec: 3531139c1837SPaolo Bonzini tcg_out_dupm_vec(s, type, vece, a0, a1, a2); 3532139c1837SPaolo Bonzini break; 3533139c1837SPaolo Bonzini 3534139c1837SPaolo Bonzini case INDEX_op_x86_shufps_vec: 3535139c1837SPaolo Bonzini insn = OPC_SHUFPS; 3536139c1837SPaolo Bonzini sub = args[3]; 3537139c1837SPaolo Bonzini goto gen_simd_imm8; 3538139c1837SPaolo Bonzini case INDEX_op_x86_blend_vec: 3539139c1837SPaolo Bonzini if (vece == MO_16) { 3540139c1837SPaolo Bonzini insn = OPC_PBLENDW; 3541139c1837SPaolo Bonzini } else if (vece == MO_32) { 3542139c1837SPaolo Bonzini insn = (have_avx2 ? OPC_VPBLENDD : OPC_BLENDPS); 3543139c1837SPaolo Bonzini } else { 3544139c1837SPaolo Bonzini g_assert_not_reached(); 3545139c1837SPaolo Bonzini } 3546139c1837SPaolo Bonzini sub = args[3]; 3547139c1837SPaolo Bonzini goto gen_simd_imm8; 3548139c1837SPaolo Bonzini case INDEX_op_x86_vperm2i128_vec: 3549139c1837SPaolo Bonzini insn = OPC_VPERM2I128; 3550139c1837SPaolo Bonzini sub = args[3]; 3551139c1837SPaolo Bonzini goto gen_simd_imm8; 3552965d5d06SRichard Henderson case INDEX_op_x86_vpshldi_vec: 3553965d5d06SRichard Henderson insn = vpshldi_insn[vece]; 3554965d5d06SRichard Henderson sub = args[3]; 3555965d5d06SRichard Henderson goto gen_simd_imm8; 35563143767bSRichard Henderson 35573143767bSRichard Henderson case INDEX_op_not_vec: 35583143767bSRichard Henderson insn = OPC_VPTERNLOGQ; 35593143767bSRichard Henderson a2 = a1; 35603143767bSRichard Henderson sub = 0x33; /* !B */ 35613143767bSRichard Henderson goto gen_simd_imm8; 35623143767bSRichard Henderson case INDEX_op_nor_vec: 35633143767bSRichard Henderson insn = OPC_VPTERNLOGQ; 35643143767bSRichard Henderson sub = 0x11; /* norCB */ 35653143767bSRichard Henderson goto gen_simd_imm8; 35663143767bSRichard Henderson case INDEX_op_nand_vec: 35673143767bSRichard Henderson insn = OPC_VPTERNLOGQ; 35683143767bSRichard Henderson sub = 0x77; /* nandCB */ 35693143767bSRichard Henderson goto gen_simd_imm8; 35703143767bSRichard Henderson case INDEX_op_eqv_vec: 35713143767bSRichard Henderson insn = OPC_VPTERNLOGQ; 35723143767bSRichard Henderson sub = 0x99; /* xnorCB */ 35733143767bSRichard Henderson goto gen_simd_imm8; 35743143767bSRichard Henderson case INDEX_op_orc_vec: 35753143767bSRichard Henderson insn = OPC_VPTERNLOGQ; 35763143767bSRichard Henderson sub = 0xdd; /* orB!C */ 35773143767bSRichard Henderson goto gen_simd_imm8; 35783143767bSRichard Henderson 3579cf320769SRichard Henderson case INDEX_op_bitsel_vec: 3580cf320769SRichard Henderson insn = OPC_VPTERNLOGQ; 3581cf320769SRichard Henderson a3 = args[3]; 3582cf320769SRichard Henderson if (a0 == a1) { 3583cf320769SRichard Henderson a1 = a2; 3584cf320769SRichard Henderson a2 = a3; 3585cf320769SRichard Henderson sub = 0xca; /* A?B:C */ 3586cf320769SRichard Henderson } else if (a0 == a2) { 3587cf320769SRichard Henderson a2 = a3; 3588cf320769SRichard Henderson sub = 0xe2; /* B?A:C */ 3589cf320769SRichard Henderson } else { 3590cf320769SRichard Henderson tcg_out_mov(s, type, a0, a3); 3591cf320769SRichard Henderson sub = 0xb8; /* B?C:A */ 3592cf320769SRichard Henderson } 3593cf320769SRichard Henderson goto gen_simd_imm8; 3594cf320769SRichard Henderson 3595139c1837SPaolo Bonzini gen_simd_imm8: 3596965d5d06SRichard Henderson tcg_debug_assert(insn != OPC_UD2); 3597bc97b3adSRichard Henderson tcg_out_vex_modrm_type(s, insn, a0, a1, a2, type); 3598139c1837SPaolo Bonzini tcg_out8(s, sub); 3599139c1837SPaolo Bonzini break; 3600139c1837SPaolo Bonzini 3601139c1837SPaolo Bonzini case INDEX_op_x86_psrldq_vec: 3602139c1837SPaolo Bonzini tcg_out_vex_modrm(s, OPC_GRP14, 3, a0, a1); 3603139c1837SPaolo Bonzini tcg_out8(s, a2); 3604139c1837SPaolo Bonzini break; 3605139c1837SPaolo Bonzini 3606139c1837SPaolo Bonzini case INDEX_op_mov_vec: /* Always emitted via tcg_out_mov. */ 3607139c1837SPaolo Bonzini case INDEX_op_dup_vec: /* Always emitted via tcg_out_dup_vec. */ 3608139c1837SPaolo Bonzini default: 3609139c1837SPaolo Bonzini g_assert_not_reached(); 3610139c1837SPaolo Bonzini } 3611139c1837SPaolo Bonzini} 3612139c1837SPaolo Bonzini 36134c22e840SRichard Hendersonstatic TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op) 3614139c1837SPaolo Bonzini{ 3615139c1837SPaolo Bonzini switch (op) { 3616139c1837SPaolo Bonzini case INDEX_op_goto_ptr: 36174c22e840SRichard Henderson return C_O0_I1(r); 3618139c1837SPaolo Bonzini 3619139c1837SPaolo Bonzini case INDEX_op_ld8u_i32: 3620139c1837SPaolo Bonzini case INDEX_op_ld8u_i64: 3621139c1837SPaolo Bonzini case INDEX_op_ld8s_i32: 3622139c1837SPaolo Bonzini case INDEX_op_ld8s_i64: 3623139c1837SPaolo Bonzini case INDEX_op_ld16u_i32: 3624139c1837SPaolo Bonzini case INDEX_op_ld16u_i64: 3625139c1837SPaolo Bonzini case INDEX_op_ld16s_i32: 3626139c1837SPaolo Bonzini case INDEX_op_ld16s_i64: 3627139c1837SPaolo Bonzini case INDEX_op_ld_i32: 3628139c1837SPaolo Bonzini case INDEX_op_ld32u_i64: 3629139c1837SPaolo Bonzini case INDEX_op_ld32s_i64: 3630139c1837SPaolo Bonzini case INDEX_op_ld_i64: 36314c22e840SRichard Henderson return C_O1_I1(r, r); 3632139c1837SPaolo Bonzini 3633139c1837SPaolo Bonzini case INDEX_op_st8_i32: 3634139c1837SPaolo Bonzini case INDEX_op_st8_i64: 36354c22e840SRichard Henderson return C_O0_I2(qi, r); 36364c22e840SRichard Henderson 3637139c1837SPaolo Bonzini case INDEX_op_st16_i32: 3638139c1837SPaolo Bonzini case INDEX_op_st16_i64: 3639139c1837SPaolo Bonzini case INDEX_op_st_i32: 3640139c1837SPaolo Bonzini case INDEX_op_st32_i64: 36414c22e840SRichard Henderson return C_O0_I2(ri, r); 36424c22e840SRichard Henderson 3643139c1837SPaolo Bonzini case INDEX_op_st_i64: 36444c22e840SRichard Henderson return C_O0_I2(re, r); 3645139c1837SPaolo Bonzini 3646139c1837SPaolo Bonzini case INDEX_op_add_i32: 3647139c1837SPaolo Bonzini case INDEX_op_add_i64: 36484c22e840SRichard Henderson return C_O1_I2(r, r, re); 36494c22e840SRichard Henderson 3650139c1837SPaolo Bonzini case INDEX_op_sub_i32: 3651139c1837SPaolo Bonzini case INDEX_op_sub_i64: 3652139c1837SPaolo Bonzini case INDEX_op_mul_i32: 3653139c1837SPaolo Bonzini case INDEX_op_mul_i64: 3654139c1837SPaolo Bonzini case INDEX_op_or_i32: 3655139c1837SPaolo Bonzini case INDEX_op_or_i64: 3656139c1837SPaolo Bonzini case INDEX_op_xor_i32: 3657139c1837SPaolo Bonzini case INDEX_op_xor_i64: 36584c22e840SRichard Henderson return C_O1_I2(r, 0, re); 3659139c1837SPaolo Bonzini 3660139c1837SPaolo Bonzini case INDEX_op_and_i32: 3661139c1837SPaolo Bonzini case INDEX_op_and_i64: 36624c22e840SRichard Henderson return C_O1_I2(r, 0, reZ); 36634c22e840SRichard Henderson 3664139c1837SPaolo Bonzini case INDEX_op_andc_i32: 3665139c1837SPaolo Bonzini case INDEX_op_andc_i64: 36664c22e840SRichard Henderson return C_O1_I2(r, r, rI); 3667139c1837SPaolo Bonzini 3668139c1837SPaolo Bonzini case INDEX_op_shl_i32: 3669139c1837SPaolo Bonzini case INDEX_op_shl_i64: 3670139c1837SPaolo Bonzini case INDEX_op_shr_i32: 3671139c1837SPaolo Bonzini case INDEX_op_shr_i64: 3672139c1837SPaolo Bonzini case INDEX_op_sar_i32: 3673139c1837SPaolo Bonzini case INDEX_op_sar_i64: 36744c22e840SRichard Henderson return have_bmi2 ? C_O1_I2(r, r, ri) : C_O1_I2(r, 0, ci); 36754c22e840SRichard Henderson 3676139c1837SPaolo Bonzini case INDEX_op_rotl_i32: 3677139c1837SPaolo Bonzini case INDEX_op_rotl_i64: 3678139c1837SPaolo Bonzini case INDEX_op_rotr_i32: 3679139c1837SPaolo Bonzini case INDEX_op_rotr_i64: 36804c22e840SRichard Henderson return C_O1_I2(r, 0, ci); 3681139c1837SPaolo Bonzini 3682139c1837SPaolo Bonzini case INDEX_op_brcond_i32: 3683139c1837SPaolo Bonzini case INDEX_op_brcond_i64: 3684d3d1c30cSRichard Henderson return C_O0_I2(r, reT); 3685139c1837SPaolo Bonzini 3686139c1837SPaolo Bonzini case INDEX_op_bswap16_i32: 3687139c1837SPaolo Bonzini case INDEX_op_bswap16_i64: 3688139c1837SPaolo Bonzini case INDEX_op_bswap32_i32: 3689139c1837SPaolo Bonzini case INDEX_op_bswap32_i64: 3690139c1837SPaolo Bonzini case INDEX_op_bswap64_i64: 3691139c1837SPaolo Bonzini case INDEX_op_neg_i32: 3692139c1837SPaolo Bonzini case INDEX_op_neg_i64: 3693139c1837SPaolo Bonzini case INDEX_op_not_i32: 3694139c1837SPaolo Bonzini case INDEX_op_not_i64: 3695139c1837SPaolo Bonzini case INDEX_op_extrh_i64_i32: 36964c22e840SRichard Henderson return C_O1_I1(r, 0); 3697139c1837SPaolo Bonzini 3698139c1837SPaolo Bonzini case INDEX_op_ext8s_i32: 3699139c1837SPaolo Bonzini case INDEX_op_ext8s_i64: 3700139c1837SPaolo Bonzini case INDEX_op_ext8u_i32: 3701139c1837SPaolo Bonzini case INDEX_op_ext8u_i64: 37024c22e840SRichard Henderson return C_O1_I1(r, q); 37034c22e840SRichard Henderson 3704139c1837SPaolo Bonzini case INDEX_op_ext16s_i32: 3705139c1837SPaolo Bonzini case INDEX_op_ext16s_i64: 3706139c1837SPaolo Bonzini case INDEX_op_ext16u_i32: 3707139c1837SPaolo Bonzini case INDEX_op_ext16u_i64: 3708139c1837SPaolo Bonzini case INDEX_op_ext32s_i64: 3709139c1837SPaolo Bonzini case INDEX_op_ext32u_i64: 3710139c1837SPaolo Bonzini case INDEX_op_ext_i32_i64: 3711139c1837SPaolo Bonzini case INDEX_op_extu_i32_i64: 3712139c1837SPaolo Bonzini case INDEX_op_extrl_i64_i32: 3713139c1837SPaolo Bonzini case INDEX_op_extract_i32: 3714139c1837SPaolo Bonzini case INDEX_op_extract_i64: 3715139c1837SPaolo Bonzini case INDEX_op_sextract_i32: 3716139c1837SPaolo Bonzini case INDEX_op_ctpop_i32: 3717139c1837SPaolo Bonzini case INDEX_op_ctpop_i64: 37184c22e840SRichard Henderson return C_O1_I1(r, r); 37194c22e840SRichard Henderson 3720139c1837SPaolo Bonzini case INDEX_op_extract2_i32: 3721139c1837SPaolo Bonzini case INDEX_op_extract2_i64: 37224c22e840SRichard Henderson return C_O1_I2(r, 0, r); 3723139c1837SPaolo Bonzini 3724139c1837SPaolo Bonzini case INDEX_op_deposit_i32: 3725139c1837SPaolo Bonzini case INDEX_op_deposit_i64: 372673f97f0aSRichard Henderson return C_O1_I2(q, 0, qi); 37274c22e840SRichard Henderson 3728139c1837SPaolo Bonzini case INDEX_op_setcond_i32: 3729139c1837SPaolo Bonzini case INDEX_op_setcond_i64: 373095bf306eSRichard Henderson case INDEX_op_negsetcond_i32: 373195bf306eSRichard Henderson case INDEX_op_negsetcond_i64: 3732d3d1c30cSRichard Henderson return C_O1_I2(q, r, reT); 37334c22e840SRichard Henderson 3734139c1837SPaolo Bonzini case INDEX_op_movcond_i32: 3735139c1837SPaolo Bonzini case INDEX_op_movcond_i64: 3736d3d1c30cSRichard Henderson return C_O1_I4(r, r, reT, r, 0); 37374c22e840SRichard Henderson 3738139c1837SPaolo Bonzini case INDEX_op_div2_i32: 3739139c1837SPaolo Bonzini case INDEX_op_div2_i64: 3740139c1837SPaolo Bonzini case INDEX_op_divu2_i32: 3741139c1837SPaolo Bonzini case INDEX_op_divu2_i64: 37424c22e840SRichard Henderson return C_O2_I3(a, d, 0, 1, r); 37434c22e840SRichard Henderson 3744139c1837SPaolo Bonzini case INDEX_op_mulu2_i32: 3745139c1837SPaolo Bonzini case INDEX_op_mulu2_i64: 3746139c1837SPaolo Bonzini case INDEX_op_muls2_i32: 3747139c1837SPaolo Bonzini case INDEX_op_muls2_i64: 37484c22e840SRichard Henderson return C_O2_I2(a, d, a, r); 37494c22e840SRichard Henderson 3750139c1837SPaolo Bonzini case INDEX_op_add2_i32: 3751139c1837SPaolo Bonzini case INDEX_op_add2_i64: 3752139c1837SPaolo Bonzini case INDEX_op_sub2_i32: 3753139c1837SPaolo Bonzini case INDEX_op_sub2_i64: 375422d2e535SIlya Leoshkevich return C_N1_O1_I4(r, r, 0, 1, re, re); 37554c22e840SRichard Henderson 3756139c1837SPaolo Bonzini case INDEX_op_ctz_i32: 3757139c1837SPaolo Bonzini case INDEX_op_ctz_i64: 37584c22e840SRichard Henderson return have_bmi1 ? C_N1_I2(r, r, rW) : C_N1_I2(r, r, r); 37594c22e840SRichard Henderson 3760139c1837SPaolo Bonzini case INDEX_op_clz_i32: 3761139c1837SPaolo Bonzini case INDEX_op_clz_i64: 37624c22e840SRichard Henderson return have_lzcnt ? C_N1_I2(r, r, rW) : C_N1_I2(r, r, r); 3763139c1837SPaolo Bonzini 3764fecccfccSRichard Henderson case INDEX_op_qemu_ld_a32_i32: 3765fecccfccSRichard Henderson return C_O1_I1(r, L); 3766fecccfccSRichard Henderson case INDEX_op_qemu_ld_a64_i32: 3767fecccfccSRichard Henderson return TCG_TARGET_REG_BITS == 64 ? C_O1_I1(r, L) : C_O1_I2(r, L, L); 37684c22e840SRichard Henderson 3769fecccfccSRichard Henderson case INDEX_op_qemu_st_a32_i32: 3770fecccfccSRichard Henderson return C_O0_I2(L, L); 3771fecccfccSRichard Henderson case INDEX_op_qemu_st_a64_i32: 3772fecccfccSRichard Henderson return TCG_TARGET_REG_BITS == 64 ? C_O0_I2(L, L) : C_O0_I3(L, L, L); 3773fecccfccSRichard Henderson case INDEX_op_qemu_st8_a32_i32: 3774fecccfccSRichard Henderson return C_O0_I2(s, L); 3775fecccfccSRichard Henderson case INDEX_op_qemu_st8_a64_i32: 3776fecccfccSRichard Henderson return TCG_TARGET_REG_BITS == 64 ? C_O0_I2(s, L) : C_O0_I3(s, L, L); 37774c22e840SRichard Henderson 3778fecccfccSRichard Henderson case INDEX_op_qemu_ld_a32_i64: 3779fecccfccSRichard Henderson return TCG_TARGET_REG_BITS == 64 ? C_O1_I1(r, L) : C_O2_I1(r, r, L); 3780fecccfccSRichard Henderson case INDEX_op_qemu_ld_a64_i64: 3781fecccfccSRichard Henderson return TCG_TARGET_REG_BITS == 64 ? C_O1_I1(r, L) : C_O2_I2(r, r, L, L); 37824c22e840SRichard Henderson 3783fecccfccSRichard Henderson case INDEX_op_qemu_st_a32_i64: 3784fecccfccSRichard Henderson return TCG_TARGET_REG_BITS == 64 ? C_O0_I2(L, L) : C_O0_I3(L, L, L); 3785fecccfccSRichard Henderson case INDEX_op_qemu_st_a64_i64: 3786fecccfccSRichard Henderson return TCG_TARGET_REG_BITS == 64 ? C_O0_I2(L, L) : C_O0_I4(L, L, L, L); 3787139c1837SPaolo Bonzini 3788098d0fc1SRichard Henderson case INDEX_op_qemu_ld_a32_i128: 3789098d0fc1SRichard Henderson case INDEX_op_qemu_ld_a64_i128: 3790098d0fc1SRichard Henderson tcg_debug_assert(TCG_TARGET_REG_BITS == 64); 3791098d0fc1SRichard Henderson return C_O2_I1(r, r, L); 3792098d0fc1SRichard Henderson case INDEX_op_qemu_st_a32_i128: 3793098d0fc1SRichard Henderson case INDEX_op_qemu_st_a64_i128: 3794098d0fc1SRichard Henderson tcg_debug_assert(TCG_TARGET_REG_BITS == 64); 3795098d0fc1SRichard Henderson return C_O0_I3(L, L, L); 3796098d0fc1SRichard Henderson 3797139c1837SPaolo Bonzini case INDEX_op_brcond2_i32: 37984c22e840SRichard Henderson return C_O0_I4(r, r, ri, ri); 37994c22e840SRichard Henderson 3800139c1837SPaolo Bonzini case INDEX_op_setcond2_i32: 38014c22e840SRichard Henderson return C_O1_I4(r, r, r, ri, ri); 3802139c1837SPaolo Bonzini 3803139c1837SPaolo Bonzini case INDEX_op_ld_vec: 3804139c1837SPaolo Bonzini case INDEX_op_dupm_vec: 38054c22e840SRichard Henderson return C_O1_I1(x, r); 38064c22e840SRichard Henderson 38074c22e840SRichard Henderson case INDEX_op_st_vec: 38084c22e840SRichard Henderson return C_O0_I2(x, r); 3809139c1837SPaolo Bonzini 3810139c1837SPaolo Bonzini case INDEX_op_add_vec: 3811139c1837SPaolo Bonzini case INDEX_op_sub_vec: 3812139c1837SPaolo Bonzini case INDEX_op_mul_vec: 3813139c1837SPaolo Bonzini case INDEX_op_and_vec: 3814139c1837SPaolo Bonzini case INDEX_op_or_vec: 3815139c1837SPaolo Bonzini case INDEX_op_xor_vec: 3816139c1837SPaolo Bonzini case INDEX_op_andc_vec: 38173143767bSRichard Henderson case INDEX_op_orc_vec: 38183143767bSRichard Henderson case INDEX_op_nand_vec: 38193143767bSRichard Henderson case INDEX_op_nor_vec: 38203143767bSRichard Henderson case INDEX_op_eqv_vec: 3821139c1837SPaolo Bonzini case INDEX_op_ssadd_vec: 3822139c1837SPaolo Bonzini case INDEX_op_usadd_vec: 3823139c1837SPaolo Bonzini case INDEX_op_sssub_vec: 3824139c1837SPaolo Bonzini case INDEX_op_ussub_vec: 3825139c1837SPaolo Bonzini case INDEX_op_smin_vec: 3826139c1837SPaolo Bonzini case INDEX_op_umin_vec: 3827139c1837SPaolo Bonzini case INDEX_op_smax_vec: 3828139c1837SPaolo Bonzini case INDEX_op_umax_vec: 3829139c1837SPaolo Bonzini case INDEX_op_shlv_vec: 3830139c1837SPaolo Bonzini case INDEX_op_shrv_vec: 3831139c1837SPaolo Bonzini case INDEX_op_sarv_vec: 3832102cd35cSRichard Henderson case INDEX_op_rotlv_vec: 3833102cd35cSRichard Henderson case INDEX_op_rotrv_vec: 3834139c1837SPaolo Bonzini case INDEX_op_shls_vec: 3835139c1837SPaolo Bonzini case INDEX_op_shrs_vec: 3836139c1837SPaolo Bonzini case INDEX_op_sars_vec: 3837139c1837SPaolo Bonzini case INDEX_op_cmp_vec: 3838139c1837SPaolo Bonzini case INDEX_op_x86_shufps_vec: 3839139c1837SPaolo Bonzini case INDEX_op_x86_blend_vec: 3840139c1837SPaolo Bonzini case INDEX_op_x86_packss_vec: 3841139c1837SPaolo Bonzini case INDEX_op_x86_packus_vec: 3842139c1837SPaolo Bonzini case INDEX_op_x86_vperm2i128_vec: 3843139c1837SPaolo Bonzini case INDEX_op_x86_punpckl_vec: 3844139c1837SPaolo Bonzini case INDEX_op_x86_punpckh_vec: 3845965d5d06SRichard Henderson case INDEX_op_x86_vpshldi_vec: 3846139c1837SPaolo Bonzini#if TCG_TARGET_REG_BITS == 32 3847139c1837SPaolo Bonzini case INDEX_op_dup2_vec: 3848139c1837SPaolo Bonzini#endif 38494c22e840SRichard Henderson return C_O1_I2(x, x, x); 38504c22e840SRichard Henderson 3851139c1837SPaolo Bonzini case INDEX_op_abs_vec: 3852139c1837SPaolo Bonzini case INDEX_op_dup_vec: 38533143767bSRichard Henderson case INDEX_op_not_vec: 3854139c1837SPaolo Bonzini case INDEX_op_shli_vec: 3855139c1837SPaolo Bonzini case INDEX_op_shri_vec: 3856139c1837SPaolo Bonzini case INDEX_op_sari_vec: 38574e73f842SRichard Henderson case INDEX_op_rotli_vec: 3858139c1837SPaolo Bonzini case INDEX_op_x86_psrldq_vec: 38594c22e840SRichard Henderson return C_O1_I1(x, x); 38604c22e840SRichard Henderson 3861965d5d06SRichard Henderson case INDEX_op_x86_vpshldv_vec: 3862965d5d06SRichard Henderson case INDEX_op_x86_vpshrdv_vec: 3863965d5d06SRichard Henderson return C_O1_I3(x, 0, x, x); 3864965d5d06SRichard Henderson 3865cf320769SRichard Henderson case INDEX_op_bitsel_vec: 38664c22e840SRichard Henderson return C_O1_I3(x, x, x, x); 3867db4121d2SRichard Henderson case INDEX_op_cmpsel_vec: 3868d8387f0eSRichard Henderson return C_O1_I4(x, x, x, xO, x); 3869139c1837SPaolo Bonzini 3870139c1837SPaolo Bonzini default: 38714c22e840SRichard Henderson g_assert_not_reached(); 3872139c1837SPaolo Bonzini } 3873139c1837SPaolo Bonzini} 3874139c1837SPaolo Bonzini 3875139c1837SPaolo Bonziniint tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece) 3876139c1837SPaolo Bonzini{ 3877139c1837SPaolo Bonzini switch (opc) { 3878139c1837SPaolo Bonzini case INDEX_op_add_vec: 3879139c1837SPaolo Bonzini case INDEX_op_sub_vec: 3880139c1837SPaolo Bonzini case INDEX_op_and_vec: 3881139c1837SPaolo Bonzini case INDEX_op_or_vec: 3882139c1837SPaolo Bonzini case INDEX_op_xor_vec: 3883139c1837SPaolo Bonzini case INDEX_op_andc_vec: 38843143767bSRichard Henderson case INDEX_op_orc_vec: 38853143767bSRichard Henderson case INDEX_op_nand_vec: 38863143767bSRichard Henderson case INDEX_op_nor_vec: 38873143767bSRichard Henderson case INDEX_op_eqv_vec: 38883143767bSRichard Henderson case INDEX_op_not_vec: 3889cf320769SRichard Henderson case INDEX_op_bitsel_vec: 3890139c1837SPaolo Bonzini return 1; 3891139c1837SPaolo Bonzini case INDEX_op_cmp_vec: 3892139c1837SPaolo Bonzini case INDEX_op_cmpsel_vec: 3893139c1837SPaolo Bonzini return -1; 3894139c1837SPaolo Bonzini 38954e73f842SRichard Henderson case INDEX_op_rotli_vec: 38964e73f842SRichard Henderson return have_avx512vl && vece >= MO_32 ? 1 : -1; 38974e73f842SRichard Henderson 3898139c1837SPaolo Bonzini case INDEX_op_shli_vec: 3899139c1837SPaolo Bonzini case INDEX_op_shri_vec: 3900139c1837SPaolo Bonzini /* We must expand the operation for MO_8. */ 3901139c1837SPaolo Bonzini return vece == MO_8 ? -1 : 1; 3902139c1837SPaolo Bonzini 3903139c1837SPaolo Bonzini case INDEX_op_sari_vec: 3904264e4182SRichard Henderson switch (vece) { 3905264e4182SRichard Henderson case MO_8: 3906139c1837SPaolo Bonzini return -1; 3907264e4182SRichard Henderson case MO_16: 3908264e4182SRichard Henderson case MO_32: 3909264e4182SRichard Henderson return 1; 3910264e4182SRichard Henderson case MO_64: 3911264e4182SRichard Henderson if (have_avx512vl) { 3912264e4182SRichard Henderson return 1; 3913139c1837SPaolo Bonzini } 3914264e4182SRichard Henderson /* 3915264e4182SRichard Henderson * We can emulate this for MO_64, but it does not pay off 3916264e4182SRichard Henderson * unless we're producing at least 4 values. 3917264e4182SRichard Henderson */ 3918139c1837SPaolo Bonzini return type >= TCG_TYPE_V256 ? -1 : 0; 3919139c1837SPaolo Bonzini } 3920264e4182SRichard Henderson return 0; 3921139c1837SPaolo Bonzini 3922139c1837SPaolo Bonzini case INDEX_op_shls_vec: 3923139c1837SPaolo Bonzini case INDEX_op_shrs_vec: 3924139c1837SPaolo Bonzini return vece >= MO_16; 3925139c1837SPaolo Bonzini case INDEX_op_sars_vec: 392647b331b2SRichard Henderson switch (vece) { 392747b331b2SRichard Henderson case MO_16: 392847b331b2SRichard Henderson case MO_32: 392947b331b2SRichard Henderson return 1; 393047b331b2SRichard Henderson case MO_64: 393147b331b2SRichard Henderson return have_avx512vl; 393247b331b2SRichard Henderson } 393347b331b2SRichard Henderson return 0; 3934139c1837SPaolo Bonzini case INDEX_op_rotls_vec: 3935139c1837SPaolo Bonzini return vece >= MO_16 ? -1 : 0; 3936139c1837SPaolo Bonzini 3937139c1837SPaolo Bonzini case INDEX_op_shlv_vec: 3938139c1837SPaolo Bonzini case INDEX_op_shrv_vec: 3939ef77ce0dSRichard Henderson switch (vece) { 3940ef77ce0dSRichard Henderson case MO_16: 3941ef77ce0dSRichard Henderson return have_avx512bw; 3942ef77ce0dSRichard Henderson case MO_32: 3943ef77ce0dSRichard Henderson case MO_64: 3944ef77ce0dSRichard Henderson return have_avx2; 3945ef77ce0dSRichard Henderson } 3946ef77ce0dSRichard Henderson return 0; 3947139c1837SPaolo Bonzini case INDEX_op_sarv_vec: 3948ef77ce0dSRichard Henderson switch (vece) { 3949ef77ce0dSRichard Henderson case MO_16: 3950ef77ce0dSRichard Henderson return have_avx512bw; 3951ef77ce0dSRichard Henderson case MO_32: 3952ef77ce0dSRichard Henderson return have_avx2; 3953ef77ce0dSRichard Henderson case MO_64: 3954ef77ce0dSRichard Henderson return have_avx512vl; 3955ef77ce0dSRichard Henderson } 3956ef77ce0dSRichard Henderson return 0; 3957139c1837SPaolo Bonzini case INDEX_op_rotlv_vec: 3958139c1837SPaolo Bonzini case INDEX_op_rotrv_vec: 3959102cd35cSRichard Henderson switch (vece) { 3960786c7ef3SRichard Henderson case MO_16: 3961786c7ef3SRichard Henderson return have_avx512vbmi2 ? -1 : 0; 3962102cd35cSRichard Henderson case MO_32: 3963102cd35cSRichard Henderson case MO_64: 3964102cd35cSRichard Henderson return have_avx512vl ? 1 : have_avx2 ? -1 : 0; 3965102cd35cSRichard Henderson } 3966102cd35cSRichard Henderson return 0; 3967139c1837SPaolo Bonzini 3968139c1837SPaolo Bonzini case INDEX_op_mul_vec: 39694c8b9686SRichard Henderson switch (vece) { 39704c8b9686SRichard Henderson case MO_8: 3971139c1837SPaolo Bonzini return -1; 39724c8b9686SRichard Henderson case MO_64: 39734c8b9686SRichard Henderson return have_avx512dq; 3974139c1837SPaolo Bonzini } 3975139c1837SPaolo Bonzini return 1; 3976139c1837SPaolo Bonzini 3977139c1837SPaolo Bonzini case INDEX_op_ssadd_vec: 3978139c1837SPaolo Bonzini case INDEX_op_usadd_vec: 3979139c1837SPaolo Bonzini case INDEX_op_sssub_vec: 3980139c1837SPaolo Bonzini case INDEX_op_ussub_vec: 3981139c1837SPaolo Bonzini return vece <= MO_16; 3982139c1837SPaolo Bonzini case INDEX_op_smin_vec: 3983139c1837SPaolo Bonzini case INDEX_op_smax_vec: 3984139c1837SPaolo Bonzini case INDEX_op_umin_vec: 3985139c1837SPaolo Bonzini case INDEX_op_umax_vec: 3986139c1837SPaolo Bonzini case INDEX_op_abs_vec: 3987dac1648fSRichard Henderson return vece <= MO_32 || have_avx512vl; 3988139c1837SPaolo Bonzini 3989139c1837SPaolo Bonzini default: 3990139c1837SPaolo Bonzini return 0; 3991139c1837SPaolo Bonzini } 3992139c1837SPaolo Bonzini} 3993139c1837SPaolo Bonzini 39942623ca6aSRichard Hendersonstatic void expand_vec_shi(TCGType type, unsigned vece, bool right, 3995139c1837SPaolo Bonzini TCGv_vec v0, TCGv_vec v1, TCGArg imm) 3996139c1837SPaolo Bonzini{ 39972623ca6aSRichard Henderson uint8_t mask; 3998139c1837SPaolo Bonzini 3999139c1837SPaolo Bonzini tcg_debug_assert(vece == MO_8); 40002623ca6aSRichard Henderson if (right) { 40012623ca6aSRichard Henderson mask = 0xff >> imm; 40022623ca6aSRichard Henderson tcg_gen_shri_vec(MO_16, v0, v1, imm); 4003139c1837SPaolo Bonzini } else { 40042623ca6aSRichard Henderson mask = 0xff << imm; 40052623ca6aSRichard Henderson tcg_gen_shli_vec(MO_16, v0, v1, imm); 4006139c1837SPaolo Bonzini } 40072623ca6aSRichard Henderson tcg_gen_and_vec(MO_8, v0, v0, tcg_constant_vec(type, MO_8, mask)); 4008139c1837SPaolo Bonzini} 4009139c1837SPaolo Bonzini 4010139c1837SPaolo Bonzinistatic void expand_vec_sari(TCGType type, unsigned vece, 4011139c1837SPaolo Bonzini TCGv_vec v0, TCGv_vec v1, TCGArg imm) 4012139c1837SPaolo Bonzini{ 4013139c1837SPaolo Bonzini TCGv_vec t1, t2; 4014139c1837SPaolo Bonzini 4015139c1837SPaolo Bonzini switch (vece) { 4016139c1837SPaolo Bonzini case MO_8: 40172623ca6aSRichard Henderson /* Unpack to 16-bit, shift, and repack. */ 4018139c1837SPaolo Bonzini t1 = tcg_temp_new_vec(type); 4019139c1837SPaolo Bonzini t2 = tcg_temp_new_vec(type); 4020139c1837SPaolo Bonzini vec_gen_3(INDEX_op_x86_punpckl_vec, type, MO_8, 4021139c1837SPaolo Bonzini tcgv_vec_arg(t1), tcgv_vec_arg(v1), tcgv_vec_arg(v1)); 4022139c1837SPaolo Bonzini vec_gen_3(INDEX_op_x86_punpckh_vec, type, MO_8, 4023139c1837SPaolo Bonzini tcgv_vec_arg(t2), tcgv_vec_arg(v1), tcgv_vec_arg(v1)); 4024139c1837SPaolo Bonzini tcg_gen_sari_vec(MO_16, t1, t1, imm + 8); 4025139c1837SPaolo Bonzini tcg_gen_sari_vec(MO_16, t2, t2, imm + 8); 4026139c1837SPaolo Bonzini vec_gen_3(INDEX_op_x86_packss_vec, type, MO_8, 4027139c1837SPaolo Bonzini tcgv_vec_arg(v0), tcgv_vec_arg(t1), tcgv_vec_arg(t2)); 4028139c1837SPaolo Bonzini tcg_temp_free_vec(t1); 4029139c1837SPaolo Bonzini tcg_temp_free_vec(t2); 4030139c1837SPaolo Bonzini break; 4031139c1837SPaolo Bonzini 4032139c1837SPaolo Bonzini case MO_64: 4033f6ff9c2fSRichard Henderson t1 = tcg_temp_new_vec(type); 4034139c1837SPaolo Bonzini if (imm <= 32) { 4035139c1837SPaolo Bonzini /* 4036139c1837SPaolo Bonzini * We can emulate a small sign extend by performing an arithmetic 4037139c1837SPaolo Bonzini * 32-bit shift and overwriting the high half of a 64-bit logical 4038139c1837SPaolo Bonzini * shift. Note that the ISA says shift of 32 is valid, but TCG 4039139c1837SPaolo Bonzini * does not, so we have to bound the smaller shift -- we get the 4040139c1837SPaolo Bonzini * same result in the high half either way. 4041139c1837SPaolo Bonzini */ 4042139c1837SPaolo Bonzini tcg_gen_sari_vec(MO_32, t1, v1, MIN(imm, 31)); 4043139c1837SPaolo Bonzini tcg_gen_shri_vec(MO_64, v0, v1, imm); 4044139c1837SPaolo Bonzini vec_gen_4(INDEX_op_x86_blend_vec, type, MO_32, 4045139c1837SPaolo Bonzini tcgv_vec_arg(v0), tcgv_vec_arg(v0), 4046139c1837SPaolo Bonzini tcgv_vec_arg(t1), 0xaa); 4047139c1837SPaolo Bonzini } else { 4048139c1837SPaolo Bonzini /* Otherwise we will need to use a compare vs 0 to produce 4049139c1837SPaolo Bonzini * the sign-extend, shift and merge. 4050139c1837SPaolo Bonzini */ 4051f6ff9c2fSRichard Henderson tcg_gen_cmp_vec(TCG_COND_GT, MO_64, t1, 4052f6ff9c2fSRichard Henderson tcg_constant_vec(type, MO_64, 0), v1); 4053139c1837SPaolo Bonzini tcg_gen_shri_vec(MO_64, v0, v1, imm); 4054139c1837SPaolo Bonzini tcg_gen_shli_vec(MO_64, t1, t1, 64 - imm); 4055139c1837SPaolo Bonzini tcg_gen_or_vec(MO_64, v0, v0, t1); 4056139c1837SPaolo Bonzini } 4057f6ff9c2fSRichard Henderson tcg_temp_free_vec(t1); 4058139c1837SPaolo Bonzini break; 4059139c1837SPaolo Bonzini 4060139c1837SPaolo Bonzini default: 4061139c1837SPaolo Bonzini g_assert_not_reached(); 4062139c1837SPaolo Bonzini } 4063139c1837SPaolo Bonzini} 4064139c1837SPaolo Bonzini 4065139c1837SPaolo Bonzinistatic void expand_vec_rotli(TCGType type, unsigned vece, 4066139c1837SPaolo Bonzini TCGv_vec v0, TCGv_vec v1, TCGArg imm) 4067139c1837SPaolo Bonzini{ 4068139c1837SPaolo Bonzini TCGv_vec t; 4069139c1837SPaolo Bonzini 40702623ca6aSRichard Henderson if (vece != MO_8 && have_avx512vbmi2) { 4071786c7ef3SRichard Henderson vec_gen_4(INDEX_op_x86_vpshldi_vec, type, vece, 4072786c7ef3SRichard Henderson tcgv_vec_arg(v0), tcgv_vec_arg(v1), tcgv_vec_arg(v1), imm); 4073786c7ef3SRichard Henderson return; 4074786c7ef3SRichard Henderson } 4075786c7ef3SRichard Henderson 4076139c1837SPaolo Bonzini t = tcg_temp_new_vec(type); 4077139c1837SPaolo Bonzini tcg_gen_shli_vec(vece, t, v1, imm); 4078139c1837SPaolo Bonzini tcg_gen_shri_vec(vece, v0, v1, (8 << vece) - imm); 4079139c1837SPaolo Bonzini tcg_gen_or_vec(vece, v0, v0, t); 4080139c1837SPaolo Bonzini tcg_temp_free_vec(t); 4081139c1837SPaolo Bonzini} 4082139c1837SPaolo Bonzini 4083139c1837SPaolo Bonzinistatic void expand_vec_rotv(TCGType type, unsigned vece, TCGv_vec v0, 4084139c1837SPaolo Bonzini TCGv_vec v1, TCGv_vec sh, bool right) 4085139c1837SPaolo Bonzini{ 4086786c7ef3SRichard Henderson TCGv_vec t; 4087139c1837SPaolo Bonzini 4088786c7ef3SRichard Henderson if (have_avx512vbmi2) { 4089786c7ef3SRichard Henderson vec_gen_4(right ? INDEX_op_x86_vpshrdv_vec : INDEX_op_x86_vpshldv_vec, 4090786c7ef3SRichard Henderson type, vece, tcgv_vec_arg(v0), tcgv_vec_arg(v1), 4091786c7ef3SRichard Henderson tcgv_vec_arg(v1), tcgv_vec_arg(sh)); 4092786c7ef3SRichard Henderson return; 4093786c7ef3SRichard Henderson } 4094786c7ef3SRichard Henderson 4095786c7ef3SRichard Henderson t = tcg_temp_new_vec(type); 4096139c1837SPaolo Bonzini tcg_gen_dupi_vec(vece, t, 8 << vece); 4097139c1837SPaolo Bonzini tcg_gen_sub_vec(vece, t, t, sh); 4098139c1837SPaolo Bonzini if (right) { 4099139c1837SPaolo Bonzini tcg_gen_shlv_vec(vece, t, v1, t); 4100139c1837SPaolo Bonzini tcg_gen_shrv_vec(vece, v0, v1, sh); 4101139c1837SPaolo Bonzini } else { 4102139c1837SPaolo Bonzini tcg_gen_shrv_vec(vece, t, v1, t); 4103139c1837SPaolo Bonzini tcg_gen_shlv_vec(vece, v0, v1, sh); 4104139c1837SPaolo Bonzini } 4105139c1837SPaolo Bonzini tcg_gen_or_vec(vece, v0, v0, t); 4106139c1837SPaolo Bonzini tcg_temp_free_vec(t); 4107139c1837SPaolo Bonzini} 4108139c1837SPaolo Bonzini 41091d442e42SRichard Hendersonstatic void expand_vec_rotls(TCGType type, unsigned vece, 41101d442e42SRichard Henderson TCGv_vec v0, TCGv_vec v1, TCGv_i32 lsh) 41111d442e42SRichard Henderson{ 41121d442e42SRichard Henderson TCGv_vec t = tcg_temp_new_vec(type); 41131d442e42SRichard Henderson 41141d442e42SRichard Henderson tcg_debug_assert(vece != MO_8); 41151d442e42SRichard Henderson 41161d442e42SRichard Henderson if (vece >= MO_32 ? have_avx512vl : have_avx512vbmi2) { 41171d442e42SRichard Henderson tcg_gen_dup_i32_vec(vece, t, lsh); 41181d442e42SRichard Henderson if (vece >= MO_32) { 41191d442e42SRichard Henderson tcg_gen_rotlv_vec(vece, v0, v1, t); 41201d442e42SRichard Henderson } else { 41211d442e42SRichard Henderson expand_vec_rotv(type, vece, v0, v1, t, false); 41221d442e42SRichard Henderson } 41231d442e42SRichard Henderson } else { 41241d442e42SRichard Henderson TCGv_i32 rsh = tcg_temp_new_i32(); 41251d442e42SRichard Henderson 41261d442e42SRichard Henderson tcg_gen_neg_i32(rsh, lsh); 41271d442e42SRichard Henderson tcg_gen_andi_i32(rsh, rsh, (8 << vece) - 1); 41281d442e42SRichard Henderson tcg_gen_shls_vec(vece, t, v1, lsh); 41291d442e42SRichard Henderson tcg_gen_shrs_vec(vece, v0, v1, rsh); 41301d442e42SRichard Henderson tcg_gen_or_vec(vece, v0, v0, t); 41311d442e42SRichard Henderson 41321d442e42SRichard Henderson tcg_temp_free_i32(rsh); 41331d442e42SRichard Henderson } 41341d442e42SRichard Henderson 41351d442e42SRichard Henderson tcg_temp_free_vec(t); 41361d442e42SRichard Henderson} 41371d442e42SRichard Henderson 4138139c1837SPaolo Bonzinistatic void expand_vec_mul(TCGType type, unsigned vece, 4139139c1837SPaolo Bonzini TCGv_vec v0, TCGv_vec v1, TCGv_vec v2) 4140139c1837SPaolo Bonzini{ 41419739a052SRichard Henderson TCGv_vec t1, t2, t3, t4, zero; 4142139c1837SPaolo Bonzini 4143139c1837SPaolo Bonzini tcg_debug_assert(vece == MO_8); 4144139c1837SPaolo Bonzini 4145139c1837SPaolo Bonzini /* 4146139c1837SPaolo Bonzini * Unpack v1 bytes to words, 0 | x. 4147139c1837SPaolo Bonzini * Unpack v2 bytes to words, y | 0. 4148139c1837SPaolo Bonzini * This leaves the 8-bit result, x * y, with 8 bits of right padding. 4149139c1837SPaolo Bonzini * Shift logical right by 8 bits to clear the high 8 bytes before 4150139c1837SPaolo Bonzini * using an unsigned saturated pack. 4151139c1837SPaolo Bonzini * 4152139c1837SPaolo Bonzini * The difference between the V64, V128 and V256 cases is merely how 4153139c1837SPaolo Bonzini * we distribute the expansion between temporaries. 4154139c1837SPaolo Bonzini */ 4155139c1837SPaolo Bonzini switch (type) { 4156139c1837SPaolo Bonzini case TCG_TYPE_V64: 4157139c1837SPaolo Bonzini t1 = tcg_temp_new_vec(TCG_TYPE_V128); 4158139c1837SPaolo Bonzini t2 = tcg_temp_new_vec(TCG_TYPE_V128); 41599739a052SRichard Henderson zero = tcg_constant_vec(TCG_TYPE_V128, MO_8, 0); 4160139c1837SPaolo Bonzini vec_gen_3(INDEX_op_x86_punpckl_vec, TCG_TYPE_V128, MO_8, 41619739a052SRichard Henderson tcgv_vec_arg(t1), tcgv_vec_arg(v1), tcgv_vec_arg(zero)); 4162139c1837SPaolo Bonzini vec_gen_3(INDEX_op_x86_punpckl_vec, TCG_TYPE_V128, MO_8, 41639739a052SRichard Henderson tcgv_vec_arg(t2), tcgv_vec_arg(zero), tcgv_vec_arg(v2)); 4164139c1837SPaolo Bonzini tcg_gen_mul_vec(MO_16, t1, t1, t2); 4165139c1837SPaolo Bonzini tcg_gen_shri_vec(MO_16, t1, t1, 8); 4166139c1837SPaolo Bonzini vec_gen_3(INDEX_op_x86_packus_vec, TCG_TYPE_V128, MO_8, 4167139c1837SPaolo Bonzini tcgv_vec_arg(v0), tcgv_vec_arg(t1), tcgv_vec_arg(t1)); 4168139c1837SPaolo Bonzini tcg_temp_free_vec(t1); 4169139c1837SPaolo Bonzini tcg_temp_free_vec(t2); 4170139c1837SPaolo Bonzini break; 4171139c1837SPaolo Bonzini 4172139c1837SPaolo Bonzini case TCG_TYPE_V128: 4173139c1837SPaolo Bonzini case TCG_TYPE_V256: 4174139c1837SPaolo Bonzini t1 = tcg_temp_new_vec(type); 4175139c1837SPaolo Bonzini t2 = tcg_temp_new_vec(type); 4176139c1837SPaolo Bonzini t3 = tcg_temp_new_vec(type); 4177139c1837SPaolo Bonzini t4 = tcg_temp_new_vec(type); 41789739a052SRichard Henderson zero = tcg_constant_vec(TCG_TYPE_V128, MO_8, 0); 4179139c1837SPaolo Bonzini vec_gen_3(INDEX_op_x86_punpckl_vec, type, MO_8, 41809739a052SRichard Henderson tcgv_vec_arg(t1), tcgv_vec_arg(v1), tcgv_vec_arg(zero)); 4181139c1837SPaolo Bonzini vec_gen_3(INDEX_op_x86_punpckl_vec, type, MO_8, 41829739a052SRichard Henderson tcgv_vec_arg(t2), tcgv_vec_arg(zero), tcgv_vec_arg(v2)); 4183139c1837SPaolo Bonzini vec_gen_3(INDEX_op_x86_punpckh_vec, type, MO_8, 41849739a052SRichard Henderson tcgv_vec_arg(t3), tcgv_vec_arg(v1), tcgv_vec_arg(zero)); 4185139c1837SPaolo Bonzini vec_gen_3(INDEX_op_x86_punpckh_vec, type, MO_8, 41869739a052SRichard Henderson tcgv_vec_arg(t4), tcgv_vec_arg(zero), tcgv_vec_arg(v2)); 4187139c1837SPaolo Bonzini tcg_gen_mul_vec(MO_16, t1, t1, t2); 4188139c1837SPaolo Bonzini tcg_gen_mul_vec(MO_16, t3, t3, t4); 4189139c1837SPaolo Bonzini tcg_gen_shri_vec(MO_16, t1, t1, 8); 4190139c1837SPaolo Bonzini tcg_gen_shri_vec(MO_16, t3, t3, 8); 4191139c1837SPaolo Bonzini vec_gen_3(INDEX_op_x86_packus_vec, type, MO_8, 4192139c1837SPaolo Bonzini tcgv_vec_arg(v0), tcgv_vec_arg(t1), tcgv_vec_arg(t3)); 4193139c1837SPaolo Bonzini tcg_temp_free_vec(t1); 4194139c1837SPaolo Bonzini tcg_temp_free_vec(t2); 4195139c1837SPaolo Bonzini tcg_temp_free_vec(t3); 4196139c1837SPaolo Bonzini tcg_temp_free_vec(t4); 4197139c1837SPaolo Bonzini break; 4198139c1837SPaolo Bonzini 4199139c1837SPaolo Bonzini default: 4200139c1837SPaolo Bonzini g_assert_not_reached(); 4201139c1837SPaolo Bonzini } 4202139c1837SPaolo Bonzini} 4203139c1837SPaolo Bonzini 4204db4121d2SRichard Hendersonstatic TCGCond expand_vec_cond(TCGType type, unsigned vece, 4205db4121d2SRichard Henderson TCGArg *a1, TCGArg *a2, TCGCond cond) 4206139c1837SPaolo Bonzini{ 4207b8a56703SRichard Henderson /* 4208b8a56703SRichard Henderson * Without AVX512, there are no 64-bit unsigned comparisons. 4209b8a56703SRichard Henderson * We must bias the inputs so that they become signed. 4210b8a56703SRichard Henderson * All other swapping and inversion are handled during code generation. 4211b8a56703SRichard Henderson */ 4212717da87dSRichard Henderson if (vece == MO_64 && !have_avx512dq && is_unsigned_cond(cond)) { 4213db4121d2SRichard Henderson TCGv_vec v1 = temp_tcgv_vec(arg_temp(*a1)); 4214db4121d2SRichard Henderson TCGv_vec v2 = temp_tcgv_vec(arg_temp(*a2)); 4215b8a56703SRichard Henderson TCGv_vec t1 = tcg_temp_new_vec(type); 4216b8a56703SRichard Henderson TCGv_vec t2 = tcg_temp_new_vec(type); 4217b8a56703SRichard Henderson TCGv_vec t3 = tcg_constant_vec(type, vece, 1ull << ((8 << vece) - 1)); 4218139c1837SPaolo Bonzini 42199739a052SRichard Henderson tcg_gen_sub_vec(vece, t1, v1, t3); 42209739a052SRichard Henderson tcg_gen_sub_vec(vece, t2, v2, t3); 4221db4121d2SRichard Henderson *a1 = tcgv_vec_arg(t1); 4222db4121d2SRichard Henderson *a2 = tcgv_vec_arg(t2); 4223139c1837SPaolo Bonzini cond = tcg_signed_cond(cond); 4224139c1837SPaolo Bonzini } 4225db4121d2SRichard Henderson return cond; 4226139c1837SPaolo Bonzini} 4227139c1837SPaolo Bonzini 4228db4121d2SRichard Hendersonstatic void expand_vec_cmp(TCGType type, unsigned vece, TCGArg a0, 4229db4121d2SRichard Henderson TCGArg a1, TCGArg a2, TCGCond cond) 4230139c1837SPaolo Bonzini{ 4231db4121d2SRichard Henderson cond = expand_vec_cond(type, vece, &a1, &a2, cond); 4232db4121d2SRichard Henderson /* Expand directly; do not recurse. */ 4233db4121d2SRichard Henderson vec_gen_4(INDEX_op_cmp_vec, type, vece, a0, a1, a2, cond); 4234db4121d2SRichard Henderson} 4235139c1837SPaolo Bonzini 4236db4121d2SRichard Hendersonstatic void expand_vec_cmpsel(TCGType type, unsigned vece, TCGArg a0, 4237db4121d2SRichard Henderson TCGArg a1, TCGArg a2, 4238db4121d2SRichard Henderson TCGArg a3, TCGArg a4, TCGCond cond) 4239db4121d2SRichard Henderson{ 4240db4121d2SRichard Henderson cond = expand_vec_cond(type, vece, &a1, &a2, cond); 4241db4121d2SRichard Henderson /* Expand directly; do not recurse. */ 4242db4121d2SRichard Henderson vec_gen_6(INDEX_op_cmpsel_vec, type, vece, a0, a1, a2, a3, a4, cond); 4243139c1837SPaolo Bonzini} 4244139c1837SPaolo Bonzini 4245139c1837SPaolo Bonzinivoid tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece, 4246139c1837SPaolo Bonzini TCGArg a0, ...) 4247139c1837SPaolo Bonzini{ 4248139c1837SPaolo Bonzini va_list va; 4249db4121d2SRichard Henderson TCGArg a1, a2, a3, a4, a5; 4250db4121d2SRichard Henderson TCGv_vec v0, v1, v2; 4251139c1837SPaolo Bonzini 4252139c1837SPaolo Bonzini va_start(va, a0); 4253db4121d2SRichard Henderson a1 = va_arg(va, TCGArg); 4254139c1837SPaolo Bonzini a2 = va_arg(va, TCGArg); 4255db4121d2SRichard Henderson v0 = temp_tcgv_vec(arg_temp(a0)); 4256db4121d2SRichard Henderson v1 = temp_tcgv_vec(arg_temp(a1)); 4257139c1837SPaolo Bonzini 4258139c1837SPaolo Bonzini switch (opc) { 4259139c1837SPaolo Bonzini case INDEX_op_shli_vec: 42602623ca6aSRichard Henderson expand_vec_shi(type, vece, false, v0, v1, a2); 4261139c1837SPaolo Bonzini break; 42622623ca6aSRichard Henderson case INDEX_op_shri_vec: 42632623ca6aSRichard Henderson expand_vec_shi(type, vece, true, v0, v1, a2); 42642623ca6aSRichard Henderson break; 4265139c1837SPaolo Bonzini case INDEX_op_sari_vec: 4266139c1837SPaolo Bonzini expand_vec_sari(type, vece, v0, v1, a2); 4267139c1837SPaolo Bonzini break; 4268139c1837SPaolo Bonzini 4269139c1837SPaolo Bonzini case INDEX_op_rotli_vec: 4270139c1837SPaolo Bonzini expand_vec_rotli(type, vece, v0, v1, a2); 4271139c1837SPaolo Bonzini break; 4272139c1837SPaolo Bonzini 4273139c1837SPaolo Bonzini case INDEX_op_rotls_vec: 4274139c1837SPaolo Bonzini expand_vec_rotls(type, vece, v0, v1, temp_tcgv_i32(arg_temp(a2))); 4275139c1837SPaolo Bonzini break; 4276139c1837SPaolo Bonzini 4277139c1837SPaolo Bonzini case INDEX_op_rotlv_vec: 4278139c1837SPaolo Bonzini v2 = temp_tcgv_vec(arg_temp(a2)); 4279139c1837SPaolo Bonzini expand_vec_rotv(type, vece, v0, v1, v2, false); 4280139c1837SPaolo Bonzini break; 4281139c1837SPaolo Bonzini case INDEX_op_rotrv_vec: 4282139c1837SPaolo Bonzini v2 = temp_tcgv_vec(arg_temp(a2)); 4283139c1837SPaolo Bonzini expand_vec_rotv(type, vece, v0, v1, v2, true); 4284139c1837SPaolo Bonzini break; 4285139c1837SPaolo Bonzini 4286139c1837SPaolo Bonzini case INDEX_op_mul_vec: 4287139c1837SPaolo Bonzini v2 = temp_tcgv_vec(arg_temp(a2)); 4288139c1837SPaolo Bonzini expand_vec_mul(type, vece, v0, v1, v2); 4289139c1837SPaolo Bonzini break; 4290139c1837SPaolo Bonzini 4291139c1837SPaolo Bonzini case INDEX_op_cmp_vec: 4292db4121d2SRichard Henderson a3 = va_arg(va, TCGArg); 4293db4121d2SRichard Henderson expand_vec_cmp(type, vece, a0, a1, a2, a3); 4294139c1837SPaolo Bonzini break; 4295139c1837SPaolo Bonzini 4296139c1837SPaolo Bonzini case INDEX_op_cmpsel_vec: 4297db4121d2SRichard Henderson a3 = va_arg(va, TCGArg); 4298db4121d2SRichard Henderson a4 = va_arg(va, TCGArg); 4299db4121d2SRichard Henderson a5 = va_arg(va, TCGArg); 4300db4121d2SRichard Henderson expand_vec_cmpsel(type, vece, a0, a1, a2, a3, a4, a5); 4301139c1837SPaolo Bonzini break; 4302139c1837SPaolo Bonzini 4303139c1837SPaolo Bonzini default: 4304139c1837SPaolo Bonzini break; 4305139c1837SPaolo Bonzini } 4306139c1837SPaolo Bonzini 4307139c1837SPaolo Bonzini va_end(va); 4308139c1837SPaolo Bonzini} 4309139c1837SPaolo Bonzini 4310139c1837SPaolo Bonzinistatic const int tcg_target_callee_save_regs[] = { 4311139c1837SPaolo Bonzini#if TCG_TARGET_REG_BITS == 64 4312139c1837SPaolo Bonzini TCG_REG_RBP, 4313139c1837SPaolo Bonzini TCG_REG_RBX, 4314139c1837SPaolo Bonzini#if defined(_WIN64) 4315139c1837SPaolo Bonzini TCG_REG_RDI, 4316139c1837SPaolo Bonzini TCG_REG_RSI, 4317139c1837SPaolo Bonzini#endif 4318139c1837SPaolo Bonzini TCG_REG_R12, 4319139c1837SPaolo Bonzini TCG_REG_R13, 4320139c1837SPaolo Bonzini TCG_REG_R14, /* Currently used for the global env. */ 4321139c1837SPaolo Bonzini TCG_REG_R15, 4322139c1837SPaolo Bonzini#else 4323139c1837SPaolo Bonzini TCG_REG_EBP, /* Currently used for the global env. */ 4324139c1837SPaolo Bonzini TCG_REG_EBX, 4325139c1837SPaolo Bonzini TCG_REG_ESI, 4326139c1837SPaolo Bonzini TCG_REG_EDI, 4327139c1837SPaolo Bonzini#endif 4328139c1837SPaolo Bonzini}; 4329139c1837SPaolo Bonzini 4330139c1837SPaolo Bonzini/* Compute frame size via macros, to share between tcg_target_qemu_prologue 4331139c1837SPaolo Bonzini and tcg_register_jit. */ 4332139c1837SPaolo Bonzini 4333139c1837SPaolo Bonzini#define PUSH_SIZE \ 4334139c1837SPaolo Bonzini ((1 + ARRAY_SIZE(tcg_target_callee_save_regs)) \ 4335139c1837SPaolo Bonzini * (TCG_TARGET_REG_BITS / 8)) 4336139c1837SPaolo Bonzini 4337139c1837SPaolo Bonzini#define FRAME_SIZE \ 4338139c1837SPaolo Bonzini ((PUSH_SIZE \ 4339139c1837SPaolo Bonzini + TCG_STATIC_CALL_ARGS_SIZE \ 4340139c1837SPaolo Bonzini + CPU_TEMP_BUF_NLONGS * sizeof(long) \ 4341139c1837SPaolo Bonzini + TCG_TARGET_STACK_ALIGN - 1) \ 4342139c1837SPaolo Bonzini & ~(TCG_TARGET_STACK_ALIGN - 1)) 4343139c1837SPaolo Bonzini 4344139c1837SPaolo Bonzini/* Generate global QEMU prologue and epilogue code */ 4345139c1837SPaolo Bonzinistatic void tcg_target_qemu_prologue(TCGContext *s) 4346139c1837SPaolo Bonzini{ 4347139c1837SPaolo Bonzini int i, stack_addend; 4348139c1837SPaolo Bonzini 4349139c1837SPaolo Bonzini /* TB prologue */ 4350139c1837SPaolo Bonzini 4351139c1837SPaolo Bonzini /* Reserve some stack space, also for TCG temps. */ 4352139c1837SPaolo Bonzini stack_addend = FRAME_SIZE - PUSH_SIZE; 4353139c1837SPaolo Bonzini tcg_set_frame(s, TCG_REG_CALL_STACK, TCG_STATIC_CALL_ARGS_SIZE, 4354139c1837SPaolo Bonzini CPU_TEMP_BUF_NLONGS * sizeof(long)); 4355139c1837SPaolo Bonzini 4356139c1837SPaolo Bonzini /* Save all callee saved registers. */ 4357139c1837SPaolo Bonzini for (i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); i++) { 4358139c1837SPaolo Bonzini tcg_out_push(s, tcg_target_callee_save_regs[i]); 4359139c1837SPaolo Bonzini } 4360139c1837SPaolo Bonzini 4361915e1d52SRichard Henderson if (!tcg_use_softmmu && guest_base) { 4362915e1d52SRichard Henderson int seg = setup_guest_base_seg(); 4363915e1d52SRichard Henderson if (seg != 0) { 4364915e1d52SRichard Henderson x86_guest_base.seg = seg; 4365915e1d52SRichard Henderson } else if (guest_base == (int32_t)guest_base) { 4366915e1d52SRichard Henderson x86_guest_base.ofs = guest_base; 4367915e1d52SRichard Henderson } else { 4368915e1d52SRichard Henderson assert(TCG_TARGET_REG_BITS == 64); 4369915e1d52SRichard Henderson /* Choose R12 because, as a base, it requires a SIB byte. */ 4370915e1d52SRichard Henderson x86_guest_base.index = TCG_REG_R12; 4371915e1d52SRichard Henderson tcg_out_movi(s, TCG_TYPE_PTR, x86_guest_base.index, guest_base); 4372915e1d52SRichard Henderson tcg_regset_set_reg(s->reserved_regs, x86_guest_base.index); 4373915e1d52SRichard Henderson } 4374915e1d52SRichard Henderson } 4375915e1d52SRichard Henderson 4376915e1d52SRichard Henderson if (TCG_TARGET_REG_BITS == 32) { 4377139c1837SPaolo Bonzini tcg_out_ld(s, TCG_TYPE_PTR, TCG_AREG0, TCG_REG_ESP, 4378139c1837SPaolo Bonzini (ARRAY_SIZE(tcg_target_callee_save_regs) + 1) * 4); 4379139c1837SPaolo Bonzini tcg_out_addi(s, TCG_REG_ESP, -stack_addend); 4380139c1837SPaolo Bonzini /* jmp *tb. */ 4381139c1837SPaolo Bonzini tcg_out_modrm_offset(s, OPC_GRP5, EXT5_JMPN_Ev, TCG_REG_ESP, 4382139c1837SPaolo Bonzini (ARRAY_SIZE(tcg_target_callee_save_regs) + 2) * 4 4383139c1837SPaolo Bonzini + stack_addend); 4384139c1837SPaolo Bonzini } else { 4385139c1837SPaolo Bonzini tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]); 4386139c1837SPaolo Bonzini tcg_out_addi(s, TCG_REG_ESP, -stack_addend); 4387139c1837SPaolo Bonzini /* jmp *tb. */ 4388139c1837SPaolo Bonzini tcg_out_modrm(s, OPC_GRP5, EXT5_JMPN_Ev, tcg_target_call_iarg_regs[1]); 4389915e1d52SRichard Henderson } 4390139c1837SPaolo Bonzini 4391139c1837SPaolo Bonzini /* 4392139c1837SPaolo Bonzini * Return path for goto_ptr. Set return value to 0, a-la exit_tb, 4393139c1837SPaolo Bonzini * and fall through to the rest of the epilogue. 4394139c1837SPaolo Bonzini */ 4395c8bc1168SRichard Henderson tcg_code_gen_epilogue = tcg_splitwx_to_rx(s->code_ptr); 4396139c1837SPaolo Bonzini tcg_out_movi(s, TCG_TYPE_REG, TCG_REG_EAX, 0); 4397139c1837SPaolo Bonzini 4398139c1837SPaolo Bonzini /* TB epilogue */ 4399705ed477SRichard Henderson tb_ret_addr = tcg_splitwx_to_rx(s->code_ptr); 4400139c1837SPaolo Bonzini 4401139c1837SPaolo Bonzini tcg_out_addi(s, TCG_REG_CALL_STACK, stack_addend); 4402139c1837SPaolo Bonzini 4403139c1837SPaolo Bonzini if (have_avx2) { 4404139c1837SPaolo Bonzini tcg_out_vex_opc(s, OPC_VZEROUPPER, 0, 0, 0, 0); 4405139c1837SPaolo Bonzini } 4406139c1837SPaolo Bonzini for (i = ARRAY_SIZE(tcg_target_callee_save_regs) - 1; i >= 0; i--) { 4407139c1837SPaolo Bonzini tcg_out_pop(s, tcg_target_callee_save_regs[i]); 4408139c1837SPaolo Bonzini } 4409139c1837SPaolo Bonzini tcg_out_opc(s, OPC_RET, 0, 0, 0); 4410139c1837SPaolo Bonzini} 4411139c1837SPaolo Bonzini 44129358fbbfSRichard Hendersonstatic void tcg_out_tb_start(TCGContext *s) 44139358fbbfSRichard Henderson{ 44149358fbbfSRichard Henderson /* nothing to do */ 44159358fbbfSRichard Henderson} 44169358fbbfSRichard Henderson 4417139c1837SPaolo Bonzinistatic void tcg_out_nop_fill(tcg_insn_unit *p, int count) 4418139c1837SPaolo Bonzini{ 4419139c1837SPaolo Bonzini memset(p, 0x90, count); 4420139c1837SPaolo Bonzini} 4421139c1837SPaolo Bonzini 4422139c1837SPaolo Bonzinistatic void tcg_target_init(TCGContext *s) 4423139c1837SPaolo Bonzini{ 4424139c1837SPaolo Bonzini tcg_target_available_regs[TCG_TYPE_I32] = ALL_GENERAL_REGS; 4425139c1837SPaolo Bonzini if (TCG_TARGET_REG_BITS == 64) { 4426139c1837SPaolo Bonzini tcg_target_available_regs[TCG_TYPE_I64] = ALL_GENERAL_REGS; 4427139c1837SPaolo Bonzini } 4428139c1837SPaolo Bonzini if (have_avx1) { 4429139c1837SPaolo Bonzini tcg_target_available_regs[TCG_TYPE_V64] = ALL_VECTOR_REGS; 4430139c1837SPaolo Bonzini tcg_target_available_regs[TCG_TYPE_V128] = ALL_VECTOR_REGS; 4431139c1837SPaolo Bonzini } 4432139c1837SPaolo Bonzini if (have_avx2) { 4433139c1837SPaolo Bonzini tcg_target_available_regs[TCG_TYPE_V256] = ALL_VECTOR_REGS; 4434139c1837SPaolo Bonzini } 4435139c1837SPaolo Bonzini 4436139c1837SPaolo Bonzini tcg_target_call_clobber_regs = ALL_VECTOR_REGS; 4437139c1837SPaolo Bonzini tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_EAX); 4438139c1837SPaolo Bonzini tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_EDX); 4439139c1837SPaolo Bonzini tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_ECX); 4440139c1837SPaolo Bonzini if (TCG_TARGET_REG_BITS == 64) { 4441139c1837SPaolo Bonzini#if !defined(_WIN64) 4442139c1837SPaolo Bonzini tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_RDI); 4443139c1837SPaolo Bonzini tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_RSI); 4444139c1837SPaolo Bonzini#endif 4445139c1837SPaolo Bonzini tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R8); 4446139c1837SPaolo Bonzini tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R9); 4447139c1837SPaolo Bonzini tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R10); 4448139c1837SPaolo Bonzini tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R11); 4449139c1837SPaolo Bonzini } 4450139c1837SPaolo Bonzini 4451139c1837SPaolo Bonzini s->reserved_regs = 0; 4452139c1837SPaolo Bonzini tcg_regset_set_reg(s->reserved_regs, TCG_REG_CALL_STACK); 4453098d0fc1SRichard Henderson tcg_regset_set_reg(s->reserved_regs, TCG_TMP_VEC); 44546b258e74SRichard Henderson#ifdef _WIN64 44556b258e74SRichard Henderson /* These are call saved, and we don't save them, so don't use them. */ 44566b258e74SRichard Henderson tcg_regset_set_reg(s->reserved_regs, TCG_REG_XMM6); 44576b258e74SRichard Henderson tcg_regset_set_reg(s->reserved_regs, TCG_REG_XMM7); 44586b258e74SRichard Henderson tcg_regset_set_reg(s->reserved_regs, TCG_REG_XMM8); 44596b258e74SRichard Henderson tcg_regset_set_reg(s->reserved_regs, TCG_REG_XMM9); 44606b258e74SRichard Henderson tcg_regset_set_reg(s->reserved_regs, TCG_REG_XMM10); 44616b258e74SRichard Henderson tcg_regset_set_reg(s->reserved_regs, TCG_REG_XMM11); 44626b258e74SRichard Henderson tcg_regset_set_reg(s->reserved_regs, TCG_REG_XMM12); 44636b258e74SRichard Henderson tcg_regset_set_reg(s->reserved_regs, TCG_REG_XMM13); 44646b258e74SRichard Henderson tcg_regset_set_reg(s->reserved_regs, TCG_REG_XMM14); 44656b258e74SRichard Henderson tcg_regset_set_reg(s->reserved_regs, TCG_REG_XMM15); 44666b258e74SRichard Henderson#endif 4467139c1837SPaolo Bonzini} 4468139c1837SPaolo Bonzini 4469139c1837SPaolo Bonzinitypedef struct { 4470139c1837SPaolo Bonzini DebugFrameHeader h; 4471139c1837SPaolo Bonzini uint8_t fde_def_cfa[4]; 4472139c1837SPaolo Bonzini uint8_t fde_reg_ofs[14]; 4473139c1837SPaolo Bonzini} DebugFrame; 4474139c1837SPaolo Bonzini 4475139c1837SPaolo Bonzini/* We're expecting a 2 byte uleb128 encoded value. */ 4476139c1837SPaolo BonziniQEMU_BUILD_BUG_ON(FRAME_SIZE >= (1 << 14)); 4477139c1837SPaolo Bonzini 4478139c1837SPaolo Bonzini#if !defined(__ELF__) 4479139c1837SPaolo Bonzini /* Host machine without ELF. */ 4480139c1837SPaolo Bonzini#elif TCG_TARGET_REG_BITS == 64 4481139c1837SPaolo Bonzini#define ELF_HOST_MACHINE EM_X86_64 4482139c1837SPaolo Bonzinistatic const DebugFrame debug_frame = { 4483139c1837SPaolo Bonzini .h.cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */ 4484139c1837SPaolo Bonzini .h.cie.id = -1, 4485139c1837SPaolo Bonzini .h.cie.version = 1, 4486139c1837SPaolo Bonzini .h.cie.code_align = 1, 4487139c1837SPaolo Bonzini .h.cie.data_align = 0x78, /* sleb128 -8 */ 4488139c1837SPaolo Bonzini .h.cie.return_column = 16, 4489139c1837SPaolo Bonzini 4490139c1837SPaolo Bonzini /* Total FDE size does not include the "len" member. */ 4491139c1837SPaolo Bonzini .h.fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, h.fde.cie_offset), 4492139c1837SPaolo Bonzini 4493139c1837SPaolo Bonzini .fde_def_cfa = { 4494139c1837SPaolo Bonzini 12, 7, /* DW_CFA_def_cfa %rsp, ... */ 4495139c1837SPaolo Bonzini (FRAME_SIZE & 0x7f) | 0x80, /* ... uleb128 FRAME_SIZE */ 4496139c1837SPaolo Bonzini (FRAME_SIZE >> 7) 4497139c1837SPaolo Bonzini }, 4498139c1837SPaolo Bonzini .fde_reg_ofs = { 4499139c1837SPaolo Bonzini 0x90, 1, /* DW_CFA_offset, %rip, -8 */ 4500139c1837SPaolo Bonzini /* The following ordering must match tcg_target_callee_save_regs. */ 4501139c1837SPaolo Bonzini 0x86, 2, /* DW_CFA_offset, %rbp, -16 */ 4502139c1837SPaolo Bonzini 0x83, 3, /* DW_CFA_offset, %rbx, -24 */ 4503139c1837SPaolo Bonzini 0x8c, 4, /* DW_CFA_offset, %r12, -32 */ 4504139c1837SPaolo Bonzini 0x8d, 5, /* DW_CFA_offset, %r13, -40 */ 4505139c1837SPaolo Bonzini 0x8e, 6, /* DW_CFA_offset, %r14, -48 */ 4506139c1837SPaolo Bonzini 0x8f, 7, /* DW_CFA_offset, %r15, -56 */ 4507139c1837SPaolo Bonzini } 4508139c1837SPaolo Bonzini}; 4509139c1837SPaolo Bonzini#else 4510139c1837SPaolo Bonzini#define ELF_HOST_MACHINE EM_386 4511139c1837SPaolo Bonzinistatic const DebugFrame debug_frame = { 4512139c1837SPaolo Bonzini .h.cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */ 4513139c1837SPaolo Bonzini .h.cie.id = -1, 4514139c1837SPaolo Bonzini .h.cie.version = 1, 4515139c1837SPaolo Bonzini .h.cie.code_align = 1, 4516139c1837SPaolo Bonzini .h.cie.data_align = 0x7c, /* sleb128 -4 */ 4517139c1837SPaolo Bonzini .h.cie.return_column = 8, 4518139c1837SPaolo Bonzini 4519139c1837SPaolo Bonzini /* Total FDE size does not include the "len" member. */ 4520139c1837SPaolo Bonzini .h.fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, h.fde.cie_offset), 4521139c1837SPaolo Bonzini 4522139c1837SPaolo Bonzini .fde_def_cfa = { 4523139c1837SPaolo Bonzini 12, 4, /* DW_CFA_def_cfa %esp, ... */ 4524139c1837SPaolo Bonzini (FRAME_SIZE & 0x7f) | 0x80, /* ... uleb128 FRAME_SIZE */ 4525139c1837SPaolo Bonzini (FRAME_SIZE >> 7) 4526139c1837SPaolo Bonzini }, 4527139c1837SPaolo Bonzini .fde_reg_ofs = { 4528139c1837SPaolo Bonzini 0x88, 1, /* DW_CFA_offset, %eip, -4 */ 4529139c1837SPaolo Bonzini /* The following ordering must match tcg_target_callee_save_regs. */ 4530139c1837SPaolo Bonzini 0x85, 2, /* DW_CFA_offset, %ebp, -8 */ 4531139c1837SPaolo Bonzini 0x83, 3, /* DW_CFA_offset, %ebx, -12 */ 4532139c1837SPaolo Bonzini 0x86, 4, /* DW_CFA_offset, %esi, -16 */ 4533139c1837SPaolo Bonzini 0x87, 5, /* DW_CFA_offset, %edi, -20 */ 4534139c1837SPaolo Bonzini } 4535139c1837SPaolo Bonzini}; 4536139c1837SPaolo Bonzini#endif 4537139c1837SPaolo Bonzini 4538139c1837SPaolo Bonzini#if defined(ELF_HOST_MACHINE) 4539755bf9e5SRichard Hendersonvoid tcg_register_jit(const void *buf, size_t buf_size) 4540139c1837SPaolo Bonzini{ 4541139c1837SPaolo Bonzini tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame)); 4542139c1837SPaolo Bonzini} 4543139c1837SPaolo Bonzini#endif 4544