xref: /openbmc/qemu/tcg/i386/tcg-target.c.inc (revision 4ae7d11b70a840eec7aa27269093b15d04ebc84e)
1139c1837SPaolo Bonzini/*
2139c1837SPaolo Bonzini * Tiny Code Generator for QEMU
3139c1837SPaolo Bonzini *
4139c1837SPaolo Bonzini * Copyright (c) 2008 Fabrice Bellard
5139c1837SPaolo Bonzini *
6139c1837SPaolo Bonzini * Permission is hereby granted, free of charge, to any person obtaining a copy
7139c1837SPaolo Bonzini * of this software and associated documentation files (the "Software"), to deal
8139c1837SPaolo Bonzini * in the Software without restriction, including without limitation the rights
9139c1837SPaolo Bonzini * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10139c1837SPaolo Bonzini * copies of the Software, and to permit persons to whom the Software is
11139c1837SPaolo Bonzini * furnished to do so, subject to the following conditions:
12139c1837SPaolo Bonzini *
13139c1837SPaolo Bonzini * The above copyright notice and this permission notice shall be included in
14139c1837SPaolo Bonzini * all copies or substantial portions of the Software.
15139c1837SPaolo Bonzini *
16139c1837SPaolo Bonzini * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17139c1837SPaolo Bonzini * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18139c1837SPaolo Bonzini * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19139c1837SPaolo Bonzini * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20139c1837SPaolo Bonzini * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21139c1837SPaolo Bonzini * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22139c1837SPaolo Bonzini * THE SOFTWARE.
23139c1837SPaolo Bonzini */
24139c1837SPaolo Bonzini
25b1ee3c67SRichard Henderson#include "../tcg-ldst.c.inc"
26139c1837SPaolo Bonzini#include "../tcg-pool.c.inc"
27139c1837SPaolo Bonzini
28139c1837SPaolo Bonzini#ifdef CONFIG_DEBUG_TCG
29139c1837SPaolo Bonzinistatic const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
30139c1837SPaolo Bonzini#if TCG_TARGET_REG_BITS == 64
31139c1837SPaolo Bonzini    "%rax", "%rcx", "%rdx", "%rbx", "%rsp", "%rbp", "%rsi", "%rdi",
32139c1837SPaolo Bonzini#else
33139c1837SPaolo Bonzini    "%eax", "%ecx", "%edx", "%ebx", "%esp", "%ebp", "%esi", "%edi",
34139c1837SPaolo Bonzini#endif
35139c1837SPaolo Bonzini    "%r8",  "%r9",  "%r10", "%r11", "%r12", "%r13", "%r14", "%r15",
36139c1837SPaolo Bonzini    "%xmm0", "%xmm1", "%xmm2", "%xmm3", "%xmm4", "%xmm5", "%xmm6", "%xmm7",
37139c1837SPaolo Bonzini#if TCG_TARGET_REG_BITS == 64
38139c1837SPaolo Bonzini    "%xmm8", "%xmm9", "%xmm10", "%xmm11",
39139c1837SPaolo Bonzini    "%xmm12", "%xmm13", "%xmm14", "%xmm15",
40139c1837SPaolo Bonzini#endif
41139c1837SPaolo Bonzini};
42139c1837SPaolo Bonzini#endif
43139c1837SPaolo Bonzini
44139c1837SPaolo Bonzinistatic const int tcg_target_reg_alloc_order[] = {
45139c1837SPaolo Bonzini#if TCG_TARGET_REG_BITS == 64
46139c1837SPaolo Bonzini    TCG_REG_RBP,
47139c1837SPaolo Bonzini    TCG_REG_RBX,
48139c1837SPaolo Bonzini    TCG_REG_R12,
49139c1837SPaolo Bonzini    TCG_REG_R13,
50139c1837SPaolo Bonzini    TCG_REG_R14,
51139c1837SPaolo Bonzini    TCG_REG_R15,
52139c1837SPaolo Bonzini    TCG_REG_R10,
53139c1837SPaolo Bonzini    TCG_REG_R11,
54139c1837SPaolo Bonzini    TCG_REG_R9,
55139c1837SPaolo Bonzini    TCG_REG_R8,
56139c1837SPaolo Bonzini    TCG_REG_RCX,
57139c1837SPaolo Bonzini    TCG_REG_RDX,
58139c1837SPaolo Bonzini    TCG_REG_RSI,
59139c1837SPaolo Bonzini    TCG_REG_RDI,
60139c1837SPaolo Bonzini    TCG_REG_RAX,
61139c1837SPaolo Bonzini#else
62139c1837SPaolo Bonzini    TCG_REG_EBX,
63139c1837SPaolo Bonzini    TCG_REG_ESI,
64139c1837SPaolo Bonzini    TCG_REG_EDI,
65139c1837SPaolo Bonzini    TCG_REG_EBP,
66139c1837SPaolo Bonzini    TCG_REG_ECX,
67139c1837SPaolo Bonzini    TCG_REG_EDX,
68139c1837SPaolo Bonzini    TCG_REG_EAX,
69139c1837SPaolo Bonzini#endif
70139c1837SPaolo Bonzini    TCG_REG_XMM0,
71139c1837SPaolo Bonzini    TCG_REG_XMM1,
72139c1837SPaolo Bonzini    TCG_REG_XMM2,
73139c1837SPaolo Bonzini    TCG_REG_XMM3,
74139c1837SPaolo Bonzini    TCG_REG_XMM4,
75139c1837SPaolo Bonzini    TCG_REG_XMM5,
76139c1837SPaolo Bonzini#ifndef _WIN64
77139c1837SPaolo Bonzini    /* The Win64 ABI has xmm6-xmm15 as caller-saves, and we do not save
78139c1837SPaolo Bonzini       any of them.  Therefore only allow xmm0-xmm5 to be allocated.  */
79139c1837SPaolo Bonzini    TCG_REG_XMM6,
80139c1837SPaolo Bonzini    TCG_REG_XMM7,
81139c1837SPaolo Bonzini#if TCG_TARGET_REG_BITS == 64
82139c1837SPaolo Bonzini    TCG_REG_XMM8,
83139c1837SPaolo Bonzini    TCG_REG_XMM9,
84139c1837SPaolo Bonzini    TCG_REG_XMM10,
85139c1837SPaolo Bonzini    TCG_REG_XMM11,
86139c1837SPaolo Bonzini    TCG_REG_XMM12,
87139c1837SPaolo Bonzini    TCG_REG_XMM13,
88139c1837SPaolo Bonzini    TCG_REG_XMM14,
89139c1837SPaolo Bonzini    TCG_REG_XMM15,
90139c1837SPaolo Bonzini#endif
91139c1837SPaolo Bonzini#endif
92139c1837SPaolo Bonzini};
93139c1837SPaolo Bonzini
94098d0fc1SRichard Henderson#define TCG_TMP_VEC  TCG_REG_XMM5
95098d0fc1SRichard Henderson
96139c1837SPaolo Bonzinistatic const int tcg_target_call_iarg_regs[] = {
97139c1837SPaolo Bonzini#if TCG_TARGET_REG_BITS == 64
98139c1837SPaolo Bonzini#if defined(_WIN64)
99139c1837SPaolo Bonzini    TCG_REG_RCX,
100139c1837SPaolo Bonzini    TCG_REG_RDX,
101139c1837SPaolo Bonzini#else
102139c1837SPaolo Bonzini    TCG_REG_RDI,
103139c1837SPaolo Bonzini    TCG_REG_RSI,
104139c1837SPaolo Bonzini    TCG_REG_RDX,
105139c1837SPaolo Bonzini    TCG_REG_RCX,
106139c1837SPaolo Bonzini#endif
107139c1837SPaolo Bonzini    TCG_REG_R8,
108139c1837SPaolo Bonzini    TCG_REG_R9,
109139c1837SPaolo Bonzini#else
110139c1837SPaolo Bonzini    /* 32 bit mode uses stack based calling convention (GCC default). */
111139c1837SPaolo Bonzini#endif
112139c1837SPaolo Bonzini};
113139c1837SPaolo Bonzini
1145e3d0c19SRichard Hendersonstatic TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot)
1155e3d0c19SRichard Henderson{
1165e3d0c19SRichard Henderson    switch (kind) {
1175e3d0c19SRichard Henderson    case TCG_CALL_RET_NORMAL:
1185e3d0c19SRichard Henderson        tcg_debug_assert(slot >= 0 && slot <= 1);
1195e3d0c19SRichard Henderson        return slot ? TCG_REG_EDX : TCG_REG_EAX;
120c4f4a00aSRichard Henderson#ifdef _WIN64
121c4f4a00aSRichard Henderson    case TCG_CALL_RET_BY_VEC:
122c4f4a00aSRichard Henderson        tcg_debug_assert(slot == 0);
123c4f4a00aSRichard Henderson        return TCG_REG_XMM0;
124c4f4a00aSRichard Henderson#endif
1255e3d0c19SRichard Henderson    default:
1265e3d0c19SRichard Henderson        g_assert_not_reached();
1275e3d0c19SRichard Henderson    }
1285e3d0c19SRichard Henderson}
129139c1837SPaolo Bonzini
130139c1837SPaolo Bonzini/* Constants we accept.  */
131139c1837SPaolo Bonzini#define TCG_CT_CONST_S32 0x100
132139c1837SPaolo Bonzini#define TCG_CT_CONST_U32 0x200
133139c1837SPaolo Bonzini#define TCG_CT_CONST_I32 0x400
134139c1837SPaolo Bonzini#define TCG_CT_CONST_WSZ 0x800
135d3d1c30cSRichard Henderson#define TCG_CT_CONST_TST 0x1000
136d8387f0eSRichard Henderson#define TCG_CT_CONST_ZERO 0x2000
137139c1837SPaolo Bonzini
138139c1837SPaolo Bonzini/* Registers used with L constraint, which are the first argument
139139c1837SPaolo Bonzini   registers on x86_64, and two random call clobbered registers on
140139c1837SPaolo Bonzini   i386. */
141139c1837SPaolo Bonzini#if TCG_TARGET_REG_BITS == 64
142139c1837SPaolo Bonzini# define TCG_REG_L0 tcg_target_call_iarg_regs[0]
143139c1837SPaolo Bonzini# define TCG_REG_L1 tcg_target_call_iarg_regs[1]
144139c1837SPaolo Bonzini#else
145139c1837SPaolo Bonzini# define TCG_REG_L0 TCG_REG_EAX
146139c1837SPaolo Bonzini# define TCG_REG_L1 TCG_REG_EDX
147139c1837SPaolo Bonzini#endif
148139c1837SPaolo Bonzini
149df903b94SRichard Henderson#if TCG_TARGET_REG_BITS == 64
150df903b94SRichard Henderson# define ALL_GENERAL_REGS      0x0000ffffu
151df903b94SRichard Henderson# define ALL_VECTOR_REGS       0xffff0000u
152df903b94SRichard Henderson# define ALL_BYTEL_REGS        ALL_GENERAL_REGS
153df903b94SRichard Henderson#else
154df903b94SRichard Henderson# define ALL_GENERAL_REGS      0x000000ffu
155df903b94SRichard Henderson# define ALL_VECTOR_REGS       0x00ff0000u
15636df88c0SRichard Henderson# define ALL_BYTEL_REGS        0x0000000fu
157df903b94SRichard Henderson#endif
158915e1d52SRichard Henderson#define SOFTMMU_RESERVE_REGS \
159915e1d52SRichard Henderson    (tcg_use_softmmu ? (1 << TCG_REG_L0) | (1 << TCG_REG_L1) : 0)
160df903b94SRichard Henderson
161dbedadbaSRichard Henderson#define have_bmi2       (cpuinfo & CPUINFO_BMI2)
162dbedadbaSRichard Henderson#define have_lzcnt      (cpuinfo & CPUINFO_LZCNT)
163139c1837SPaolo Bonzini
164705ed477SRichard Hendersonstatic const tcg_insn_unit *tb_ret_addr;
165139c1837SPaolo Bonzini
166139c1837SPaolo Bonzinistatic bool patch_reloc(tcg_insn_unit *code_ptr, int type,
167139c1837SPaolo Bonzini                        intptr_t value, intptr_t addend)
168139c1837SPaolo Bonzini{
169139c1837SPaolo Bonzini    value += addend;
170139c1837SPaolo Bonzini    switch(type) {
171139c1837SPaolo Bonzini    case R_386_PC32:
172705ed477SRichard Henderson        value -= (uintptr_t)tcg_splitwx_to_rx(code_ptr);
173139c1837SPaolo Bonzini        if (value != (int32_t)value) {
174139c1837SPaolo Bonzini            return false;
175139c1837SPaolo Bonzini        }
176139c1837SPaolo Bonzini        /* FALLTHRU */
177139c1837SPaolo Bonzini    case R_386_32:
178139c1837SPaolo Bonzini        tcg_patch32(code_ptr, value);
179139c1837SPaolo Bonzini        break;
180139c1837SPaolo Bonzini    case R_386_PC8:
181705ed477SRichard Henderson        value -= (uintptr_t)tcg_splitwx_to_rx(code_ptr);
182139c1837SPaolo Bonzini        if (value != (int8_t)value) {
183139c1837SPaolo Bonzini            return false;
184139c1837SPaolo Bonzini        }
185139c1837SPaolo Bonzini        tcg_patch8(code_ptr, value);
186139c1837SPaolo Bonzini        break;
187139c1837SPaolo Bonzini    default:
188732e89f4SRichard Henderson        g_assert_not_reached();
189139c1837SPaolo Bonzini    }
190139c1837SPaolo Bonzini    return true;
191139c1837SPaolo Bonzini}
192139c1837SPaolo Bonzini
193139c1837SPaolo Bonzini/* test if a constant matches the constraint */
19421e9a8aeSRichard Hendersonstatic bool tcg_target_const_match(int64_t val, int ct,
19521e9a8aeSRichard Henderson                                   TCGType type, TCGCond cond, int vece)
196139c1837SPaolo Bonzini{
197139c1837SPaolo Bonzini    if (ct & TCG_CT_CONST) {
198139c1837SPaolo Bonzini        return 1;
199139c1837SPaolo Bonzini    }
200c7c778b5SRichard Henderson    if (type == TCG_TYPE_I32) {
201d3d1c30cSRichard Henderson        if (ct & (TCG_CT_CONST_S32 | TCG_CT_CONST_U32 |
202d3d1c30cSRichard Henderson                  TCG_CT_CONST_I32 | TCG_CT_CONST_TST)) {
203c7c778b5SRichard Henderson            return 1;
204c7c778b5SRichard Henderson        }
205c7c778b5SRichard Henderson    } else {
206139c1837SPaolo Bonzini        if ((ct & TCG_CT_CONST_S32) && val == (int32_t)val) {
207139c1837SPaolo Bonzini            return 1;
208139c1837SPaolo Bonzini        }
209139c1837SPaolo Bonzini        if ((ct & TCG_CT_CONST_U32) && val == (uint32_t)val) {
210139c1837SPaolo Bonzini            return 1;
211139c1837SPaolo Bonzini        }
212139c1837SPaolo Bonzini        if ((ct & TCG_CT_CONST_I32) && ~val == (int32_t)~val) {
213139c1837SPaolo Bonzini            return 1;
214139c1837SPaolo Bonzini        }
215d3d1c30cSRichard Henderson        /*
216d3d1c30cSRichard Henderson         * This will be used in combination with TCG_CT_CONST_S32,
217d3d1c30cSRichard Henderson         * so "normal" TESTQ is already matched.  Also accept:
218d3d1c30cSRichard Henderson         *    TESTQ -> TESTL   (uint32_t)
219d3d1c30cSRichard Henderson         *    TESTQ -> BT      (is_power_of_2)
220d3d1c30cSRichard Henderson         */
221d3d1c30cSRichard Henderson        if ((ct & TCG_CT_CONST_TST)
222d3d1c30cSRichard Henderson            && is_tst_cond(cond)
223d3d1c30cSRichard Henderson            && (val == (uint32_t)val || is_power_of_2(val))) {
224d3d1c30cSRichard Henderson            return 1;
225d3d1c30cSRichard Henderson        }
226c7c778b5SRichard Henderson    }
227139c1837SPaolo Bonzini    if ((ct & TCG_CT_CONST_WSZ) && val == (type == TCG_TYPE_I32 ? 32 : 64)) {
228139c1837SPaolo Bonzini        return 1;
229139c1837SPaolo Bonzini    }
230d8387f0eSRichard Henderson    if ((ct & TCG_CT_CONST_ZERO) && val == 0) {
231d8387f0eSRichard Henderson        return 1;
232d8387f0eSRichard Henderson    }
233139c1837SPaolo Bonzini    return 0;
234139c1837SPaolo Bonzini}
235139c1837SPaolo Bonzini
236139c1837SPaolo Bonzini# define LOWREGMASK(x)	((x) & 7)
237139c1837SPaolo Bonzini
238139c1837SPaolo Bonzini#define P_EXT		0x100		/* 0x0f opcode prefix */
239139c1837SPaolo Bonzini#define P_EXT38         0x200           /* 0x0f 0x38 opcode prefix */
240139c1837SPaolo Bonzini#define P_DATA16        0x400           /* 0x66 opcode prefix */
241fc88a523SRichard Henderson#define P_VEXW          0x1000          /* Set VEX.W = 1 */
242139c1837SPaolo Bonzini#if TCG_TARGET_REG_BITS == 64
243fc88a523SRichard Henderson# define P_REXW         P_VEXW          /* Set REX.W = 1; match VEXW */
244139c1837SPaolo Bonzini# define P_REXB_R       0x2000          /* REG field as byte register */
245139c1837SPaolo Bonzini# define P_REXB_RM      0x4000          /* R/M field as byte register */
246139c1837SPaolo Bonzini# define P_GS           0x8000          /* gs segment override */
247139c1837SPaolo Bonzini#else
248139c1837SPaolo Bonzini# define P_REXW		0
249139c1837SPaolo Bonzini# define P_REXB_R	0
250139c1837SPaolo Bonzini# define P_REXB_RM	0
251139c1837SPaolo Bonzini# define P_GS           0
252139c1837SPaolo Bonzini#endif
253139c1837SPaolo Bonzini#define P_EXT3A         0x10000         /* 0x0f 0x3a opcode prefix */
254139c1837SPaolo Bonzini#define P_SIMDF3        0x20000         /* 0xf3 opcode prefix */
255139c1837SPaolo Bonzini#define P_SIMDF2        0x40000         /* 0xf2 opcode prefix */
256139c1837SPaolo Bonzini#define P_VEXL          0x80000         /* Set VEX.L = 1 */
25708b032f7SRichard Henderson#define P_EVEX          0x100000        /* Requires EVEX encoding */
258139c1837SPaolo Bonzini
259afa37be4SPaolo Bonzini#define OPC_ARITH_EbIb	(0x80)
260139c1837SPaolo Bonzini#define OPC_ARITH_EvIz	(0x81)
261139c1837SPaolo Bonzini#define OPC_ARITH_EvIb	(0x83)
262139c1837SPaolo Bonzini#define OPC_ARITH_GvEv	(0x03)		/* ... plus (ARITH_FOO << 3) */
263139c1837SPaolo Bonzini#define OPC_ANDN        (0xf2 | P_EXT38)
264139c1837SPaolo Bonzini#define OPC_ADD_GvEv	(OPC_ARITH_GvEv | (ARITH_ADD << 3))
265139c1837SPaolo Bonzini#define OPC_AND_GvEv    (OPC_ARITH_GvEv | (ARITH_AND << 3))
266139c1837SPaolo Bonzini#define OPC_BLENDPS     (0x0c | P_EXT3A | P_DATA16)
267139c1837SPaolo Bonzini#define OPC_BSF         (0xbc | P_EXT)
268139c1837SPaolo Bonzini#define OPC_BSR         (0xbd | P_EXT)
269139c1837SPaolo Bonzini#define OPC_BSWAP	(0xc8 | P_EXT)
270139c1837SPaolo Bonzini#define OPC_CALL_Jz	(0xe8)
271139c1837SPaolo Bonzini#define OPC_CMOVCC      (0x40 | P_EXT)  /* ... plus condition code */
272139c1837SPaolo Bonzini#define OPC_CMP_GvEv	(OPC_ARITH_GvEv | (ARITH_CMP << 3))
273139c1837SPaolo Bonzini#define OPC_DEC_r32	(0x48)
274139c1837SPaolo Bonzini#define OPC_IMUL_GvEv	(0xaf | P_EXT)
275139c1837SPaolo Bonzini#define OPC_IMUL_GvEvIb	(0x6b)
276139c1837SPaolo Bonzini#define OPC_IMUL_GvEvIz	(0x69)
277139c1837SPaolo Bonzini#define OPC_INC_r32	(0x40)
278139c1837SPaolo Bonzini#define OPC_JCC_long	(0x80 | P_EXT)	/* ... plus condition code */
279139c1837SPaolo Bonzini#define OPC_JCC_short	(0x70)		/* ... plus condition code */
280139c1837SPaolo Bonzini#define OPC_JMP_long	(0xe9)
281139c1837SPaolo Bonzini#define OPC_JMP_short	(0xeb)
282139c1837SPaolo Bonzini#define OPC_LEA         (0x8d)
283139c1837SPaolo Bonzini#define OPC_LZCNT       (0xbd | P_EXT | P_SIMDF3)
284139c1837SPaolo Bonzini#define OPC_MOVB_EvGv	(0x88)		/* stores, more or less */
285139c1837SPaolo Bonzini#define OPC_MOVL_EvGv	(0x89)		/* stores, more or less */
286139c1837SPaolo Bonzini#define OPC_MOVL_GvEv	(0x8b)		/* loads, more or less */
287139c1837SPaolo Bonzini#define OPC_MOVB_EvIz   (0xc6)
288139c1837SPaolo Bonzini#define OPC_MOVL_EvIz	(0xc7)
28973f97f0aSRichard Henderson#define OPC_MOVB_Ib     (0xb0)
290139c1837SPaolo Bonzini#define OPC_MOVL_Iv     (0xb8)
291139c1837SPaolo Bonzini#define OPC_MOVBE_GyMy  (0xf0 | P_EXT38)
292139c1837SPaolo Bonzini#define OPC_MOVBE_MyGy  (0xf1 | P_EXT38)
293139c1837SPaolo Bonzini#define OPC_MOVD_VyEy   (0x6e | P_EXT | P_DATA16)
294139c1837SPaolo Bonzini#define OPC_MOVD_EyVy   (0x7e | P_EXT | P_DATA16)
295139c1837SPaolo Bonzini#define OPC_MOVDDUP     (0x12 | P_EXT | P_SIMDF2)
296139c1837SPaolo Bonzini#define OPC_MOVDQA_VxWx (0x6f | P_EXT | P_DATA16)
297139c1837SPaolo Bonzini#define OPC_MOVDQA_WxVx (0x7f | P_EXT | P_DATA16)
298139c1837SPaolo Bonzini#define OPC_MOVDQU_VxWx (0x6f | P_EXT | P_SIMDF3)
299139c1837SPaolo Bonzini#define OPC_MOVDQU_WxVx (0x7f | P_EXT | P_SIMDF3)
300139c1837SPaolo Bonzini#define OPC_MOVQ_VqWq   (0x7e | P_EXT | P_SIMDF3)
301139c1837SPaolo Bonzini#define OPC_MOVQ_WqVq   (0xd6 | P_EXT | P_DATA16)
302139c1837SPaolo Bonzini#define OPC_MOVSBL	(0xbe | P_EXT)
303139c1837SPaolo Bonzini#define OPC_MOVSWL	(0xbf | P_EXT)
304139c1837SPaolo Bonzini#define OPC_MOVSLQ	(0x63 | P_REXW)
305139c1837SPaolo Bonzini#define OPC_MOVZBL	(0xb6 | P_EXT)
306139c1837SPaolo Bonzini#define OPC_MOVZWL	(0xb7 | P_EXT)
307139c1837SPaolo Bonzini#define OPC_PABSB       (0x1c | P_EXT38 | P_DATA16)
308139c1837SPaolo Bonzini#define OPC_PABSW       (0x1d | P_EXT38 | P_DATA16)
309139c1837SPaolo Bonzini#define OPC_PABSD       (0x1e | P_EXT38 | P_DATA16)
310dac1648fSRichard Henderson#define OPC_VPABSQ      (0x1f | P_EXT38 | P_DATA16 | P_VEXW | P_EVEX)
311139c1837SPaolo Bonzini#define OPC_PACKSSDW    (0x6b | P_EXT | P_DATA16)
312139c1837SPaolo Bonzini#define OPC_PACKSSWB    (0x63 | P_EXT | P_DATA16)
313139c1837SPaolo Bonzini#define OPC_PACKUSDW    (0x2b | P_EXT38 | P_DATA16)
314139c1837SPaolo Bonzini#define OPC_PACKUSWB    (0x67 | P_EXT | P_DATA16)
315139c1837SPaolo Bonzini#define OPC_PADDB       (0xfc | P_EXT | P_DATA16)
316139c1837SPaolo Bonzini#define OPC_PADDW       (0xfd | P_EXT | P_DATA16)
317139c1837SPaolo Bonzini#define OPC_PADDD       (0xfe | P_EXT | P_DATA16)
318139c1837SPaolo Bonzini#define OPC_PADDQ       (0xd4 | P_EXT | P_DATA16)
319139c1837SPaolo Bonzini#define OPC_PADDSB      (0xec | P_EXT | P_DATA16)
320139c1837SPaolo Bonzini#define OPC_PADDSW      (0xed | P_EXT | P_DATA16)
321139c1837SPaolo Bonzini#define OPC_PADDUB      (0xdc | P_EXT | P_DATA16)
322139c1837SPaolo Bonzini#define OPC_PADDUW      (0xdd | P_EXT | P_DATA16)
323139c1837SPaolo Bonzini#define OPC_PAND        (0xdb | P_EXT | P_DATA16)
324139c1837SPaolo Bonzini#define OPC_PANDN       (0xdf | P_EXT | P_DATA16)
325139c1837SPaolo Bonzini#define OPC_PBLENDW     (0x0e | P_EXT3A | P_DATA16)
326139c1837SPaolo Bonzini#define OPC_PCMPEQB     (0x74 | P_EXT | P_DATA16)
327139c1837SPaolo Bonzini#define OPC_PCMPEQW     (0x75 | P_EXT | P_DATA16)
328139c1837SPaolo Bonzini#define OPC_PCMPEQD     (0x76 | P_EXT | P_DATA16)
329139c1837SPaolo Bonzini#define OPC_PCMPEQQ     (0x29 | P_EXT38 | P_DATA16)
330139c1837SPaolo Bonzini#define OPC_PCMPGTB     (0x64 | P_EXT | P_DATA16)
331139c1837SPaolo Bonzini#define OPC_PCMPGTW     (0x65 | P_EXT | P_DATA16)
332139c1837SPaolo Bonzini#define OPC_PCMPGTD     (0x66 | P_EXT | P_DATA16)
333139c1837SPaolo Bonzini#define OPC_PCMPGTQ     (0x37 | P_EXT38 | P_DATA16)
334098d0fc1SRichard Henderson#define OPC_PEXTRD      (0x16 | P_EXT3A | P_DATA16)
335098d0fc1SRichard Henderson#define OPC_PINSRD      (0x22 | P_EXT3A | P_DATA16)
336139c1837SPaolo Bonzini#define OPC_PMAXSB      (0x3c | P_EXT38 | P_DATA16)
337139c1837SPaolo Bonzini#define OPC_PMAXSW      (0xee | P_EXT | P_DATA16)
338139c1837SPaolo Bonzini#define OPC_PMAXSD      (0x3d | P_EXT38 | P_DATA16)
339dac1648fSRichard Henderson#define OPC_VPMAXSQ     (0x3d | P_EXT38 | P_DATA16 | P_VEXW | P_EVEX)
340139c1837SPaolo Bonzini#define OPC_PMAXUB      (0xde | P_EXT | P_DATA16)
341139c1837SPaolo Bonzini#define OPC_PMAXUW      (0x3e | P_EXT38 | P_DATA16)
342139c1837SPaolo Bonzini#define OPC_PMAXUD      (0x3f | P_EXT38 | P_DATA16)
343dac1648fSRichard Henderson#define OPC_VPMAXUQ     (0x3f | P_EXT38 | P_DATA16 | P_VEXW | P_EVEX)
344139c1837SPaolo Bonzini#define OPC_PMINSB      (0x38 | P_EXT38 | P_DATA16)
345139c1837SPaolo Bonzini#define OPC_PMINSW      (0xea | P_EXT | P_DATA16)
346139c1837SPaolo Bonzini#define OPC_PMINSD      (0x39 | P_EXT38 | P_DATA16)
347dac1648fSRichard Henderson#define OPC_VPMINSQ     (0x39 | P_EXT38 | P_DATA16 | P_VEXW | P_EVEX)
348139c1837SPaolo Bonzini#define OPC_PMINUB      (0xda | P_EXT | P_DATA16)
349139c1837SPaolo Bonzini#define OPC_PMINUW      (0x3a | P_EXT38 | P_DATA16)
350139c1837SPaolo Bonzini#define OPC_PMINUD      (0x3b | P_EXT38 | P_DATA16)
351dac1648fSRichard Henderson#define OPC_VPMINUQ     (0x3b | P_EXT38 | P_DATA16 | P_VEXW | P_EVEX)
352139c1837SPaolo Bonzini#define OPC_PMOVSXBW    (0x20 | P_EXT38 | P_DATA16)
353139c1837SPaolo Bonzini#define OPC_PMOVSXWD    (0x23 | P_EXT38 | P_DATA16)
354139c1837SPaolo Bonzini#define OPC_PMOVSXDQ    (0x25 | P_EXT38 | P_DATA16)
355139c1837SPaolo Bonzini#define OPC_PMOVZXBW    (0x30 | P_EXT38 | P_DATA16)
356139c1837SPaolo Bonzini#define OPC_PMOVZXWD    (0x33 | P_EXT38 | P_DATA16)
357139c1837SPaolo Bonzini#define OPC_PMOVZXDQ    (0x35 | P_EXT38 | P_DATA16)
358139c1837SPaolo Bonzini#define OPC_PMULLW      (0xd5 | P_EXT | P_DATA16)
359139c1837SPaolo Bonzini#define OPC_PMULLD      (0x40 | P_EXT38 | P_DATA16)
3604c8b9686SRichard Henderson#define OPC_VPMULLQ     (0x40 | P_EXT38 | P_DATA16 | P_VEXW | P_EVEX)
361139c1837SPaolo Bonzini#define OPC_POR         (0xeb | P_EXT | P_DATA16)
362139c1837SPaolo Bonzini#define OPC_PSHUFB      (0x00 | P_EXT38 | P_DATA16)
363139c1837SPaolo Bonzini#define OPC_PSHUFD      (0x70 | P_EXT | P_DATA16)
364139c1837SPaolo Bonzini#define OPC_PSHUFLW     (0x70 | P_EXT | P_SIMDF2)
365139c1837SPaolo Bonzini#define OPC_PSHUFHW     (0x70 | P_EXT | P_SIMDF3)
366139c1837SPaolo Bonzini#define OPC_PSHIFTW_Ib  (0x71 | P_EXT | P_DATA16) /* /2 /6 /4 */
3674e73f842SRichard Henderson#define OPC_PSHIFTD_Ib  (0x72 | P_EXT | P_DATA16) /* /1 /2 /6 /4 */
368139c1837SPaolo Bonzini#define OPC_PSHIFTQ_Ib  (0x73 | P_EXT | P_DATA16) /* /2 /6 /4 */
369139c1837SPaolo Bonzini#define OPC_PSLLW       (0xf1 | P_EXT | P_DATA16)
370139c1837SPaolo Bonzini#define OPC_PSLLD       (0xf2 | P_EXT | P_DATA16)
371139c1837SPaolo Bonzini#define OPC_PSLLQ       (0xf3 | P_EXT | P_DATA16)
372139c1837SPaolo Bonzini#define OPC_PSRAW       (0xe1 | P_EXT | P_DATA16)
373139c1837SPaolo Bonzini#define OPC_PSRAD       (0xe2 | P_EXT | P_DATA16)
3743cc18d18SRichard Henderson#define OPC_VPSRAQ      (0xe2 | P_EXT | P_DATA16 | P_VEXW | P_EVEX)
375139c1837SPaolo Bonzini#define OPC_PSRLW       (0xd1 | P_EXT | P_DATA16)
376139c1837SPaolo Bonzini#define OPC_PSRLD       (0xd2 | P_EXT | P_DATA16)
377139c1837SPaolo Bonzini#define OPC_PSRLQ       (0xd3 | P_EXT | P_DATA16)
378139c1837SPaolo Bonzini#define OPC_PSUBB       (0xf8 | P_EXT | P_DATA16)
379139c1837SPaolo Bonzini#define OPC_PSUBW       (0xf9 | P_EXT | P_DATA16)
380139c1837SPaolo Bonzini#define OPC_PSUBD       (0xfa | P_EXT | P_DATA16)
381139c1837SPaolo Bonzini#define OPC_PSUBQ       (0xfb | P_EXT | P_DATA16)
382139c1837SPaolo Bonzini#define OPC_PSUBSB      (0xe8 | P_EXT | P_DATA16)
383139c1837SPaolo Bonzini#define OPC_PSUBSW      (0xe9 | P_EXT | P_DATA16)
384139c1837SPaolo Bonzini#define OPC_PSUBUB      (0xd8 | P_EXT | P_DATA16)
385139c1837SPaolo Bonzini#define OPC_PSUBUW      (0xd9 | P_EXT | P_DATA16)
386139c1837SPaolo Bonzini#define OPC_PUNPCKLBW   (0x60 | P_EXT | P_DATA16)
387139c1837SPaolo Bonzini#define OPC_PUNPCKLWD   (0x61 | P_EXT | P_DATA16)
388139c1837SPaolo Bonzini#define OPC_PUNPCKLDQ   (0x62 | P_EXT | P_DATA16)
389139c1837SPaolo Bonzini#define OPC_PUNPCKLQDQ  (0x6c | P_EXT | P_DATA16)
390139c1837SPaolo Bonzini#define OPC_PUNPCKHBW   (0x68 | P_EXT | P_DATA16)
391139c1837SPaolo Bonzini#define OPC_PUNPCKHWD   (0x69 | P_EXT | P_DATA16)
392139c1837SPaolo Bonzini#define OPC_PUNPCKHDQ   (0x6a | P_EXT | P_DATA16)
393139c1837SPaolo Bonzini#define OPC_PUNPCKHQDQ  (0x6d | P_EXT | P_DATA16)
394139c1837SPaolo Bonzini#define OPC_PXOR        (0xef | P_EXT | P_DATA16)
395139c1837SPaolo Bonzini#define OPC_POP_r32	(0x58)
396139c1837SPaolo Bonzini#define OPC_POPCNT      (0xb8 | P_EXT | P_SIMDF3)
397139c1837SPaolo Bonzini#define OPC_PUSH_r32	(0x50)
398139c1837SPaolo Bonzini#define OPC_PUSH_Iv	(0x68)
399139c1837SPaolo Bonzini#define OPC_PUSH_Ib	(0x6a)
400139c1837SPaolo Bonzini#define OPC_RET		(0xc3)
401139c1837SPaolo Bonzini#define OPC_SETCC	(0x90 | P_EXT | P_REXB_RM) /* ... plus cc */
402139c1837SPaolo Bonzini#define OPC_SHIFT_1	(0xd1)
403139c1837SPaolo Bonzini#define OPC_SHIFT_Ib	(0xc1)
404139c1837SPaolo Bonzini#define OPC_SHIFT_cl	(0xd3)
405139c1837SPaolo Bonzini#define OPC_SARX        (0xf7 | P_EXT38 | P_SIMDF3)
406139c1837SPaolo Bonzini#define OPC_SHUFPS      (0xc6 | P_EXT)
407139c1837SPaolo Bonzini#define OPC_SHLX        (0xf7 | P_EXT38 | P_DATA16)
408139c1837SPaolo Bonzini#define OPC_SHRX        (0xf7 | P_EXT38 | P_SIMDF2)
409139c1837SPaolo Bonzini#define OPC_SHRD_Ib     (0xac | P_EXT)
410d3d1c30cSRichard Henderson#define OPC_TESTB	(0x84)
411139c1837SPaolo Bonzini#define OPC_TESTL	(0x85)
412139c1837SPaolo Bonzini#define OPC_TZCNT       (0xbc | P_EXT | P_SIMDF3)
413139c1837SPaolo Bonzini#define OPC_UD2         (0x0b | P_EXT)
414139c1837SPaolo Bonzini#define OPC_VPBLENDD    (0x02 | P_EXT3A | P_DATA16)
415139c1837SPaolo Bonzini#define OPC_VPBLENDVB   (0x4c | P_EXT3A | P_DATA16)
416d5896749SRichard Henderson#define OPC_VPBLENDMB   (0x66 | P_EXT38 | P_DATA16 | P_EVEX)
417d5896749SRichard Henderson#define OPC_VPBLENDMW   (0x66 | P_EXT38 | P_DATA16 | P_VEXW | P_EVEX)
418d5896749SRichard Henderson#define OPC_VPBLENDMD   (0x64 | P_EXT38 | P_DATA16 | P_EVEX)
419d5896749SRichard Henderson#define OPC_VPBLENDMQ   (0x64 | P_EXT38 | P_DATA16 | P_VEXW | P_EVEX)
420717da87dSRichard Henderson#define OPC_VPCMPB      (0x3f | P_EXT3A | P_DATA16 | P_EVEX)
421717da87dSRichard Henderson#define OPC_VPCMPUB     (0x3e | P_EXT3A | P_DATA16 | P_EVEX)
422717da87dSRichard Henderson#define OPC_VPCMPW      (0x3f | P_EXT3A | P_DATA16 | P_VEXW | P_EVEX)
423717da87dSRichard Henderson#define OPC_VPCMPUW     (0x3e | P_EXT3A | P_DATA16 | P_VEXW | P_EVEX)
424717da87dSRichard Henderson#define OPC_VPCMPD      (0x1f | P_EXT3A | P_DATA16 | P_EVEX)
425717da87dSRichard Henderson#define OPC_VPCMPUD     (0x1e | P_EXT3A | P_DATA16 | P_EVEX)
426717da87dSRichard Henderson#define OPC_VPCMPQ      (0x1f | P_EXT3A | P_DATA16 | P_VEXW | P_EVEX)
427717da87dSRichard Henderson#define OPC_VPCMPUQ     (0x1e | P_EXT3A | P_DATA16 | P_VEXW | P_EVEX)
428139c1837SPaolo Bonzini#define OPC_VPINSRB     (0x20 | P_EXT3A | P_DATA16)
429139c1837SPaolo Bonzini#define OPC_VPINSRW     (0xc4 | P_EXT | P_DATA16)
430139c1837SPaolo Bonzini#define OPC_VBROADCASTSS (0x18 | P_EXT38 | P_DATA16)
431139c1837SPaolo Bonzini#define OPC_VBROADCASTSD (0x19 | P_EXT38 | P_DATA16)
432139c1837SPaolo Bonzini#define OPC_VPBROADCASTB (0x78 | P_EXT38 | P_DATA16)
433139c1837SPaolo Bonzini#define OPC_VPBROADCASTW (0x79 | P_EXT38 | P_DATA16)
434139c1837SPaolo Bonzini#define OPC_VPBROADCASTD (0x58 | P_EXT38 | P_DATA16)
435139c1837SPaolo Bonzini#define OPC_VPBROADCASTQ (0x59 | P_EXT38 | P_DATA16)
436717da87dSRichard Henderson#define OPC_VPMOVM2B    (0x28 | P_EXT38 | P_SIMDF3 | P_EVEX)
437717da87dSRichard Henderson#define OPC_VPMOVM2W    (0x28 | P_EXT38 | P_SIMDF3 | P_VEXW | P_EVEX)
438717da87dSRichard Henderson#define OPC_VPMOVM2D    (0x38 | P_EXT38 | P_SIMDF3 | P_EVEX)
439717da87dSRichard Henderson#define OPC_VPMOVM2Q    (0x38 | P_EXT38 | P_SIMDF3 | P_VEXW | P_EVEX)
440fc88a523SRichard Henderson#define OPC_VPERMQ      (0x00 | P_EXT3A | P_DATA16 | P_VEXW)
441139c1837SPaolo Bonzini#define OPC_VPERM2I128  (0x46 | P_EXT3A | P_DATA16 | P_VEXL)
442102cd35cSRichard Henderson#define OPC_VPROLVD     (0x15 | P_EXT38 | P_DATA16 | P_EVEX)
443102cd35cSRichard Henderson#define OPC_VPROLVQ     (0x15 | P_EXT38 | P_DATA16 | P_VEXW | P_EVEX)
444102cd35cSRichard Henderson#define OPC_VPRORVD     (0x14 | P_EXT38 | P_DATA16 | P_EVEX)
445102cd35cSRichard Henderson#define OPC_VPRORVQ     (0x14 | P_EXT38 | P_DATA16 | P_VEXW | P_EVEX)
446965d5d06SRichard Henderson#define OPC_VPSHLDW     (0x70 | P_EXT3A | P_DATA16 | P_VEXW | P_EVEX)
447965d5d06SRichard Henderson#define OPC_VPSHLDD     (0x71 | P_EXT3A | P_DATA16 | P_EVEX)
448965d5d06SRichard Henderson#define OPC_VPSHLDQ     (0x71 | P_EXT3A | P_DATA16 | P_VEXW | P_EVEX)
449965d5d06SRichard Henderson#define OPC_VPSHLDVW    (0x70 | P_EXT38 | P_DATA16 | P_VEXW | P_EVEX)
450965d5d06SRichard Henderson#define OPC_VPSHLDVD    (0x71 | P_EXT38 | P_DATA16 | P_EVEX)
451965d5d06SRichard Henderson#define OPC_VPSHLDVQ    (0x71 | P_EXT38 | P_DATA16 | P_VEXW | P_EVEX)
452965d5d06SRichard Henderson#define OPC_VPSHRDVW    (0x72 | P_EXT38 | P_DATA16 | P_VEXW | P_EVEX)
453965d5d06SRichard Henderson#define OPC_VPSHRDVD    (0x73 | P_EXT38 | P_DATA16 | P_EVEX)
454965d5d06SRichard Henderson#define OPC_VPSHRDVQ    (0x73 | P_EXT38 | P_DATA16 | P_VEXW | P_EVEX)
455ef77ce0dSRichard Henderson#define OPC_VPSLLVW     (0x12 | P_EXT38 | P_DATA16 | P_VEXW | P_EVEX)
456139c1837SPaolo Bonzini#define OPC_VPSLLVD     (0x47 | P_EXT38 | P_DATA16)
457fc88a523SRichard Henderson#define OPC_VPSLLVQ     (0x47 | P_EXT38 | P_DATA16 | P_VEXW)
458ef77ce0dSRichard Henderson#define OPC_VPSRAVW     (0x11 | P_EXT38 | P_DATA16 | P_VEXW | P_EVEX)
459139c1837SPaolo Bonzini#define OPC_VPSRAVD     (0x46 | P_EXT38 | P_DATA16)
460ef77ce0dSRichard Henderson#define OPC_VPSRAVQ     (0x46 | P_EXT38 | P_DATA16 | P_VEXW | P_EVEX)
461ef77ce0dSRichard Henderson#define OPC_VPSRLVW     (0x10 | P_EXT38 | P_DATA16 | P_VEXW | P_EVEX)
462139c1837SPaolo Bonzini#define OPC_VPSRLVD     (0x45 | P_EXT38 | P_DATA16)
463fc88a523SRichard Henderson#define OPC_VPSRLVQ     (0x45 | P_EXT38 | P_DATA16 | P_VEXW)
4643143767bSRichard Henderson#define OPC_VPTERNLOGQ  (0x25 | P_EXT3A | P_DATA16 | P_VEXW | P_EVEX)
465*782cffa4SRichard Henderson#define OPC_VPTESTMB    (0x26 | P_EXT38 | P_DATA16 | P_EVEX)
466*782cffa4SRichard Henderson#define OPC_VPTESTMW    (0x26 | P_EXT38 | P_DATA16 | P_VEXW | P_EVEX)
467*782cffa4SRichard Henderson#define OPC_VPTESTMD    (0x27 | P_EXT38 | P_DATA16 | P_EVEX)
468*782cffa4SRichard Henderson#define OPC_VPTESTMQ    (0x27 | P_EXT38 | P_DATA16 | P_VEXW | P_EVEX)
469*782cffa4SRichard Henderson#define OPC_VPTESTNMB   (0x26 | P_EXT38 | P_SIMDF3 | P_EVEX)
470*782cffa4SRichard Henderson#define OPC_VPTESTNMW   (0x26 | P_EXT38 | P_SIMDF3 | P_VEXW | P_EVEX)
471*782cffa4SRichard Henderson#define OPC_VPTESTNMD   (0x27 | P_EXT38 | P_SIMDF3 | P_EVEX)
472*782cffa4SRichard Henderson#define OPC_VPTESTNMQ   (0x27 | P_EXT38 | P_SIMDF3 | P_VEXW | P_EVEX)
473139c1837SPaolo Bonzini#define OPC_VZEROUPPER  (0x77 | P_EXT)
474139c1837SPaolo Bonzini#define OPC_XCHG_ax_r32	(0x90)
475767c2503SRichard Henderson#define OPC_XCHG_EvGv   (0x87)
476139c1837SPaolo Bonzini
477b1ee3c67SRichard Henderson#define OPC_GRP3_Eb     (0xf6)
478139c1837SPaolo Bonzini#define OPC_GRP3_Ev     (0xf7)
479139c1837SPaolo Bonzini#define OPC_GRP5        (0xff)
480139c1837SPaolo Bonzini#define OPC_GRP14       (0x73 | P_EXT | P_DATA16)
481d3d1c30cSRichard Henderson#define OPC_GRPBT       (0xba | P_EXT)
482d3d1c30cSRichard Henderson
483d3d1c30cSRichard Henderson#define OPC_GRPBT_BT    4
484d3d1c30cSRichard Henderson#define OPC_GRPBT_BTS   5
485d3d1c30cSRichard Henderson#define OPC_GRPBT_BTR   6
486d3d1c30cSRichard Henderson#define OPC_GRPBT_BTC   7
487139c1837SPaolo Bonzini
488139c1837SPaolo Bonzini/* Group 1 opcode extensions for 0x80-0x83.
489139c1837SPaolo Bonzini   These are also used as modifiers for OPC_ARITH.  */
490139c1837SPaolo Bonzini#define ARITH_ADD 0
491139c1837SPaolo Bonzini#define ARITH_OR  1
492139c1837SPaolo Bonzini#define ARITH_ADC 2
493139c1837SPaolo Bonzini#define ARITH_SBB 3
494139c1837SPaolo Bonzini#define ARITH_AND 4
495139c1837SPaolo Bonzini#define ARITH_SUB 5
496139c1837SPaolo Bonzini#define ARITH_XOR 6
497139c1837SPaolo Bonzini#define ARITH_CMP 7
498139c1837SPaolo Bonzini
499139c1837SPaolo Bonzini/* Group 2 opcode extensions for 0xc0, 0xc1, 0xd0-0xd3.  */
500139c1837SPaolo Bonzini#define SHIFT_ROL 0
501139c1837SPaolo Bonzini#define SHIFT_ROR 1
502139c1837SPaolo Bonzini#define SHIFT_SHL 4
503139c1837SPaolo Bonzini#define SHIFT_SHR 5
504139c1837SPaolo Bonzini#define SHIFT_SAR 7
505139c1837SPaolo Bonzini
506139c1837SPaolo Bonzini/* Group 3 opcode extensions for 0xf6, 0xf7.  To be used with OPC_GRP3.  */
507b1ee3c67SRichard Henderson#define EXT3_TESTi 0
508139c1837SPaolo Bonzini#define EXT3_NOT   2
509139c1837SPaolo Bonzini#define EXT3_NEG   3
510139c1837SPaolo Bonzini#define EXT3_MUL   4
511139c1837SPaolo Bonzini#define EXT3_IMUL  5
512139c1837SPaolo Bonzini#define EXT3_DIV   6
513139c1837SPaolo Bonzini#define EXT3_IDIV  7
514139c1837SPaolo Bonzini
515139c1837SPaolo Bonzini/* Group 5 opcode extensions for 0xff.  To be used with OPC_GRP5.  */
516139c1837SPaolo Bonzini#define EXT5_INC_Ev	0
517139c1837SPaolo Bonzini#define EXT5_DEC_Ev	1
518139c1837SPaolo Bonzini#define EXT5_CALLN_Ev	2
519139c1837SPaolo Bonzini#define EXT5_JMPN_Ev	4
520139c1837SPaolo Bonzini
521139c1837SPaolo Bonzini/* Condition codes to be added to OPC_JCC_{long,short}.  */
522139c1837SPaolo Bonzini#define JCC_JMP (-1)
523139c1837SPaolo Bonzini#define JCC_JO  0x0
524139c1837SPaolo Bonzini#define JCC_JNO 0x1
525139c1837SPaolo Bonzini#define JCC_JB  0x2
526139c1837SPaolo Bonzini#define JCC_JAE 0x3
527139c1837SPaolo Bonzini#define JCC_JE  0x4
528139c1837SPaolo Bonzini#define JCC_JNE 0x5
529139c1837SPaolo Bonzini#define JCC_JBE 0x6
530139c1837SPaolo Bonzini#define JCC_JA  0x7
531139c1837SPaolo Bonzini#define JCC_JS  0x8
532139c1837SPaolo Bonzini#define JCC_JNS 0x9
533139c1837SPaolo Bonzini#define JCC_JP  0xa
534139c1837SPaolo Bonzini#define JCC_JNP 0xb
535139c1837SPaolo Bonzini#define JCC_JL  0xc
536139c1837SPaolo Bonzini#define JCC_JGE 0xd
537139c1837SPaolo Bonzini#define JCC_JLE 0xe
538139c1837SPaolo Bonzini#define JCC_JG  0xf
539139c1837SPaolo Bonzini
540139c1837SPaolo Bonzinistatic const uint8_t tcg_cond_to_jcc[] = {
541139c1837SPaolo Bonzini    [TCG_COND_EQ] = JCC_JE,
542139c1837SPaolo Bonzini    [TCG_COND_NE] = JCC_JNE,
543139c1837SPaolo Bonzini    [TCG_COND_LT] = JCC_JL,
544139c1837SPaolo Bonzini    [TCG_COND_GE] = JCC_JGE,
545139c1837SPaolo Bonzini    [TCG_COND_LE] = JCC_JLE,
546139c1837SPaolo Bonzini    [TCG_COND_GT] = JCC_JG,
547139c1837SPaolo Bonzini    [TCG_COND_LTU] = JCC_JB,
548139c1837SPaolo Bonzini    [TCG_COND_GEU] = JCC_JAE,
549139c1837SPaolo Bonzini    [TCG_COND_LEU] = JCC_JBE,
550139c1837SPaolo Bonzini    [TCG_COND_GTU] = JCC_JA,
551303214aaSRichard Henderson    [TCG_COND_TSTEQ] = JCC_JE,
552303214aaSRichard Henderson    [TCG_COND_TSTNE] = JCC_JNE,
553139c1837SPaolo Bonzini};
554139c1837SPaolo Bonzini
555139c1837SPaolo Bonzini#if TCG_TARGET_REG_BITS == 64
556139c1837SPaolo Bonzinistatic void tcg_out_opc(TCGContext *s, int opc, int r, int rm, int x)
557139c1837SPaolo Bonzini{
558139c1837SPaolo Bonzini    int rex;
559139c1837SPaolo Bonzini
560139c1837SPaolo Bonzini    if (opc & P_GS) {
561139c1837SPaolo Bonzini        tcg_out8(s, 0x65);
562139c1837SPaolo Bonzini    }
563139c1837SPaolo Bonzini    if (opc & P_DATA16) {
564139c1837SPaolo Bonzini        /* We should never be asking for both 16 and 64-bit operation.  */
565139c1837SPaolo Bonzini        tcg_debug_assert((opc & P_REXW) == 0);
566139c1837SPaolo Bonzini        tcg_out8(s, 0x66);
567139c1837SPaolo Bonzini    }
568139c1837SPaolo Bonzini    if (opc & P_SIMDF3) {
569139c1837SPaolo Bonzini        tcg_out8(s, 0xf3);
570139c1837SPaolo Bonzini    } else if (opc & P_SIMDF2) {
571139c1837SPaolo Bonzini        tcg_out8(s, 0xf2);
572139c1837SPaolo Bonzini    }
573139c1837SPaolo Bonzini
574139c1837SPaolo Bonzini    rex = 0;
575139c1837SPaolo Bonzini    rex |= (opc & P_REXW) ? 0x8 : 0x0;  /* REX.W */
576139c1837SPaolo Bonzini    rex |= (r & 8) >> 1;                /* REX.R */
577139c1837SPaolo Bonzini    rex |= (x & 8) >> 2;                /* REX.X */
578139c1837SPaolo Bonzini    rex |= (rm & 8) >> 3;               /* REX.B */
579139c1837SPaolo Bonzini
580139c1837SPaolo Bonzini    /* P_REXB_{R,RM} indicates that the given register is the low byte.
581139c1837SPaolo Bonzini       For %[abcd]l we need no REX prefix, but for %{si,di,bp,sp}l we do,
582139c1837SPaolo Bonzini       as otherwise the encoding indicates %[abcd]h.  Note that the values
583139c1837SPaolo Bonzini       that are ORed in merely indicate that the REX byte must be present;
584139c1837SPaolo Bonzini       those bits get discarded in output.  */
585139c1837SPaolo Bonzini    rex |= opc & (r >= 4 ? P_REXB_R : 0);
586139c1837SPaolo Bonzini    rex |= opc & (rm >= 4 ? P_REXB_RM : 0);
587139c1837SPaolo Bonzini
588139c1837SPaolo Bonzini    if (rex) {
589139c1837SPaolo Bonzini        tcg_out8(s, (uint8_t)(rex | 0x40));
590139c1837SPaolo Bonzini    }
591139c1837SPaolo Bonzini
592139c1837SPaolo Bonzini    if (opc & (P_EXT | P_EXT38 | P_EXT3A)) {
593139c1837SPaolo Bonzini        tcg_out8(s, 0x0f);
594139c1837SPaolo Bonzini        if (opc & P_EXT38) {
595139c1837SPaolo Bonzini            tcg_out8(s, 0x38);
596139c1837SPaolo Bonzini        } else if (opc & P_EXT3A) {
597139c1837SPaolo Bonzini            tcg_out8(s, 0x3a);
598139c1837SPaolo Bonzini        }
599139c1837SPaolo Bonzini    }
600139c1837SPaolo Bonzini
601139c1837SPaolo Bonzini    tcg_out8(s, opc);
602139c1837SPaolo Bonzini}
603139c1837SPaolo Bonzini#else
604139c1837SPaolo Bonzinistatic void tcg_out_opc(TCGContext *s, int opc)
605139c1837SPaolo Bonzini{
606139c1837SPaolo Bonzini    if (opc & P_DATA16) {
607139c1837SPaolo Bonzini        tcg_out8(s, 0x66);
608139c1837SPaolo Bonzini    }
609139c1837SPaolo Bonzini    if (opc & P_SIMDF3) {
610139c1837SPaolo Bonzini        tcg_out8(s, 0xf3);
611139c1837SPaolo Bonzini    } else if (opc & P_SIMDF2) {
612139c1837SPaolo Bonzini        tcg_out8(s, 0xf2);
613139c1837SPaolo Bonzini    }
614139c1837SPaolo Bonzini    if (opc & (P_EXT | P_EXT38 | P_EXT3A)) {
615139c1837SPaolo Bonzini        tcg_out8(s, 0x0f);
616139c1837SPaolo Bonzini        if (opc & P_EXT38) {
617139c1837SPaolo Bonzini            tcg_out8(s, 0x38);
618139c1837SPaolo Bonzini        } else if (opc & P_EXT3A) {
619139c1837SPaolo Bonzini            tcg_out8(s, 0x3a);
620139c1837SPaolo Bonzini        }
621139c1837SPaolo Bonzini    }
622139c1837SPaolo Bonzini    tcg_out8(s, opc);
623139c1837SPaolo Bonzini}
624139c1837SPaolo Bonzini/* Discard the register arguments to tcg_out_opc early, so as not to penalize
625139c1837SPaolo Bonzini   the 32-bit compilation paths.  This method works with all versions of gcc,
626139c1837SPaolo Bonzini   whereas relying on optimization may not be able to exclude them.  */
627139c1837SPaolo Bonzini#define tcg_out_opc(s, opc, r, rm, x)  (tcg_out_opc)(s, opc)
628139c1837SPaolo Bonzini#endif
629139c1837SPaolo Bonzini
630139c1837SPaolo Bonzinistatic void tcg_out_modrm(TCGContext *s, int opc, int r, int rm)
631139c1837SPaolo Bonzini{
632139c1837SPaolo Bonzini    tcg_out_opc(s, opc, r, rm, 0);
633139c1837SPaolo Bonzini    tcg_out8(s, 0xc0 | (LOWREGMASK(r) << 3) | LOWREGMASK(rm));
634139c1837SPaolo Bonzini}
635139c1837SPaolo Bonzini
636139c1837SPaolo Bonzinistatic void tcg_out_vex_opc(TCGContext *s, int opc, int r, int v,
637139c1837SPaolo Bonzini                            int rm, int index)
638139c1837SPaolo Bonzini{
639139c1837SPaolo Bonzini    int tmp;
640139c1837SPaolo Bonzini
641d3b41127SRichard Henderson    if (opc & P_GS) {
642d3b41127SRichard Henderson        tcg_out8(s, 0x65);
643d3b41127SRichard Henderson    }
644139c1837SPaolo Bonzini    /* Use the two byte form if possible, which cannot encode
645139c1837SPaolo Bonzini       VEX.W, VEX.B, VEX.X, or an m-mmmm field other than P_EXT.  */
646fc88a523SRichard Henderson    if ((opc & (P_EXT | P_EXT38 | P_EXT3A | P_VEXW)) == P_EXT
647139c1837SPaolo Bonzini        && ((rm | index) & 8) == 0) {
648139c1837SPaolo Bonzini        /* Two byte VEX prefix.  */
649139c1837SPaolo Bonzini        tcg_out8(s, 0xc5);
650139c1837SPaolo Bonzini
651139c1837SPaolo Bonzini        tmp = (r & 8 ? 0 : 0x80);              /* VEX.R */
652139c1837SPaolo Bonzini    } else {
653139c1837SPaolo Bonzini        /* Three byte VEX prefix.  */
654139c1837SPaolo Bonzini        tcg_out8(s, 0xc4);
655139c1837SPaolo Bonzini
656139c1837SPaolo Bonzini        /* VEX.m-mmmm */
657139c1837SPaolo Bonzini        if (opc & P_EXT3A) {
658139c1837SPaolo Bonzini            tmp = 3;
659139c1837SPaolo Bonzini        } else if (opc & P_EXT38) {
660139c1837SPaolo Bonzini            tmp = 2;
661139c1837SPaolo Bonzini        } else if (opc & P_EXT) {
662139c1837SPaolo Bonzini            tmp = 1;
663139c1837SPaolo Bonzini        } else {
664139c1837SPaolo Bonzini            g_assert_not_reached();
665139c1837SPaolo Bonzini        }
666139c1837SPaolo Bonzini        tmp |= (r & 8 ? 0 : 0x80);             /* VEX.R */
667139c1837SPaolo Bonzini        tmp |= (index & 8 ? 0 : 0x40);         /* VEX.X */
668139c1837SPaolo Bonzini        tmp |= (rm & 8 ? 0 : 0x20);            /* VEX.B */
669139c1837SPaolo Bonzini        tcg_out8(s, tmp);
670139c1837SPaolo Bonzini
671fc88a523SRichard Henderson        tmp = (opc & P_VEXW ? 0x80 : 0);       /* VEX.W */
672139c1837SPaolo Bonzini    }
673139c1837SPaolo Bonzini
674139c1837SPaolo Bonzini    tmp |= (opc & P_VEXL ? 0x04 : 0);      /* VEX.L */
675139c1837SPaolo Bonzini    /* VEX.pp */
676139c1837SPaolo Bonzini    if (opc & P_DATA16) {
677139c1837SPaolo Bonzini        tmp |= 1;                          /* 0x66 */
678139c1837SPaolo Bonzini    } else if (opc & P_SIMDF3) {
679139c1837SPaolo Bonzini        tmp |= 2;                          /* 0xf3 */
680139c1837SPaolo Bonzini    } else if (opc & P_SIMDF2) {
681139c1837SPaolo Bonzini        tmp |= 3;                          /* 0xf2 */
682139c1837SPaolo Bonzini    }
683139c1837SPaolo Bonzini    tmp |= (~v & 15) << 3;                 /* VEX.vvvv */
684139c1837SPaolo Bonzini    tcg_out8(s, tmp);
685139c1837SPaolo Bonzini    tcg_out8(s, opc);
686139c1837SPaolo Bonzini}
687139c1837SPaolo Bonzini
68808b032f7SRichard Hendersonstatic void tcg_out_evex_opc(TCGContext *s, int opc, int r, int v,
689c044ec0dSRichard Henderson                             int rm, int index, int aaa, bool z)
69008b032f7SRichard Henderson{
69108b032f7SRichard Henderson    /* The entire 4-byte evex prefix; with R' and V' set. */
69208b032f7SRichard Henderson    uint32_t p = 0x08041062;
69308b032f7SRichard Henderson    int mm, pp;
69408b032f7SRichard Henderson
69508b032f7SRichard Henderson    tcg_debug_assert(have_avx512vl);
69608b032f7SRichard Henderson
69708b032f7SRichard Henderson    /* EVEX.mm */
69808b032f7SRichard Henderson    if (opc & P_EXT3A) {
69908b032f7SRichard Henderson        mm = 3;
70008b032f7SRichard Henderson    } else if (opc & P_EXT38) {
70108b032f7SRichard Henderson        mm = 2;
70208b032f7SRichard Henderson    } else if (opc & P_EXT) {
70308b032f7SRichard Henderson        mm = 1;
70408b032f7SRichard Henderson    } else {
70508b032f7SRichard Henderson        g_assert_not_reached();
70608b032f7SRichard Henderson    }
70708b032f7SRichard Henderson
70808b032f7SRichard Henderson    /* EVEX.pp */
70908b032f7SRichard Henderson    if (opc & P_DATA16) {
71008b032f7SRichard Henderson        pp = 1;                          /* 0x66 */
71108b032f7SRichard Henderson    } else if (opc & P_SIMDF3) {
71208b032f7SRichard Henderson        pp = 2;                          /* 0xf3 */
71308b032f7SRichard Henderson    } else if (opc & P_SIMDF2) {
71408b032f7SRichard Henderson        pp = 3;                          /* 0xf2 */
71508b032f7SRichard Henderson    } else {
71608b032f7SRichard Henderson        pp = 0;
71708b032f7SRichard Henderson    }
71808b032f7SRichard Henderson
71908b032f7SRichard Henderson    p = deposit32(p, 8, 2, mm);
72008b032f7SRichard Henderson    p = deposit32(p, 13, 1, (rm & 8) == 0);             /* EVEX.RXB.B */
72108b032f7SRichard Henderson    p = deposit32(p, 14, 1, (index & 8) == 0);          /* EVEX.RXB.X */
72208b032f7SRichard Henderson    p = deposit32(p, 15, 1, (r & 8) == 0);              /* EVEX.RXB.R */
72308b032f7SRichard Henderson    p = deposit32(p, 16, 2, pp);
72408b032f7SRichard Henderson    p = deposit32(p, 19, 4, ~v);
72508b032f7SRichard Henderson    p = deposit32(p, 23, 1, (opc & P_VEXW) != 0);
726c044ec0dSRichard Henderson    p = deposit32(p, 24, 3, aaa);
72708b032f7SRichard Henderson    p = deposit32(p, 29, 2, (opc & P_VEXL) != 0);
728c044ec0dSRichard Henderson    p = deposit32(p, 31, 1, z);
72908b032f7SRichard Henderson
73008b032f7SRichard Henderson    tcg_out32(s, p);
73108b032f7SRichard Henderson    tcg_out8(s, opc);
73208b032f7SRichard Henderson}
73308b032f7SRichard Henderson
734139c1837SPaolo Bonzinistatic void tcg_out_vex_modrm(TCGContext *s, int opc, int r, int v, int rm)
735139c1837SPaolo Bonzini{
73608b032f7SRichard Henderson    if (opc & P_EVEX) {
737c044ec0dSRichard Henderson        tcg_out_evex_opc(s, opc, r, v, rm, 0, 0, false);
73808b032f7SRichard Henderson    } else {
739139c1837SPaolo Bonzini        tcg_out_vex_opc(s, opc, r, v, rm, 0);
74008b032f7SRichard Henderson    }
741139c1837SPaolo Bonzini    tcg_out8(s, 0xc0 | (LOWREGMASK(r) << 3) | LOWREGMASK(rm));
742139c1837SPaolo Bonzini}
743139c1837SPaolo Bonzini
744bc97b3adSRichard Hendersonstatic void tcg_out_vex_modrm_type(TCGContext *s, int opc,
745bc97b3adSRichard Henderson                                   int r, int v, int rm, TCGType type)
746bc97b3adSRichard Henderson{
747bc97b3adSRichard Henderson    if (type == TCG_TYPE_V256) {
748bc97b3adSRichard Henderson        opc |= P_VEXL;
749bc97b3adSRichard Henderson    }
750bc97b3adSRichard Henderson    tcg_out_vex_modrm(s, opc, r, v, rm);
751bc97b3adSRichard Henderson}
752bc97b3adSRichard Henderson
753d5896749SRichard Hendersonstatic void tcg_out_evex_modrm_type(TCGContext *s, int opc, int r, int v,
754d5896749SRichard Henderson                                    int rm, int aaa, bool z, TCGType type)
755d5896749SRichard Henderson{
756d5896749SRichard Henderson    if (type == TCG_TYPE_V256) {
757d5896749SRichard Henderson        opc |= P_VEXL;
758d5896749SRichard Henderson    }
759d5896749SRichard Henderson    tcg_out_evex_opc(s, opc, r, v, rm, 0, aaa, z);
760d5896749SRichard Henderson    tcg_out8(s, 0xc0 | (LOWREGMASK(r) << 3) | LOWREGMASK(rm));
761d5896749SRichard Henderson}
762d5896749SRichard Henderson
763139c1837SPaolo Bonzini/* Output an opcode with a full "rm + (index<<shift) + offset" address mode.
764139c1837SPaolo Bonzini   We handle either RM and INDEX missing with a negative value.  In 64-bit
765139c1837SPaolo Bonzini   mode for absolute addresses, ~RM is the size of the immediate operand
766139c1837SPaolo Bonzini   that will follow the instruction.  */
767139c1837SPaolo Bonzini
768139c1837SPaolo Bonzinistatic void tcg_out_sib_offset(TCGContext *s, int r, int rm, int index,
769139c1837SPaolo Bonzini                               int shift, intptr_t offset)
770139c1837SPaolo Bonzini{
771139c1837SPaolo Bonzini    int mod, len;
772139c1837SPaolo Bonzini
773139c1837SPaolo Bonzini    if (index < 0 && rm < 0) {
774139c1837SPaolo Bonzini        if (TCG_TARGET_REG_BITS == 64) {
775139c1837SPaolo Bonzini            /* Try for a rip-relative addressing mode.  This has replaced
776139c1837SPaolo Bonzini               the 32-bit-mode absolute addressing encoding.  */
777139c1837SPaolo Bonzini            intptr_t pc = (intptr_t)s->code_ptr + 5 + ~rm;
778139c1837SPaolo Bonzini            intptr_t disp = offset - pc;
779139c1837SPaolo Bonzini            if (disp == (int32_t)disp) {
780139c1837SPaolo Bonzini                tcg_out8(s, (LOWREGMASK(r) << 3) | 5);
781139c1837SPaolo Bonzini                tcg_out32(s, disp);
782139c1837SPaolo Bonzini                return;
783139c1837SPaolo Bonzini            }
784139c1837SPaolo Bonzini
785139c1837SPaolo Bonzini            /* Try for an absolute address encoding.  This requires the
786139c1837SPaolo Bonzini               use of the MODRM+SIB encoding and is therefore larger than
787139c1837SPaolo Bonzini               rip-relative addressing.  */
788139c1837SPaolo Bonzini            if (offset == (int32_t)offset) {
789139c1837SPaolo Bonzini                tcg_out8(s, (LOWREGMASK(r) << 3) | 4);
790139c1837SPaolo Bonzini                tcg_out8(s, (4 << 3) | 5);
791139c1837SPaolo Bonzini                tcg_out32(s, offset);
792139c1837SPaolo Bonzini                return;
793139c1837SPaolo Bonzini            }
794139c1837SPaolo Bonzini
795139c1837SPaolo Bonzini            /* ??? The memory isn't directly addressable.  */
796139c1837SPaolo Bonzini            g_assert_not_reached();
797139c1837SPaolo Bonzini        } else {
798139c1837SPaolo Bonzini            /* Absolute address.  */
799139c1837SPaolo Bonzini            tcg_out8(s, (r << 3) | 5);
800139c1837SPaolo Bonzini            tcg_out32(s, offset);
801139c1837SPaolo Bonzini            return;
802139c1837SPaolo Bonzini        }
803139c1837SPaolo Bonzini    }
804139c1837SPaolo Bonzini
805139c1837SPaolo Bonzini    /* Find the length of the immediate addend.  Note that the encoding
806139c1837SPaolo Bonzini       that would be used for (%ebp) indicates absolute addressing.  */
807139c1837SPaolo Bonzini    if (rm < 0) {
808139c1837SPaolo Bonzini        mod = 0, len = 4, rm = 5;
809139c1837SPaolo Bonzini    } else if (offset == 0 && LOWREGMASK(rm) != TCG_REG_EBP) {
810139c1837SPaolo Bonzini        mod = 0, len = 0;
811139c1837SPaolo Bonzini    } else if (offset == (int8_t)offset) {
812139c1837SPaolo Bonzini        mod = 0x40, len = 1;
813139c1837SPaolo Bonzini    } else {
814139c1837SPaolo Bonzini        mod = 0x80, len = 4;
815139c1837SPaolo Bonzini    }
816139c1837SPaolo Bonzini
817139c1837SPaolo Bonzini    /* Use a single byte MODRM format if possible.  Note that the encoding
818139c1837SPaolo Bonzini       that would be used for %esp is the escape to the two byte form.  */
819139c1837SPaolo Bonzini    if (index < 0 && LOWREGMASK(rm) != TCG_REG_ESP) {
820139c1837SPaolo Bonzini        /* Single byte MODRM format.  */
821139c1837SPaolo Bonzini        tcg_out8(s, mod | (LOWREGMASK(r) << 3) | LOWREGMASK(rm));
822139c1837SPaolo Bonzini    } else {
823139c1837SPaolo Bonzini        /* Two byte MODRM+SIB format.  */
824139c1837SPaolo Bonzini
825139c1837SPaolo Bonzini        /* Note that the encoding that would place %esp into the index
826139c1837SPaolo Bonzini           field indicates no index register.  In 64-bit mode, the REX.X
827139c1837SPaolo Bonzini           bit counts, so %r12 can be used as the index.  */
828139c1837SPaolo Bonzini        if (index < 0) {
829139c1837SPaolo Bonzini            index = 4;
830139c1837SPaolo Bonzini        } else {
831139c1837SPaolo Bonzini            tcg_debug_assert(index != TCG_REG_ESP);
832139c1837SPaolo Bonzini        }
833139c1837SPaolo Bonzini
834139c1837SPaolo Bonzini        tcg_out8(s, mod | (LOWREGMASK(r) << 3) | 4);
835139c1837SPaolo Bonzini        tcg_out8(s, (shift << 6) | (LOWREGMASK(index) << 3) | LOWREGMASK(rm));
836139c1837SPaolo Bonzini    }
837139c1837SPaolo Bonzini
838139c1837SPaolo Bonzini    if (len == 1) {
839139c1837SPaolo Bonzini        tcg_out8(s, offset);
840139c1837SPaolo Bonzini    } else if (len == 4) {
841139c1837SPaolo Bonzini        tcg_out32(s, offset);
842139c1837SPaolo Bonzini    }
843139c1837SPaolo Bonzini}
844139c1837SPaolo Bonzini
845139c1837SPaolo Bonzinistatic void tcg_out_modrm_sib_offset(TCGContext *s, int opc, int r, int rm,
846139c1837SPaolo Bonzini                                     int index, int shift, intptr_t offset)
847139c1837SPaolo Bonzini{
848139c1837SPaolo Bonzini    tcg_out_opc(s, opc, r, rm < 0 ? 0 : rm, index < 0 ? 0 : index);
849139c1837SPaolo Bonzini    tcg_out_sib_offset(s, r, rm, index, shift, offset);
850139c1837SPaolo Bonzini}
851139c1837SPaolo Bonzini
852139c1837SPaolo Bonzinistatic void tcg_out_vex_modrm_sib_offset(TCGContext *s, int opc, int r, int v,
853139c1837SPaolo Bonzini                                         int rm, int index, int shift,
854139c1837SPaolo Bonzini                                         intptr_t offset)
855139c1837SPaolo Bonzini{
856139c1837SPaolo Bonzini    tcg_out_vex_opc(s, opc, r, v, rm < 0 ? 0 : rm, index < 0 ? 0 : index);
857139c1837SPaolo Bonzini    tcg_out_sib_offset(s, r, rm, index, shift, offset);
858139c1837SPaolo Bonzini}
859139c1837SPaolo Bonzini
860139c1837SPaolo Bonzini/* A simplification of the above with no index or shift.  */
861139c1837SPaolo Bonzinistatic inline void tcg_out_modrm_offset(TCGContext *s, int opc, int r,
862139c1837SPaolo Bonzini                                        int rm, intptr_t offset)
863139c1837SPaolo Bonzini{
864139c1837SPaolo Bonzini    tcg_out_modrm_sib_offset(s, opc, r, rm, -1, 0, offset);
865139c1837SPaolo Bonzini}
866139c1837SPaolo Bonzini
867139c1837SPaolo Bonzinistatic inline void tcg_out_vex_modrm_offset(TCGContext *s, int opc, int r,
868139c1837SPaolo Bonzini                                            int v, int rm, intptr_t offset)
869139c1837SPaolo Bonzini{
870139c1837SPaolo Bonzini    tcg_out_vex_modrm_sib_offset(s, opc, r, v, rm, -1, 0, offset);
871139c1837SPaolo Bonzini}
872139c1837SPaolo Bonzini
873139c1837SPaolo Bonzini/* Output an opcode with an expected reference to the constant pool.  */
874139c1837SPaolo Bonzinistatic inline void tcg_out_modrm_pool(TCGContext *s, int opc, int r)
875139c1837SPaolo Bonzini{
876139c1837SPaolo Bonzini    tcg_out_opc(s, opc, r, 0, 0);
877139c1837SPaolo Bonzini    /* Absolute for 32-bit, pc-relative for 64-bit.  */
878139c1837SPaolo Bonzini    tcg_out8(s, LOWREGMASK(r) << 3 | 5);
879139c1837SPaolo Bonzini    tcg_out32(s, 0);
880139c1837SPaolo Bonzini}
881139c1837SPaolo Bonzini
882139c1837SPaolo Bonzini/* Output an opcode with an expected reference to the constant pool.  */
883139c1837SPaolo Bonzinistatic inline void tcg_out_vex_modrm_pool(TCGContext *s, int opc, int r)
884139c1837SPaolo Bonzini{
885139c1837SPaolo Bonzini    tcg_out_vex_opc(s, opc, r, 0, 0, 0);
886139c1837SPaolo Bonzini    /* Absolute for 32-bit, pc-relative for 64-bit.  */
887139c1837SPaolo Bonzini    tcg_out8(s, LOWREGMASK(r) << 3 | 5);
888139c1837SPaolo Bonzini    tcg_out32(s, 0);
889139c1837SPaolo Bonzini}
890139c1837SPaolo Bonzini
891139c1837SPaolo Bonzini/* Generate dest op= src.  Uses the same ARITH_* codes as tgen_arithi.  */
892139c1837SPaolo Bonzinistatic inline void tgen_arithr(TCGContext *s, int subop, int dest, int src)
893139c1837SPaolo Bonzini{
894139c1837SPaolo Bonzini    /* Propagate an opcode prefix, such as P_REXW.  */
895139c1837SPaolo Bonzini    int ext = subop & ~0x7;
896139c1837SPaolo Bonzini    subop &= 0x7;
897139c1837SPaolo Bonzini
898139c1837SPaolo Bonzini    tcg_out_modrm(s, OPC_ARITH_GvEv + (subop << 3) + ext, dest, src);
899139c1837SPaolo Bonzini}
900139c1837SPaolo Bonzini
901139c1837SPaolo Bonzinistatic bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg)
902139c1837SPaolo Bonzini{
903139c1837SPaolo Bonzini    int rexw = 0;
904139c1837SPaolo Bonzini
905139c1837SPaolo Bonzini    if (arg == ret) {
906139c1837SPaolo Bonzini        return true;
907139c1837SPaolo Bonzini    }
908139c1837SPaolo Bonzini    switch (type) {
909139c1837SPaolo Bonzini    case TCG_TYPE_I64:
910139c1837SPaolo Bonzini        rexw = P_REXW;
911139c1837SPaolo Bonzini        /* fallthru */
912139c1837SPaolo Bonzini    case TCG_TYPE_I32:
913139c1837SPaolo Bonzini        if (ret < 16) {
914139c1837SPaolo Bonzini            if (arg < 16) {
915139c1837SPaolo Bonzini                tcg_out_modrm(s, OPC_MOVL_GvEv + rexw, ret, arg);
916139c1837SPaolo Bonzini            } else {
917139c1837SPaolo Bonzini                tcg_out_vex_modrm(s, OPC_MOVD_EyVy + rexw, arg, 0, ret);
918139c1837SPaolo Bonzini            }
919139c1837SPaolo Bonzini        } else {
920139c1837SPaolo Bonzini            if (arg < 16) {
921139c1837SPaolo Bonzini                tcg_out_vex_modrm(s, OPC_MOVD_VyEy + rexw, ret, 0, arg);
922139c1837SPaolo Bonzini            } else {
923139c1837SPaolo Bonzini                tcg_out_vex_modrm(s, OPC_MOVQ_VqWq, ret, 0, arg);
924139c1837SPaolo Bonzini            }
925139c1837SPaolo Bonzini        }
926139c1837SPaolo Bonzini        break;
927139c1837SPaolo Bonzini
928139c1837SPaolo Bonzini    case TCG_TYPE_V64:
929139c1837SPaolo Bonzini        tcg_debug_assert(ret >= 16 && arg >= 16);
930139c1837SPaolo Bonzini        tcg_out_vex_modrm(s, OPC_MOVQ_VqWq, ret, 0, arg);
931139c1837SPaolo Bonzini        break;
932139c1837SPaolo Bonzini    case TCG_TYPE_V128:
933139c1837SPaolo Bonzini        tcg_debug_assert(ret >= 16 && arg >= 16);
934139c1837SPaolo Bonzini        tcg_out_vex_modrm(s, OPC_MOVDQA_VxWx, ret, 0, arg);
935139c1837SPaolo Bonzini        break;
936139c1837SPaolo Bonzini    case TCG_TYPE_V256:
937139c1837SPaolo Bonzini        tcg_debug_assert(ret >= 16 && arg >= 16);
938139c1837SPaolo Bonzini        tcg_out_vex_modrm(s, OPC_MOVDQA_VxWx | P_VEXL, ret, 0, arg);
939139c1837SPaolo Bonzini        break;
940139c1837SPaolo Bonzini
941139c1837SPaolo Bonzini    default:
942139c1837SPaolo Bonzini        g_assert_not_reached();
943139c1837SPaolo Bonzini    }
944139c1837SPaolo Bonzini    return true;
945139c1837SPaolo Bonzini}
946139c1837SPaolo Bonzini
947139c1837SPaolo Bonzinistatic const int avx2_dup_insn[4] = {
948139c1837SPaolo Bonzini    OPC_VPBROADCASTB, OPC_VPBROADCASTW,
949139c1837SPaolo Bonzini    OPC_VPBROADCASTD, OPC_VPBROADCASTQ,
950139c1837SPaolo Bonzini};
951139c1837SPaolo Bonzini
952139c1837SPaolo Bonzinistatic bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
953139c1837SPaolo Bonzini                            TCGReg r, TCGReg a)
954139c1837SPaolo Bonzini{
955139c1837SPaolo Bonzini    if (have_avx2) {
956bc97b3adSRichard Henderson        tcg_out_vex_modrm_type(s, avx2_dup_insn[vece], r, 0, a, type);
957139c1837SPaolo Bonzini    } else {
958139c1837SPaolo Bonzini        switch (vece) {
959139c1837SPaolo Bonzini        case MO_8:
960139c1837SPaolo Bonzini            /* ??? With zero in a register, use PSHUFB.  */
961139c1837SPaolo Bonzini            tcg_out_vex_modrm(s, OPC_PUNPCKLBW, r, a, a);
962139c1837SPaolo Bonzini            a = r;
963139c1837SPaolo Bonzini            /* FALLTHRU */
964139c1837SPaolo Bonzini        case MO_16:
965139c1837SPaolo Bonzini            tcg_out_vex_modrm(s, OPC_PUNPCKLWD, r, a, a);
966139c1837SPaolo Bonzini            a = r;
967139c1837SPaolo Bonzini            /* FALLTHRU */
968139c1837SPaolo Bonzini        case MO_32:
969139c1837SPaolo Bonzini            tcg_out_vex_modrm(s, OPC_PSHUFD, r, 0, a);
970139c1837SPaolo Bonzini            /* imm8 operand: all output lanes selected from input lane 0.  */
971139c1837SPaolo Bonzini            tcg_out8(s, 0);
972139c1837SPaolo Bonzini            break;
973139c1837SPaolo Bonzini        case MO_64:
974139c1837SPaolo Bonzini            tcg_out_vex_modrm(s, OPC_PUNPCKLQDQ, r, a, a);
975139c1837SPaolo Bonzini            break;
976139c1837SPaolo Bonzini        default:
977139c1837SPaolo Bonzini            g_assert_not_reached();
978139c1837SPaolo Bonzini        }
979139c1837SPaolo Bonzini    }
980139c1837SPaolo Bonzini    return true;
981139c1837SPaolo Bonzini}
982139c1837SPaolo Bonzini
983139c1837SPaolo Bonzinistatic bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
984139c1837SPaolo Bonzini                             TCGReg r, TCGReg base, intptr_t offset)
985139c1837SPaolo Bonzini{
986139c1837SPaolo Bonzini    if (have_avx2) {
987139c1837SPaolo Bonzini        int vex_l = (type == TCG_TYPE_V256 ? P_VEXL : 0);
988139c1837SPaolo Bonzini        tcg_out_vex_modrm_offset(s, avx2_dup_insn[vece] + vex_l,
989139c1837SPaolo Bonzini                                 r, 0, base, offset);
990139c1837SPaolo Bonzini    } else {
991139c1837SPaolo Bonzini        switch (vece) {
992139c1837SPaolo Bonzini        case MO_64:
993139c1837SPaolo Bonzini            tcg_out_vex_modrm_offset(s, OPC_MOVDDUP, r, 0, base, offset);
994139c1837SPaolo Bonzini            break;
995139c1837SPaolo Bonzini        case MO_32:
996139c1837SPaolo Bonzini            tcg_out_vex_modrm_offset(s, OPC_VBROADCASTSS, r, 0, base, offset);
997139c1837SPaolo Bonzini            break;
998139c1837SPaolo Bonzini        case MO_16:
999139c1837SPaolo Bonzini            tcg_out_vex_modrm_offset(s, OPC_VPINSRW, r, r, base, offset);
1000139c1837SPaolo Bonzini            tcg_out8(s, 0); /* imm8 */
1001139c1837SPaolo Bonzini            tcg_out_dup_vec(s, type, vece, r, r);
1002139c1837SPaolo Bonzini            break;
1003139c1837SPaolo Bonzini        case MO_8:
1004139c1837SPaolo Bonzini            tcg_out_vex_modrm_offset(s, OPC_VPINSRB, r, r, base, offset);
1005139c1837SPaolo Bonzini            tcg_out8(s, 0); /* imm8 */
1006139c1837SPaolo Bonzini            tcg_out_dup_vec(s, type, vece, r, r);
1007139c1837SPaolo Bonzini            break;
1008139c1837SPaolo Bonzini        default:
1009139c1837SPaolo Bonzini            g_assert_not_reached();
1010139c1837SPaolo Bonzini        }
1011139c1837SPaolo Bonzini    }
1012139c1837SPaolo Bonzini    return true;
1013139c1837SPaolo Bonzini}
1014139c1837SPaolo Bonzini
10154e186175SRichard Hendersonstatic void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
10164e186175SRichard Henderson                             TCGReg ret, int64_t arg)
1017139c1837SPaolo Bonzini{
1018139c1837SPaolo Bonzini    int vex_l = (type == TCG_TYPE_V256 ? P_VEXL : 0);
1019139c1837SPaolo Bonzini
1020139c1837SPaolo Bonzini    if (arg == 0) {
1021139c1837SPaolo Bonzini        tcg_out_vex_modrm(s, OPC_PXOR, ret, ret, ret);
1022139c1837SPaolo Bonzini        return;
1023139c1837SPaolo Bonzini    }
1024139c1837SPaolo Bonzini    if (arg == -1) {
1025139c1837SPaolo Bonzini        tcg_out_vex_modrm(s, OPC_PCMPEQB + vex_l, ret, ret, ret);
1026139c1837SPaolo Bonzini        return;
1027139c1837SPaolo Bonzini    }
1028139c1837SPaolo Bonzini
10294e186175SRichard Henderson    if (TCG_TARGET_REG_BITS == 32 && vece < MO_64) {
10304e186175SRichard Henderson        if (have_avx2) {
10314e186175SRichard Henderson            tcg_out_vex_modrm_pool(s, OPC_VPBROADCASTD + vex_l, ret);
10324e186175SRichard Henderson        } else {
10334e186175SRichard Henderson            tcg_out_vex_modrm_pool(s, OPC_VBROADCASTSS, ret);
10344e186175SRichard Henderson        }
10354e186175SRichard Henderson        new_pool_label(s, arg, R_386_32, s->code_ptr - 4, 0);
10364e186175SRichard Henderson    } else {
1037139c1837SPaolo Bonzini        if (type == TCG_TYPE_V64) {
1038139c1837SPaolo Bonzini            tcg_out_vex_modrm_pool(s, OPC_MOVQ_VqWq, ret);
1039139c1837SPaolo Bonzini        } else if (have_avx2) {
1040139c1837SPaolo Bonzini            tcg_out_vex_modrm_pool(s, OPC_VPBROADCASTQ + vex_l, ret);
1041139c1837SPaolo Bonzini        } else {
1042139c1837SPaolo Bonzini            tcg_out_vex_modrm_pool(s, OPC_MOVDDUP, ret);
1043139c1837SPaolo Bonzini        }
10444e186175SRichard Henderson        if (TCG_TARGET_REG_BITS == 64) {
1045139c1837SPaolo Bonzini            new_pool_label(s, arg, R_386_PC32, s->code_ptr - 4, -4);
1046139c1837SPaolo Bonzini        } else {
10474e186175SRichard Henderson            new_pool_l2(s, R_386_32, s->code_ptr - 4, 0, arg, arg >> 32);
1048139c1837SPaolo Bonzini        }
1049139c1837SPaolo Bonzini    }
1050139c1837SPaolo Bonzini}
1051139c1837SPaolo Bonzini
10520a6a8bc8SRichard Hendersonstatic void tcg_out_movi_vec(TCGContext *s, TCGType type,
10530a6a8bc8SRichard Henderson                             TCGReg ret, tcg_target_long arg)
10540a6a8bc8SRichard Henderson{
10550a6a8bc8SRichard Henderson    if (arg == 0) {
10560a6a8bc8SRichard Henderson        tcg_out_vex_modrm(s, OPC_PXOR, ret, ret, ret);
10570a6a8bc8SRichard Henderson        return;
10580a6a8bc8SRichard Henderson    }
10590a6a8bc8SRichard Henderson    if (arg == -1) {
10600a6a8bc8SRichard Henderson        tcg_out_vex_modrm(s, OPC_PCMPEQB, ret, ret, ret);
10610a6a8bc8SRichard Henderson        return;
10620a6a8bc8SRichard Henderson    }
10630a6a8bc8SRichard Henderson
10640a6a8bc8SRichard Henderson    int rexw = (type == TCG_TYPE_I32 ? 0 : P_REXW);
10650a6a8bc8SRichard Henderson    tcg_out_vex_modrm_pool(s, OPC_MOVD_VyEy + rexw, ret);
10660a6a8bc8SRichard Henderson    if (TCG_TARGET_REG_BITS == 64) {
10670a6a8bc8SRichard Henderson        new_pool_label(s, arg, R_386_PC32, s->code_ptr - 4, -4);
10680a6a8bc8SRichard Henderson    } else {
10690a6a8bc8SRichard Henderson        new_pool_label(s, arg, R_386_32, s->code_ptr - 4, 0);
10700a6a8bc8SRichard Henderson    }
10710a6a8bc8SRichard Henderson}
10720a6a8bc8SRichard Henderson
10730a6a8bc8SRichard Hendersonstatic void tcg_out_movi_int(TCGContext *s, TCGType type,
1074139c1837SPaolo Bonzini                             TCGReg ret, tcg_target_long arg)
1075139c1837SPaolo Bonzini{
1076139c1837SPaolo Bonzini    tcg_target_long diff;
1077139c1837SPaolo Bonzini
1078139c1837SPaolo Bonzini    if (arg == 0) {
1079139c1837SPaolo Bonzini        tgen_arithr(s, ARITH_XOR, ret, ret);
1080139c1837SPaolo Bonzini        return;
1081139c1837SPaolo Bonzini    }
1082139c1837SPaolo Bonzini    if (arg == (uint32_t)arg || type == TCG_TYPE_I32) {
1083139c1837SPaolo Bonzini        tcg_out_opc(s, OPC_MOVL_Iv + LOWREGMASK(ret), 0, ret, 0);
1084139c1837SPaolo Bonzini        tcg_out32(s, arg);
1085139c1837SPaolo Bonzini        return;
1086139c1837SPaolo Bonzini    }
1087139c1837SPaolo Bonzini    if (arg == (int32_t)arg) {
1088139c1837SPaolo Bonzini        tcg_out_modrm(s, OPC_MOVL_EvIz + P_REXW, 0, ret);
1089139c1837SPaolo Bonzini        tcg_out32(s, arg);
1090139c1837SPaolo Bonzini        return;
1091139c1837SPaolo Bonzini    }
1092139c1837SPaolo Bonzini
1093139c1837SPaolo Bonzini    /* Try a 7 byte pc-relative lea before the 10 byte movq.  */
1094705ed477SRichard Henderson    diff = tcg_pcrel_diff(s, (const void *)arg) - 7;
1095139c1837SPaolo Bonzini    if (diff == (int32_t)diff) {
1096139c1837SPaolo Bonzini        tcg_out_opc(s, OPC_LEA | P_REXW, ret, 0, 0);
1097139c1837SPaolo Bonzini        tcg_out8(s, (LOWREGMASK(ret) << 3) | 5);
1098139c1837SPaolo Bonzini        tcg_out32(s, diff);
1099139c1837SPaolo Bonzini        return;
1100139c1837SPaolo Bonzini    }
1101139c1837SPaolo Bonzini
1102139c1837SPaolo Bonzini    tcg_out_opc(s, OPC_MOVL_Iv + P_REXW + LOWREGMASK(ret), 0, ret, 0);
1103139c1837SPaolo Bonzini    tcg_out64(s, arg);
1104139c1837SPaolo Bonzini}
1105139c1837SPaolo Bonzini
11060a6a8bc8SRichard Hendersonstatic void tcg_out_movi(TCGContext *s, TCGType type,
11070a6a8bc8SRichard Henderson                         TCGReg ret, tcg_target_long arg)
11080a6a8bc8SRichard Henderson{
11090a6a8bc8SRichard Henderson    switch (type) {
11100a6a8bc8SRichard Henderson    case TCG_TYPE_I32:
11110a6a8bc8SRichard Henderson#if TCG_TARGET_REG_BITS == 64
11120a6a8bc8SRichard Henderson    case TCG_TYPE_I64:
11130a6a8bc8SRichard Henderson#endif
11140a6a8bc8SRichard Henderson        if (ret < 16) {
11150a6a8bc8SRichard Henderson            tcg_out_movi_int(s, type, ret, arg);
11160a6a8bc8SRichard Henderson        } else {
11170a6a8bc8SRichard Henderson            tcg_out_movi_vec(s, type, ret, arg);
11180a6a8bc8SRichard Henderson        }
11190a6a8bc8SRichard Henderson        break;
11200a6a8bc8SRichard Henderson    default:
11210a6a8bc8SRichard Henderson        g_assert_not_reached();
11220a6a8bc8SRichard Henderson    }
11230a6a8bc8SRichard Henderson}
11240a6a8bc8SRichard Henderson
1125767c2503SRichard Hendersonstatic bool tcg_out_xchg(TCGContext *s, TCGType type, TCGReg r1, TCGReg r2)
1126767c2503SRichard Henderson{
1127767c2503SRichard Henderson    int rexw = type == TCG_TYPE_I32 ? 0 : P_REXW;
1128767c2503SRichard Henderson    tcg_out_modrm(s, OPC_XCHG_EvGv + rexw, r1, r2);
1129767c2503SRichard Henderson    return true;
1130767c2503SRichard Henderson}
1131767c2503SRichard Henderson
11326a6d772eSRichard Hendersonstatic void tcg_out_addi_ptr(TCGContext *s, TCGReg rd, TCGReg rs,
11336a6d772eSRichard Henderson                             tcg_target_long imm)
11346a6d772eSRichard Henderson{
11356a6d772eSRichard Henderson    /* This function is only used for passing structs by reference. */
11367d9e1ee4SRichard Henderson    tcg_debug_assert(imm == (int32_t)imm);
113798899850SRichard Henderson    tcg_out_modrm_offset(s, OPC_LEA | P_REXW, rd, rs, imm);
11386a6d772eSRichard Henderson}
11396a6d772eSRichard Henderson
1140139c1837SPaolo Bonzinistatic inline void tcg_out_pushi(TCGContext *s, tcg_target_long val)
1141139c1837SPaolo Bonzini{
1142139c1837SPaolo Bonzini    if (val == (int8_t)val) {
1143139c1837SPaolo Bonzini        tcg_out_opc(s, OPC_PUSH_Ib, 0, 0, 0);
1144139c1837SPaolo Bonzini        tcg_out8(s, val);
1145139c1837SPaolo Bonzini    } else if (val == (int32_t)val) {
1146139c1837SPaolo Bonzini        tcg_out_opc(s, OPC_PUSH_Iv, 0, 0, 0);
1147139c1837SPaolo Bonzini        tcg_out32(s, val);
1148139c1837SPaolo Bonzini    } else {
1149732e89f4SRichard Henderson        g_assert_not_reached();
1150139c1837SPaolo Bonzini    }
1151139c1837SPaolo Bonzini}
1152139c1837SPaolo Bonzini
1153139c1837SPaolo Bonzinistatic inline void tcg_out_mb(TCGContext *s, TCGArg a0)
1154139c1837SPaolo Bonzini{
1155139c1837SPaolo Bonzini    /* Given the strength of x86 memory ordering, we only need care for
1156139c1837SPaolo Bonzini       store-load ordering.  Experimentally, "lock orl $0,0(%esp)" is
1157139c1837SPaolo Bonzini       faster than "mfence", so don't bother with the sse insn.  */
1158139c1837SPaolo Bonzini    if (a0 & TCG_MO_ST_LD) {
1159139c1837SPaolo Bonzini        tcg_out8(s, 0xf0);
1160139c1837SPaolo Bonzini        tcg_out_modrm_offset(s, OPC_ARITH_EvIb, ARITH_OR, TCG_REG_ESP, 0);
1161139c1837SPaolo Bonzini        tcg_out8(s, 0);
1162139c1837SPaolo Bonzini    }
1163139c1837SPaolo Bonzini}
1164139c1837SPaolo Bonzini
1165139c1837SPaolo Bonzinistatic inline void tcg_out_push(TCGContext *s, int reg)
1166139c1837SPaolo Bonzini{
1167139c1837SPaolo Bonzini    tcg_out_opc(s, OPC_PUSH_r32 + LOWREGMASK(reg), 0, reg, 0);
1168139c1837SPaolo Bonzini}
1169139c1837SPaolo Bonzini
1170139c1837SPaolo Bonzinistatic inline void tcg_out_pop(TCGContext *s, int reg)
1171139c1837SPaolo Bonzini{
1172139c1837SPaolo Bonzini    tcg_out_opc(s, OPC_POP_r32 + LOWREGMASK(reg), 0, reg, 0);
1173139c1837SPaolo Bonzini}
1174139c1837SPaolo Bonzini
1175139c1837SPaolo Bonzinistatic void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret,
1176139c1837SPaolo Bonzini                       TCGReg arg1, intptr_t arg2)
1177139c1837SPaolo Bonzini{
1178139c1837SPaolo Bonzini    switch (type) {
1179139c1837SPaolo Bonzini    case TCG_TYPE_I32:
1180139c1837SPaolo Bonzini        if (ret < 16) {
1181139c1837SPaolo Bonzini            tcg_out_modrm_offset(s, OPC_MOVL_GvEv, ret, arg1, arg2);
1182139c1837SPaolo Bonzini        } else {
1183139c1837SPaolo Bonzini            tcg_out_vex_modrm_offset(s, OPC_MOVD_VyEy, ret, 0, arg1, arg2);
1184139c1837SPaolo Bonzini        }
1185139c1837SPaolo Bonzini        break;
1186139c1837SPaolo Bonzini    case TCG_TYPE_I64:
1187139c1837SPaolo Bonzini        if (ret < 16) {
1188139c1837SPaolo Bonzini            tcg_out_modrm_offset(s, OPC_MOVL_GvEv | P_REXW, ret, arg1, arg2);
1189139c1837SPaolo Bonzini            break;
1190139c1837SPaolo Bonzini        }
1191139c1837SPaolo Bonzini        /* FALLTHRU */
1192139c1837SPaolo Bonzini    case TCG_TYPE_V64:
1193139c1837SPaolo Bonzini        /* There is no instruction that can validate 8-byte alignment.  */
1194139c1837SPaolo Bonzini        tcg_debug_assert(ret >= 16);
1195139c1837SPaolo Bonzini        tcg_out_vex_modrm_offset(s, OPC_MOVQ_VqWq, ret, 0, arg1, arg2);
1196139c1837SPaolo Bonzini        break;
1197139c1837SPaolo Bonzini    case TCG_TYPE_V128:
1198139c1837SPaolo Bonzini        /*
1199139c1837SPaolo Bonzini         * The gvec infrastructure is asserts that v128 vector loads
1200139c1837SPaolo Bonzini         * and stores use a 16-byte aligned offset.  Validate that the
1201139c1837SPaolo Bonzini         * final pointer is aligned by using an insn that will SIGSEGV.
1202139c1837SPaolo Bonzini         */
1203139c1837SPaolo Bonzini        tcg_debug_assert(ret >= 16);
1204139c1837SPaolo Bonzini        tcg_out_vex_modrm_offset(s, OPC_MOVDQA_VxWx, ret, 0, arg1, arg2);
1205139c1837SPaolo Bonzini        break;
1206139c1837SPaolo Bonzini    case TCG_TYPE_V256:
1207139c1837SPaolo Bonzini        /*
1208139c1837SPaolo Bonzini         * The gvec infrastructure only requires 16-byte alignment,
1209139c1837SPaolo Bonzini         * so here we must use an unaligned load.
1210139c1837SPaolo Bonzini         */
1211139c1837SPaolo Bonzini        tcg_debug_assert(ret >= 16);
1212139c1837SPaolo Bonzini        tcg_out_vex_modrm_offset(s, OPC_MOVDQU_VxWx | P_VEXL,
1213139c1837SPaolo Bonzini                                 ret, 0, arg1, arg2);
1214139c1837SPaolo Bonzini        break;
1215139c1837SPaolo Bonzini    default:
1216139c1837SPaolo Bonzini        g_assert_not_reached();
1217139c1837SPaolo Bonzini    }
1218139c1837SPaolo Bonzini}
1219139c1837SPaolo Bonzini
1220139c1837SPaolo Bonzinistatic void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg,
1221139c1837SPaolo Bonzini                       TCGReg arg1, intptr_t arg2)
1222139c1837SPaolo Bonzini{
1223139c1837SPaolo Bonzini    switch (type) {
1224139c1837SPaolo Bonzini    case TCG_TYPE_I32:
1225139c1837SPaolo Bonzini        if (arg < 16) {
1226139c1837SPaolo Bonzini            tcg_out_modrm_offset(s, OPC_MOVL_EvGv, arg, arg1, arg2);
1227139c1837SPaolo Bonzini        } else {
1228139c1837SPaolo Bonzini            tcg_out_vex_modrm_offset(s, OPC_MOVD_EyVy, arg, 0, arg1, arg2);
1229139c1837SPaolo Bonzini        }
1230139c1837SPaolo Bonzini        break;
1231139c1837SPaolo Bonzini    case TCG_TYPE_I64:
1232139c1837SPaolo Bonzini        if (arg < 16) {
1233139c1837SPaolo Bonzini            tcg_out_modrm_offset(s, OPC_MOVL_EvGv | P_REXW, arg, arg1, arg2);
1234139c1837SPaolo Bonzini            break;
1235139c1837SPaolo Bonzini        }
1236139c1837SPaolo Bonzini        /* FALLTHRU */
1237139c1837SPaolo Bonzini    case TCG_TYPE_V64:
1238139c1837SPaolo Bonzini        /* There is no instruction that can validate 8-byte alignment.  */
1239139c1837SPaolo Bonzini        tcg_debug_assert(arg >= 16);
1240139c1837SPaolo Bonzini        tcg_out_vex_modrm_offset(s, OPC_MOVQ_WqVq, arg, 0, arg1, arg2);
1241139c1837SPaolo Bonzini        break;
1242139c1837SPaolo Bonzini    case TCG_TYPE_V128:
1243139c1837SPaolo Bonzini        /*
1244139c1837SPaolo Bonzini         * The gvec infrastructure is asserts that v128 vector loads
1245139c1837SPaolo Bonzini         * and stores use a 16-byte aligned offset.  Validate that the
1246139c1837SPaolo Bonzini         * final pointer is aligned by using an insn that will SIGSEGV.
1247c4f4a00aSRichard Henderson         *
1248c4f4a00aSRichard Henderson         * This specific instance is also used by TCG_CALL_RET_BY_VEC,
1249c4f4a00aSRichard Henderson         * for _WIN64, which must have SSE2 but may not have AVX.
1250139c1837SPaolo Bonzini         */
1251139c1837SPaolo Bonzini        tcg_debug_assert(arg >= 16);
1252c4f4a00aSRichard Henderson        if (have_avx1) {
1253139c1837SPaolo Bonzini            tcg_out_vex_modrm_offset(s, OPC_MOVDQA_WxVx, arg, 0, arg1, arg2);
1254c4f4a00aSRichard Henderson        } else {
1255c4f4a00aSRichard Henderson            tcg_out_modrm_offset(s, OPC_MOVDQA_WxVx, arg, arg1, arg2);
1256c4f4a00aSRichard Henderson        }
1257139c1837SPaolo Bonzini        break;
1258139c1837SPaolo Bonzini    case TCG_TYPE_V256:
1259139c1837SPaolo Bonzini        /*
1260139c1837SPaolo Bonzini         * The gvec infrastructure only requires 16-byte alignment,
1261139c1837SPaolo Bonzini         * so here we must use an unaligned store.
1262139c1837SPaolo Bonzini         */
1263139c1837SPaolo Bonzini        tcg_debug_assert(arg >= 16);
1264139c1837SPaolo Bonzini        tcg_out_vex_modrm_offset(s, OPC_MOVDQU_WxVx | P_VEXL,
1265139c1837SPaolo Bonzini                                 arg, 0, arg1, arg2);
1266139c1837SPaolo Bonzini        break;
1267139c1837SPaolo Bonzini    default:
1268139c1837SPaolo Bonzini        g_assert_not_reached();
1269139c1837SPaolo Bonzini    }
1270139c1837SPaolo Bonzini}
1271139c1837SPaolo Bonzini
1272139c1837SPaolo Bonzinistatic bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
1273139c1837SPaolo Bonzini                        TCGReg base, intptr_t ofs)
1274139c1837SPaolo Bonzini{
1275139c1837SPaolo Bonzini    int rexw = 0;
1276139c1837SPaolo Bonzini    if (TCG_TARGET_REG_BITS == 64 && type == TCG_TYPE_I64) {
1277139c1837SPaolo Bonzini        if (val != (int32_t)val) {
1278139c1837SPaolo Bonzini            return false;
1279139c1837SPaolo Bonzini        }
1280139c1837SPaolo Bonzini        rexw = P_REXW;
1281139c1837SPaolo Bonzini    } else if (type != TCG_TYPE_I32) {
1282139c1837SPaolo Bonzini        return false;
1283139c1837SPaolo Bonzini    }
1284139c1837SPaolo Bonzini    tcg_out_modrm_offset(s, OPC_MOVL_EvIz | rexw, 0, base, ofs);
1285139c1837SPaolo Bonzini    tcg_out32(s, val);
1286139c1837SPaolo Bonzini    return true;
1287139c1837SPaolo Bonzini}
1288139c1837SPaolo Bonzini
1289139c1837SPaolo Bonzinistatic void tcg_out_shifti(TCGContext *s, int subopc, int reg, int count)
1290139c1837SPaolo Bonzini{
1291139c1837SPaolo Bonzini    /* Propagate an opcode prefix, such as P_DATA16.  */
1292139c1837SPaolo Bonzini    int ext = subopc & ~0x7;
1293139c1837SPaolo Bonzini    subopc &= 0x7;
1294139c1837SPaolo Bonzini
1295139c1837SPaolo Bonzini    if (count == 1) {
1296139c1837SPaolo Bonzini        tcg_out_modrm(s, OPC_SHIFT_1 + ext, subopc, reg);
1297139c1837SPaolo Bonzini    } else {
1298139c1837SPaolo Bonzini        tcg_out_modrm(s, OPC_SHIFT_Ib + ext, subopc, reg);
1299139c1837SPaolo Bonzini        tcg_out8(s, count);
1300139c1837SPaolo Bonzini    }
1301139c1837SPaolo Bonzini}
1302139c1837SPaolo Bonzini
1303139c1837SPaolo Bonzinistatic inline void tcg_out_bswap32(TCGContext *s, int reg)
1304139c1837SPaolo Bonzini{
1305139c1837SPaolo Bonzini    tcg_out_opc(s, OPC_BSWAP + LOWREGMASK(reg), 0, reg, 0);
1306139c1837SPaolo Bonzini}
1307139c1837SPaolo Bonzini
1308139c1837SPaolo Bonzinistatic inline void tcg_out_rolw_8(TCGContext *s, int reg)
1309139c1837SPaolo Bonzini{
1310139c1837SPaolo Bonzini    tcg_out_shifti(s, SHIFT_ROL + P_DATA16, reg, 8);
1311139c1837SPaolo Bonzini}
1312139c1837SPaolo Bonzini
1313d0e66c89SRichard Hendersonstatic void tcg_out_ext8u(TCGContext *s, TCGReg dest, TCGReg src)
1314139c1837SPaolo Bonzini{
1315139c1837SPaolo Bonzini    /* movzbl */
1316139c1837SPaolo Bonzini    tcg_debug_assert(src < 4 || TCG_TARGET_REG_BITS == 64);
1317139c1837SPaolo Bonzini    tcg_out_modrm(s, OPC_MOVZBL + P_REXB_RM, dest, src);
1318139c1837SPaolo Bonzini}
1319139c1837SPaolo Bonzini
1320678155b2SRichard Hendersonstatic void tcg_out_ext8s(TCGContext *s, TCGType type, TCGReg dest, TCGReg src)
1321139c1837SPaolo Bonzini{
1322678155b2SRichard Henderson    int rexw = type == TCG_TYPE_I32 ? 0 : P_REXW;
1323139c1837SPaolo Bonzini    /* movsbl */
1324139c1837SPaolo Bonzini    tcg_debug_assert(src < 4 || TCG_TARGET_REG_BITS == 64);
1325139c1837SPaolo Bonzini    tcg_out_modrm(s, OPC_MOVSBL + P_REXB_RM + rexw, dest, src);
1326139c1837SPaolo Bonzini}
1327139c1837SPaolo Bonzini
1328379afdffSRichard Hendersonstatic void tcg_out_ext16u(TCGContext *s, TCGReg dest, TCGReg src)
1329139c1837SPaolo Bonzini{
1330139c1837SPaolo Bonzini    /* movzwl */
1331139c1837SPaolo Bonzini    tcg_out_modrm(s, OPC_MOVZWL, dest, src);
1332139c1837SPaolo Bonzini}
1333139c1837SPaolo Bonzini
1334753e42eaSRichard Hendersonstatic void tcg_out_ext16s(TCGContext *s, TCGType type, TCGReg dest, TCGReg src)
1335139c1837SPaolo Bonzini{
1336753e42eaSRichard Henderson    int rexw = type == TCG_TYPE_I32 ? 0 : P_REXW;
1337139c1837SPaolo Bonzini    /* movsw[lq] */
1338139c1837SPaolo Bonzini    tcg_out_modrm(s, OPC_MOVSWL + rexw, dest, src);
1339139c1837SPaolo Bonzini}
1340139c1837SPaolo Bonzini
13419ecf5f61SRichard Hendersonstatic void tcg_out_ext32u(TCGContext *s, TCGReg dest, TCGReg src)
1342139c1837SPaolo Bonzini{
1343139c1837SPaolo Bonzini    /* 32-bit mov zero extends.  */
1344139c1837SPaolo Bonzini    tcg_out_modrm(s, OPC_MOVL_GvEv, dest, src);
1345139c1837SPaolo Bonzini}
1346139c1837SPaolo Bonzini
134752bf3398SRichard Hendersonstatic void tcg_out_ext32s(TCGContext *s, TCGReg dest, TCGReg src)
1348139c1837SPaolo Bonzini{
134952bf3398SRichard Henderson    tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
1350139c1837SPaolo Bonzini    tcg_out_modrm(s, OPC_MOVSLQ, dest, src);
1351139c1837SPaolo Bonzini}
1352139c1837SPaolo Bonzini
13539c6aa274SRichard Hendersonstatic void tcg_out_exts_i32_i64(TCGContext *s, TCGReg dest, TCGReg src)
13549c6aa274SRichard Henderson{
13559c6aa274SRichard Henderson    tcg_out_ext32s(s, dest, src);
13569c6aa274SRichard Henderson}
13579c6aa274SRichard Henderson
1358b9bfe000SRichard Hendersonstatic void tcg_out_extu_i32_i64(TCGContext *s, TCGReg dest, TCGReg src)
1359b9bfe000SRichard Henderson{
1360b2485530SRichard Henderson    if (dest != src) {
1361b9bfe000SRichard Henderson        tcg_out_ext32u(s, dest, src);
1362b9bfe000SRichard Henderson    }
1363b2485530SRichard Henderson}
1364b9bfe000SRichard Henderson
1365b8b94ac6SRichard Hendersonstatic void tcg_out_extrl_i64_i32(TCGContext *s, TCGReg dest, TCGReg src)
1366b8b94ac6SRichard Henderson{
1367b8b94ac6SRichard Henderson    tcg_out_ext32u(s, dest, src);
1368b8b94ac6SRichard Henderson}
1369b8b94ac6SRichard Henderson
1370139c1837SPaolo Bonzinistatic inline void tcg_out_bswap64(TCGContext *s, int reg)
1371139c1837SPaolo Bonzini{
1372139c1837SPaolo Bonzini    tcg_out_opc(s, OPC_BSWAP + P_REXW + LOWREGMASK(reg), 0, reg, 0);
1373139c1837SPaolo Bonzini}
1374139c1837SPaolo Bonzini
1375139c1837SPaolo Bonzinistatic void tgen_arithi(TCGContext *s, int c, int r0,
1376139c1837SPaolo Bonzini                        tcg_target_long val, int cf)
1377139c1837SPaolo Bonzini{
1378139c1837SPaolo Bonzini    int rexw = 0;
1379139c1837SPaolo Bonzini
1380139c1837SPaolo Bonzini    if (TCG_TARGET_REG_BITS == 64) {
1381139c1837SPaolo Bonzini        rexw = c & -8;
1382139c1837SPaolo Bonzini        c &= 7;
1383139c1837SPaolo Bonzini    }
1384139c1837SPaolo Bonzini
138564708db3SPaolo Bonzini    switch (c) {
138664708db3SPaolo Bonzini    case ARITH_ADD:
138764708db3SPaolo Bonzini    case ARITH_SUB:
138864708db3SPaolo Bonzini        if (!cf) {
138964708db3SPaolo Bonzini            /*
139064708db3SPaolo Bonzini             * ??? While INC is 2 bytes shorter than ADDL $1, they also induce
139164708db3SPaolo Bonzini             * partial flags update stalls on Pentium4 and are not recommended
139264708db3SPaolo Bonzini             * by current Intel optimization manuals.
139364708db3SPaolo Bonzini             */
139464708db3SPaolo Bonzini            if (val == 1 || val == -1) {
1395139c1837SPaolo Bonzini                int is_inc = (c == ARITH_ADD) ^ (val < 0);
1396139c1837SPaolo Bonzini                if (TCG_TARGET_REG_BITS == 64) {
139764708db3SPaolo Bonzini                    /*
139864708db3SPaolo Bonzini                     * The single-byte increment encodings are re-tasked
139964708db3SPaolo Bonzini                     * as the REX prefixes.  Use the MODRM encoding.
140064708db3SPaolo Bonzini                     */
1401139c1837SPaolo Bonzini                    tcg_out_modrm(s, OPC_GRP5 + rexw,
1402139c1837SPaolo Bonzini                                  (is_inc ? EXT5_INC_Ev : EXT5_DEC_Ev), r0);
1403139c1837SPaolo Bonzini                } else {
1404139c1837SPaolo Bonzini                    tcg_out8(s, (is_inc ? OPC_INC_r32 : OPC_DEC_r32) + r0);
1405139c1837SPaolo Bonzini                }
1406139c1837SPaolo Bonzini                return;
1407139c1837SPaolo Bonzini            }
140864708db3SPaolo Bonzini            if (val == 128) {
140964708db3SPaolo Bonzini                /*
141064708db3SPaolo Bonzini                 * Facilitate using an 8-bit immediate.  Carry is inverted
141164708db3SPaolo Bonzini                 * by this transformation, so do it only if cf == 0.
141264708db3SPaolo Bonzini                 */
141364708db3SPaolo Bonzini                c ^= ARITH_ADD ^ ARITH_SUB;
141464708db3SPaolo Bonzini                val = -128;
141564708db3SPaolo Bonzini            }
141664708db3SPaolo Bonzini        }
141764708db3SPaolo Bonzini        break;
1418139c1837SPaolo Bonzini
141964708db3SPaolo Bonzini    case ARITH_AND:
1420139c1837SPaolo Bonzini        if (TCG_TARGET_REG_BITS == 64) {
1421139c1837SPaolo Bonzini            if (val == 0xffffffffu) {
1422139c1837SPaolo Bonzini                tcg_out_ext32u(s, r0, r0);
1423139c1837SPaolo Bonzini                return;
1424139c1837SPaolo Bonzini            }
1425139c1837SPaolo Bonzini            if (val == (uint32_t)val) {
1426139c1837SPaolo Bonzini                /* AND with no high bits set can use a 32-bit operation.  */
1427139c1837SPaolo Bonzini                rexw = 0;
1428139c1837SPaolo Bonzini            }
1429139c1837SPaolo Bonzini        }
1430139c1837SPaolo Bonzini        if (val == 0xffu && (r0 < 4 || TCG_TARGET_REG_BITS == 64)) {
1431139c1837SPaolo Bonzini            tcg_out_ext8u(s, r0, r0);
1432139c1837SPaolo Bonzini            return;
1433139c1837SPaolo Bonzini        }
1434139c1837SPaolo Bonzini        if (val == 0xffffu) {
1435139c1837SPaolo Bonzini            tcg_out_ext16u(s, r0, r0);
1436139c1837SPaolo Bonzini            return;
1437139c1837SPaolo Bonzini        }
143864708db3SPaolo Bonzini        break;
1439afa37be4SPaolo Bonzini
1440afa37be4SPaolo Bonzini    case ARITH_OR:
1441afa37be4SPaolo Bonzini    case ARITH_XOR:
1442afa37be4SPaolo Bonzini        if (val >= 0x80 && val <= 0xff
1443afa37be4SPaolo Bonzini            && (r0 < 4 || TCG_TARGET_REG_BITS == 64)) {
1444afa37be4SPaolo Bonzini            tcg_out_modrm(s, OPC_ARITH_EbIb + P_REXB_RM, c, r0);
1445afa37be4SPaolo Bonzini            tcg_out8(s, val);
1446afa37be4SPaolo Bonzini            return;
1447afa37be4SPaolo Bonzini        }
1448afa37be4SPaolo Bonzini        break;
1449139c1837SPaolo Bonzini    }
1450139c1837SPaolo Bonzini
1451139c1837SPaolo Bonzini    if (val == (int8_t)val) {
1452139c1837SPaolo Bonzini        tcg_out_modrm(s, OPC_ARITH_EvIb + rexw, c, r0);
1453139c1837SPaolo Bonzini        tcg_out8(s, val);
1454139c1837SPaolo Bonzini        return;
1455139c1837SPaolo Bonzini    }
1456139c1837SPaolo Bonzini    if (rexw == 0 || val == (int32_t)val) {
1457139c1837SPaolo Bonzini        tcg_out_modrm(s, OPC_ARITH_EvIz + rexw, c, r0);
1458139c1837SPaolo Bonzini        tcg_out32(s, val);
1459139c1837SPaolo Bonzini        return;
1460139c1837SPaolo Bonzini    }
1461139c1837SPaolo Bonzini
1462732e89f4SRichard Henderson    g_assert_not_reached();
1463139c1837SPaolo Bonzini}
1464139c1837SPaolo Bonzini
1465139c1837SPaolo Bonzinistatic void tcg_out_addi(TCGContext *s, int reg, tcg_target_long val)
1466139c1837SPaolo Bonzini{
1467139c1837SPaolo Bonzini    if (val != 0) {
1468139c1837SPaolo Bonzini        tgen_arithi(s, ARITH_ADD + P_REXW, reg, val, 0);
1469139c1837SPaolo Bonzini    }
1470139c1837SPaolo Bonzini}
1471139c1837SPaolo Bonzini
14721a057554SRichard Henderson/* Set SMALL to force a short forward branch.  */
14731a057554SRichard Hendersonstatic void tcg_out_jxx(TCGContext *s, int opc, TCGLabel *l, bool small)
1474139c1837SPaolo Bonzini{
1475139c1837SPaolo Bonzini    int32_t val, val1;
1476139c1837SPaolo Bonzini
1477139c1837SPaolo Bonzini    if (l->has_value) {
1478139c1837SPaolo Bonzini        val = tcg_pcrel_diff(s, l->u.value_ptr);
1479139c1837SPaolo Bonzini        val1 = val - 2;
1480139c1837SPaolo Bonzini        if ((int8_t)val1 == val1) {
1481139c1837SPaolo Bonzini            if (opc == -1) {
1482139c1837SPaolo Bonzini                tcg_out8(s, OPC_JMP_short);
1483139c1837SPaolo Bonzini            } else {
1484139c1837SPaolo Bonzini                tcg_out8(s, OPC_JCC_short + opc);
1485139c1837SPaolo Bonzini            }
1486139c1837SPaolo Bonzini            tcg_out8(s, val1);
1487139c1837SPaolo Bonzini        } else {
14881a057554SRichard Henderson            tcg_debug_assert(!small);
1489139c1837SPaolo Bonzini            if (opc == -1) {
1490139c1837SPaolo Bonzini                tcg_out8(s, OPC_JMP_long);
1491139c1837SPaolo Bonzini                tcg_out32(s, val - 5);
1492139c1837SPaolo Bonzini            } else {
1493139c1837SPaolo Bonzini                tcg_out_opc(s, OPC_JCC_long + opc, 0, 0, 0);
1494139c1837SPaolo Bonzini                tcg_out32(s, val - 6);
1495139c1837SPaolo Bonzini            }
1496139c1837SPaolo Bonzini        }
1497139c1837SPaolo Bonzini    } else if (small) {
1498139c1837SPaolo Bonzini        if (opc == -1) {
1499139c1837SPaolo Bonzini            tcg_out8(s, OPC_JMP_short);
1500139c1837SPaolo Bonzini        } else {
1501139c1837SPaolo Bonzini            tcg_out8(s, OPC_JCC_short + opc);
1502139c1837SPaolo Bonzini        }
1503139c1837SPaolo Bonzini        tcg_out_reloc(s, s->code_ptr, R_386_PC8, l, -1);
1504139c1837SPaolo Bonzini        s->code_ptr += 1;
1505139c1837SPaolo Bonzini    } else {
1506139c1837SPaolo Bonzini        if (opc == -1) {
1507139c1837SPaolo Bonzini            tcg_out8(s, OPC_JMP_long);
1508139c1837SPaolo Bonzini        } else {
1509139c1837SPaolo Bonzini            tcg_out_opc(s, OPC_JCC_long + opc, 0, 0, 0);
1510139c1837SPaolo Bonzini        }
1511139c1837SPaolo Bonzini        tcg_out_reloc(s, s->code_ptr, R_386_PC32, l, -4);
1512139c1837SPaolo Bonzini        s->code_ptr += 4;
1513139c1837SPaolo Bonzini    }
1514139c1837SPaolo Bonzini}
1515139c1837SPaolo Bonzini
15166749d85bSRichard Hendersonstatic int tcg_out_cmp(TCGContext *s, TCGCond cond, TCGArg arg1,
15176749d85bSRichard Henderson                       TCGArg arg2, int const_arg2, int rexw)
1518139c1837SPaolo Bonzini{
1519d3d1c30cSRichard Henderson    int jz, js;
1520303214aaSRichard Henderson
1521303214aaSRichard Henderson    if (!is_tst_cond(cond)) {
1522303214aaSRichard Henderson        if (!const_arg2) {
1523303214aaSRichard Henderson            tgen_arithr(s, ARITH_CMP + rexw, arg1, arg2);
1524303214aaSRichard Henderson        } else if (arg2 == 0) {
1525139c1837SPaolo Bonzini            tcg_out_modrm(s, OPC_TESTL + rexw, arg1, arg1);
1526139c1837SPaolo Bonzini        } else {
1527303214aaSRichard Henderson            tcg_debug_assert(!rexw || arg2 == (int32_t)arg2);
1528139c1837SPaolo Bonzini            tgen_arithi(s, ARITH_CMP + rexw, arg1, arg2, 0);
1529139c1837SPaolo Bonzini        }
15306749d85bSRichard Henderson        return tcg_cond_to_jcc[cond];
1531139c1837SPaolo Bonzini    }
1532139c1837SPaolo Bonzini
1533303214aaSRichard Henderson    jz = tcg_cond_to_jcc[cond];
1534d3d1c30cSRichard Henderson    js = (cond == TCG_COND_TSTNE ? JCC_JS : JCC_JNS);
1535303214aaSRichard Henderson
1536303214aaSRichard Henderson    if (!const_arg2) {
1537303214aaSRichard Henderson        tcg_out_modrm(s, OPC_TESTL + rexw, arg1, arg2);
1538303214aaSRichard Henderson        return jz;
1539303214aaSRichard Henderson    }
1540303214aaSRichard Henderson
1541303214aaSRichard Henderson    if (arg2 <= 0xff && (TCG_TARGET_REG_BITS == 64 || arg1 < 4)) {
1542d3d1c30cSRichard Henderson        if (arg2 == 0x80) {
1543d3d1c30cSRichard Henderson            tcg_out_modrm(s, OPC_TESTB | P_REXB_R, arg1, arg1);
1544d3d1c30cSRichard Henderson            return js;
1545d3d1c30cSRichard Henderson        }
1546be1335dbSPaolo Bonzini        if (arg2 == 0xff) {
1547be1335dbSPaolo Bonzini            tcg_out_modrm(s, OPC_TESTB | P_REXB_R, arg1, arg1);
1548be1335dbSPaolo Bonzini            return jz;
1549be1335dbSPaolo Bonzini        }
1550303214aaSRichard Henderson        tcg_out_modrm(s, OPC_GRP3_Eb | P_REXB_RM, EXT3_TESTi, arg1);
1551303214aaSRichard Henderson        tcg_out8(s, arg2);
1552303214aaSRichard Henderson        return jz;
1553303214aaSRichard Henderson    }
1554303214aaSRichard Henderson
1555303214aaSRichard Henderson    if ((arg2 & ~0xff00) == 0 && arg1 < 4) {
1556d3d1c30cSRichard Henderson        if (arg2 == 0x8000) {
1557d3d1c30cSRichard Henderson            tcg_out_modrm(s, OPC_TESTB, arg1 + 4, arg1 + 4);
1558d3d1c30cSRichard Henderson            return js;
1559d3d1c30cSRichard Henderson        }
1560be1335dbSPaolo Bonzini        if (arg2 == 0xff00) {
1561be1335dbSPaolo Bonzini            tcg_out_modrm(s, OPC_TESTB, arg1 + 4, arg1 + 4);
1562be1335dbSPaolo Bonzini            return jz;
1563be1335dbSPaolo Bonzini        }
1564303214aaSRichard Henderson        tcg_out_modrm(s, OPC_GRP3_Eb, EXT3_TESTi, arg1 + 4);
1565303214aaSRichard Henderson        tcg_out8(s, arg2 >> 8);
1566303214aaSRichard Henderson        return jz;
1567303214aaSRichard Henderson    }
1568303214aaSRichard Henderson
1569be1335dbSPaolo Bonzini    if (arg2 == 0xffff) {
1570be1335dbSPaolo Bonzini        tcg_out_modrm(s, OPC_TESTL | P_DATA16, arg1, arg1);
1571be1335dbSPaolo Bonzini        return jz;
1572be1335dbSPaolo Bonzini    }
1573be1335dbSPaolo Bonzini    if (arg2 == 0xffffffffu) {
1574be1335dbSPaolo Bonzini        tcg_out_modrm(s, OPC_TESTL, arg1, arg1);
1575be1335dbSPaolo Bonzini        return jz;
1576be1335dbSPaolo Bonzini    }
1577be1335dbSPaolo Bonzini
1578d3d1c30cSRichard Henderson    if (is_power_of_2(rexw ? arg2 : (uint32_t)arg2)) {
1579d3d1c30cSRichard Henderson        int jc = (cond == TCG_COND_TSTNE ? JCC_JB : JCC_JAE);
1580d3d1c30cSRichard Henderson        int sh = ctz64(arg2);
1581d3d1c30cSRichard Henderson
1582d3d1c30cSRichard Henderson        rexw = (sh & 32 ? P_REXW : 0);
1583d3d1c30cSRichard Henderson        if ((sh & 31) == 31) {
1584d3d1c30cSRichard Henderson            tcg_out_modrm(s, OPC_TESTL | rexw, arg1, arg1);
1585d3d1c30cSRichard Henderson            return js;
1586d3d1c30cSRichard Henderson        } else {
1587d3d1c30cSRichard Henderson            tcg_out_modrm(s, OPC_GRPBT | rexw, OPC_GRPBT_BT, arg1);
1588d3d1c30cSRichard Henderson            tcg_out8(s, sh);
1589d3d1c30cSRichard Henderson            return jc;
1590d3d1c30cSRichard Henderson        }
1591d3d1c30cSRichard Henderson    }
1592d3d1c30cSRichard Henderson
1593303214aaSRichard Henderson    if (rexw) {
1594303214aaSRichard Henderson        if (arg2 == (uint32_t)arg2) {
1595303214aaSRichard Henderson            rexw = 0;
1596303214aaSRichard Henderson        } else {
1597303214aaSRichard Henderson            tcg_debug_assert(arg2 == (int32_t)arg2);
1598303214aaSRichard Henderson        }
1599303214aaSRichard Henderson    }
1600303214aaSRichard Henderson    tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_TESTi, arg1);
1601303214aaSRichard Henderson    tcg_out32(s, arg2);
1602303214aaSRichard Henderson    return jz;
1603303214aaSRichard Henderson}
1604303214aaSRichard Henderson
1605c359ce75SRichard Hendersonstatic void tcg_out_brcond(TCGContext *s, int rexw, TCGCond cond,
1606139c1837SPaolo Bonzini                           TCGArg arg1, TCGArg arg2, int const_arg2,
1607c359ce75SRichard Henderson                           TCGLabel *label, bool small)
1608139c1837SPaolo Bonzini{
16096749d85bSRichard Henderson    int jcc = tcg_out_cmp(s, cond, arg1, arg2, const_arg2, rexw);
16106749d85bSRichard Henderson    tcg_out_jxx(s, jcc, label, small);
1611139c1837SPaolo Bonzini}
1612139c1837SPaolo Bonzini
1613c359ce75SRichard Henderson#if TCG_TARGET_REG_BITS == 32
1614139c1837SPaolo Bonzinistatic void tcg_out_brcond2(TCGContext *s, const TCGArg *args,
1615c359ce75SRichard Henderson                            const int *const_args, bool small)
1616139c1837SPaolo Bonzini{
1617139c1837SPaolo Bonzini    TCGLabel *label_next = gen_new_label();
1618139c1837SPaolo Bonzini    TCGLabel *label_this = arg_label(args[5]);
1619303214aaSRichard Henderson    TCGCond cond = args[4];
1620139c1837SPaolo Bonzini
1621303214aaSRichard Henderson    switch (cond) {
1622139c1837SPaolo Bonzini    case TCG_COND_EQ:
1623303214aaSRichard Henderson    case TCG_COND_TSTEQ:
1624303214aaSRichard Henderson        tcg_out_brcond(s, 0, tcg_invert_cond(cond),
1625303214aaSRichard Henderson                       args[0], args[2], const_args[2], label_next, 1);
1626303214aaSRichard Henderson        tcg_out_brcond(s, 0, cond, args[1], args[3], const_args[3],
1627139c1837SPaolo Bonzini                       label_this, small);
1628139c1837SPaolo Bonzini        break;
1629139c1837SPaolo Bonzini    case TCG_COND_NE:
1630303214aaSRichard Henderson    case TCG_COND_TSTNE:
1631303214aaSRichard Henderson        tcg_out_brcond(s, 0, cond, args[0], args[2], const_args[2],
1632139c1837SPaolo Bonzini                       label_this, small);
1633303214aaSRichard Henderson        tcg_out_brcond(s, 0, cond, args[1], args[3], const_args[3],
1634139c1837SPaolo Bonzini                       label_this, small);
1635139c1837SPaolo Bonzini        break;
1636139c1837SPaolo Bonzini    case TCG_COND_LT:
1637c359ce75SRichard Henderson        tcg_out_brcond(s, 0, TCG_COND_LT, args[1], args[3], const_args[3],
1638139c1837SPaolo Bonzini                       label_this, small);
1639139c1837SPaolo Bonzini        tcg_out_jxx(s, JCC_JNE, label_next, 1);
1640c359ce75SRichard Henderson        tcg_out_brcond(s, 0, TCG_COND_LTU, args[0], args[2], const_args[2],
1641139c1837SPaolo Bonzini                       label_this, small);
1642139c1837SPaolo Bonzini        break;
1643139c1837SPaolo Bonzini    case TCG_COND_LE:
1644c359ce75SRichard Henderson        tcg_out_brcond(s, 0, TCG_COND_LT, args[1], args[3], const_args[3],
1645139c1837SPaolo Bonzini                       label_this, small);
1646139c1837SPaolo Bonzini        tcg_out_jxx(s, JCC_JNE, label_next, 1);
1647c359ce75SRichard Henderson        tcg_out_brcond(s, 0, TCG_COND_LEU, args[0], args[2], const_args[2],
1648139c1837SPaolo Bonzini                       label_this, small);
1649139c1837SPaolo Bonzini        break;
1650139c1837SPaolo Bonzini    case TCG_COND_GT:
1651c359ce75SRichard Henderson        tcg_out_brcond(s, 0, TCG_COND_GT, args[1], args[3], const_args[3],
1652139c1837SPaolo Bonzini                       label_this, small);
1653139c1837SPaolo Bonzini        tcg_out_jxx(s, JCC_JNE, label_next, 1);
1654c359ce75SRichard Henderson        tcg_out_brcond(s, 0, TCG_COND_GTU, args[0], args[2], const_args[2],
1655139c1837SPaolo Bonzini                       label_this, small);
1656139c1837SPaolo Bonzini        break;
1657139c1837SPaolo Bonzini    case TCG_COND_GE:
1658c359ce75SRichard Henderson        tcg_out_brcond(s, 0, TCG_COND_GT, args[1], args[3], const_args[3],
1659139c1837SPaolo Bonzini                       label_this, small);
1660139c1837SPaolo Bonzini        tcg_out_jxx(s, JCC_JNE, label_next, 1);
1661c359ce75SRichard Henderson        tcg_out_brcond(s, 0, TCG_COND_GEU, args[0], args[2], const_args[2],
1662139c1837SPaolo Bonzini                       label_this, small);
1663139c1837SPaolo Bonzini        break;
1664139c1837SPaolo Bonzini    case TCG_COND_LTU:
1665c359ce75SRichard Henderson        tcg_out_brcond(s, 0, TCG_COND_LTU, args[1], args[3], const_args[3],
1666139c1837SPaolo Bonzini                       label_this, small);
1667139c1837SPaolo Bonzini        tcg_out_jxx(s, JCC_JNE, label_next, 1);
1668c359ce75SRichard Henderson        tcg_out_brcond(s, 0, TCG_COND_LTU, args[0], args[2], const_args[2],
1669139c1837SPaolo Bonzini                       label_this, small);
1670139c1837SPaolo Bonzini        break;
1671139c1837SPaolo Bonzini    case TCG_COND_LEU:
1672c359ce75SRichard Henderson        tcg_out_brcond(s, 0, TCG_COND_LTU, args[1], args[3], const_args[3],
1673139c1837SPaolo Bonzini                       label_this, small);
1674139c1837SPaolo Bonzini        tcg_out_jxx(s, JCC_JNE, label_next, 1);
1675c359ce75SRichard Henderson        tcg_out_brcond(s, 0, TCG_COND_LEU, args[0], args[2], const_args[2],
1676139c1837SPaolo Bonzini                       label_this, small);
1677139c1837SPaolo Bonzini        break;
1678139c1837SPaolo Bonzini    case TCG_COND_GTU:
1679c359ce75SRichard Henderson        tcg_out_brcond(s, 0, TCG_COND_GTU, args[1], args[3], const_args[3],
1680139c1837SPaolo Bonzini                       label_this, small);
1681139c1837SPaolo Bonzini        tcg_out_jxx(s, JCC_JNE, label_next, 1);
1682c359ce75SRichard Henderson        tcg_out_brcond(s, 0, TCG_COND_GTU, args[0], args[2], const_args[2],
1683139c1837SPaolo Bonzini                       label_this, small);
1684139c1837SPaolo Bonzini        break;
1685139c1837SPaolo Bonzini    case TCG_COND_GEU:
1686c359ce75SRichard Henderson        tcg_out_brcond(s, 0, TCG_COND_GTU, args[1], args[3], const_args[3],
1687139c1837SPaolo Bonzini                       label_this, small);
1688139c1837SPaolo Bonzini        tcg_out_jxx(s, JCC_JNE, label_next, 1);
1689c359ce75SRichard Henderson        tcg_out_brcond(s, 0, TCG_COND_GEU, args[0], args[2], const_args[2],
1690139c1837SPaolo Bonzini                       label_this, small);
1691139c1837SPaolo Bonzini        break;
1692139c1837SPaolo Bonzini    default:
1693732e89f4SRichard Henderson        g_assert_not_reached();
1694139c1837SPaolo Bonzini    }
169592ab8e7dSRichard Henderson    tcg_out_label(s, label_next);
1696139c1837SPaolo Bonzini}
1697139c1837SPaolo Bonzini#endif
1698139c1837SPaolo Bonzini
16997ba99a1cSRichard Hendersonstatic void tcg_out_setcond(TCGContext *s, int rexw, TCGCond cond,
17007ba99a1cSRichard Henderson                            TCGArg dest, TCGArg arg1, TCGArg arg2,
170195bf306eSRichard Henderson                            int const_arg2, bool neg)
1702139c1837SPaolo Bonzini{
170319517b83SRichard Henderson    int cmp_rexw = rexw;
17046950f68bSRichard Henderson    bool inv = false;
170596658acaSRichard Henderson    bool cleared;
17066749d85bSRichard Henderson    int jcc;
17076950f68bSRichard Henderson
17086950f68bSRichard Henderson    switch (cond) {
17096950f68bSRichard Henderson    case TCG_COND_NE:
17106950f68bSRichard Henderson        inv = true;
17116950f68bSRichard Henderson        /* fall through */
17126950f68bSRichard Henderson    case TCG_COND_EQ:
17136950f68bSRichard Henderson        /* If arg2 is 0, convert to LTU/GEU vs 1. */
17146950f68bSRichard Henderson        if (const_arg2 && arg2 == 0) {
17156950f68bSRichard Henderson            arg2 = 1;
17166950f68bSRichard Henderson            goto do_ltu;
17176950f68bSRichard Henderson        }
17186950f68bSRichard Henderson        break;
17196950f68bSRichard Henderson
172019517b83SRichard Henderson    case TCG_COND_TSTNE:
172119517b83SRichard Henderson        inv = true;
172219517b83SRichard Henderson        /* fall through */
172319517b83SRichard Henderson    case TCG_COND_TSTEQ:
172419517b83SRichard Henderson        /* If arg2 is -1, convert to LTU/GEU vs 1. */
172519517b83SRichard Henderson        if (const_arg2 && arg2 == 0xffffffffu) {
172619517b83SRichard Henderson            arg2 = 1;
172719517b83SRichard Henderson            cmp_rexw = 0;
172819517b83SRichard Henderson            goto do_ltu;
172919517b83SRichard Henderson        }
173019517b83SRichard Henderson        break;
173119517b83SRichard Henderson
17326950f68bSRichard Henderson    case TCG_COND_LEU:
17336950f68bSRichard Henderson        inv = true;
17346950f68bSRichard Henderson        /* fall through */
17356950f68bSRichard Henderson    case TCG_COND_GTU:
17366950f68bSRichard Henderson        /* If arg2 is a register, swap for LTU/GEU. */
17376950f68bSRichard Henderson        if (!const_arg2) {
17386950f68bSRichard Henderson            TCGReg t = arg1;
17396950f68bSRichard Henderson            arg1 = arg2;
17406950f68bSRichard Henderson            arg2 = t;
17416950f68bSRichard Henderson            goto do_ltu;
17426950f68bSRichard Henderson        }
17436950f68bSRichard Henderson        break;
17446950f68bSRichard Henderson
17456950f68bSRichard Henderson    case TCG_COND_GEU:
17466950f68bSRichard Henderson        inv = true;
17476950f68bSRichard Henderson        /* fall through */
17486950f68bSRichard Henderson    case TCG_COND_LTU:
17496950f68bSRichard Henderson    do_ltu:
17506950f68bSRichard Henderson        /*
17516950f68bSRichard Henderson         * Relying on the carry bit, use SBB to produce -1 if LTU, 0 if GEU.
17526950f68bSRichard Henderson         * We can then use NEG or INC to produce the desired result.
17536950f68bSRichard Henderson         * This is always smaller than the SETCC expansion.
17546950f68bSRichard Henderson         */
175519517b83SRichard Henderson        tcg_out_cmp(s, TCG_COND_LTU, arg1, arg2, const_arg2, cmp_rexw);
175695bf306eSRichard Henderson
175795bf306eSRichard Henderson        /* X - X - C = -C = (C ? -1 : 0) */
175895bf306eSRichard Henderson        tgen_arithr(s, ARITH_SBB + (neg ? rexw : 0), dest, dest);
175995bf306eSRichard Henderson        if (inv && neg) {
176095bf306eSRichard Henderson            /* ~(C ? -1 : 0) = (C ? 0 : -1) */
176195bf306eSRichard Henderson            tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_NOT, dest);
176295bf306eSRichard Henderson        } else if (inv) {
176395bf306eSRichard Henderson            /* (C ? -1 : 0) + 1 = (C ? 0 : 1) */
176495bf306eSRichard Henderson            tgen_arithi(s, ARITH_ADD, dest, 1, 0);
176595bf306eSRichard Henderson        } else if (!neg) {
176695bf306eSRichard Henderson            /* -(C ? -1 : 0) = (C ? 1 : 0) */
176795bf306eSRichard Henderson            tcg_out_modrm(s, OPC_GRP3_Ev, EXT3_NEG, dest);
17686950f68bSRichard Henderson        }
17696950f68bSRichard Henderson        return;
17706950f68bSRichard Henderson
1771e91f015bSRichard Henderson    case TCG_COND_GE:
1772e91f015bSRichard Henderson        inv = true;
1773e91f015bSRichard Henderson        /* fall through */
1774e91f015bSRichard Henderson    case TCG_COND_LT:
1775e91f015bSRichard Henderson        /* If arg2 is 0, extract the sign bit. */
1776e91f015bSRichard Henderson        if (const_arg2 && arg2 == 0) {
1777e91f015bSRichard Henderson            tcg_out_mov(s, rexw ? TCG_TYPE_I64 : TCG_TYPE_I32, dest, arg1);
1778e91f015bSRichard Henderson            if (inv) {
1779e91f015bSRichard Henderson                tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_NOT, dest);
1780e91f015bSRichard Henderson            }
178195bf306eSRichard Henderson            tcg_out_shifti(s, (neg ? SHIFT_SAR : SHIFT_SHR) + rexw,
178295bf306eSRichard Henderson                           dest, rexw ? 63 : 31);
1783e91f015bSRichard Henderson            return;
1784e91f015bSRichard Henderson        }
1785e91f015bSRichard Henderson        break;
1786e91f015bSRichard Henderson
17876950f68bSRichard Henderson    default:
17886950f68bSRichard Henderson        break;
17896950f68bSRichard Henderson    }
17906950f68bSRichard Henderson
179196658acaSRichard Henderson    /*
179296658acaSRichard Henderson     * If dest does not overlap the inputs, clearing it first is preferred.
179396658acaSRichard Henderson     * The XOR breaks any false dependency for the low-byte write to dest,
179496658acaSRichard Henderson     * and is also one byte smaller than MOVZBL.
179596658acaSRichard Henderson     */
179696658acaSRichard Henderson    cleared = false;
179796658acaSRichard Henderson    if (dest != arg1 && (const_arg2 || dest != arg2)) {
179896658acaSRichard Henderson        tgen_arithr(s, ARITH_XOR, dest, dest);
179996658acaSRichard Henderson        cleared = true;
180096658acaSRichard Henderson    }
180196658acaSRichard Henderson
180219517b83SRichard Henderson    jcc = tcg_out_cmp(s, cond, arg1, arg2, const_arg2, cmp_rexw);
18036749d85bSRichard Henderson    tcg_out_modrm(s, OPC_SETCC | jcc, 0, dest);
180496658acaSRichard Henderson
180596658acaSRichard Henderson    if (!cleared) {
1806139c1837SPaolo Bonzini        tcg_out_ext8u(s, dest, dest);
1807139c1837SPaolo Bonzini    }
180895bf306eSRichard Henderson    if (neg) {
180995bf306eSRichard Henderson        tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_NEG, dest);
181095bf306eSRichard Henderson    }
181196658acaSRichard Henderson}
1812139c1837SPaolo Bonzini
18137ba99a1cSRichard Henderson#if TCG_TARGET_REG_BITS == 32
1814139c1837SPaolo Bonzinistatic void tcg_out_setcond2(TCGContext *s, const TCGArg *args,
1815139c1837SPaolo Bonzini                             const int *const_args)
1816139c1837SPaolo Bonzini{
1817139c1837SPaolo Bonzini    TCGArg new_args[6];
1818139c1837SPaolo Bonzini    TCGLabel *label_true, *label_over;
1819139c1837SPaolo Bonzini
1820139c1837SPaolo Bonzini    memcpy(new_args, args+1, 5*sizeof(TCGArg));
1821139c1837SPaolo Bonzini
1822139c1837SPaolo Bonzini    if (args[0] == args[1] || args[0] == args[2]
1823139c1837SPaolo Bonzini        || (!const_args[3] && args[0] == args[3])
1824139c1837SPaolo Bonzini        || (!const_args[4] && args[0] == args[4])) {
1825139c1837SPaolo Bonzini        /* When the destination overlaps with one of the argument
1826139c1837SPaolo Bonzini           registers, don't do anything tricky.  */
1827139c1837SPaolo Bonzini        label_true = gen_new_label();
1828139c1837SPaolo Bonzini        label_over = gen_new_label();
1829139c1837SPaolo Bonzini
1830139c1837SPaolo Bonzini        new_args[5] = label_arg(label_true);
1831139c1837SPaolo Bonzini        tcg_out_brcond2(s, new_args, const_args+1, 1);
1832139c1837SPaolo Bonzini
1833139c1837SPaolo Bonzini        tcg_out_movi(s, TCG_TYPE_I32, args[0], 0);
1834139c1837SPaolo Bonzini        tcg_out_jxx(s, JCC_JMP, label_over, 1);
183592ab8e7dSRichard Henderson        tcg_out_label(s, label_true);
1836139c1837SPaolo Bonzini
1837139c1837SPaolo Bonzini        tcg_out_movi(s, TCG_TYPE_I32, args[0], 1);
183892ab8e7dSRichard Henderson        tcg_out_label(s, label_over);
1839139c1837SPaolo Bonzini    } else {
1840139c1837SPaolo Bonzini        /* When the destination does not overlap one of the arguments,
1841139c1837SPaolo Bonzini           clear the destination first, jump if cond false, and emit an
1842139c1837SPaolo Bonzini           increment in the true case.  This results in smaller code.  */
1843139c1837SPaolo Bonzini
1844139c1837SPaolo Bonzini        tcg_out_movi(s, TCG_TYPE_I32, args[0], 0);
1845139c1837SPaolo Bonzini
1846139c1837SPaolo Bonzini        label_over = gen_new_label();
1847139c1837SPaolo Bonzini        new_args[4] = tcg_invert_cond(new_args[4]);
1848139c1837SPaolo Bonzini        new_args[5] = label_arg(label_over);
1849139c1837SPaolo Bonzini        tcg_out_brcond2(s, new_args, const_args+1, 1);
1850139c1837SPaolo Bonzini
1851139c1837SPaolo Bonzini        tgen_arithi(s, ARITH_ADD, args[0], 1, 0);
185292ab8e7dSRichard Henderson        tcg_out_label(s, label_over);
1853139c1837SPaolo Bonzini    }
1854139c1837SPaolo Bonzini}
1855139c1837SPaolo Bonzini#endif
1856139c1837SPaolo Bonzini
1857c95da56bSRichard Hendersonstatic void tcg_out_cmov(TCGContext *s, int jcc, int rexw,
1858139c1837SPaolo Bonzini                         TCGReg dest, TCGReg v1)
1859139c1837SPaolo Bonzini{
1860c95da56bSRichard Henderson    tcg_out_modrm(s, OPC_CMOVCC | jcc | rexw, dest, v1);
1861139c1837SPaolo Bonzini}
1862139c1837SPaolo Bonzini
186378ddf0dcSRichard Hendersonstatic void tcg_out_movcond(TCGContext *s, int rexw, TCGCond cond,
186478ddf0dcSRichard Henderson                            TCGReg dest, TCGReg c1, TCGArg c2, int const_c2,
1865139c1837SPaolo Bonzini                            TCGReg v1)
1866139c1837SPaolo Bonzini{
18676749d85bSRichard Henderson    int jcc = tcg_out_cmp(s, cond, c1, c2, const_c2, rexw);
18686749d85bSRichard Henderson    tcg_out_cmov(s, jcc, rexw, dest, v1);
1869139c1837SPaolo Bonzini}
1870139c1837SPaolo Bonzini
1871139c1837SPaolo Bonzinistatic void tcg_out_ctz(TCGContext *s, int rexw, TCGReg dest, TCGReg arg1,
1872139c1837SPaolo Bonzini                        TCGArg arg2, bool const_a2)
1873139c1837SPaolo Bonzini{
1874139c1837SPaolo Bonzini    if (have_bmi1) {
1875139c1837SPaolo Bonzini        tcg_out_modrm(s, OPC_TZCNT + rexw, dest, arg1);
1876139c1837SPaolo Bonzini        if (const_a2) {
1877139c1837SPaolo Bonzini            tcg_debug_assert(arg2 == (rexw ? 64 : 32));
1878139c1837SPaolo Bonzini        } else {
1879139c1837SPaolo Bonzini            tcg_debug_assert(dest != arg2);
1880c95da56bSRichard Henderson            tcg_out_cmov(s, JCC_JB, rexw, dest, arg2);
1881139c1837SPaolo Bonzini        }
1882139c1837SPaolo Bonzini    } else {
1883139c1837SPaolo Bonzini        tcg_debug_assert(dest != arg2);
1884139c1837SPaolo Bonzini        tcg_out_modrm(s, OPC_BSF + rexw, dest, arg1);
1885c95da56bSRichard Henderson        tcg_out_cmov(s, JCC_JE, rexw, dest, arg2);
1886139c1837SPaolo Bonzini    }
1887139c1837SPaolo Bonzini}
1888139c1837SPaolo Bonzini
1889139c1837SPaolo Bonzinistatic void tcg_out_clz(TCGContext *s, int rexw, TCGReg dest, TCGReg arg1,
1890139c1837SPaolo Bonzini                        TCGArg arg2, bool const_a2)
1891139c1837SPaolo Bonzini{
1892139c1837SPaolo Bonzini    if (have_lzcnt) {
1893139c1837SPaolo Bonzini        tcg_out_modrm(s, OPC_LZCNT + rexw, dest, arg1);
1894139c1837SPaolo Bonzini        if (const_a2) {
1895139c1837SPaolo Bonzini            tcg_debug_assert(arg2 == (rexw ? 64 : 32));
1896139c1837SPaolo Bonzini        } else {
1897139c1837SPaolo Bonzini            tcg_debug_assert(dest != arg2);
1898c95da56bSRichard Henderson            tcg_out_cmov(s, JCC_JB, rexw, dest, arg2);
1899139c1837SPaolo Bonzini        }
1900139c1837SPaolo Bonzini    } else {
1901139c1837SPaolo Bonzini        tcg_debug_assert(!const_a2);
1902139c1837SPaolo Bonzini        tcg_debug_assert(dest != arg1);
1903139c1837SPaolo Bonzini        tcg_debug_assert(dest != arg2);
1904139c1837SPaolo Bonzini
1905139c1837SPaolo Bonzini        /* Recall that the output of BSR is the index not the count.  */
1906139c1837SPaolo Bonzini        tcg_out_modrm(s, OPC_BSR + rexw, dest, arg1);
1907139c1837SPaolo Bonzini        tgen_arithi(s, ARITH_XOR + rexw, dest, rexw ? 63 : 31, 0);
1908139c1837SPaolo Bonzini
1909139c1837SPaolo Bonzini        /* Since we have destroyed the flags from BSR, we have to re-test.  */
19106749d85bSRichard Henderson        int jcc = tcg_out_cmp(s, TCG_COND_EQ, arg1, 0, 1, rexw);
19116749d85bSRichard Henderson        tcg_out_cmov(s, jcc, rexw, dest, arg2);
1912139c1837SPaolo Bonzini    }
1913139c1837SPaolo Bonzini}
1914139c1837SPaolo Bonzini
19152be7d76bSRichard Hendersonstatic void tcg_out_branch(TCGContext *s, int call, const tcg_insn_unit *dest)
1916139c1837SPaolo Bonzini{
1917139c1837SPaolo Bonzini    intptr_t disp = tcg_pcrel_diff(s, dest) - 5;
1918139c1837SPaolo Bonzini
1919139c1837SPaolo Bonzini    if (disp == (int32_t)disp) {
1920139c1837SPaolo Bonzini        tcg_out_opc(s, call ? OPC_CALL_Jz : OPC_JMP_long, 0, 0, 0);
1921139c1837SPaolo Bonzini        tcg_out32(s, disp);
1922139c1837SPaolo Bonzini    } else {
1923139c1837SPaolo Bonzini        /* rip-relative addressing into the constant pool.
1924139c1837SPaolo Bonzini           This is 6 + 8 = 14 bytes, as compared to using an
19257a21bee2SDaniel P. Berrangé           immediate load 10 + 6 = 16 bytes, plus we may
1926139c1837SPaolo Bonzini           be able to re-use the pool constant for more calls.  */
1927139c1837SPaolo Bonzini        tcg_out_opc(s, OPC_GRP5, 0, 0, 0);
1928139c1837SPaolo Bonzini        tcg_out8(s, (call ? EXT5_CALLN_Ev : EXT5_JMPN_Ev) << 3 | 5);
1929139c1837SPaolo Bonzini        new_pool_label(s, (uintptr_t)dest, R_386_PC32, s->code_ptr, -4);
1930139c1837SPaolo Bonzini        tcg_out32(s, 0);
1931139c1837SPaolo Bonzini    }
1932139c1837SPaolo Bonzini}
1933139c1837SPaolo Bonzini
1934cee44b03SRichard Hendersonstatic void tcg_out_call(TCGContext *s, const tcg_insn_unit *dest,
1935cee44b03SRichard Henderson                         const TCGHelperInfo *info)
1936139c1837SPaolo Bonzini{
1937139c1837SPaolo Bonzini    tcg_out_branch(s, 1, dest);
1938c4f4a00aSRichard Henderson
1939c4f4a00aSRichard Henderson#ifndef _WIN32
1940c4f4a00aSRichard Henderson    if (TCG_TARGET_REG_BITS == 32 && info->out_kind == TCG_CALL_RET_BY_REF) {
1941c4f4a00aSRichard Henderson        /*
1942c4f4a00aSRichard Henderson         * The sysv i386 abi for struct return places a reference as the
1943c4f4a00aSRichard Henderson         * first argument of the stack, and pops that argument with the
1944c4f4a00aSRichard Henderson         * return statement.  Since we want to retain the aligned stack
1945c4f4a00aSRichard Henderson         * pointer for the callee, we do not want to actually push that
1946c4f4a00aSRichard Henderson         * argument before the call but rely on the normal store to the
1947c4f4a00aSRichard Henderson         * stack slot.  But we do need to compensate for the pop in order
1948c4f4a00aSRichard Henderson         * to reset our correct stack pointer value.
1949c4f4a00aSRichard Henderson         * Pushing a garbage value back onto the stack is quickest.
1950c4f4a00aSRichard Henderson         */
1951c4f4a00aSRichard Henderson        tcg_out_push(s, TCG_REG_EAX);
1952c4f4a00aSRichard Henderson    }
1953c4f4a00aSRichard Henderson#endif
1954139c1837SPaolo Bonzini}
1955139c1837SPaolo Bonzini
1956705ed477SRichard Hendersonstatic void tcg_out_jmp(TCGContext *s, const tcg_insn_unit *dest)
1957139c1837SPaolo Bonzini{
1958139c1837SPaolo Bonzini    tcg_out_branch(s, 0, dest);
1959139c1837SPaolo Bonzini}
1960139c1837SPaolo Bonzini
1961139c1837SPaolo Bonzinistatic void tcg_out_nopn(TCGContext *s, int n)
1962139c1837SPaolo Bonzini{
1963139c1837SPaolo Bonzini    int i;
1964139c1837SPaolo Bonzini    /* Emit 1 or 2 operand size prefixes for the standard one byte nop,
1965139c1837SPaolo Bonzini     * "xchg %eax,%eax", forming "xchg %ax,%ax". All cores accept the
1966139c1837SPaolo Bonzini     * duplicate prefix, and all of the interesting recent cores can
1967139c1837SPaolo Bonzini     * decode and discard the duplicates in a single cycle.
1968139c1837SPaolo Bonzini     */
1969139c1837SPaolo Bonzini    tcg_debug_assert(n >= 1);
1970139c1837SPaolo Bonzini    for (i = 1; i < n; ++i) {
1971139c1837SPaolo Bonzini        tcg_out8(s, 0x66);
1972139c1837SPaolo Bonzini    }
1973139c1837SPaolo Bonzini    tcg_out8(s, 0x90);
1974139c1837SPaolo Bonzini}
1975139c1837SPaolo Bonzini
197661713c29SRichard Hendersontypedef struct {
197761713c29SRichard Henderson    TCGReg base;
197861713c29SRichard Henderson    int index;
197961713c29SRichard Henderson    int ofs;
198061713c29SRichard Henderson    int seg;
19811c5322d9SRichard Henderson    TCGAtomAlign aa;
198261713c29SRichard Henderson} HostAddress;
198361713c29SRichard Henderson
19847b880107SRichard Hendersonbool tcg_target_has_memory_bswap(MemOp memop)
19857b880107SRichard Henderson{
1986098d0fc1SRichard Henderson    TCGAtomAlign aa;
1987098d0fc1SRichard Henderson
1988098d0fc1SRichard Henderson    if (!have_movbe) {
1989098d0fc1SRichard Henderson        return false;
1990098d0fc1SRichard Henderson    }
1991098d0fc1SRichard Henderson    if ((memop & MO_SIZE) < MO_128) {
1992098d0fc1SRichard Henderson        return true;
1993098d0fc1SRichard Henderson    }
1994098d0fc1SRichard Henderson
1995098d0fc1SRichard Henderson    /*
1996098d0fc1SRichard Henderson     * Reject 16-byte memop with 16-byte atomicity, i.e. VMOVDQA,
1997098d0fc1SRichard Henderson     * but do allow a pair of 64-bit operations, i.e. MOVBEQ.
1998098d0fc1SRichard Henderson     */
1999098d0fc1SRichard Henderson    aa = atom_and_align_for_opc(tcg_ctx, memop, MO_ATOM_IFALIGN, true);
2000098d0fc1SRichard Henderson    return aa.atom < MO_128;
20017b880107SRichard Henderson}
20027b880107SRichard Henderson
2003139c1837SPaolo Bonzini/*
2004da8ab70aSRichard Henderson * Because i686 has no register parameters and because x86_64 has xchg
2005da8ab70aSRichard Henderson * to handle addr/data register overlap, we have placed all input arguments
2006da8ab70aSRichard Henderson * before we need might need a scratch reg.
2007da8ab70aSRichard Henderson *
2008da8ab70aSRichard Henderson * Even then, a scratch is only needed for l->raddr.  Rather than expose
2009da8ab70aSRichard Henderson * a general-purpose scratch when we don't actually know it's available,
2010da8ab70aSRichard Henderson * use the ra_gen hook to load into RAX if needed.
2011da8ab70aSRichard Henderson */
2012da8ab70aSRichard Henderson#if TCG_TARGET_REG_BITS == 64
2013da8ab70aSRichard Hendersonstatic TCGReg ldst_ra_gen(TCGContext *s, const TCGLabelQemuLdst *l, int arg)
2014da8ab70aSRichard Henderson{
2015da8ab70aSRichard Henderson    if (arg < 0) {
2016da8ab70aSRichard Henderson        arg = TCG_REG_RAX;
2017da8ab70aSRichard Henderson    }
2018da8ab70aSRichard Henderson    tcg_out_movi(s, TCG_TYPE_PTR, arg, (uintptr_t)l->raddr);
2019da8ab70aSRichard Henderson    return arg;
2020da8ab70aSRichard Henderson}
2021da8ab70aSRichard Hendersonstatic const TCGLdstHelperParam ldst_helper_param = {
2022da8ab70aSRichard Henderson    .ra_gen = ldst_ra_gen
2023da8ab70aSRichard Henderson};
2024da8ab70aSRichard Henderson#else
2025da8ab70aSRichard Hendersonstatic const TCGLdstHelperParam ldst_helper_param = { };
2026da8ab70aSRichard Henderson#endif
2027da8ab70aSRichard Henderson
2028098d0fc1SRichard Hendersonstatic void tcg_out_vec_to_pair(TCGContext *s, TCGType type,
2029098d0fc1SRichard Henderson                                TCGReg l, TCGReg h, TCGReg v)
2030098d0fc1SRichard Henderson{
2031098d0fc1SRichard Henderson    int rexw = type == TCG_TYPE_I32 ? 0 : P_REXW;
2032098d0fc1SRichard Henderson
2033098d0fc1SRichard Henderson    /* vpmov{d,q} %v, %l */
2034098d0fc1SRichard Henderson    tcg_out_vex_modrm(s, OPC_MOVD_EyVy + rexw, v, 0, l);
2035098d0fc1SRichard Henderson    /* vpextr{d,q} $1, %v, %h */
2036098d0fc1SRichard Henderson    tcg_out_vex_modrm(s, OPC_PEXTRD + rexw, v, 0, h);
2037098d0fc1SRichard Henderson    tcg_out8(s, 1);
2038098d0fc1SRichard Henderson}
2039098d0fc1SRichard Henderson
2040098d0fc1SRichard Hendersonstatic void tcg_out_pair_to_vec(TCGContext *s, TCGType type,
2041098d0fc1SRichard Henderson                                TCGReg v, TCGReg l, TCGReg h)
2042098d0fc1SRichard Henderson{
2043098d0fc1SRichard Henderson    int rexw = type == TCG_TYPE_I32 ? 0 : P_REXW;
2044098d0fc1SRichard Henderson
2045098d0fc1SRichard Henderson    /* vmov{d,q} %l, %v */
2046098d0fc1SRichard Henderson    tcg_out_vex_modrm(s, OPC_MOVD_VyEy + rexw, v, 0, l);
2047098d0fc1SRichard Henderson    /* vpinsr{d,q} $1, %h, %v, %v */
2048098d0fc1SRichard Henderson    tcg_out_vex_modrm(s, OPC_PINSRD + rexw, v, v, h);
2049098d0fc1SRichard Henderson    tcg_out8(s, 1);
2050098d0fc1SRichard Henderson}
2051098d0fc1SRichard Henderson
2052da8ab70aSRichard Henderson/*
2053139c1837SPaolo Bonzini * Generate code for the slow path for a load at the end of block
2054139c1837SPaolo Bonzini */
2055139c1837SPaolo Bonzinistatic bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
2056139c1837SPaolo Bonzini{
2057da8ab70aSRichard Henderson    MemOp opc = get_memop(l->oi);
2058139c1837SPaolo Bonzini    tcg_insn_unit **label_ptr = &l->label_ptr[0];
2059139c1837SPaolo Bonzini
2060139c1837SPaolo Bonzini    /* resolve label address */
2061139c1837SPaolo Bonzini    tcg_patch32(label_ptr[0], s->code_ptr - label_ptr[0] - 4);
206230cc7a7eSRichard Henderson    if (label_ptr[1]) {
2063139c1837SPaolo Bonzini        tcg_patch32(label_ptr[1], s->code_ptr - label_ptr[1] - 4);
2064139c1837SPaolo Bonzini    }
2065139c1837SPaolo Bonzini
2066da8ab70aSRichard Henderson    tcg_out_ld_helper_args(s, l, &ldst_helper_param);
20670cadc1edSRichard Henderson    tcg_out_branch(s, 1, qemu_ld_helpers[opc & MO_SIZE]);
2068da8ab70aSRichard Henderson    tcg_out_ld_helper_ret(s, l, false, &ldst_helper_param);
2069139c1837SPaolo Bonzini
2070139c1837SPaolo Bonzini    tcg_out_jmp(s, l->raddr);
2071139c1837SPaolo Bonzini    return true;
2072139c1837SPaolo Bonzini}
2073139c1837SPaolo Bonzini
2074139c1837SPaolo Bonzini/*
2075139c1837SPaolo Bonzini * Generate code for the slow path for a store at the end of block
2076139c1837SPaolo Bonzini */
2077139c1837SPaolo Bonzinistatic bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
2078139c1837SPaolo Bonzini{
20790036e54eSRichard Henderson    MemOp opc = get_memop(l->oi);
2080139c1837SPaolo Bonzini    tcg_insn_unit **label_ptr = &l->label_ptr[0];
2081139c1837SPaolo Bonzini
2082139c1837SPaolo Bonzini    /* resolve label address */
2083139c1837SPaolo Bonzini    tcg_patch32(label_ptr[0], s->code_ptr - label_ptr[0] - 4);
208430cc7a7eSRichard Henderson    if (label_ptr[1]) {
2085139c1837SPaolo Bonzini        tcg_patch32(label_ptr[1], s->code_ptr - label_ptr[1] - 4);
2086139c1837SPaolo Bonzini    }
2087139c1837SPaolo Bonzini
20880036e54eSRichard Henderson    tcg_out_st_helper_args(s, l, &ldst_helper_param);
20890cadc1edSRichard Henderson    tcg_out_branch(s, 1, qemu_st_helpers[opc & MO_SIZE]);
2090139c1837SPaolo Bonzini
20910036e54eSRichard Henderson    tcg_out_jmp(s, l->raddr);
2092139c1837SPaolo Bonzini    return true;
2093139c1837SPaolo Bonzini}
2094b1ee3c67SRichard Henderson
2095915e1d52SRichard Henderson#ifdef CONFIG_USER_ONLY
209661713c29SRichard Hendersonstatic HostAddress x86_guest_base = {
209761713c29SRichard Henderson    .index = -1
209861713c29SRichard Henderson};
209961713c29SRichard Henderson
2100139c1837SPaolo Bonzini#if defined(__x86_64__) && defined(__linux__)
2101139c1837SPaolo Bonzini# include <asm/prctl.h>
2102139c1837SPaolo Bonzini# include <sys/prctl.h>
2103139c1837SPaolo Bonziniint arch_prctl(int code, unsigned long addr);
2104139c1837SPaolo Bonzinistatic inline int setup_guest_base_seg(void)
2105139c1837SPaolo Bonzini{
2106139c1837SPaolo Bonzini    if (arch_prctl(ARCH_SET_GS, guest_base) == 0) {
2107139c1837SPaolo Bonzini        return P_GS;
2108139c1837SPaolo Bonzini    }
2109139c1837SPaolo Bonzini    return 0;
2110139c1837SPaolo Bonzini}
2111915e1d52SRichard Henderson#define setup_guest_base_seg  setup_guest_base_seg
211261713c29SRichard Henderson#elif defined(__x86_64__) && \
211361713c29SRichard Henderson      (defined (__FreeBSD__) || defined (__FreeBSD_kernel__))
2114139c1837SPaolo Bonzini# include <machine/sysarch.h>
2115139c1837SPaolo Bonzinistatic inline int setup_guest_base_seg(void)
2116139c1837SPaolo Bonzini{
2117139c1837SPaolo Bonzini    if (sysarch(AMD64_SET_GSBASE, &guest_base) == 0) {
2118139c1837SPaolo Bonzini        return P_GS;
2119139c1837SPaolo Bonzini    }
2120139c1837SPaolo Bonzini    return 0;
2121139c1837SPaolo Bonzini}
2122915e1d52SRichard Henderson#define setup_guest_base_seg  setup_guest_base_seg
2123915e1d52SRichard Henderson#endif
2124139c1837SPaolo Bonzini#else
2125915e1d52SRichard Henderson# define x86_guest_base (*(HostAddress *)({ qemu_build_not_reached(); NULL; }))
2126915e1d52SRichard Henderson#endif /* CONFIG_USER_ONLY */
2127915e1d52SRichard Henderson#ifndef setup_guest_base_seg
2128915e1d52SRichard Henderson# define setup_guest_base_seg()  0
2129915e1d52SRichard Henderson#endif
2130139c1837SPaolo Bonzini
2131d0a9bb5eSRichard Henderson#define MIN_TLB_MASK_TABLE_OFS  INT_MIN
2132d0a9bb5eSRichard Henderson
2133530074c6SRichard Henderson/*
2134530074c6SRichard Henderson * For softmmu, perform the TLB load and compare.
2135530074c6SRichard Henderson * For useronly, perform any required alignment tests.
2136530074c6SRichard Henderson * In both cases, return a TCGLabelQemuLdst structure if the slow path
2137530074c6SRichard Henderson * is required and fill in @h with the host address for the fast path.
2138530074c6SRichard Henderson */
2139530074c6SRichard Hendersonstatic TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
2140530074c6SRichard Henderson                                           TCGReg addrlo, TCGReg addrhi,
2141530074c6SRichard Henderson                                           MemOpIdx oi, bool is_ld)
2142530074c6SRichard Henderson{
2143530074c6SRichard Henderson    TCGLabelQemuLdst *ldst = NULL;
2144530074c6SRichard Henderson    MemOp opc = get_memop(oi);
2145098d0fc1SRichard Henderson    MemOp s_bits = opc & MO_SIZE;
21461c5322d9SRichard Henderson    unsigned a_mask;
21471c5322d9SRichard Henderson
2148915e1d52SRichard Henderson    if (tcg_use_softmmu) {
21491c5322d9SRichard Henderson        h->index = TCG_REG_L0;
21501c5322d9SRichard Henderson        h->ofs = 0;
21511c5322d9SRichard Henderson        h->seg = 0;
2152915e1d52SRichard Henderson    } else {
21531c5322d9SRichard Henderson        *h = x86_guest_base;
2154915e1d52SRichard Henderson    }
21551c5322d9SRichard Henderson    h->base = addrlo;
2156098d0fc1SRichard Henderson    h->aa = atom_and_align_for_opc(s, opc, MO_ATOM_IFALIGN, s_bits == MO_128);
21571c5322d9SRichard Henderson    a_mask = (1 << h->aa.align) - 1;
2158530074c6SRichard Henderson
2159915e1d52SRichard Henderson    if (tcg_use_softmmu) {
2160530074c6SRichard Henderson        int cmp_ofs = is_ld ? offsetof(CPUTLBEntry, addr_read)
2161530074c6SRichard Henderson                            : offsetof(CPUTLBEntry, addr_write);
2162530074c6SRichard Henderson        TCGType ttype = TCG_TYPE_I32;
2163530074c6SRichard Henderson        TCGType tlbtype = TCG_TYPE_I32;
2164530074c6SRichard Henderson        int trexw = 0, hrexw = 0, tlbrexw = 0;
2165530074c6SRichard Henderson        unsigned mem_index = get_mmuidx(oi);
2166530074c6SRichard Henderson        unsigned s_mask = (1 << s_bits) - 1;
2167d0a9bb5eSRichard Henderson        int fast_ofs = tlb_mask_table_ofs(s, mem_index);
2168c60ad6e3SRichard Henderson        int tlb_mask;
2169530074c6SRichard Henderson
2170530074c6SRichard Henderson        ldst = new_ldst_label(s);
2171530074c6SRichard Henderson        ldst->is_ld = is_ld;
2172530074c6SRichard Henderson        ldst->oi = oi;
2173530074c6SRichard Henderson        ldst->addrlo_reg = addrlo;
2174530074c6SRichard Henderson        ldst->addrhi_reg = addrhi;
2175530074c6SRichard Henderson
2176530074c6SRichard Henderson        if (TCG_TARGET_REG_BITS == 64) {
217763f4da91SRichard Henderson            ttype = s->addr_type;
217863f4da91SRichard Henderson            trexw = (ttype == TCG_TYPE_I32 ? 0 : P_REXW);
2179530074c6SRichard Henderson            if (TCG_TYPE_PTR == TCG_TYPE_I64) {
2180530074c6SRichard Henderson                hrexw = P_REXW;
2181a66efde1SRichard Henderson                if (s->page_bits + s->tlb_dyn_max_bits > 32) {
2182530074c6SRichard Henderson                    tlbtype = TCG_TYPE_I64;
2183530074c6SRichard Henderson                    tlbrexw = P_REXW;
2184530074c6SRichard Henderson                }
2185530074c6SRichard Henderson            }
2186530074c6SRichard Henderson        }
2187530074c6SRichard Henderson
2188530074c6SRichard Henderson        tcg_out_mov(s, tlbtype, TCG_REG_L0, addrlo);
2189530074c6SRichard Henderson        tcg_out_shifti(s, SHIFT_SHR + tlbrexw, TCG_REG_L0,
2190aece72b7SRichard Henderson                       s->page_bits - CPU_TLB_ENTRY_BITS);
2191530074c6SRichard Henderson
2192530074c6SRichard Henderson        tcg_out_modrm_offset(s, OPC_AND_GvEv + trexw, TCG_REG_L0, TCG_AREG0,
2193d0a9bb5eSRichard Henderson                             fast_ofs + offsetof(CPUTLBDescFast, mask));
2194530074c6SRichard Henderson
2195530074c6SRichard Henderson        tcg_out_modrm_offset(s, OPC_ADD_GvEv + hrexw, TCG_REG_L0, TCG_AREG0,
2196d0a9bb5eSRichard Henderson                             fast_ofs + offsetof(CPUTLBDescFast, table));
2197530074c6SRichard Henderson
2198530074c6SRichard Henderson        /*
2199915e1d52SRichard Henderson         * If the required alignment is at least as large as the access,
2200915e1d52SRichard Henderson         * simply copy the address and mask.  For lesser alignments,
2201915e1d52SRichard Henderson         * check that we don't cross pages for the complete access.
2202530074c6SRichard Henderson         */
22031c5322d9SRichard Henderson        if (a_mask >= s_mask) {
2204530074c6SRichard Henderson            tcg_out_mov(s, ttype, TCG_REG_L1, addrlo);
2205530074c6SRichard Henderson        } else {
2206530074c6SRichard Henderson            tcg_out_modrm_offset(s, OPC_LEA + trexw, TCG_REG_L1,
2207530074c6SRichard Henderson                                 addrlo, s_mask - a_mask);
2208530074c6SRichard Henderson        }
2209aece72b7SRichard Henderson        tlb_mask = s->page_mask | a_mask;
2210530074c6SRichard Henderson        tgen_arithi(s, ARITH_AND + trexw, TCG_REG_L1, tlb_mask, 0);
2211530074c6SRichard Henderson
2212530074c6SRichard Henderson        /* cmp 0(TCG_REG_L0), TCG_REG_L1 */
2213530074c6SRichard Henderson        tcg_out_modrm_offset(s, OPC_CMP_GvEv + trexw,
2214530074c6SRichard Henderson                             TCG_REG_L1, TCG_REG_L0, cmp_ofs);
2215530074c6SRichard Henderson
2216530074c6SRichard Henderson        /* jne slow_path */
2217530074c6SRichard Henderson        tcg_out_opc(s, OPC_JCC_long + JCC_JNE, 0, 0, 0);
2218530074c6SRichard Henderson        ldst->label_ptr[0] = s->code_ptr;
2219530074c6SRichard Henderson        s->code_ptr += 4;
2220530074c6SRichard Henderson
222163f4da91SRichard Henderson        if (TCG_TARGET_REG_BITS == 32 && s->addr_type == TCG_TYPE_I64) {
2222530074c6SRichard Henderson            /* cmp 4(TCG_REG_L0), addrhi */
2223915e1d52SRichard Henderson            tcg_out_modrm_offset(s, OPC_CMP_GvEv, addrhi,
2224915e1d52SRichard Henderson                                 TCG_REG_L0, cmp_ofs + 4);
2225530074c6SRichard Henderson
2226530074c6SRichard Henderson            /* jne slow_path */
2227530074c6SRichard Henderson            tcg_out_opc(s, OPC_JCC_long + JCC_JNE, 0, 0, 0);
2228530074c6SRichard Henderson            ldst->label_ptr[1] = s->code_ptr;
2229530074c6SRichard Henderson            s->code_ptr += 4;
2230530074c6SRichard Henderson        }
2231530074c6SRichard Henderson
2232530074c6SRichard Henderson        /* TLB Hit.  */
22331fac4648SRichard Henderson        tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_L0, TCG_REG_L0,
2234530074c6SRichard Henderson                   offsetof(CPUTLBEntry, addend));
2235915e1d52SRichard Henderson    } else if (a_mask) {
2236303214aaSRichard Henderson        int jcc;
2237530074c6SRichard Henderson
2238303214aaSRichard Henderson        ldst = new_ldst_label(s);
2239530074c6SRichard Henderson        ldst->is_ld = is_ld;
2240530074c6SRichard Henderson        ldst->oi = oi;
2241530074c6SRichard Henderson        ldst->addrlo_reg = addrlo;
2242530074c6SRichard Henderson        ldst->addrhi_reg = addrhi;
2243530074c6SRichard Henderson
2244530074c6SRichard Henderson        /* jne slow_path */
2245303214aaSRichard Henderson        jcc = tcg_out_cmp(s, TCG_COND_TSTNE, addrlo, a_mask, true, false);
2246303214aaSRichard Henderson        tcg_out_opc(s, OPC_JCC_long + jcc, 0, 0, 0);
2247530074c6SRichard Henderson        ldst->label_ptr[0] = s->code_ptr;
2248530074c6SRichard Henderson        s->code_ptr += 4;
2249530074c6SRichard Henderson    }
2250530074c6SRichard Henderson
2251530074c6SRichard Henderson    return ldst;
2252530074c6SRichard Henderson}
2253530074c6SRichard Henderson
2254139c1837SPaolo Bonzinistatic void tcg_out_qemu_ld_direct(TCGContext *s, TCGReg datalo, TCGReg datahi,
225561713c29SRichard Henderson                                   HostAddress h, TCGType type, MemOp memop)
2256139c1837SPaolo Bonzini{
2257d2ef1b83SRichard Henderson    bool use_movbe = false;
2258bf12e224SRichard Henderson    int rexw = (type == TCG_TYPE_I32 ? 0 : P_REXW);
2259139c1837SPaolo Bonzini    int movop = OPC_MOVL_GvEv;
2260139c1837SPaolo Bonzini
2261d2ef1b83SRichard Henderson    /* Do big-endian loads with movbe.  */
2262d2ef1b83SRichard Henderson    if (memop & MO_BSWAP) {
2263d2ef1b83SRichard Henderson        tcg_debug_assert(have_movbe);
2264d2ef1b83SRichard Henderson        use_movbe = true;
2265139c1837SPaolo Bonzini        movop = OPC_MOVBE_GyMy;
2266139c1837SPaolo Bonzini    }
2267139c1837SPaolo Bonzini
2268139c1837SPaolo Bonzini    switch (memop & MO_SSIZE) {
2269139c1837SPaolo Bonzini    case MO_UB:
227061713c29SRichard Henderson        tcg_out_modrm_sib_offset(s, OPC_MOVZBL + h.seg, datalo,
227161713c29SRichard Henderson                                 h.base, h.index, 0, h.ofs);
2272139c1837SPaolo Bonzini        break;
2273139c1837SPaolo Bonzini    case MO_SB:
227461713c29SRichard Henderson        tcg_out_modrm_sib_offset(s, OPC_MOVSBL + rexw + h.seg, datalo,
227561713c29SRichard Henderson                                 h.base, h.index, 0, h.ofs);
2276139c1837SPaolo Bonzini        break;
2277139c1837SPaolo Bonzini    case MO_UW:
2278d2ef1b83SRichard Henderson        if (use_movbe) {
2279d2ef1b83SRichard Henderson            /* There is no extending movbe; only low 16-bits are modified.  */
228061713c29SRichard Henderson            if (datalo != h.base && datalo != h.index) {
2281d2ef1b83SRichard Henderson                /* XOR breaks dependency chains.  */
2282d2ef1b83SRichard Henderson                tgen_arithr(s, ARITH_XOR, datalo, datalo);
228361713c29SRichard Henderson                tcg_out_modrm_sib_offset(s, OPC_MOVBE_GyMy + P_DATA16 + h.seg,
228461713c29SRichard Henderson                                         datalo, h.base, h.index, 0, h.ofs);
2285139c1837SPaolo Bonzini            } else {
228661713c29SRichard Henderson                tcg_out_modrm_sib_offset(s, OPC_MOVBE_GyMy + P_DATA16 + h.seg,
228761713c29SRichard Henderson                                         datalo, h.base, h.index, 0, h.ofs);
2288d2ef1b83SRichard Henderson                tcg_out_ext16u(s, datalo, datalo);
2289d2ef1b83SRichard Henderson            }
2290d2ef1b83SRichard Henderson        } else {
229161713c29SRichard Henderson            tcg_out_modrm_sib_offset(s, OPC_MOVZWL + h.seg, datalo,
229261713c29SRichard Henderson                                     h.base, h.index, 0, h.ofs);
2293139c1837SPaolo Bonzini        }
2294d2ef1b83SRichard Henderson        break;
2295d2ef1b83SRichard Henderson    case MO_SW:
2296d2ef1b83SRichard Henderson        if (use_movbe) {
229761713c29SRichard Henderson            tcg_out_modrm_sib_offset(s, OPC_MOVBE_GyMy + P_DATA16 + h.seg,
229861713c29SRichard Henderson                                     datalo, h.base, h.index, 0, h.ofs);
2299753e42eaSRichard Henderson            tcg_out_ext16s(s, type, datalo, datalo);
2300139c1837SPaolo Bonzini        } else {
230161713c29SRichard Henderson            tcg_out_modrm_sib_offset(s, OPC_MOVSWL + rexw + h.seg,
230261713c29SRichard Henderson                                     datalo, h.base, h.index, 0, h.ofs);
2303139c1837SPaolo Bonzini        }
2304139c1837SPaolo Bonzini        break;
2305139c1837SPaolo Bonzini    case MO_UL:
230661713c29SRichard Henderson        tcg_out_modrm_sib_offset(s, movop + h.seg, datalo,
230761713c29SRichard Henderson                                 h.base, h.index, 0, h.ofs);
2308139c1837SPaolo Bonzini        break;
2309139c1837SPaolo Bonzini#if TCG_TARGET_REG_BITS == 64
2310139c1837SPaolo Bonzini    case MO_SL:
2311d2ef1b83SRichard Henderson        if (use_movbe) {
231261713c29SRichard Henderson            tcg_out_modrm_sib_offset(s, OPC_MOVBE_GyMy + h.seg, datalo,
231361713c29SRichard Henderson                                     h.base, h.index, 0, h.ofs);
2314139c1837SPaolo Bonzini            tcg_out_ext32s(s, datalo, datalo);
2315139c1837SPaolo Bonzini        } else {
231661713c29SRichard Henderson            tcg_out_modrm_sib_offset(s, OPC_MOVSLQ + h.seg, datalo,
231761713c29SRichard Henderson                                     h.base, h.index, 0, h.ofs);
2318139c1837SPaolo Bonzini        }
2319139c1837SPaolo Bonzini        break;
2320139c1837SPaolo Bonzini#endif
2321fc313c64SFrédéric Pétrot    case MO_UQ:
2322139c1837SPaolo Bonzini        if (TCG_TARGET_REG_BITS == 64) {
232361713c29SRichard Henderson            tcg_out_modrm_sib_offset(s, movop + P_REXW + h.seg, datalo,
232461713c29SRichard Henderson                                     h.base, h.index, 0, h.ofs);
23253174941fSRichard Henderson            break;
23263174941fSRichard Henderson        }
2327d2ef1b83SRichard Henderson        if (use_movbe) {
2328d2ef1b83SRichard Henderson            TCGReg t = datalo;
2329139c1837SPaolo Bonzini            datalo = datahi;
2330139c1837SPaolo Bonzini            datahi = t;
2331139c1837SPaolo Bonzini        }
233261713c29SRichard Henderson        if (h.base == datalo || h.index == datalo) {
233361713c29SRichard Henderson            tcg_out_modrm_sib_offset(s, OPC_LEA, datahi,
233461713c29SRichard Henderson                                     h.base, h.index, 0, h.ofs);
233561713c29SRichard Henderson            tcg_out_modrm_offset(s, movop + h.seg, datalo, datahi, 0);
233661713c29SRichard Henderson            tcg_out_modrm_offset(s, movop + h.seg, datahi, datahi, 4);
2337139c1837SPaolo Bonzini        } else {
233861713c29SRichard Henderson            tcg_out_modrm_sib_offset(s, movop + h.seg, datalo,
233961713c29SRichard Henderson                                     h.base, h.index, 0, h.ofs);
234061713c29SRichard Henderson            tcg_out_modrm_sib_offset(s, movop + h.seg, datahi,
234161713c29SRichard Henderson                                     h.base, h.index, 0, h.ofs + 4);
2342139c1837SPaolo Bonzini        }
2343139c1837SPaolo Bonzini        break;
2344098d0fc1SRichard Henderson
2345098d0fc1SRichard Henderson    case MO_128:
2346098d0fc1SRichard Henderson        tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
2347098d0fc1SRichard Henderson
2348098d0fc1SRichard Henderson        /*
2349098d0fc1SRichard Henderson         * Without 16-byte atomicity, use integer regs.
2350098d0fc1SRichard Henderson         * That is where we want the data, and it allows bswaps.
2351098d0fc1SRichard Henderson         */
2352098d0fc1SRichard Henderson        if (h.aa.atom < MO_128) {
2353098d0fc1SRichard Henderson            if (use_movbe) {
2354098d0fc1SRichard Henderson                TCGReg t = datalo;
2355098d0fc1SRichard Henderson                datalo = datahi;
2356098d0fc1SRichard Henderson                datahi = t;
2357098d0fc1SRichard Henderson            }
2358098d0fc1SRichard Henderson            if (h.base == datalo || h.index == datalo) {
2359098d0fc1SRichard Henderson                tcg_out_modrm_sib_offset(s, OPC_LEA + P_REXW, datahi,
2360098d0fc1SRichard Henderson                                         h.base, h.index, 0, h.ofs);
2361098d0fc1SRichard Henderson                tcg_out_modrm_offset(s, movop + P_REXW + h.seg,
2362098d0fc1SRichard Henderson                                     datalo, datahi, 0);
2363098d0fc1SRichard Henderson                tcg_out_modrm_offset(s, movop + P_REXW + h.seg,
2364098d0fc1SRichard Henderson                                     datahi, datahi, 8);
2365098d0fc1SRichard Henderson            } else {
2366098d0fc1SRichard Henderson                tcg_out_modrm_sib_offset(s, movop + P_REXW + h.seg, datalo,
2367098d0fc1SRichard Henderson                                         h.base, h.index, 0, h.ofs);
2368098d0fc1SRichard Henderson                tcg_out_modrm_sib_offset(s, movop + P_REXW + h.seg, datahi,
2369098d0fc1SRichard Henderson                                         h.base, h.index, 0, h.ofs + 8);
2370098d0fc1SRichard Henderson            }
2371098d0fc1SRichard Henderson            break;
2372098d0fc1SRichard Henderson        }
2373098d0fc1SRichard Henderson
2374098d0fc1SRichard Henderson        /*
2375098d0fc1SRichard Henderson         * With 16-byte atomicity, a vector load is required.
2376098d0fc1SRichard Henderson         * If we already have 16-byte alignment, then VMOVDQA always works.
2377098d0fc1SRichard Henderson         * Else if VMOVDQU has atomicity with dynamic alignment, use that.
2378098d0fc1SRichard Henderson         * Else use we require a runtime test for alignment for VMOVDQA;
2379098d0fc1SRichard Henderson         * use VMOVDQU on the unaligned nonatomic path for simplicity.
2380098d0fc1SRichard Henderson         */
2381098d0fc1SRichard Henderson        if (h.aa.align >= MO_128) {
2382098d0fc1SRichard Henderson            tcg_out_vex_modrm_sib_offset(s, OPC_MOVDQA_VxWx + h.seg,
2383098d0fc1SRichard Henderson                                         TCG_TMP_VEC, 0,
2384098d0fc1SRichard Henderson                                         h.base, h.index, 0, h.ofs);
2385098d0fc1SRichard Henderson        } else if (cpuinfo & CPUINFO_ATOMIC_VMOVDQU) {
2386098d0fc1SRichard Henderson            tcg_out_vex_modrm_sib_offset(s, OPC_MOVDQU_VxWx + h.seg,
2387098d0fc1SRichard Henderson                                         TCG_TMP_VEC, 0,
2388098d0fc1SRichard Henderson                                         h.base, h.index, 0, h.ofs);
2389098d0fc1SRichard Henderson        } else {
2390098d0fc1SRichard Henderson            TCGLabel *l1 = gen_new_label();
2391098d0fc1SRichard Henderson            TCGLabel *l2 = gen_new_label();
2392303214aaSRichard Henderson            int jcc;
2393098d0fc1SRichard Henderson
2394303214aaSRichard Henderson            jcc = tcg_out_cmp(s, TCG_COND_TSTNE, h.base, 15, true, false);
2395303214aaSRichard Henderson            tcg_out_jxx(s, jcc, l1, true);
2396098d0fc1SRichard Henderson
2397098d0fc1SRichard Henderson            tcg_out_vex_modrm_sib_offset(s, OPC_MOVDQA_VxWx + h.seg,
2398098d0fc1SRichard Henderson                                         TCG_TMP_VEC, 0,
2399098d0fc1SRichard Henderson                                         h.base, h.index, 0, h.ofs);
2400098d0fc1SRichard Henderson            tcg_out_jxx(s, JCC_JMP, l2, true);
2401098d0fc1SRichard Henderson
2402098d0fc1SRichard Henderson            tcg_out_label(s, l1);
2403098d0fc1SRichard Henderson            tcg_out_vex_modrm_sib_offset(s, OPC_MOVDQU_VxWx + h.seg,
2404098d0fc1SRichard Henderson                                         TCG_TMP_VEC, 0,
2405098d0fc1SRichard Henderson                                         h.base, h.index, 0, h.ofs);
2406098d0fc1SRichard Henderson            tcg_out_label(s, l2);
2407098d0fc1SRichard Henderson        }
2408098d0fc1SRichard Henderson        tcg_out_vec_to_pair(s, TCG_TYPE_I64, datalo, datahi, TCG_TMP_VEC);
2409098d0fc1SRichard Henderson        break;
2410098d0fc1SRichard Henderson
2411139c1837SPaolo Bonzini    default:
2412d2ef1b83SRichard Henderson        g_assert_not_reached();
2413139c1837SPaolo Bonzini    }
2414139c1837SPaolo Bonzini}
2415139c1837SPaolo Bonzini
2416bf12e224SRichard Hendersonstatic void tcg_out_qemu_ld(TCGContext *s, TCGReg datalo, TCGReg datahi,
2417bf12e224SRichard Henderson                            TCGReg addrlo, TCGReg addrhi,
2418bf12e224SRichard Henderson                            MemOpIdx oi, TCGType data_type)
2419139c1837SPaolo Bonzini{
2420530074c6SRichard Henderson    TCGLabelQemuLdst *ldst;
242161713c29SRichard Henderson    HostAddress h;
2422bf12e224SRichard Henderson
2423530074c6SRichard Henderson    ldst = prepare_host_addr(s, &h, addrlo, addrhi, oi, true);
2424530074c6SRichard Henderson    tcg_out_qemu_ld_direct(s, datalo, datahi, h, data_type, get_memop(oi));
2425139c1837SPaolo Bonzini
2426530074c6SRichard Henderson    if (ldst) {
2427530074c6SRichard Henderson        ldst->type = data_type;
2428530074c6SRichard Henderson        ldst->datalo_reg = datalo;
2429530074c6SRichard Henderson        ldst->datahi_reg = datahi;
2430530074c6SRichard Henderson        ldst->raddr = tcg_splitwx_to_rx(s->code_ptr);
2431b1ee3c67SRichard Henderson    }
2432139c1837SPaolo Bonzini}
2433139c1837SPaolo Bonzini
2434139c1837SPaolo Bonzinistatic void tcg_out_qemu_st_direct(TCGContext *s, TCGReg datalo, TCGReg datahi,
243561713c29SRichard Henderson                                   HostAddress h, MemOp memop)
2436139c1837SPaolo Bonzini{
2437d2ef1b83SRichard Henderson    bool use_movbe = false;
2438139c1837SPaolo Bonzini    int movop = OPC_MOVL_EvGv;
2439139c1837SPaolo Bonzini
2440d2ef1b83SRichard Henderson    /*
24417893e42dSPhilippe Mathieu-Daudé     * Do big-endian stores with movbe or system-mode.
2442d2ef1b83SRichard Henderson     * User-only without movbe will have its swapping done generically.
2443d2ef1b83SRichard Henderson     */
2444d2ef1b83SRichard Henderson    if (memop & MO_BSWAP) {
2445d2ef1b83SRichard Henderson        tcg_debug_assert(have_movbe);
2446d2ef1b83SRichard Henderson        use_movbe = true;
2447139c1837SPaolo Bonzini        movop = OPC_MOVBE_MyGy;
2448139c1837SPaolo Bonzini    }
2449139c1837SPaolo Bonzini
2450139c1837SPaolo Bonzini    switch (memop & MO_SIZE) {
2451139c1837SPaolo Bonzini    case MO_8:
245207ce0b05SRichard Henderson        /* This is handled with constraints on INDEX_op_qemu_st8_i32. */
245307ce0b05SRichard Henderson        tcg_debug_assert(TCG_TARGET_REG_BITS == 64 || datalo < 4);
245461713c29SRichard Henderson        tcg_out_modrm_sib_offset(s, OPC_MOVB_EvGv + P_REXB_R + h.seg,
245561713c29SRichard Henderson                                 datalo, h.base, h.index, 0, h.ofs);
2456139c1837SPaolo Bonzini        break;
2457139c1837SPaolo Bonzini    case MO_16:
245861713c29SRichard Henderson        tcg_out_modrm_sib_offset(s, movop + P_DATA16 + h.seg, datalo,
245961713c29SRichard Henderson                                 h.base, h.index, 0, h.ofs);
2460139c1837SPaolo Bonzini        break;
2461139c1837SPaolo Bonzini    case MO_32:
246261713c29SRichard Henderson        tcg_out_modrm_sib_offset(s, movop + h.seg, datalo,
246361713c29SRichard Henderson                                 h.base, h.index, 0, h.ofs);
2464139c1837SPaolo Bonzini        break;
2465139c1837SPaolo Bonzini    case MO_64:
2466139c1837SPaolo Bonzini        if (TCG_TARGET_REG_BITS == 64) {
246761713c29SRichard Henderson            tcg_out_modrm_sib_offset(s, movop + P_REXW + h.seg, datalo,
246861713c29SRichard Henderson                                     h.base, h.index, 0, h.ofs);
2469139c1837SPaolo Bonzini        } else {
2470d2ef1b83SRichard Henderson            if (use_movbe) {
2471d2ef1b83SRichard Henderson                TCGReg t = datalo;
2472139c1837SPaolo Bonzini                datalo = datahi;
2473139c1837SPaolo Bonzini                datahi = t;
2474139c1837SPaolo Bonzini            }
247561713c29SRichard Henderson            tcg_out_modrm_sib_offset(s, movop + h.seg, datalo,
247661713c29SRichard Henderson                                     h.base, h.index, 0, h.ofs);
247761713c29SRichard Henderson            tcg_out_modrm_sib_offset(s, movop + h.seg, datahi,
247861713c29SRichard Henderson                                     h.base, h.index, 0, h.ofs + 4);
2479139c1837SPaolo Bonzini        }
2480139c1837SPaolo Bonzini        break;
2481098d0fc1SRichard Henderson
2482098d0fc1SRichard Henderson    case MO_128:
2483098d0fc1SRichard Henderson        tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
2484098d0fc1SRichard Henderson
2485098d0fc1SRichard Henderson        /*
2486098d0fc1SRichard Henderson         * Without 16-byte atomicity, use integer regs.
2487098d0fc1SRichard Henderson         * That is where we have the data, and it allows bswaps.
2488098d0fc1SRichard Henderson         */
2489098d0fc1SRichard Henderson        if (h.aa.atom < MO_128) {
2490098d0fc1SRichard Henderson            if (use_movbe) {
2491098d0fc1SRichard Henderson                TCGReg t = datalo;
2492098d0fc1SRichard Henderson                datalo = datahi;
2493098d0fc1SRichard Henderson                datahi = t;
2494098d0fc1SRichard Henderson            }
2495098d0fc1SRichard Henderson            tcg_out_modrm_sib_offset(s, movop + P_REXW + h.seg, datalo,
2496098d0fc1SRichard Henderson                                     h.base, h.index, 0, h.ofs);
2497098d0fc1SRichard Henderson            tcg_out_modrm_sib_offset(s, movop + P_REXW + h.seg, datahi,
2498098d0fc1SRichard Henderson                                     h.base, h.index, 0, h.ofs + 8);
2499098d0fc1SRichard Henderson            break;
2500098d0fc1SRichard Henderson        }
2501098d0fc1SRichard Henderson
2502098d0fc1SRichard Henderson        /*
2503098d0fc1SRichard Henderson         * With 16-byte atomicity, a vector store is required.
2504098d0fc1SRichard Henderson         * If we already have 16-byte alignment, then VMOVDQA always works.
2505098d0fc1SRichard Henderson         * Else if VMOVDQU has atomicity with dynamic alignment, use that.
2506098d0fc1SRichard Henderson         * Else use we require a runtime test for alignment for VMOVDQA;
2507098d0fc1SRichard Henderson         * use VMOVDQU on the unaligned nonatomic path for simplicity.
2508098d0fc1SRichard Henderson         */
2509098d0fc1SRichard Henderson        tcg_out_pair_to_vec(s, TCG_TYPE_I64, TCG_TMP_VEC, datalo, datahi);
2510098d0fc1SRichard Henderson        if (h.aa.align >= MO_128) {
2511098d0fc1SRichard Henderson            tcg_out_vex_modrm_sib_offset(s, OPC_MOVDQA_WxVx + h.seg,
2512098d0fc1SRichard Henderson                                         TCG_TMP_VEC, 0,
2513098d0fc1SRichard Henderson                                         h.base, h.index, 0, h.ofs);
2514098d0fc1SRichard Henderson        } else if (cpuinfo & CPUINFO_ATOMIC_VMOVDQU) {
2515098d0fc1SRichard Henderson            tcg_out_vex_modrm_sib_offset(s, OPC_MOVDQU_WxVx + h.seg,
2516098d0fc1SRichard Henderson                                         TCG_TMP_VEC, 0,
2517098d0fc1SRichard Henderson                                         h.base, h.index, 0, h.ofs);
2518098d0fc1SRichard Henderson        } else {
2519098d0fc1SRichard Henderson            TCGLabel *l1 = gen_new_label();
2520098d0fc1SRichard Henderson            TCGLabel *l2 = gen_new_label();
2521303214aaSRichard Henderson            int jcc;
2522098d0fc1SRichard Henderson
2523303214aaSRichard Henderson            jcc = tcg_out_cmp(s, TCG_COND_TSTNE, h.base, 15, true, false);
2524303214aaSRichard Henderson            tcg_out_jxx(s, jcc, l1, true);
2525098d0fc1SRichard Henderson
2526098d0fc1SRichard Henderson            tcg_out_vex_modrm_sib_offset(s, OPC_MOVDQA_WxVx + h.seg,
2527098d0fc1SRichard Henderson                                         TCG_TMP_VEC, 0,
2528098d0fc1SRichard Henderson                                         h.base, h.index, 0, h.ofs);
2529098d0fc1SRichard Henderson            tcg_out_jxx(s, JCC_JMP, l2, true);
2530098d0fc1SRichard Henderson
2531098d0fc1SRichard Henderson            tcg_out_label(s, l1);
2532098d0fc1SRichard Henderson            tcg_out_vex_modrm_sib_offset(s, OPC_MOVDQU_WxVx + h.seg,
2533098d0fc1SRichard Henderson                                         TCG_TMP_VEC, 0,
2534098d0fc1SRichard Henderson                                         h.base, h.index, 0, h.ofs);
2535098d0fc1SRichard Henderson            tcg_out_label(s, l2);
2536098d0fc1SRichard Henderson        }
2537098d0fc1SRichard Henderson        break;
2538098d0fc1SRichard Henderson
2539139c1837SPaolo Bonzini    default:
2540d2ef1b83SRichard Henderson        g_assert_not_reached();
2541139c1837SPaolo Bonzini    }
2542139c1837SPaolo Bonzini}
2543139c1837SPaolo Bonzini
2544bf12e224SRichard Hendersonstatic void tcg_out_qemu_st(TCGContext *s, TCGReg datalo, TCGReg datahi,
2545bf12e224SRichard Henderson                            TCGReg addrlo, TCGReg addrhi,
2546bf12e224SRichard Henderson                            MemOpIdx oi, TCGType data_type)
2547139c1837SPaolo Bonzini{
2548530074c6SRichard Henderson    TCGLabelQemuLdst *ldst;
254961713c29SRichard Henderson    HostAddress h;
2550bf12e224SRichard Henderson
2551530074c6SRichard Henderson    ldst = prepare_host_addr(s, &h, addrlo, addrhi, oi, false);
2552530074c6SRichard Henderson    tcg_out_qemu_st_direct(s, datalo, datahi, h, get_memop(oi));
2553139c1837SPaolo Bonzini
2554530074c6SRichard Henderson    if (ldst) {
2555530074c6SRichard Henderson        ldst->type = data_type;
2556530074c6SRichard Henderson        ldst->datalo_reg = datalo;
2557530074c6SRichard Henderson        ldst->datahi_reg = datahi;
2558530074c6SRichard Henderson        ldst->raddr = tcg_splitwx_to_rx(s->code_ptr);
2559b1ee3c67SRichard Henderson    }
2560139c1837SPaolo Bonzini}
2561139c1837SPaolo Bonzini
2562b55a8d9dSRichard Hendersonstatic void tcg_out_exit_tb(TCGContext *s, uintptr_t a0)
2563b55a8d9dSRichard Henderson{
2564b55a8d9dSRichard Henderson    /* Reuse the zeroing that exists for goto_ptr.  */
2565b55a8d9dSRichard Henderson    if (a0 == 0) {
2566b55a8d9dSRichard Henderson        tcg_out_jmp(s, tcg_code_gen_epilogue);
2567b55a8d9dSRichard Henderson    } else {
2568b55a8d9dSRichard Henderson        tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_EAX, a0);
2569b55a8d9dSRichard Henderson        tcg_out_jmp(s, tb_ret_addr);
2570b55a8d9dSRichard Henderson    }
2571b55a8d9dSRichard Henderson}
2572b55a8d9dSRichard Henderson
2573cf7d6b8eSRichard Hendersonstatic void tcg_out_goto_tb(TCGContext *s, int which)
2574cf7d6b8eSRichard Henderson{
2575cf7d6b8eSRichard Henderson    /*
2576cf7d6b8eSRichard Henderson     * Jump displacement must be aligned for atomic patching;
2577cf7d6b8eSRichard Henderson     * see if we need to add extra nops before jump
2578cf7d6b8eSRichard Henderson     */
2579cf7d6b8eSRichard Henderson    int gap = QEMU_ALIGN_PTR_UP(s->code_ptr + 1, 4) - s->code_ptr;
2580cf7d6b8eSRichard Henderson    if (gap != 1) {
2581cf7d6b8eSRichard Henderson        tcg_out_nopn(s, gap - 1);
2582cf7d6b8eSRichard Henderson    }
2583cf7d6b8eSRichard Henderson    tcg_out8(s, OPC_JMP_long); /* jmp im */
2584cf7d6b8eSRichard Henderson    set_jmp_insn_offset(s, which);
2585cf7d6b8eSRichard Henderson    tcg_out32(s, 0);
2586cf7d6b8eSRichard Henderson    set_jmp_reset_offset(s, which);
2587cf7d6b8eSRichard Henderson}
2588cf7d6b8eSRichard Henderson
25890fe1c98dSRichard Hendersonvoid tb_target_set_jmp_target(const TranslationBlock *tb, int n,
25900fe1c98dSRichard Henderson                              uintptr_t jmp_rx, uintptr_t jmp_rw)
25910fe1c98dSRichard Henderson{
25920fe1c98dSRichard Henderson    /* patch the branch destination */
25930fe1c98dSRichard Henderson    uintptr_t addr = tb->jmp_target_addr[n];
25940fe1c98dSRichard Henderson    qatomic_set((int32_t *)jmp_rw, addr - (jmp_rx + 4));
25950fe1c98dSRichard Henderson    /* no need to flush icache explicitly */
25960fe1c98dSRichard Henderson}
25970fe1c98dSRichard Henderson
2598139c1837SPaolo Bonzinistatic inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
25995e8892dbSMiroslav Rezanina                              const TCGArg args[TCG_MAX_OP_ARGS],
26005e8892dbSMiroslav Rezanina                              const int const_args[TCG_MAX_OP_ARGS])
2601139c1837SPaolo Bonzini{
2602139c1837SPaolo Bonzini    TCGArg a0, a1, a2;
2603139c1837SPaolo Bonzini    int c, const_a2, vexop, rexw = 0;
2604139c1837SPaolo Bonzini
2605139c1837SPaolo Bonzini#if TCG_TARGET_REG_BITS == 64
2606139c1837SPaolo Bonzini# define OP_32_64(x) \
2607139c1837SPaolo Bonzini        case glue(glue(INDEX_op_, x), _i64): \
2608139c1837SPaolo Bonzini            rexw = P_REXW; /* FALLTHRU */    \
2609139c1837SPaolo Bonzini        case glue(glue(INDEX_op_, x), _i32)
2610139c1837SPaolo Bonzini#else
2611139c1837SPaolo Bonzini# define OP_32_64(x) \
2612139c1837SPaolo Bonzini        case glue(glue(INDEX_op_, x), _i32)
2613139c1837SPaolo Bonzini#endif
2614139c1837SPaolo Bonzini
2615139c1837SPaolo Bonzini    /* Hoist the loads of the most common arguments.  */
2616139c1837SPaolo Bonzini    a0 = args[0];
2617139c1837SPaolo Bonzini    a1 = args[1];
2618139c1837SPaolo Bonzini    a2 = args[2];
2619139c1837SPaolo Bonzini    const_a2 = const_args[2];
2620139c1837SPaolo Bonzini
2621139c1837SPaolo Bonzini    switch (opc) {
2622139c1837SPaolo Bonzini    case INDEX_op_goto_ptr:
2623139c1837SPaolo Bonzini        /* jmp to the given host address (could be epilogue) */
2624139c1837SPaolo Bonzini        tcg_out_modrm(s, OPC_GRP5, EXT5_JMPN_Ev, a0);
2625139c1837SPaolo Bonzini        break;
2626139c1837SPaolo Bonzini    case INDEX_op_br:
2627139c1837SPaolo Bonzini        tcg_out_jxx(s, JCC_JMP, arg_label(a0), 0);
2628139c1837SPaolo Bonzini        break;
2629139c1837SPaolo Bonzini    OP_32_64(ld8u):
2630139c1837SPaolo Bonzini        /* Note that we can ignore REXW for the zero-extend to 64-bit.  */
2631139c1837SPaolo Bonzini        tcg_out_modrm_offset(s, OPC_MOVZBL, a0, a1, a2);
2632139c1837SPaolo Bonzini        break;
2633139c1837SPaolo Bonzini    OP_32_64(ld8s):
2634139c1837SPaolo Bonzini        tcg_out_modrm_offset(s, OPC_MOVSBL + rexw, a0, a1, a2);
2635139c1837SPaolo Bonzini        break;
2636139c1837SPaolo Bonzini    OP_32_64(ld16u):
2637139c1837SPaolo Bonzini        /* Note that we can ignore REXW for the zero-extend to 64-bit.  */
2638139c1837SPaolo Bonzini        tcg_out_modrm_offset(s, OPC_MOVZWL, a0, a1, a2);
2639139c1837SPaolo Bonzini        break;
2640139c1837SPaolo Bonzini    OP_32_64(ld16s):
2641139c1837SPaolo Bonzini        tcg_out_modrm_offset(s, OPC_MOVSWL + rexw, a0, a1, a2);
2642139c1837SPaolo Bonzini        break;
2643139c1837SPaolo Bonzini#if TCG_TARGET_REG_BITS == 64
2644139c1837SPaolo Bonzini    case INDEX_op_ld32u_i64:
2645139c1837SPaolo Bonzini#endif
2646139c1837SPaolo Bonzini    case INDEX_op_ld_i32:
2647139c1837SPaolo Bonzini        tcg_out_ld(s, TCG_TYPE_I32, a0, a1, a2);
2648139c1837SPaolo Bonzini        break;
2649139c1837SPaolo Bonzini
2650139c1837SPaolo Bonzini    OP_32_64(st8):
2651139c1837SPaolo Bonzini        if (const_args[0]) {
2652139c1837SPaolo Bonzini            tcg_out_modrm_offset(s, OPC_MOVB_EvIz, 0, a1, a2);
2653139c1837SPaolo Bonzini            tcg_out8(s, a0);
2654139c1837SPaolo Bonzini        } else {
2655139c1837SPaolo Bonzini            tcg_out_modrm_offset(s, OPC_MOVB_EvGv | P_REXB_R, a0, a1, a2);
2656139c1837SPaolo Bonzini        }
2657139c1837SPaolo Bonzini        break;
2658139c1837SPaolo Bonzini    OP_32_64(st16):
2659139c1837SPaolo Bonzini        if (const_args[0]) {
2660139c1837SPaolo Bonzini            tcg_out_modrm_offset(s, OPC_MOVL_EvIz | P_DATA16, 0, a1, a2);
2661139c1837SPaolo Bonzini            tcg_out16(s, a0);
2662139c1837SPaolo Bonzini        } else {
2663139c1837SPaolo Bonzini            tcg_out_modrm_offset(s, OPC_MOVL_EvGv | P_DATA16, a0, a1, a2);
2664139c1837SPaolo Bonzini        }
2665139c1837SPaolo Bonzini        break;
2666139c1837SPaolo Bonzini#if TCG_TARGET_REG_BITS == 64
2667139c1837SPaolo Bonzini    case INDEX_op_st32_i64:
2668139c1837SPaolo Bonzini#endif
2669139c1837SPaolo Bonzini    case INDEX_op_st_i32:
2670139c1837SPaolo Bonzini        if (const_args[0]) {
2671139c1837SPaolo Bonzini            tcg_out_modrm_offset(s, OPC_MOVL_EvIz, 0, a1, a2);
2672139c1837SPaolo Bonzini            tcg_out32(s, a0);
2673139c1837SPaolo Bonzini        } else {
2674139c1837SPaolo Bonzini            tcg_out_st(s, TCG_TYPE_I32, a0, a1, a2);
2675139c1837SPaolo Bonzini        }
2676139c1837SPaolo Bonzini        break;
2677139c1837SPaolo Bonzini
2678139c1837SPaolo Bonzini    OP_32_64(add):
2679139c1837SPaolo Bonzini        /* For 3-operand addition, use LEA.  */
2680139c1837SPaolo Bonzini        if (a0 != a1) {
2681139c1837SPaolo Bonzini            TCGArg c3 = 0;
2682139c1837SPaolo Bonzini            if (const_a2) {
2683139c1837SPaolo Bonzini                c3 = a2, a2 = -1;
2684139c1837SPaolo Bonzini            } else if (a0 == a2) {
2685139c1837SPaolo Bonzini                /* Watch out for dest = src + dest, since we've removed
2686139c1837SPaolo Bonzini                   the matching constraint on the add.  */
2687139c1837SPaolo Bonzini                tgen_arithr(s, ARITH_ADD + rexw, a0, a1);
2688139c1837SPaolo Bonzini                break;
2689139c1837SPaolo Bonzini            }
2690139c1837SPaolo Bonzini
2691139c1837SPaolo Bonzini            tcg_out_modrm_sib_offset(s, OPC_LEA + rexw, a0, a1, a2, 0, c3);
2692139c1837SPaolo Bonzini            break;
2693139c1837SPaolo Bonzini        }
2694139c1837SPaolo Bonzini        c = ARITH_ADD;
2695139c1837SPaolo Bonzini        goto gen_arith;
2696139c1837SPaolo Bonzini    OP_32_64(sub):
2697139c1837SPaolo Bonzini        c = ARITH_SUB;
2698139c1837SPaolo Bonzini        goto gen_arith;
2699139c1837SPaolo Bonzini    OP_32_64(and):
2700139c1837SPaolo Bonzini        c = ARITH_AND;
2701139c1837SPaolo Bonzini        goto gen_arith;
2702139c1837SPaolo Bonzini    OP_32_64(or):
2703139c1837SPaolo Bonzini        c = ARITH_OR;
2704139c1837SPaolo Bonzini        goto gen_arith;
2705139c1837SPaolo Bonzini    OP_32_64(xor):
2706139c1837SPaolo Bonzini        c = ARITH_XOR;
2707139c1837SPaolo Bonzini        goto gen_arith;
2708139c1837SPaolo Bonzini    gen_arith:
2709139c1837SPaolo Bonzini        if (const_a2) {
2710139c1837SPaolo Bonzini            tgen_arithi(s, c + rexw, a0, a2, 0);
2711139c1837SPaolo Bonzini        } else {
2712139c1837SPaolo Bonzini            tgen_arithr(s, c + rexw, a0, a2);
2713139c1837SPaolo Bonzini        }
2714139c1837SPaolo Bonzini        break;
2715139c1837SPaolo Bonzini
2716139c1837SPaolo Bonzini    OP_32_64(andc):
2717139c1837SPaolo Bonzini        if (const_a2) {
2718139c1837SPaolo Bonzini            tcg_out_mov(s, rexw ? TCG_TYPE_I64 : TCG_TYPE_I32, a0, a1);
2719139c1837SPaolo Bonzini            tgen_arithi(s, ARITH_AND + rexw, a0, ~a2, 0);
2720139c1837SPaolo Bonzini        } else {
2721139c1837SPaolo Bonzini            tcg_out_vex_modrm(s, OPC_ANDN + rexw, a0, a2, a1);
2722139c1837SPaolo Bonzini        }
2723139c1837SPaolo Bonzini        break;
2724139c1837SPaolo Bonzini
2725139c1837SPaolo Bonzini    OP_32_64(mul):
2726139c1837SPaolo Bonzini        if (const_a2) {
2727139c1837SPaolo Bonzini            int32_t val;
2728139c1837SPaolo Bonzini            val = a2;
2729139c1837SPaolo Bonzini            if (val == (int8_t)val) {
2730139c1837SPaolo Bonzini                tcg_out_modrm(s, OPC_IMUL_GvEvIb + rexw, a0, a0);
2731139c1837SPaolo Bonzini                tcg_out8(s, val);
2732139c1837SPaolo Bonzini            } else {
2733139c1837SPaolo Bonzini                tcg_out_modrm(s, OPC_IMUL_GvEvIz + rexw, a0, a0);
2734139c1837SPaolo Bonzini                tcg_out32(s, val);
2735139c1837SPaolo Bonzini            }
2736139c1837SPaolo Bonzini        } else {
2737139c1837SPaolo Bonzini            tcg_out_modrm(s, OPC_IMUL_GvEv + rexw, a0, a2);
2738139c1837SPaolo Bonzini        }
2739139c1837SPaolo Bonzini        break;
2740139c1837SPaolo Bonzini
2741139c1837SPaolo Bonzini    OP_32_64(div2):
2742139c1837SPaolo Bonzini        tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_IDIV, args[4]);
2743139c1837SPaolo Bonzini        break;
2744139c1837SPaolo Bonzini    OP_32_64(divu2):
2745139c1837SPaolo Bonzini        tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_DIV, args[4]);
2746139c1837SPaolo Bonzini        break;
2747139c1837SPaolo Bonzini
2748139c1837SPaolo Bonzini    OP_32_64(shl):
2749139c1837SPaolo Bonzini        /* For small constant 3-operand shift, use LEA.  */
2750139c1837SPaolo Bonzini        if (const_a2 && a0 != a1 && (a2 - 1) < 3) {
2751139c1837SPaolo Bonzini            if (a2 - 1 == 0) {
2752139c1837SPaolo Bonzini                /* shl $1,a1,a0 -> lea (a1,a1),a0 */
2753139c1837SPaolo Bonzini                tcg_out_modrm_sib_offset(s, OPC_LEA + rexw, a0, a1, a1, 0, 0);
2754139c1837SPaolo Bonzini            } else {
2755139c1837SPaolo Bonzini                /* shl $n,a1,a0 -> lea 0(,a1,n),a0 */
2756139c1837SPaolo Bonzini                tcg_out_modrm_sib_offset(s, OPC_LEA + rexw, a0, -1, a1, a2, 0);
2757139c1837SPaolo Bonzini            }
2758139c1837SPaolo Bonzini            break;
2759139c1837SPaolo Bonzini        }
2760139c1837SPaolo Bonzini        c = SHIFT_SHL;
2761139c1837SPaolo Bonzini        vexop = OPC_SHLX;
2762139c1837SPaolo Bonzini        goto gen_shift_maybe_vex;
2763139c1837SPaolo Bonzini    OP_32_64(shr):
2764139c1837SPaolo Bonzini        c = SHIFT_SHR;
2765139c1837SPaolo Bonzini        vexop = OPC_SHRX;
2766139c1837SPaolo Bonzini        goto gen_shift_maybe_vex;
2767139c1837SPaolo Bonzini    OP_32_64(sar):
2768139c1837SPaolo Bonzini        c = SHIFT_SAR;
2769139c1837SPaolo Bonzini        vexop = OPC_SARX;
2770139c1837SPaolo Bonzini        goto gen_shift_maybe_vex;
2771139c1837SPaolo Bonzini    OP_32_64(rotl):
2772139c1837SPaolo Bonzini        c = SHIFT_ROL;
2773139c1837SPaolo Bonzini        goto gen_shift;
2774139c1837SPaolo Bonzini    OP_32_64(rotr):
2775139c1837SPaolo Bonzini        c = SHIFT_ROR;
2776139c1837SPaolo Bonzini        goto gen_shift;
2777139c1837SPaolo Bonzini    gen_shift_maybe_vex:
2778139c1837SPaolo Bonzini        if (have_bmi2) {
2779139c1837SPaolo Bonzini            if (!const_a2) {
2780139c1837SPaolo Bonzini                tcg_out_vex_modrm(s, vexop + rexw, a0, a2, a1);
2781139c1837SPaolo Bonzini                break;
2782139c1837SPaolo Bonzini            }
2783139c1837SPaolo Bonzini            tcg_out_mov(s, rexw ? TCG_TYPE_I64 : TCG_TYPE_I32, a0, a1);
2784139c1837SPaolo Bonzini        }
2785139c1837SPaolo Bonzini        /* FALLTHRU */
2786139c1837SPaolo Bonzini    gen_shift:
2787139c1837SPaolo Bonzini        if (const_a2) {
2788139c1837SPaolo Bonzini            tcg_out_shifti(s, c + rexw, a0, a2);
2789139c1837SPaolo Bonzini        } else {
2790139c1837SPaolo Bonzini            tcg_out_modrm(s, OPC_SHIFT_cl + rexw, c, a0);
2791139c1837SPaolo Bonzini        }
2792139c1837SPaolo Bonzini        break;
2793139c1837SPaolo Bonzini
2794139c1837SPaolo Bonzini    OP_32_64(ctz):
2795139c1837SPaolo Bonzini        tcg_out_ctz(s, rexw, args[0], args[1], args[2], const_args[2]);
2796139c1837SPaolo Bonzini        break;
2797139c1837SPaolo Bonzini    OP_32_64(clz):
2798139c1837SPaolo Bonzini        tcg_out_clz(s, rexw, args[0], args[1], args[2], const_args[2]);
2799139c1837SPaolo Bonzini        break;
2800139c1837SPaolo Bonzini    OP_32_64(ctpop):
2801139c1837SPaolo Bonzini        tcg_out_modrm(s, OPC_POPCNT + rexw, a0, a1);
2802139c1837SPaolo Bonzini        break;
2803139c1837SPaolo Bonzini
2804c359ce75SRichard Henderson    OP_32_64(brcond):
2805c359ce75SRichard Henderson        tcg_out_brcond(s, rexw, a2, a0, a1, const_args[1],
2806c359ce75SRichard Henderson                       arg_label(args[3]), 0);
2807139c1837SPaolo Bonzini        break;
28087ba99a1cSRichard Henderson    OP_32_64(setcond):
280995bf306eSRichard Henderson        tcg_out_setcond(s, rexw, args[3], a0, a1, a2, const_a2, false);
281095bf306eSRichard Henderson        break;
281195bf306eSRichard Henderson    OP_32_64(negsetcond):
281295bf306eSRichard Henderson        tcg_out_setcond(s, rexw, args[3], a0, a1, a2, const_a2, true);
2813139c1837SPaolo Bonzini        break;
281478ddf0dcSRichard Henderson    OP_32_64(movcond):
281578ddf0dcSRichard Henderson        tcg_out_movcond(s, rexw, args[5], a0, a1, a2, const_a2, args[3]);
2816139c1837SPaolo Bonzini        break;
2817139c1837SPaolo Bonzini
2818139c1837SPaolo Bonzini    OP_32_64(bswap16):
28197335a3d6SRichard Henderson        if (a2 & TCG_BSWAP_OS) {
28207335a3d6SRichard Henderson            /* Output must be sign-extended. */
28217335a3d6SRichard Henderson            if (rexw) {
28227335a3d6SRichard Henderson                tcg_out_bswap64(s, a0);
28237335a3d6SRichard Henderson                tcg_out_shifti(s, SHIFT_SAR + rexw, a0, 48);
28247335a3d6SRichard Henderson            } else {
28257335a3d6SRichard Henderson                tcg_out_bswap32(s, a0);
28267335a3d6SRichard Henderson                tcg_out_shifti(s, SHIFT_SAR, a0, 16);
28277335a3d6SRichard Henderson            }
28287335a3d6SRichard Henderson        } else if ((a2 & (TCG_BSWAP_IZ | TCG_BSWAP_OZ)) == TCG_BSWAP_OZ) {
28297335a3d6SRichard Henderson            /* Output must be zero-extended, but input isn't. */
28307335a3d6SRichard Henderson            tcg_out_bswap32(s, a0);
28317335a3d6SRichard Henderson            tcg_out_shifti(s, SHIFT_SHR, a0, 16);
28327335a3d6SRichard Henderson        } else {
2833139c1837SPaolo Bonzini            tcg_out_rolw_8(s, a0);
28347335a3d6SRichard Henderson        }
2835139c1837SPaolo Bonzini        break;
2836139c1837SPaolo Bonzini    OP_32_64(bswap32):
2837139c1837SPaolo Bonzini        tcg_out_bswap32(s, a0);
28387335a3d6SRichard Henderson        if (rexw && (a2 & TCG_BSWAP_OS)) {
28397335a3d6SRichard Henderson            tcg_out_ext32s(s, a0, a0);
28407335a3d6SRichard Henderson        }
2841139c1837SPaolo Bonzini        break;
2842139c1837SPaolo Bonzini
2843139c1837SPaolo Bonzini    OP_32_64(neg):
2844139c1837SPaolo Bonzini        tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_NEG, a0);
2845139c1837SPaolo Bonzini        break;
2846139c1837SPaolo Bonzini    OP_32_64(not):
2847139c1837SPaolo Bonzini        tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_NOT, a0);
2848139c1837SPaolo Bonzini        break;
2849139c1837SPaolo Bonzini
2850fecccfccSRichard Henderson    case INDEX_op_qemu_ld_a64_i32:
2851fecccfccSRichard Henderson        if (TCG_TARGET_REG_BITS == 32) {
2852bf12e224SRichard Henderson            tcg_out_qemu_ld(s, a0, -1, a1, a2, args[3], TCG_TYPE_I32);
2853139c1837SPaolo Bonzini            break;
2854fecccfccSRichard Henderson        }
2855fecccfccSRichard Henderson        /* fall through */
2856fecccfccSRichard Henderson    case INDEX_op_qemu_ld_a32_i32:
2857fecccfccSRichard Henderson        tcg_out_qemu_ld(s, a0, -1, a1, -1, a2, TCG_TYPE_I32);
2858fecccfccSRichard Henderson        break;
2859fecccfccSRichard Henderson    case INDEX_op_qemu_ld_a32_i64:
2860bf12e224SRichard Henderson        if (TCG_TARGET_REG_BITS == 64) {
2861bf12e224SRichard Henderson            tcg_out_qemu_ld(s, a0, -1, a1, -1, a2, TCG_TYPE_I64);
2862fecccfccSRichard Henderson        } else {
2863bf12e224SRichard Henderson            tcg_out_qemu_ld(s, a0, a1, a2, -1, args[3], TCG_TYPE_I64);
2864fecccfccSRichard Henderson        }
2865fecccfccSRichard Henderson        break;
2866fecccfccSRichard Henderson    case INDEX_op_qemu_ld_a64_i64:
2867fecccfccSRichard Henderson        if (TCG_TARGET_REG_BITS == 64) {
2868fecccfccSRichard Henderson            tcg_out_qemu_ld(s, a0, -1, a1, -1, a2, TCG_TYPE_I64);
2869bf12e224SRichard Henderson        } else {
2870bf12e224SRichard Henderson            tcg_out_qemu_ld(s, a0, a1, a2, args[3], args[4], TCG_TYPE_I64);
2871bf12e224SRichard Henderson        }
2872139c1837SPaolo Bonzini        break;
2873098d0fc1SRichard Henderson    case INDEX_op_qemu_ld_a32_i128:
2874098d0fc1SRichard Henderson    case INDEX_op_qemu_ld_a64_i128:
2875098d0fc1SRichard Henderson        tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
2876098d0fc1SRichard Henderson        tcg_out_qemu_ld(s, a0, a1, a2, -1, args[3], TCG_TYPE_I128);
2877098d0fc1SRichard Henderson        break;
2878fecccfccSRichard Henderson
2879fecccfccSRichard Henderson    case INDEX_op_qemu_st_a64_i32:
2880fecccfccSRichard Henderson    case INDEX_op_qemu_st8_a64_i32:
2881fecccfccSRichard Henderson        if (TCG_TARGET_REG_BITS == 32) {
2882bf12e224SRichard Henderson            tcg_out_qemu_st(s, a0, -1, a1, a2, args[3], TCG_TYPE_I32);
2883139c1837SPaolo Bonzini            break;
2884fecccfccSRichard Henderson        }
2885fecccfccSRichard Henderson        /* fall through */
2886fecccfccSRichard Henderson    case INDEX_op_qemu_st_a32_i32:
2887fecccfccSRichard Henderson    case INDEX_op_qemu_st8_a32_i32:
2888fecccfccSRichard Henderson        tcg_out_qemu_st(s, a0, -1, a1, -1, a2, TCG_TYPE_I32);
2889fecccfccSRichard Henderson        break;
2890fecccfccSRichard Henderson    case INDEX_op_qemu_st_a32_i64:
2891bf12e224SRichard Henderson        if (TCG_TARGET_REG_BITS == 64) {
2892bf12e224SRichard Henderson            tcg_out_qemu_st(s, a0, -1, a1, -1, a2, TCG_TYPE_I64);
2893fecccfccSRichard Henderson        } else {
2894bf12e224SRichard Henderson            tcg_out_qemu_st(s, a0, a1, a2, -1, args[3], TCG_TYPE_I64);
2895fecccfccSRichard Henderson        }
2896fecccfccSRichard Henderson        break;
2897fecccfccSRichard Henderson    case INDEX_op_qemu_st_a64_i64:
2898fecccfccSRichard Henderson        if (TCG_TARGET_REG_BITS == 64) {
2899fecccfccSRichard Henderson            tcg_out_qemu_st(s, a0, -1, a1, -1, a2, TCG_TYPE_I64);
2900bf12e224SRichard Henderson        } else {
2901bf12e224SRichard Henderson            tcg_out_qemu_st(s, a0, a1, a2, args[3], args[4], TCG_TYPE_I64);
2902bf12e224SRichard Henderson        }
2903139c1837SPaolo Bonzini        break;
2904098d0fc1SRichard Henderson    case INDEX_op_qemu_st_a32_i128:
2905098d0fc1SRichard Henderson    case INDEX_op_qemu_st_a64_i128:
2906098d0fc1SRichard Henderson        tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
2907098d0fc1SRichard Henderson        tcg_out_qemu_st(s, a0, a1, a2, -1, args[3], TCG_TYPE_I128);
2908098d0fc1SRichard Henderson        break;
2909139c1837SPaolo Bonzini
2910139c1837SPaolo Bonzini    OP_32_64(mulu2):
2911139c1837SPaolo Bonzini        tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_MUL, args[3]);
2912139c1837SPaolo Bonzini        break;
2913139c1837SPaolo Bonzini    OP_32_64(muls2):
2914139c1837SPaolo Bonzini        tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_IMUL, args[3]);
2915139c1837SPaolo Bonzini        break;
2916139c1837SPaolo Bonzini    OP_32_64(add2):
2917139c1837SPaolo Bonzini        if (const_args[4]) {
2918139c1837SPaolo Bonzini            tgen_arithi(s, ARITH_ADD + rexw, a0, args[4], 1);
2919139c1837SPaolo Bonzini        } else {
2920139c1837SPaolo Bonzini            tgen_arithr(s, ARITH_ADD + rexw, a0, args[4]);
2921139c1837SPaolo Bonzini        }
2922139c1837SPaolo Bonzini        if (const_args[5]) {
2923139c1837SPaolo Bonzini            tgen_arithi(s, ARITH_ADC + rexw, a1, args[5], 1);
2924139c1837SPaolo Bonzini        } else {
2925139c1837SPaolo Bonzini            tgen_arithr(s, ARITH_ADC + rexw, a1, args[5]);
2926139c1837SPaolo Bonzini        }
2927139c1837SPaolo Bonzini        break;
2928139c1837SPaolo Bonzini    OP_32_64(sub2):
2929139c1837SPaolo Bonzini        if (const_args[4]) {
2930139c1837SPaolo Bonzini            tgen_arithi(s, ARITH_SUB + rexw, a0, args[4], 1);
2931139c1837SPaolo Bonzini        } else {
2932139c1837SPaolo Bonzini            tgen_arithr(s, ARITH_SUB + rexw, a0, args[4]);
2933139c1837SPaolo Bonzini        }
2934139c1837SPaolo Bonzini        if (const_args[5]) {
2935139c1837SPaolo Bonzini            tgen_arithi(s, ARITH_SBB + rexw, a1, args[5], 1);
2936139c1837SPaolo Bonzini        } else {
2937139c1837SPaolo Bonzini            tgen_arithr(s, ARITH_SBB + rexw, a1, args[5]);
2938139c1837SPaolo Bonzini        }
2939139c1837SPaolo Bonzini        break;
2940139c1837SPaolo Bonzini
2941139c1837SPaolo Bonzini#if TCG_TARGET_REG_BITS == 32
2942139c1837SPaolo Bonzini    case INDEX_op_brcond2_i32:
2943139c1837SPaolo Bonzini        tcg_out_brcond2(s, args, const_args, 0);
2944139c1837SPaolo Bonzini        break;
2945139c1837SPaolo Bonzini    case INDEX_op_setcond2_i32:
2946139c1837SPaolo Bonzini        tcg_out_setcond2(s, args, const_args);
2947139c1837SPaolo Bonzini        break;
2948139c1837SPaolo Bonzini#else /* TCG_TARGET_REG_BITS == 64 */
2949139c1837SPaolo Bonzini    case INDEX_op_ld32s_i64:
2950139c1837SPaolo Bonzini        tcg_out_modrm_offset(s, OPC_MOVSLQ, a0, a1, a2);
2951139c1837SPaolo Bonzini        break;
2952139c1837SPaolo Bonzini    case INDEX_op_ld_i64:
2953139c1837SPaolo Bonzini        tcg_out_ld(s, TCG_TYPE_I64, a0, a1, a2);
2954139c1837SPaolo Bonzini        break;
2955139c1837SPaolo Bonzini    case INDEX_op_st_i64:
2956139c1837SPaolo Bonzini        if (const_args[0]) {
2957139c1837SPaolo Bonzini            tcg_out_modrm_offset(s, OPC_MOVL_EvIz | P_REXW, 0, a1, a2);
2958139c1837SPaolo Bonzini            tcg_out32(s, a0);
2959139c1837SPaolo Bonzini        } else {
2960139c1837SPaolo Bonzini            tcg_out_st(s, TCG_TYPE_I64, a0, a1, a2);
2961139c1837SPaolo Bonzini        }
2962139c1837SPaolo Bonzini        break;
2963139c1837SPaolo Bonzini
2964139c1837SPaolo Bonzini    case INDEX_op_bswap64_i64:
2965139c1837SPaolo Bonzini        tcg_out_bswap64(s, a0);
2966139c1837SPaolo Bonzini        break;
2967139c1837SPaolo Bonzini    case INDEX_op_extrh_i64_i32:
2968139c1837SPaolo Bonzini        tcg_out_shifti(s, SHIFT_SHR + P_REXW, a0, 32);
2969139c1837SPaolo Bonzini        break;
2970139c1837SPaolo Bonzini#endif
2971139c1837SPaolo Bonzini
2972139c1837SPaolo Bonzini    OP_32_64(deposit):
2973139c1837SPaolo Bonzini        if (args[3] == 0 && args[4] == 8) {
2974139c1837SPaolo Bonzini            /* load bits 0..7 */
297573f97f0aSRichard Henderson            if (const_a2) {
297673f97f0aSRichard Henderson                tcg_out_opc(s, OPC_MOVB_Ib | P_REXB_RM | LOWREGMASK(a0),
297773f97f0aSRichard Henderson                            0, a0, 0);
297873f97f0aSRichard Henderson                tcg_out8(s, a2);
297973f97f0aSRichard Henderson            } else {
2980139c1837SPaolo Bonzini                tcg_out_modrm(s, OPC_MOVB_EvGv | P_REXB_R | P_REXB_RM, a2, a0);
298173f97f0aSRichard Henderson            }
298236df88c0SRichard Henderson        } else if (TCG_TARGET_REG_BITS == 32 && args[3] == 8 && args[4] == 8) {
2983139c1837SPaolo Bonzini            /* load bits 8..15 */
298473f97f0aSRichard Henderson            if (const_a2) {
298573f97f0aSRichard Henderson                tcg_out8(s, OPC_MOVB_Ib + a0 + 4);
298673f97f0aSRichard Henderson                tcg_out8(s, a2);
298773f97f0aSRichard Henderson            } else {
2988139c1837SPaolo Bonzini                tcg_out_modrm(s, OPC_MOVB_EvGv, a2, a0 + 4);
298973f97f0aSRichard Henderson            }
2990139c1837SPaolo Bonzini        } else if (args[3] == 0 && args[4] == 16) {
2991139c1837SPaolo Bonzini            /* load bits 0..15 */
299273f97f0aSRichard Henderson            if (const_a2) {
299373f97f0aSRichard Henderson                tcg_out_opc(s, OPC_MOVL_Iv | P_DATA16 | LOWREGMASK(a0),
299473f97f0aSRichard Henderson                            0, a0, 0);
299573f97f0aSRichard Henderson                tcg_out16(s, a2);
299673f97f0aSRichard Henderson            } else {
2997139c1837SPaolo Bonzini                tcg_out_modrm(s, OPC_MOVL_EvGv | P_DATA16, a2, a0);
299873f97f0aSRichard Henderson            }
2999139c1837SPaolo Bonzini        } else {
3000732e89f4SRichard Henderson            g_assert_not_reached();
3001139c1837SPaolo Bonzini        }
3002139c1837SPaolo Bonzini        break;
3003139c1837SPaolo Bonzini
3004139c1837SPaolo Bonzini    case INDEX_op_extract_i64:
3005139c1837SPaolo Bonzini        if (a2 + args[3] == 32) {
3006139c1837SPaolo Bonzini            /* This is a 32-bit zero-extending right shift.  */
3007139c1837SPaolo Bonzini            tcg_out_mov(s, TCG_TYPE_I32, a0, a1);
3008139c1837SPaolo Bonzini            tcg_out_shifti(s, SHIFT_SHR, a0, a2);
3009139c1837SPaolo Bonzini            break;
3010139c1837SPaolo Bonzini        }
3011139c1837SPaolo Bonzini        /* FALLTHRU */
3012139c1837SPaolo Bonzini    case INDEX_op_extract_i32:
3013139c1837SPaolo Bonzini        /* On the off-chance that we can use the high-byte registers.
3014139c1837SPaolo Bonzini           Otherwise we emit the same ext16 + shift pattern that we
3015139c1837SPaolo Bonzini           would have gotten from the normal tcg-op.c expansion.  */
3016139c1837SPaolo Bonzini        tcg_debug_assert(a2 == 8 && args[3] == 8);
3017139c1837SPaolo Bonzini        if (a1 < 4 && a0 < 8) {
3018139c1837SPaolo Bonzini            tcg_out_modrm(s, OPC_MOVZBL, a0, a1 + 4);
3019139c1837SPaolo Bonzini        } else {
3020139c1837SPaolo Bonzini            tcg_out_ext16u(s, a0, a1);
3021139c1837SPaolo Bonzini            tcg_out_shifti(s, SHIFT_SHR, a0, 8);
3022139c1837SPaolo Bonzini        }
3023139c1837SPaolo Bonzini        break;
3024139c1837SPaolo Bonzini
3025139c1837SPaolo Bonzini    case INDEX_op_sextract_i32:
3026139c1837SPaolo Bonzini        /* We don't implement sextract_i64, as we cannot sign-extend to
3027139c1837SPaolo Bonzini           64-bits without using the REX prefix that explicitly excludes
3028139c1837SPaolo Bonzini           access to the high-byte registers.  */
3029139c1837SPaolo Bonzini        tcg_debug_assert(a2 == 8 && args[3] == 8);
3030139c1837SPaolo Bonzini        if (a1 < 4 && a0 < 8) {
3031139c1837SPaolo Bonzini            tcg_out_modrm(s, OPC_MOVSBL, a0, a1 + 4);
3032139c1837SPaolo Bonzini        } else {
3033753e42eaSRichard Henderson            tcg_out_ext16s(s, TCG_TYPE_I32, a0, a1);
3034139c1837SPaolo Bonzini            tcg_out_shifti(s, SHIFT_SAR, a0, 8);
3035139c1837SPaolo Bonzini        }
3036139c1837SPaolo Bonzini        break;
3037139c1837SPaolo Bonzini
3038139c1837SPaolo Bonzini    OP_32_64(extract2):
3039139c1837SPaolo Bonzini        /* Note that SHRD outputs to the r/m operand.  */
3040139c1837SPaolo Bonzini        tcg_out_modrm(s, OPC_SHRD_Ib + rexw, a2, a0);
3041139c1837SPaolo Bonzini        tcg_out8(s, args[3]);
3042139c1837SPaolo Bonzini        break;
3043139c1837SPaolo Bonzini
3044139c1837SPaolo Bonzini    case INDEX_op_mb:
3045139c1837SPaolo Bonzini        tcg_out_mb(s, a0);
3046139c1837SPaolo Bonzini        break;
3047139c1837SPaolo Bonzini    case INDEX_op_mov_i32:  /* Always emitted via tcg_out_mov.  */
3048139c1837SPaolo Bonzini    case INDEX_op_mov_i64:
3049139c1837SPaolo Bonzini    case INDEX_op_call:     /* Always emitted via tcg_out_call.  */
3050b55a8d9dSRichard Henderson    case INDEX_op_exit_tb:  /* Always emitted via tcg_out_exit_tb.  */
3051cf7d6b8eSRichard Henderson    case INDEX_op_goto_tb:  /* Always emitted via tcg_out_goto_tb.  */
3052678155b2SRichard Henderson    case INDEX_op_ext8s_i32:  /* Always emitted via tcg_reg_alloc_op.  */
3053678155b2SRichard Henderson    case INDEX_op_ext8s_i64:
3054d0e66c89SRichard Henderson    case INDEX_op_ext8u_i32:
3055d0e66c89SRichard Henderson    case INDEX_op_ext8u_i64:
3056753e42eaSRichard Henderson    case INDEX_op_ext16s_i32:
3057753e42eaSRichard Henderson    case INDEX_op_ext16s_i64:
3058379afdffSRichard Henderson    case INDEX_op_ext16u_i32:
3059379afdffSRichard Henderson    case INDEX_op_ext16u_i64:
306052bf3398SRichard Henderson    case INDEX_op_ext32s_i64:
30619ecf5f61SRichard Henderson    case INDEX_op_ext32u_i64:
30629c6aa274SRichard Henderson    case INDEX_op_ext_i32_i64:
3063b9bfe000SRichard Henderson    case INDEX_op_extu_i32_i64:
3064b8b94ac6SRichard Henderson    case INDEX_op_extrl_i64_i32:
3065139c1837SPaolo Bonzini    default:
3066732e89f4SRichard Henderson        g_assert_not_reached();
3067139c1837SPaolo Bonzini    }
3068139c1837SPaolo Bonzini
3069139c1837SPaolo Bonzini#undef OP_32_64
3070139c1837SPaolo Bonzini}
3071139c1837SPaolo Bonzini
3072b8a56703SRichard Hendersonstatic int const umin_insn[4] = {
3073b8a56703SRichard Henderson    OPC_PMINUB, OPC_PMINUW, OPC_PMINUD, OPC_VPMINUQ
3074b8a56703SRichard Henderson};
3075b8a56703SRichard Henderson
3076b8a56703SRichard Hendersonstatic int const umax_insn[4] = {
3077b8a56703SRichard Henderson    OPC_PMAXUB, OPC_PMAXUW, OPC_PMAXUD, OPC_VPMAXUQ
3078b8a56703SRichard Henderson};
3079b8a56703SRichard Henderson
3080b8a56703SRichard Hendersonstatic bool tcg_out_cmp_vec_noinv(TCGContext *s, TCGType type, unsigned vece,
3081b8a56703SRichard Henderson                                  TCGReg v0, TCGReg v1, TCGReg v2, TCGCond cond)
3082b8a56703SRichard Henderson{
3083b8a56703SRichard Henderson    static int const cmpeq_insn[4] = {
3084b8a56703SRichard Henderson        OPC_PCMPEQB, OPC_PCMPEQW, OPC_PCMPEQD, OPC_PCMPEQQ
3085b8a56703SRichard Henderson    };
3086b8a56703SRichard Henderson    static int const cmpgt_insn[4] = {
3087b8a56703SRichard Henderson        OPC_PCMPGTB, OPC_PCMPGTW, OPC_PCMPGTD, OPC_PCMPGTQ
3088b8a56703SRichard Henderson    };
3089b8a56703SRichard Henderson
3090b8a56703SRichard Henderson    enum {
3091b8a56703SRichard Henderson        NEED_INV  = 1,
3092b8a56703SRichard Henderson        NEED_SWAP = 2,
3093b8a56703SRichard Henderson        NEED_UMIN = 4,
3094b8a56703SRichard Henderson        NEED_UMAX = 8,
3095b8a56703SRichard Henderson        INVALID   = 16,
3096b8a56703SRichard Henderson    };
3097b8a56703SRichard Henderson    static const uint8_t cond_fixup[16] = {
3098b8a56703SRichard Henderson        [0 ... 15] = INVALID,
3099b8a56703SRichard Henderson        [TCG_COND_EQ] = 0,
3100b8a56703SRichard Henderson        [TCG_COND_GT] = 0,
3101b8a56703SRichard Henderson        [TCG_COND_NE] = NEED_INV,
3102b8a56703SRichard Henderson        [TCG_COND_LE] = NEED_INV,
3103b8a56703SRichard Henderson        [TCG_COND_LT] = NEED_SWAP,
3104b8a56703SRichard Henderson        [TCG_COND_GE] = NEED_SWAP | NEED_INV,
3105b8a56703SRichard Henderson        [TCG_COND_LEU] = NEED_UMIN,
3106b8a56703SRichard Henderson        [TCG_COND_GTU] = NEED_UMIN | NEED_INV,
3107b8a56703SRichard Henderson        [TCG_COND_GEU] = NEED_UMAX,
3108b8a56703SRichard Henderson        [TCG_COND_LTU] = NEED_UMAX | NEED_INV,
3109b8a56703SRichard Henderson    };
3110b8a56703SRichard Henderson    int fixup = cond_fixup[cond];
3111b8a56703SRichard Henderson
3112b8a56703SRichard Henderson    assert(!(fixup & INVALID));
3113b8a56703SRichard Henderson
3114b8a56703SRichard Henderson    if (fixup & NEED_INV) {
3115b8a56703SRichard Henderson        cond = tcg_invert_cond(cond);
3116b8a56703SRichard Henderson    }
3117b8a56703SRichard Henderson
3118b8a56703SRichard Henderson    if (fixup & NEED_SWAP) {
3119b8a56703SRichard Henderson        TCGReg swap = v1;
3120b8a56703SRichard Henderson        v1 = v2;
3121b8a56703SRichard Henderson        v2 = swap;
3122b8a56703SRichard Henderson        cond = tcg_swap_cond(cond);
3123b8a56703SRichard Henderson    }
3124b8a56703SRichard Henderson
3125b8a56703SRichard Henderson    if (fixup & (NEED_UMIN | NEED_UMAX)) {
3126b8a56703SRichard Henderson        int op = (fixup & NEED_UMIN ? umin_insn[vece] : umax_insn[vece]);
3127b8a56703SRichard Henderson
3128b8a56703SRichard Henderson        /* avx2 does not have 64-bit min/max; adjusted during expand. */
3129b8a56703SRichard Henderson        assert(vece <= MO_32);
3130b8a56703SRichard Henderson
3131b8a56703SRichard Henderson        tcg_out_vex_modrm_type(s, op, TCG_TMP_VEC, v1, v2, type);
3132b8a56703SRichard Henderson        v2 = TCG_TMP_VEC;
3133b8a56703SRichard Henderson        cond = TCG_COND_EQ;
3134b8a56703SRichard Henderson    }
3135b8a56703SRichard Henderson
3136b8a56703SRichard Henderson    switch (cond) {
3137b8a56703SRichard Henderson    case TCG_COND_EQ:
3138b8a56703SRichard Henderson        tcg_out_vex_modrm_type(s, cmpeq_insn[vece], v0, v1, v2, type);
3139b8a56703SRichard Henderson        break;
3140b8a56703SRichard Henderson    case TCG_COND_GT:
3141b8a56703SRichard Henderson        tcg_out_vex_modrm_type(s, cmpgt_insn[vece], v0, v1, v2, type);
3142b8a56703SRichard Henderson        break;
3143b8a56703SRichard Henderson    default:
3144b8a56703SRichard Henderson        g_assert_not_reached();
3145b8a56703SRichard Henderson    }
3146b8a56703SRichard Henderson    return fixup & NEED_INV;
3147b8a56703SRichard Henderson}
3148b8a56703SRichard Henderson
3149717da87dSRichard Hendersonstatic void tcg_out_cmp_vec_k1(TCGContext *s, TCGType type, unsigned vece,
3150717da87dSRichard Henderson                               TCGReg v1, TCGReg v2, TCGCond cond)
3151717da87dSRichard Henderson{
3152717da87dSRichard Henderson    static const int cmpm_insn[2][4] = {
3153717da87dSRichard Henderson        { OPC_VPCMPB, OPC_VPCMPW, OPC_VPCMPD, OPC_VPCMPQ },
3154717da87dSRichard Henderson        { OPC_VPCMPUB, OPC_VPCMPUW, OPC_VPCMPUD, OPC_VPCMPUQ }
3155717da87dSRichard Henderson    };
3156*782cffa4SRichard Henderson    static const int testm_insn[4] = {
3157*782cffa4SRichard Henderson        OPC_VPTESTMB, OPC_VPTESTMW, OPC_VPTESTMD, OPC_VPTESTMQ
3158*782cffa4SRichard Henderson    };
3159*782cffa4SRichard Henderson    static const int testnm_insn[4] = {
3160*782cffa4SRichard Henderson        OPC_VPTESTNMB, OPC_VPTESTNMW, OPC_VPTESTNMD, OPC_VPTESTNMQ
3161*782cffa4SRichard Henderson    };
3162*782cffa4SRichard Henderson
3163717da87dSRichard Henderson    static const int cond_ext[16] = {
3164717da87dSRichard Henderson        [TCG_COND_EQ] = 0,
3165717da87dSRichard Henderson        [TCG_COND_NE] = 4,
3166717da87dSRichard Henderson        [TCG_COND_LT] = 1,
3167717da87dSRichard Henderson        [TCG_COND_LTU] = 1,
3168717da87dSRichard Henderson        [TCG_COND_LE] = 2,
3169717da87dSRichard Henderson        [TCG_COND_LEU] = 2,
3170717da87dSRichard Henderson        [TCG_COND_NEVER] = 3,
3171717da87dSRichard Henderson        [TCG_COND_GE] = 5,
3172717da87dSRichard Henderson        [TCG_COND_GEU] = 5,
3173717da87dSRichard Henderson        [TCG_COND_GT] = 6,
3174717da87dSRichard Henderson        [TCG_COND_GTU] = 6,
3175717da87dSRichard Henderson        [TCG_COND_ALWAYS] = 7,
3176717da87dSRichard Henderson    };
3177717da87dSRichard Henderson
3178*782cffa4SRichard Henderson    switch (cond) {
3179*782cffa4SRichard Henderson    case TCG_COND_TSTNE:
3180*782cffa4SRichard Henderson        tcg_out_vex_modrm_type(s, testm_insn[vece], /* k1 */ 1, v1, v2, type);
3181*782cffa4SRichard Henderson        break;
3182*782cffa4SRichard Henderson    case TCG_COND_TSTEQ:
3183*782cffa4SRichard Henderson        tcg_out_vex_modrm_type(s, testnm_insn[vece], /* k1 */ 1, v1, v2, type);
3184*782cffa4SRichard Henderson        break;
3185*782cffa4SRichard Henderson    default:
3186717da87dSRichard Henderson        tcg_out_vex_modrm_type(s, cmpm_insn[is_unsigned_cond(cond)][vece],
3187717da87dSRichard Henderson                               /* k1 */ 1, v1, v2, type);
3188717da87dSRichard Henderson        tcg_out8(s, cond_ext[cond]);
3189*782cffa4SRichard Henderson        break;
3190*782cffa4SRichard Henderson    }
3191717da87dSRichard Henderson}
3192717da87dSRichard Henderson
3193717da87dSRichard Hendersonstatic void tcg_out_k1_to_vec(TCGContext *s, TCGType type,
3194717da87dSRichard Henderson                              unsigned vece, TCGReg dest)
3195717da87dSRichard Henderson{
3196717da87dSRichard Henderson    static const int movm_insn[] = {
3197717da87dSRichard Henderson        OPC_VPMOVM2B, OPC_VPMOVM2W, OPC_VPMOVM2D, OPC_VPMOVM2Q
3198717da87dSRichard Henderson    };
3199717da87dSRichard Henderson    tcg_out_vex_modrm_type(s, movm_insn[vece], dest, 0, /* k1 */ 1, type);
3200717da87dSRichard Henderson}
3201717da87dSRichard Henderson
3202b8a56703SRichard Hendersonstatic void tcg_out_cmp_vec(TCGContext *s, TCGType type, unsigned vece,
3203b8a56703SRichard Henderson                            TCGReg v0, TCGReg v1, TCGReg v2, TCGCond cond)
3204b8a56703SRichard Henderson{
3205717da87dSRichard Henderson    /*
3206717da87dSRichard Henderson     * With avx512, we have a complete set of comparisons into mask.
3207717da87dSRichard Henderson     * Unless there's a single insn expansion for the comparision,
3208717da87dSRichard Henderson     * expand via a mask in k1.
3209717da87dSRichard Henderson     */
3210717da87dSRichard Henderson    if ((vece <= MO_16 ? have_avx512bw : have_avx512dq)
3211717da87dSRichard Henderson        && cond != TCG_COND_EQ
3212717da87dSRichard Henderson        && cond != TCG_COND_LT
3213717da87dSRichard Henderson        && cond != TCG_COND_GT) {
3214717da87dSRichard Henderson        tcg_out_cmp_vec_k1(s, type, vece, v1, v2, cond);
3215717da87dSRichard Henderson        tcg_out_k1_to_vec(s, type, vece, v0);
3216717da87dSRichard Henderson        return;
3217717da87dSRichard Henderson    }
3218717da87dSRichard Henderson
3219b8a56703SRichard Henderson    if (tcg_out_cmp_vec_noinv(s, type, vece, v0, v1, v2, cond)) {
3220b8a56703SRichard Henderson        tcg_out_dupi_vec(s, type, vece, TCG_TMP_VEC, -1);
3221b8a56703SRichard Henderson        tcg_out_vex_modrm_type(s, OPC_PXOR, v0, v0, TCG_TMP_VEC, type);
3222b8a56703SRichard Henderson    }
3223b8a56703SRichard Henderson}
3224b8a56703SRichard Henderson
3225d5896749SRichard Hendersonstatic void tcg_out_cmpsel_vec_k1(TCGContext *s, TCGType type, unsigned vece,
3226d5896749SRichard Henderson                                  TCGReg v0, TCGReg c1, TCGReg c2,
3227d5896749SRichard Henderson                                  TCGReg v3, TCGReg v4, TCGCond cond)
3228d5896749SRichard Henderson{
3229d5896749SRichard Henderson    static const int vpblendm_insn[] = {
3230d5896749SRichard Henderson        OPC_VPBLENDMB, OPC_VPBLENDMW, OPC_VPBLENDMD, OPC_VPBLENDMQ
3231d5896749SRichard Henderson    };
3232d5896749SRichard Henderson    bool z = false;
3233d5896749SRichard Henderson
3234d5896749SRichard Henderson    /* Swap to place constant in V4 to take advantage of zero-masking. */
3235d5896749SRichard Henderson    if (!v3) {
3236d5896749SRichard Henderson        z = true;
3237d5896749SRichard Henderson        v3 = v4;
3238d5896749SRichard Henderson        cond = tcg_invert_cond(cond);
3239d5896749SRichard Henderson    }
3240d5896749SRichard Henderson
3241d5896749SRichard Henderson    tcg_out_cmp_vec_k1(s, type, vece, c1, c2, cond);
3242d5896749SRichard Henderson    tcg_out_evex_modrm_type(s, vpblendm_insn[vece], v0, v4, v3,
3243d5896749SRichard Henderson                            /* k1 */1, z, type);
3244d5896749SRichard Henderson}
3245d5896749SRichard Henderson
3246db4121d2SRichard Hendersonstatic void tcg_out_cmpsel_vec(TCGContext *s, TCGType type, unsigned vece,
3247db4121d2SRichard Henderson                               TCGReg v0, TCGReg c1, TCGReg c2,
3248db4121d2SRichard Henderson                               TCGReg v3, TCGReg v4, TCGCond cond)
3249db4121d2SRichard Henderson{
3250d5896749SRichard Henderson    bool inv;
3251d5896749SRichard Henderson
3252d5896749SRichard Henderson    if (vece <= MO_16 ? have_avx512bw : have_avx512vl) {
3253d5896749SRichard Henderson        tcg_out_cmpsel_vec_k1(s, type, vece, v0, c1, c2, v3, v4, cond);
3254d5896749SRichard Henderson        return;
3255d5896749SRichard Henderson    }
3256d5896749SRichard Henderson
3257d5896749SRichard Henderson    inv = tcg_out_cmp_vec_noinv(s, type, vece, TCG_TMP_VEC, c1, c2, cond);
3258d8387f0eSRichard Henderson
3259d8387f0eSRichard Henderson    /*
3260d8387f0eSRichard Henderson     * Since XMM0 is 16, the only way we get 0 into V3
3261d8387f0eSRichard Henderson     * is via the constant zero constraint.
3262d8387f0eSRichard Henderson     */
3263d8387f0eSRichard Henderson    if (!v3) {
3264d8387f0eSRichard Henderson        if (inv) {
3265d8387f0eSRichard Henderson            tcg_out_vex_modrm_type(s, OPC_PAND, v0, TCG_TMP_VEC, v4, type);
3266d8387f0eSRichard Henderson        } else {
3267d8387f0eSRichard Henderson            tcg_out_vex_modrm_type(s, OPC_PANDN, v0, TCG_TMP_VEC, v4, type);
3268d8387f0eSRichard Henderson        }
3269d8387f0eSRichard Henderson    } else {
3270d8387f0eSRichard Henderson        if (inv) {
3271db4121d2SRichard Henderson            TCGReg swap = v3;
3272db4121d2SRichard Henderson            v3 = v4;
3273db4121d2SRichard Henderson            v4 = swap;
3274db4121d2SRichard Henderson        }
3275db4121d2SRichard Henderson        tcg_out_vex_modrm_type(s, OPC_VPBLENDVB, v0, v4, v3, type);
3276db4121d2SRichard Henderson        tcg_out8(s, (TCG_TMP_VEC - TCG_REG_XMM0) << 4);
3277db4121d2SRichard Henderson    }
3278d8387f0eSRichard Henderson}
3279db4121d2SRichard Henderson
3280139c1837SPaolo Bonzinistatic void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
3281139c1837SPaolo Bonzini                           unsigned vecl, unsigned vece,
32825e8892dbSMiroslav Rezanina                           const TCGArg args[TCG_MAX_OP_ARGS],
32835e8892dbSMiroslav Rezanina                           const int const_args[TCG_MAX_OP_ARGS])
3284139c1837SPaolo Bonzini{
3285139c1837SPaolo Bonzini    static int const add_insn[4] = {
3286139c1837SPaolo Bonzini        OPC_PADDB, OPC_PADDW, OPC_PADDD, OPC_PADDQ
3287139c1837SPaolo Bonzini    };
3288139c1837SPaolo Bonzini    static int const ssadd_insn[4] = {
3289139c1837SPaolo Bonzini        OPC_PADDSB, OPC_PADDSW, OPC_UD2, OPC_UD2
3290139c1837SPaolo Bonzini    };
3291139c1837SPaolo Bonzini    static int const usadd_insn[4] = {
3292139c1837SPaolo Bonzini        OPC_PADDUB, OPC_PADDUW, OPC_UD2, OPC_UD2
3293139c1837SPaolo Bonzini    };
3294139c1837SPaolo Bonzini    static int const sub_insn[4] = {
3295139c1837SPaolo Bonzini        OPC_PSUBB, OPC_PSUBW, OPC_PSUBD, OPC_PSUBQ
3296139c1837SPaolo Bonzini    };
3297139c1837SPaolo Bonzini    static int const sssub_insn[4] = {
3298139c1837SPaolo Bonzini        OPC_PSUBSB, OPC_PSUBSW, OPC_UD2, OPC_UD2
3299139c1837SPaolo Bonzini    };
3300139c1837SPaolo Bonzini    static int const ussub_insn[4] = {
3301139c1837SPaolo Bonzini        OPC_PSUBUB, OPC_PSUBUW, OPC_UD2, OPC_UD2
3302139c1837SPaolo Bonzini    };
3303139c1837SPaolo Bonzini    static int const mul_insn[4] = {
33044c8b9686SRichard Henderson        OPC_UD2, OPC_PMULLW, OPC_PMULLD, OPC_VPMULLQ
3305139c1837SPaolo Bonzini    };
3306139c1837SPaolo Bonzini    static int const shift_imm_insn[4] = {
3307139c1837SPaolo Bonzini        OPC_UD2, OPC_PSHIFTW_Ib, OPC_PSHIFTD_Ib, OPC_PSHIFTQ_Ib
3308139c1837SPaolo Bonzini    };
3309139c1837SPaolo Bonzini    static int const punpckl_insn[4] = {
3310139c1837SPaolo Bonzini        OPC_PUNPCKLBW, OPC_PUNPCKLWD, OPC_PUNPCKLDQ, OPC_PUNPCKLQDQ
3311139c1837SPaolo Bonzini    };
3312139c1837SPaolo Bonzini    static int const punpckh_insn[4] = {
3313139c1837SPaolo Bonzini        OPC_PUNPCKHBW, OPC_PUNPCKHWD, OPC_PUNPCKHDQ, OPC_PUNPCKHQDQ
3314139c1837SPaolo Bonzini    };
3315139c1837SPaolo Bonzini    static int const packss_insn[4] = {
3316139c1837SPaolo Bonzini        OPC_PACKSSWB, OPC_PACKSSDW, OPC_UD2, OPC_UD2
3317139c1837SPaolo Bonzini    };
3318139c1837SPaolo Bonzini    static int const packus_insn[4] = {
3319139c1837SPaolo Bonzini        OPC_PACKUSWB, OPC_PACKUSDW, OPC_UD2, OPC_UD2
3320139c1837SPaolo Bonzini    };
3321139c1837SPaolo Bonzini    static int const smin_insn[4] = {
3322dac1648fSRichard Henderson        OPC_PMINSB, OPC_PMINSW, OPC_PMINSD, OPC_VPMINSQ
3323139c1837SPaolo Bonzini    };
3324139c1837SPaolo Bonzini    static int const smax_insn[4] = {
3325dac1648fSRichard Henderson        OPC_PMAXSB, OPC_PMAXSW, OPC_PMAXSD, OPC_VPMAXSQ
3326139c1837SPaolo Bonzini    };
3327102cd35cSRichard Henderson    static int const rotlv_insn[4] = {
3328102cd35cSRichard Henderson        OPC_UD2, OPC_UD2, OPC_VPROLVD, OPC_VPROLVQ
3329102cd35cSRichard Henderson    };
3330102cd35cSRichard Henderson    static int const rotrv_insn[4] = {
3331102cd35cSRichard Henderson        OPC_UD2, OPC_UD2, OPC_VPRORVD, OPC_VPRORVQ
3332102cd35cSRichard Henderson    };
3333139c1837SPaolo Bonzini    static int const shlv_insn[4] = {
3334ef77ce0dSRichard Henderson        OPC_UD2, OPC_VPSLLVW, OPC_VPSLLVD, OPC_VPSLLVQ
3335139c1837SPaolo Bonzini    };
3336139c1837SPaolo Bonzini    static int const shrv_insn[4] = {
3337ef77ce0dSRichard Henderson        OPC_UD2, OPC_VPSRLVW, OPC_VPSRLVD, OPC_VPSRLVQ
3338139c1837SPaolo Bonzini    };
3339139c1837SPaolo Bonzini    static int const sarv_insn[4] = {
3340ef77ce0dSRichard Henderson        OPC_UD2, OPC_VPSRAVW, OPC_VPSRAVD, OPC_VPSRAVQ
3341139c1837SPaolo Bonzini    };
3342139c1837SPaolo Bonzini    static int const shls_insn[4] = {
3343139c1837SPaolo Bonzini        OPC_UD2, OPC_PSLLW, OPC_PSLLD, OPC_PSLLQ
3344139c1837SPaolo Bonzini    };
3345139c1837SPaolo Bonzini    static int const shrs_insn[4] = {
3346139c1837SPaolo Bonzini        OPC_UD2, OPC_PSRLW, OPC_PSRLD, OPC_PSRLQ
3347139c1837SPaolo Bonzini    };
3348139c1837SPaolo Bonzini    static int const sars_insn[4] = {
334947b331b2SRichard Henderson        OPC_UD2, OPC_PSRAW, OPC_PSRAD, OPC_VPSRAQ
3350139c1837SPaolo Bonzini    };
3351965d5d06SRichard Henderson    static int const vpshldi_insn[4] = {
3352965d5d06SRichard Henderson        OPC_UD2, OPC_VPSHLDW, OPC_VPSHLDD, OPC_VPSHLDQ
3353965d5d06SRichard Henderson    };
3354965d5d06SRichard Henderson    static int const vpshldv_insn[4] = {
3355965d5d06SRichard Henderson        OPC_UD2, OPC_VPSHLDVW, OPC_VPSHLDVD, OPC_VPSHLDVQ
3356965d5d06SRichard Henderson    };
3357965d5d06SRichard Henderson    static int const vpshrdv_insn[4] = {
3358965d5d06SRichard Henderson        OPC_UD2, OPC_VPSHRDVW, OPC_VPSHRDVD, OPC_VPSHRDVQ
3359965d5d06SRichard Henderson    };
3360139c1837SPaolo Bonzini    static int const abs_insn[4] = {
3361dac1648fSRichard Henderson        OPC_PABSB, OPC_PABSW, OPC_PABSD, OPC_VPABSQ
3362139c1837SPaolo Bonzini    };
3363139c1837SPaolo Bonzini
3364139c1837SPaolo Bonzini    TCGType type = vecl + TCG_TYPE_V64;
3365139c1837SPaolo Bonzini    int insn, sub;
3366cf320769SRichard Henderson    TCGArg a0, a1, a2, a3;
3367139c1837SPaolo Bonzini
3368139c1837SPaolo Bonzini    a0 = args[0];
3369139c1837SPaolo Bonzini    a1 = args[1];
3370139c1837SPaolo Bonzini    a2 = args[2];
3371139c1837SPaolo Bonzini
3372139c1837SPaolo Bonzini    switch (opc) {
3373139c1837SPaolo Bonzini    case INDEX_op_add_vec:
3374139c1837SPaolo Bonzini        insn = add_insn[vece];
3375139c1837SPaolo Bonzini        goto gen_simd;
3376139c1837SPaolo Bonzini    case INDEX_op_ssadd_vec:
3377139c1837SPaolo Bonzini        insn = ssadd_insn[vece];
3378139c1837SPaolo Bonzini        goto gen_simd;
3379139c1837SPaolo Bonzini    case INDEX_op_usadd_vec:
3380139c1837SPaolo Bonzini        insn = usadd_insn[vece];
3381139c1837SPaolo Bonzini        goto gen_simd;
3382139c1837SPaolo Bonzini    case INDEX_op_sub_vec:
3383139c1837SPaolo Bonzini        insn = sub_insn[vece];
3384139c1837SPaolo Bonzini        goto gen_simd;
3385139c1837SPaolo Bonzini    case INDEX_op_sssub_vec:
3386139c1837SPaolo Bonzini        insn = sssub_insn[vece];
3387139c1837SPaolo Bonzini        goto gen_simd;
3388139c1837SPaolo Bonzini    case INDEX_op_ussub_vec:
3389139c1837SPaolo Bonzini        insn = ussub_insn[vece];
3390139c1837SPaolo Bonzini        goto gen_simd;
3391139c1837SPaolo Bonzini    case INDEX_op_mul_vec:
3392139c1837SPaolo Bonzini        insn = mul_insn[vece];
3393139c1837SPaolo Bonzini        goto gen_simd;
3394139c1837SPaolo Bonzini    case INDEX_op_and_vec:
3395139c1837SPaolo Bonzini        insn = OPC_PAND;
3396139c1837SPaolo Bonzini        goto gen_simd;
3397139c1837SPaolo Bonzini    case INDEX_op_or_vec:
3398139c1837SPaolo Bonzini        insn = OPC_POR;
3399139c1837SPaolo Bonzini        goto gen_simd;
3400139c1837SPaolo Bonzini    case INDEX_op_xor_vec:
3401139c1837SPaolo Bonzini        insn = OPC_PXOR;
3402139c1837SPaolo Bonzini        goto gen_simd;
3403139c1837SPaolo Bonzini    case INDEX_op_smin_vec:
3404139c1837SPaolo Bonzini        insn = smin_insn[vece];
3405139c1837SPaolo Bonzini        goto gen_simd;
3406139c1837SPaolo Bonzini    case INDEX_op_umin_vec:
3407139c1837SPaolo Bonzini        insn = umin_insn[vece];
3408139c1837SPaolo Bonzini        goto gen_simd;
3409139c1837SPaolo Bonzini    case INDEX_op_smax_vec:
3410139c1837SPaolo Bonzini        insn = smax_insn[vece];
3411139c1837SPaolo Bonzini        goto gen_simd;
3412139c1837SPaolo Bonzini    case INDEX_op_umax_vec:
3413139c1837SPaolo Bonzini        insn = umax_insn[vece];
3414139c1837SPaolo Bonzini        goto gen_simd;
3415139c1837SPaolo Bonzini    case INDEX_op_shlv_vec:
3416139c1837SPaolo Bonzini        insn = shlv_insn[vece];
3417139c1837SPaolo Bonzini        goto gen_simd;
3418139c1837SPaolo Bonzini    case INDEX_op_shrv_vec:
3419139c1837SPaolo Bonzini        insn = shrv_insn[vece];
3420139c1837SPaolo Bonzini        goto gen_simd;
3421139c1837SPaolo Bonzini    case INDEX_op_sarv_vec:
3422139c1837SPaolo Bonzini        insn = sarv_insn[vece];
3423139c1837SPaolo Bonzini        goto gen_simd;
3424102cd35cSRichard Henderson    case INDEX_op_rotlv_vec:
3425102cd35cSRichard Henderson        insn = rotlv_insn[vece];
3426102cd35cSRichard Henderson        goto gen_simd;
3427102cd35cSRichard Henderson    case INDEX_op_rotrv_vec:
3428102cd35cSRichard Henderson        insn = rotrv_insn[vece];
3429102cd35cSRichard Henderson        goto gen_simd;
3430139c1837SPaolo Bonzini    case INDEX_op_shls_vec:
3431139c1837SPaolo Bonzini        insn = shls_insn[vece];
3432139c1837SPaolo Bonzini        goto gen_simd;
3433139c1837SPaolo Bonzini    case INDEX_op_shrs_vec:
3434139c1837SPaolo Bonzini        insn = shrs_insn[vece];
3435139c1837SPaolo Bonzini        goto gen_simd;
3436139c1837SPaolo Bonzini    case INDEX_op_sars_vec:
3437139c1837SPaolo Bonzini        insn = sars_insn[vece];
3438139c1837SPaolo Bonzini        goto gen_simd;
3439139c1837SPaolo Bonzini    case INDEX_op_x86_punpckl_vec:
3440139c1837SPaolo Bonzini        insn = punpckl_insn[vece];
3441139c1837SPaolo Bonzini        goto gen_simd;
3442139c1837SPaolo Bonzini    case INDEX_op_x86_punpckh_vec:
3443139c1837SPaolo Bonzini        insn = punpckh_insn[vece];
3444139c1837SPaolo Bonzini        goto gen_simd;
3445139c1837SPaolo Bonzini    case INDEX_op_x86_packss_vec:
3446139c1837SPaolo Bonzini        insn = packss_insn[vece];
3447139c1837SPaolo Bonzini        goto gen_simd;
3448139c1837SPaolo Bonzini    case INDEX_op_x86_packus_vec:
3449139c1837SPaolo Bonzini        insn = packus_insn[vece];
3450139c1837SPaolo Bonzini        goto gen_simd;
3451965d5d06SRichard Henderson    case INDEX_op_x86_vpshldv_vec:
3452965d5d06SRichard Henderson        insn = vpshldv_insn[vece];
3453965d5d06SRichard Henderson        a1 = a2;
3454965d5d06SRichard Henderson        a2 = args[3];
3455965d5d06SRichard Henderson        goto gen_simd;
3456965d5d06SRichard Henderson    case INDEX_op_x86_vpshrdv_vec:
3457965d5d06SRichard Henderson        insn = vpshrdv_insn[vece];
3458965d5d06SRichard Henderson        a1 = a2;
3459965d5d06SRichard Henderson        a2 = args[3];
3460965d5d06SRichard Henderson        goto gen_simd;
3461139c1837SPaolo Bonzini#if TCG_TARGET_REG_BITS == 32
3462139c1837SPaolo Bonzini    case INDEX_op_dup2_vec:
3463139c1837SPaolo Bonzini        /* First merge the two 32-bit inputs to a single 64-bit element. */
3464139c1837SPaolo Bonzini        tcg_out_vex_modrm(s, OPC_PUNPCKLDQ, a0, a1, a2);
3465139c1837SPaolo Bonzini        /* Then replicate the 64-bit elements across the rest of the vector. */
3466139c1837SPaolo Bonzini        if (type != TCG_TYPE_V64) {
3467139c1837SPaolo Bonzini            tcg_out_dup_vec(s, type, MO_64, a0, a0);
3468139c1837SPaolo Bonzini        }
3469139c1837SPaolo Bonzini        break;
3470139c1837SPaolo Bonzini#endif
3471139c1837SPaolo Bonzini    case INDEX_op_abs_vec:
3472139c1837SPaolo Bonzini        insn = abs_insn[vece];
3473139c1837SPaolo Bonzini        a2 = a1;
3474139c1837SPaolo Bonzini        a1 = 0;
3475139c1837SPaolo Bonzini        goto gen_simd;
3476139c1837SPaolo Bonzini    gen_simd:
3477139c1837SPaolo Bonzini        tcg_debug_assert(insn != OPC_UD2);
3478bc97b3adSRichard Henderson        tcg_out_vex_modrm_type(s, insn, a0, a1, a2, type);
3479139c1837SPaolo Bonzini        break;
3480139c1837SPaolo Bonzini
3481139c1837SPaolo Bonzini    case INDEX_op_cmp_vec:
3482b8a56703SRichard Henderson        tcg_out_cmp_vec(s, type, vece, a0, a1, a2, args[3]);
3483b8a56703SRichard Henderson        break;
3484139c1837SPaolo Bonzini
3485db4121d2SRichard Henderson    case INDEX_op_cmpsel_vec:
3486db4121d2SRichard Henderson        tcg_out_cmpsel_vec(s, type, vece, a0, a1, a2,
3487db4121d2SRichard Henderson                           args[3], args[4], args[5]);
3488db4121d2SRichard Henderson        break;
3489db4121d2SRichard Henderson
3490139c1837SPaolo Bonzini    case INDEX_op_andc_vec:
3491139c1837SPaolo Bonzini        insn = OPC_PANDN;
3492bc97b3adSRichard Henderson        tcg_out_vex_modrm_type(s, insn, a0, a2, a1, type);
3493139c1837SPaolo Bonzini        break;
3494139c1837SPaolo Bonzini
3495139c1837SPaolo Bonzini    case INDEX_op_shli_vec:
3496264e4182SRichard Henderson        insn = shift_imm_insn[vece];
3497139c1837SPaolo Bonzini        sub = 6;
3498139c1837SPaolo Bonzini        goto gen_shift;
3499139c1837SPaolo Bonzini    case INDEX_op_shri_vec:
3500264e4182SRichard Henderson        insn = shift_imm_insn[vece];
3501139c1837SPaolo Bonzini        sub = 2;
3502139c1837SPaolo Bonzini        goto gen_shift;
3503139c1837SPaolo Bonzini    case INDEX_op_sari_vec:
3504264e4182SRichard Henderson        if (vece == MO_64) {
3505264e4182SRichard Henderson            insn = OPC_PSHIFTD_Ib | P_VEXW | P_EVEX;
3506264e4182SRichard Henderson        } else {
3507264e4182SRichard Henderson            insn = shift_imm_insn[vece];
3508264e4182SRichard Henderson        }
3509139c1837SPaolo Bonzini        sub = 4;
35104e73f842SRichard Henderson        goto gen_shift;
35114e73f842SRichard Henderson    case INDEX_op_rotli_vec:
35124e73f842SRichard Henderson        insn = OPC_PSHIFTD_Ib | P_EVEX;  /* VPROL[DQ] */
35134e73f842SRichard Henderson        if (vece == MO_64) {
35144e73f842SRichard Henderson            insn |= P_VEXW;
35154e73f842SRichard Henderson        }
35164e73f842SRichard Henderson        sub = 1;
35174e73f842SRichard Henderson        goto gen_shift;
3518139c1837SPaolo Bonzini    gen_shift:
3519139c1837SPaolo Bonzini        tcg_debug_assert(vece != MO_8);
3520bc97b3adSRichard Henderson        tcg_out_vex_modrm_type(s, insn, sub, a0, a1, type);
3521139c1837SPaolo Bonzini        tcg_out8(s, a2);
3522139c1837SPaolo Bonzini        break;
3523139c1837SPaolo Bonzini
3524139c1837SPaolo Bonzini    case INDEX_op_ld_vec:
3525139c1837SPaolo Bonzini        tcg_out_ld(s, type, a0, a1, a2);
3526139c1837SPaolo Bonzini        break;
3527139c1837SPaolo Bonzini    case INDEX_op_st_vec:
3528139c1837SPaolo Bonzini        tcg_out_st(s, type, a0, a1, a2);
3529139c1837SPaolo Bonzini        break;
3530139c1837SPaolo Bonzini    case INDEX_op_dupm_vec:
3531139c1837SPaolo Bonzini        tcg_out_dupm_vec(s, type, vece, a0, a1, a2);
3532139c1837SPaolo Bonzini        break;
3533139c1837SPaolo Bonzini
3534139c1837SPaolo Bonzini    case INDEX_op_x86_shufps_vec:
3535139c1837SPaolo Bonzini        insn = OPC_SHUFPS;
3536139c1837SPaolo Bonzini        sub = args[3];
3537139c1837SPaolo Bonzini        goto gen_simd_imm8;
3538139c1837SPaolo Bonzini    case INDEX_op_x86_blend_vec:
3539139c1837SPaolo Bonzini        if (vece == MO_16) {
3540139c1837SPaolo Bonzini            insn = OPC_PBLENDW;
3541139c1837SPaolo Bonzini        } else if (vece == MO_32) {
3542139c1837SPaolo Bonzini            insn = (have_avx2 ? OPC_VPBLENDD : OPC_BLENDPS);
3543139c1837SPaolo Bonzini        } else {
3544139c1837SPaolo Bonzini            g_assert_not_reached();
3545139c1837SPaolo Bonzini        }
3546139c1837SPaolo Bonzini        sub = args[3];
3547139c1837SPaolo Bonzini        goto gen_simd_imm8;
3548139c1837SPaolo Bonzini    case INDEX_op_x86_vperm2i128_vec:
3549139c1837SPaolo Bonzini        insn = OPC_VPERM2I128;
3550139c1837SPaolo Bonzini        sub = args[3];
3551139c1837SPaolo Bonzini        goto gen_simd_imm8;
3552965d5d06SRichard Henderson    case INDEX_op_x86_vpshldi_vec:
3553965d5d06SRichard Henderson        insn = vpshldi_insn[vece];
3554965d5d06SRichard Henderson        sub = args[3];
3555965d5d06SRichard Henderson        goto gen_simd_imm8;
35563143767bSRichard Henderson
35573143767bSRichard Henderson    case INDEX_op_not_vec:
35583143767bSRichard Henderson        insn = OPC_VPTERNLOGQ;
35593143767bSRichard Henderson        a2 = a1;
35603143767bSRichard Henderson        sub = 0x33; /* !B */
35613143767bSRichard Henderson        goto gen_simd_imm8;
35623143767bSRichard Henderson    case INDEX_op_nor_vec:
35633143767bSRichard Henderson        insn = OPC_VPTERNLOGQ;
35643143767bSRichard Henderson        sub = 0x11; /* norCB */
35653143767bSRichard Henderson        goto gen_simd_imm8;
35663143767bSRichard Henderson    case INDEX_op_nand_vec:
35673143767bSRichard Henderson        insn = OPC_VPTERNLOGQ;
35683143767bSRichard Henderson        sub = 0x77; /* nandCB */
35693143767bSRichard Henderson        goto gen_simd_imm8;
35703143767bSRichard Henderson    case INDEX_op_eqv_vec:
35713143767bSRichard Henderson        insn = OPC_VPTERNLOGQ;
35723143767bSRichard Henderson        sub = 0x99; /* xnorCB */
35733143767bSRichard Henderson        goto gen_simd_imm8;
35743143767bSRichard Henderson    case INDEX_op_orc_vec:
35753143767bSRichard Henderson        insn = OPC_VPTERNLOGQ;
35763143767bSRichard Henderson        sub = 0xdd; /* orB!C */
35773143767bSRichard Henderson        goto gen_simd_imm8;
35783143767bSRichard Henderson
3579cf320769SRichard Henderson    case INDEX_op_bitsel_vec:
3580cf320769SRichard Henderson        insn = OPC_VPTERNLOGQ;
3581cf320769SRichard Henderson        a3 = args[3];
3582cf320769SRichard Henderson        if (a0 == a1) {
3583cf320769SRichard Henderson            a1 = a2;
3584cf320769SRichard Henderson            a2 = a3;
3585cf320769SRichard Henderson            sub = 0xca; /* A?B:C */
3586cf320769SRichard Henderson        } else if (a0 == a2) {
3587cf320769SRichard Henderson            a2 = a3;
3588cf320769SRichard Henderson            sub = 0xe2; /* B?A:C */
3589cf320769SRichard Henderson        } else {
3590cf320769SRichard Henderson            tcg_out_mov(s, type, a0, a3);
3591cf320769SRichard Henderson            sub = 0xb8; /* B?C:A */
3592cf320769SRichard Henderson        }
3593cf320769SRichard Henderson        goto gen_simd_imm8;
3594cf320769SRichard Henderson
3595139c1837SPaolo Bonzini    gen_simd_imm8:
3596965d5d06SRichard Henderson        tcg_debug_assert(insn != OPC_UD2);
3597bc97b3adSRichard Henderson        tcg_out_vex_modrm_type(s, insn, a0, a1, a2, type);
3598139c1837SPaolo Bonzini        tcg_out8(s, sub);
3599139c1837SPaolo Bonzini        break;
3600139c1837SPaolo Bonzini
3601139c1837SPaolo Bonzini    case INDEX_op_x86_psrldq_vec:
3602139c1837SPaolo Bonzini        tcg_out_vex_modrm(s, OPC_GRP14, 3, a0, a1);
3603139c1837SPaolo Bonzini        tcg_out8(s, a2);
3604139c1837SPaolo Bonzini        break;
3605139c1837SPaolo Bonzini
3606139c1837SPaolo Bonzini    case INDEX_op_mov_vec:  /* Always emitted via tcg_out_mov.  */
3607139c1837SPaolo Bonzini    case INDEX_op_dup_vec:  /* Always emitted via tcg_out_dup_vec.  */
3608139c1837SPaolo Bonzini    default:
3609139c1837SPaolo Bonzini        g_assert_not_reached();
3610139c1837SPaolo Bonzini    }
3611139c1837SPaolo Bonzini}
3612139c1837SPaolo Bonzini
36134c22e840SRichard Hendersonstatic TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
3614139c1837SPaolo Bonzini{
3615139c1837SPaolo Bonzini    switch (op) {
3616139c1837SPaolo Bonzini    case INDEX_op_goto_ptr:
36174c22e840SRichard Henderson        return C_O0_I1(r);
3618139c1837SPaolo Bonzini
3619139c1837SPaolo Bonzini    case INDEX_op_ld8u_i32:
3620139c1837SPaolo Bonzini    case INDEX_op_ld8u_i64:
3621139c1837SPaolo Bonzini    case INDEX_op_ld8s_i32:
3622139c1837SPaolo Bonzini    case INDEX_op_ld8s_i64:
3623139c1837SPaolo Bonzini    case INDEX_op_ld16u_i32:
3624139c1837SPaolo Bonzini    case INDEX_op_ld16u_i64:
3625139c1837SPaolo Bonzini    case INDEX_op_ld16s_i32:
3626139c1837SPaolo Bonzini    case INDEX_op_ld16s_i64:
3627139c1837SPaolo Bonzini    case INDEX_op_ld_i32:
3628139c1837SPaolo Bonzini    case INDEX_op_ld32u_i64:
3629139c1837SPaolo Bonzini    case INDEX_op_ld32s_i64:
3630139c1837SPaolo Bonzini    case INDEX_op_ld_i64:
36314c22e840SRichard Henderson        return C_O1_I1(r, r);
3632139c1837SPaolo Bonzini
3633139c1837SPaolo Bonzini    case INDEX_op_st8_i32:
3634139c1837SPaolo Bonzini    case INDEX_op_st8_i64:
36354c22e840SRichard Henderson        return C_O0_I2(qi, r);
36364c22e840SRichard Henderson
3637139c1837SPaolo Bonzini    case INDEX_op_st16_i32:
3638139c1837SPaolo Bonzini    case INDEX_op_st16_i64:
3639139c1837SPaolo Bonzini    case INDEX_op_st_i32:
3640139c1837SPaolo Bonzini    case INDEX_op_st32_i64:
36414c22e840SRichard Henderson        return C_O0_I2(ri, r);
36424c22e840SRichard Henderson
3643139c1837SPaolo Bonzini    case INDEX_op_st_i64:
36444c22e840SRichard Henderson        return C_O0_I2(re, r);
3645139c1837SPaolo Bonzini
3646139c1837SPaolo Bonzini    case INDEX_op_add_i32:
3647139c1837SPaolo Bonzini    case INDEX_op_add_i64:
36484c22e840SRichard Henderson        return C_O1_I2(r, r, re);
36494c22e840SRichard Henderson
3650139c1837SPaolo Bonzini    case INDEX_op_sub_i32:
3651139c1837SPaolo Bonzini    case INDEX_op_sub_i64:
3652139c1837SPaolo Bonzini    case INDEX_op_mul_i32:
3653139c1837SPaolo Bonzini    case INDEX_op_mul_i64:
3654139c1837SPaolo Bonzini    case INDEX_op_or_i32:
3655139c1837SPaolo Bonzini    case INDEX_op_or_i64:
3656139c1837SPaolo Bonzini    case INDEX_op_xor_i32:
3657139c1837SPaolo Bonzini    case INDEX_op_xor_i64:
36584c22e840SRichard Henderson        return C_O1_I2(r, 0, re);
3659139c1837SPaolo Bonzini
3660139c1837SPaolo Bonzini    case INDEX_op_and_i32:
3661139c1837SPaolo Bonzini    case INDEX_op_and_i64:
36624c22e840SRichard Henderson        return C_O1_I2(r, 0, reZ);
36634c22e840SRichard Henderson
3664139c1837SPaolo Bonzini    case INDEX_op_andc_i32:
3665139c1837SPaolo Bonzini    case INDEX_op_andc_i64:
36664c22e840SRichard Henderson        return C_O1_I2(r, r, rI);
3667139c1837SPaolo Bonzini
3668139c1837SPaolo Bonzini    case INDEX_op_shl_i32:
3669139c1837SPaolo Bonzini    case INDEX_op_shl_i64:
3670139c1837SPaolo Bonzini    case INDEX_op_shr_i32:
3671139c1837SPaolo Bonzini    case INDEX_op_shr_i64:
3672139c1837SPaolo Bonzini    case INDEX_op_sar_i32:
3673139c1837SPaolo Bonzini    case INDEX_op_sar_i64:
36744c22e840SRichard Henderson        return have_bmi2 ? C_O1_I2(r, r, ri) : C_O1_I2(r, 0, ci);
36754c22e840SRichard Henderson
3676139c1837SPaolo Bonzini    case INDEX_op_rotl_i32:
3677139c1837SPaolo Bonzini    case INDEX_op_rotl_i64:
3678139c1837SPaolo Bonzini    case INDEX_op_rotr_i32:
3679139c1837SPaolo Bonzini    case INDEX_op_rotr_i64:
36804c22e840SRichard Henderson        return C_O1_I2(r, 0, ci);
3681139c1837SPaolo Bonzini
3682139c1837SPaolo Bonzini    case INDEX_op_brcond_i32:
3683139c1837SPaolo Bonzini    case INDEX_op_brcond_i64:
3684d3d1c30cSRichard Henderson        return C_O0_I2(r, reT);
3685139c1837SPaolo Bonzini
3686139c1837SPaolo Bonzini    case INDEX_op_bswap16_i32:
3687139c1837SPaolo Bonzini    case INDEX_op_bswap16_i64:
3688139c1837SPaolo Bonzini    case INDEX_op_bswap32_i32:
3689139c1837SPaolo Bonzini    case INDEX_op_bswap32_i64:
3690139c1837SPaolo Bonzini    case INDEX_op_bswap64_i64:
3691139c1837SPaolo Bonzini    case INDEX_op_neg_i32:
3692139c1837SPaolo Bonzini    case INDEX_op_neg_i64:
3693139c1837SPaolo Bonzini    case INDEX_op_not_i32:
3694139c1837SPaolo Bonzini    case INDEX_op_not_i64:
3695139c1837SPaolo Bonzini    case INDEX_op_extrh_i64_i32:
36964c22e840SRichard Henderson        return C_O1_I1(r, 0);
3697139c1837SPaolo Bonzini
3698139c1837SPaolo Bonzini    case INDEX_op_ext8s_i32:
3699139c1837SPaolo Bonzini    case INDEX_op_ext8s_i64:
3700139c1837SPaolo Bonzini    case INDEX_op_ext8u_i32:
3701139c1837SPaolo Bonzini    case INDEX_op_ext8u_i64:
37024c22e840SRichard Henderson        return C_O1_I1(r, q);
37034c22e840SRichard Henderson
3704139c1837SPaolo Bonzini    case INDEX_op_ext16s_i32:
3705139c1837SPaolo Bonzini    case INDEX_op_ext16s_i64:
3706139c1837SPaolo Bonzini    case INDEX_op_ext16u_i32:
3707139c1837SPaolo Bonzini    case INDEX_op_ext16u_i64:
3708139c1837SPaolo Bonzini    case INDEX_op_ext32s_i64:
3709139c1837SPaolo Bonzini    case INDEX_op_ext32u_i64:
3710139c1837SPaolo Bonzini    case INDEX_op_ext_i32_i64:
3711139c1837SPaolo Bonzini    case INDEX_op_extu_i32_i64:
3712139c1837SPaolo Bonzini    case INDEX_op_extrl_i64_i32:
3713139c1837SPaolo Bonzini    case INDEX_op_extract_i32:
3714139c1837SPaolo Bonzini    case INDEX_op_extract_i64:
3715139c1837SPaolo Bonzini    case INDEX_op_sextract_i32:
3716139c1837SPaolo Bonzini    case INDEX_op_ctpop_i32:
3717139c1837SPaolo Bonzini    case INDEX_op_ctpop_i64:
37184c22e840SRichard Henderson        return C_O1_I1(r, r);
37194c22e840SRichard Henderson
3720139c1837SPaolo Bonzini    case INDEX_op_extract2_i32:
3721139c1837SPaolo Bonzini    case INDEX_op_extract2_i64:
37224c22e840SRichard Henderson        return C_O1_I2(r, 0, r);
3723139c1837SPaolo Bonzini
3724139c1837SPaolo Bonzini    case INDEX_op_deposit_i32:
3725139c1837SPaolo Bonzini    case INDEX_op_deposit_i64:
372673f97f0aSRichard Henderson        return C_O1_I2(q, 0, qi);
37274c22e840SRichard Henderson
3728139c1837SPaolo Bonzini    case INDEX_op_setcond_i32:
3729139c1837SPaolo Bonzini    case INDEX_op_setcond_i64:
373095bf306eSRichard Henderson    case INDEX_op_negsetcond_i32:
373195bf306eSRichard Henderson    case INDEX_op_negsetcond_i64:
3732d3d1c30cSRichard Henderson        return C_O1_I2(q, r, reT);
37334c22e840SRichard Henderson
3734139c1837SPaolo Bonzini    case INDEX_op_movcond_i32:
3735139c1837SPaolo Bonzini    case INDEX_op_movcond_i64:
3736d3d1c30cSRichard Henderson        return C_O1_I4(r, r, reT, r, 0);
37374c22e840SRichard Henderson
3738139c1837SPaolo Bonzini    case INDEX_op_div2_i32:
3739139c1837SPaolo Bonzini    case INDEX_op_div2_i64:
3740139c1837SPaolo Bonzini    case INDEX_op_divu2_i32:
3741139c1837SPaolo Bonzini    case INDEX_op_divu2_i64:
37424c22e840SRichard Henderson        return C_O2_I3(a, d, 0, 1, r);
37434c22e840SRichard Henderson
3744139c1837SPaolo Bonzini    case INDEX_op_mulu2_i32:
3745139c1837SPaolo Bonzini    case INDEX_op_mulu2_i64:
3746139c1837SPaolo Bonzini    case INDEX_op_muls2_i32:
3747139c1837SPaolo Bonzini    case INDEX_op_muls2_i64:
37484c22e840SRichard Henderson        return C_O2_I2(a, d, a, r);
37494c22e840SRichard Henderson
3750139c1837SPaolo Bonzini    case INDEX_op_add2_i32:
3751139c1837SPaolo Bonzini    case INDEX_op_add2_i64:
3752139c1837SPaolo Bonzini    case INDEX_op_sub2_i32:
3753139c1837SPaolo Bonzini    case INDEX_op_sub2_i64:
375422d2e535SIlya Leoshkevich        return C_N1_O1_I4(r, r, 0, 1, re, re);
37554c22e840SRichard Henderson
3756139c1837SPaolo Bonzini    case INDEX_op_ctz_i32:
3757139c1837SPaolo Bonzini    case INDEX_op_ctz_i64:
37584c22e840SRichard Henderson        return have_bmi1 ? C_N1_I2(r, r, rW) : C_N1_I2(r, r, r);
37594c22e840SRichard Henderson
3760139c1837SPaolo Bonzini    case INDEX_op_clz_i32:
3761139c1837SPaolo Bonzini    case INDEX_op_clz_i64:
37624c22e840SRichard Henderson        return have_lzcnt ? C_N1_I2(r, r, rW) : C_N1_I2(r, r, r);
3763139c1837SPaolo Bonzini
3764fecccfccSRichard Henderson    case INDEX_op_qemu_ld_a32_i32:
3765fecccfccSRichard Henderson        return C_O1_I1(r, L);
3766fecccfccSRichard Henderson    case INDEX_op_qemu_ld_a64_i32:
3767fecccfccSRichard Henderson        return TCG_TARGET_REG_BITS == 64 ? C_O1_I1(r, L) : C_O1_I2(r, L, L);
37684c22e840SRichard Henderson
3769fecccfccSRichard Henderson    case INDEX_op_qemu_st_a32_i32:
3770fecccfccSRichard Henderson        return C_O0_I2(L, L);
3771fecccfccSRichard Henderson    case INDEX_op_qemu_st_a64_i32:
3772fecccfccSRichard Henderson        return TCG_TARGET_REG_BITS == 64 ? C_O0_I2(L, L) : C_O0_I3(L, L, L);
3773fecccfccSRichard Henderson    case INDEX_op_qemu_st8_a32_i32:
3774fecccfccSRichard Henderson        return C_O0_I2(s, L);
3775fecccfccSRichard Henderson    case INDEX_op_qemu_st8_a64_i32:
3776fecccfccSRichard Henderson        return TCG_TARGET_REG_BITS == 64 ? C_O0_I2(s, L) : C_O0_I3(s, L, L);
37774c22e840SRichard Henderson
3778fecccfccSRichard Henderson    case INDEX_op_qemu_ld_a32_i64:
3779fecccfccSRichard Henderson        return TCG_TARGET_REG_BITS == 64 ? C_O1_I1(r, L) : C_O2_I1(r, r, L);
3780fecccfccSRichard Henderson    case INDEX_op_qemu_ld_a64_i64:
3781fecccfccSRichard Henderson        return TCG_TARGET_REG_BITS == 64 ? C_O1_I1(r, L) : C_O2_I2(r, r, L, L);
37824c22e840SRichard Henderson
3783fecccfccSRichard Henderson    case INDEX_op_qemu_st_a32_i64:
3784fecccfccSRichard Henderson        return TCG_TARGET_REG_BITS == 64 ? C_O0_I2(L, L) : C_O0_I3(L, L, L);
3785fecccfccSRichard Henderson    case INDEX_op_qemu_st_a64_i64:
3786fecccfccSRichard Henderson        return TCG_TARGET_REG_BITS == 64 ? C_O0_I2(L, L) : C_O0_I4(L, L, L, L);
3787139c1837SPaolo Bonzini
3788098d0fc1SRichard Henderson    case INDEX_op_qemu_ld_a32_i128:
3789098d0fc1SRichard Henderson    case INDEX_op_qemu_ld_a64_i128:
3790098d0fc1SRichard Henderson        tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
3791098d0fc1SRichard Henderson        return C_O2_I1(r, r, L);
3792098d0fc1SRichard Henderson    case INDEX_op_qemu_st_a32_i128:
3793098d0fc1SRichard Henderson    case INDEX_op_qemu_st_a64_i128:
3794098d0fc1SRichard Henderson        tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
3795098d0fc1SRichard Henderson        return C_O0_I3(L, L, L);
3796098d0fc1SRichard Henderson
3797139c1837SPaolo Bonzini    case INDEX_op_brcond2_i32:
37984c22e840SRichard Henderson        return C_O0_I4(r, r, ri, ri);
37994c22e840SRichard Henderson
3800139c1837SPaolo Bonzini    case INDEX_op_setcond2_i32:
38014c22e840SRichard Henderson        return C_O1_I4(r, r, r, ri, ri);
3802139c1837SPaolo Bonzini
3803139c1837SPaolo Bonzini    case INDEX_op_ld_vec:
3804139c1837SPaolo Bonzini    case INDEX_op_dupm_vec:
38054c22e840SRichard Henderson        return C_O1_I1(x, r);
38064c22e840SRichard Henderson
38074c22e840SRichard Henderson    case INDEX_op_st_vec:
38084c22e840SRichard Henderson        return C_O0_I2(x, r);
3809139c1837SPaolo Bonzini
3810139c1837SPaolo Bonzini    case INDEX_op_add_vec:
3811139c1837SPaolo Bonzini    case INDEX_op_sub_vec:
3812139c1837SPaolo Bonzini    case INDEX_op_mul_vec:
3813139c1837SPaolo Bonzini    case INDEX_op_and_vec:
3814139c1837SPaolo Bonzini    case INDEX_op_or_vec:
3815139c1837SPaolo Bonzini    case INDEX_op_xor_vec:
3816139c1837SPaolo Bonzini    case INDEX_op_andc_vec:
38173143767bSRichard Henderson    case INDEX_op_orc_vec:
38183143767bSRichard Henderson    case INDEX_op_nand_vec:
38193143767bSRichard Henderson    case INDEX_op_nor_vec:
38203143767bSRichard Henderson    case INDEX_op_eqv_vec:
3821139c1837SPaolo Bonzini    case INDEX_op_ssadd_vec:
3822139c1837SPaolo Bonzini    case INDEX_op_usadd_vec:
3823139c1837SPaolo Bonzini    case INDEX_op_sssub_vec:
3824139c1837SPaolo Bonzini    case INDEX_op_ussub_vec:
3825139c1837SPaolo Bonzini    case INDEX_op_smin_vec:
3826139c1837SPaolo Bonzini    case INDEX_op_umin_vec:
3827139c1837SPaolo Bonzini    case INDEX_op_smax_vec:
3828139c1837SPaolo Bonzini    case INDEX_op_umax_vec:
3829139c1837SPaolo Bonzini    case INDEX_op_shlv_vec:
3830139c1837SPaolo Bonzini    case INDEX_op_shrv_vec:
3831139c1837SPaolo Bonzini    case INDEX_op_sarv_vec:
3832102cd35cSRichard Henderson    case INDEX_op_rotlv_vec:
3833102cd35cSRichard Henderson    case INDEX_op_rotrv_vec:
3834139c1837SPaolo Bonzini    case INDEX_op_shls_vec:
3835139c1837SPaolo Bonzini    case INDEX_op_shrs_vec:
3836139c1837SPaolo Bonzini    case INDEX_op_sars_vec:
3837139c1837SPaolo Bonzini    case INDEX_op_cmp_vec:
3838139c1837SPaolo Bonzini    case INDEX_op_x86_shufps_vec:
3839139c1837SPaolo Bonzini    case INDEX_op_x86_blend_vec:
3840139c1837SPaolo Bonzini    case INDEX_op_x86_packss_vec:
3841139c1837SPaolo Bonzini    case INDEX_op_x86_packus_vec:
3842139c1837SPaolo Bonzini    case INDEX_op_x86_vperm2i128_vec:
3843139c1837SPaolo Bonzini    case INDEX_op_x86_punpckl_vec:
3844139c1837SPaolo Bonzini    case INDEX_op_x86_punpckh_vec:
3845965d5d06SRichard Henderson    case INDEX_op_x86_vpshldi_vec:
3846139c1837SPaolo Bonzini#if TCG_TARGET_REG_BITS == 32
3847139c1837SPaolo Bonzini    case INDEX_op_dup2_vec:
3848139c1837SPaolo Bonzini#endif
38494c22e840SRichard Henderson        return C_O1_I2(x, x, x);
38504c22e840SRichard Henderson
3851139c1837SPaolo Bonzini    case INDEX_op_abs_vec:
3852139c1837SPaolo Bonzini    case INDEX_op_dup_vec:
38533143767bSRichard Henderson    case INDEX_op_not_vec:
3854139c1837SPaolo Bonzini    case INDEX_op_shli_vec:
3855139c1837SPaolo Bonzini    case INDEX_op_shri_vec:
3856139c1837SPaolo Bonzini    case INDEX_op_sari_vec:
38574e73f842SRichard Henderson    case INDEX_op_rotli_vec:
3858139c1837SPaolo Bonzini    case INDEX_op_x86_psrldq_vec:
38594c22e840SRichard Henderson        return C_O1_I1(x, x);
38604c22e840SRichard Henderson
3861965d5d06SRichard Henderson    case INDEX_op_x86_vpshldv_vec:
3862965d5d06SRichard Henderson    case INDEX_op_x86_vpshrdv_vec:
3863965d5d06SRichard Henderson        return C_O1_I3(x, 0, x, x);
3864965d5d06SRichard Henderson
3865cf320769SRichard Henderson    case INDEX_op_bitsel_vec:
38664c22e840SRichard Henderson        return C_O1_I3(x, x, x, x);
3867db4121d2SRichard Henderson    case INDEX_op_cmpsel_vec:
3868d8387f0eSRichard Henderson        return C_O1_I4(x, x, x, xO, x);
3869139c1837SPaolo Bonzini
3870139c1837SPaolo Bonzini    default:
38714c22e840SRichard Henderson        g_assert_not_reached();
3872139c1837SPaolo Bonzini    }
3873139c1837SPaolo Bonzini}
3874139c1837SPaolo Bonzini
3875139c1837SPaolo Bonziniint tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece)
3876139c1837SPaolo Bonzini{
3877139c1837SPaolo Bonzini    switch (opc) {
3878139c1837SPaolo Bonzini    case INDEX_op_add_vec:
3879139c1837SPaolo Bonzini    case INDEX_op_sub_vec:
3880139c1837SPaolo Bonzini    case INDEX_op_and_vec:
3881139c1837SPaolo Bonzini    case INDEX_op_or_vec:
3882139c1837SPaolo Bonzini    case INDEX_op_xor_vec:
3883139c1837SPaolo Bonzini    case INDEX_op_andc_vec:
38843143767bSRichard Henderson    case INDEX_op_orc_vec:
38853143767bSRichard Henderson    case INDEX_op_nand_vec:
38863143767bSRichard Henderson    case INDEX_op_nor_vec:
38873143767bSRichard Henderson    case INDEX_op_eqv_vec:
38883143767bSRichard Henderson    case INDEX_op_not_vec:
3889cf320769SRichard Henderson    case INDEX_op_bitsel_vec:
3890139c1837SPaolo Bonzini        return 1;
3891139c1837SPaolo Bonzini    case INDEX_op_cmp_vec:
3892139c1837SPaolo Bonzini    case INDEX_op_cmpsel_vec:
3893139c1837SPaolo Bonzini        return -1;
3894139c1837SPaolo Bonzini
38954e73f842SRichard Henderson    case INDEX_op_rotli_vec:
38964e73f842SRichard Henderson        return have_avx512vl && vece >= MO_32 ? 1 : -1;
38974e73f842SRichard Henderson
3898139c1837SPaolo Bonzini    case INDEX_op_shli_vec:
3899139c1837SPaolo Bonzini    case INDEX_op_shri_vec:
3900139c1837SPaolo Bonzini        /* We must expand the operation for MO_8.  */
3901139c1837SPaolo Bonzini        return vece == MO_8 ? -1 : 1;
3902139c1837SPaolo Bonzini
3903139c1837SPaolo Bonzini    case INDEX_op_sari_vec:
3904264e4182SRichard Henderson        switch (vece) {
3905264e4182SRichard Henderson        case MO_8:
3906139c1837SPaolo Bonzini            return -1;
3907264e4182SRichard Henderson        case MO_16:
3908264e4182SRichard Henderson        case MO_32:
3909264e4182SRichard Henderson            return 1;
3910264e4182SRichard Henderson        case MO_64:
3911264e4182SRichard Henderson            if (have_avx512vl) {
3912264e4182SRichard Henderson                return 1;
3913139c1837SPaolo Bonzini            }
3914264e4182SRichard Henderson            /*
3915264e4182SRichard Henderson             * We can emulate this for MO_64, but it does not pay off
3916264e4182SRichard Henderson             * unless we're producing at least 4 values.
3917264e4182SRichard Henderson             */
3918139c1837SPaolo Bonzini            return type >= TCG_TYPE_V256 ? -1 : 0;
3919139c1837SPaolo Bonzini        }
3920264e4182SRichard Henderson        return 0;
3921139c1837SPaolo Bonzini
3922139c1837SPaolo Bonzini    case INDEX_op_shls_vec:
3923139c1837SPaolo Bonzini    case INDEX_op_shrs_vec:
3924139c1837SPaolo Bonzini        return vece >= MO_16;
3925139c1837SPaolo Bonzini    case INDEX_op_sars_vec:
392647b331b2SRichard Henderson        switch (vece) {
392747b331b2SRichard Henderson        case MO_16:
392847b331b2SRichard Henderson        case MO_32:
392947b331b2SRichard Henderson            return 1;
393047b331b2SRichard Henderson        case MO_64:
393147b331b2SRichard Henderson            return have_avx512vl;
393247b331b2SRichard Henderson        }
393347b331b2SRichard Henderson        return 0;
3934139c1837SPaolo Bonzini    case INDEX_op_rotls_vec:
3935139c1837SPaolo Bonzini        return vece >= MO_16 ? -1 : 0;
3936139c1837SPaolo Bonzini
3937139c1837SPaolo Bonzini    case INDEX_op_shlv_vec:
3938139c1837SPaolo Bonzini    case INDEX_op_shrv_vec:
3939ef77ce0dSRichard Henderson        switch (vece) {
3940ef77ce0dSRichard Henderson        case MO_16:
3941ef77ce0dSRichard Henderson            return have_avx512bw;
3942ef77ce0dSRichard Henderson        case MO_32:
3943ef77ce0dSRichard Henderson        case MO_64:
3944ef77ce0dSRichard Henderson            return have_avx2;
3945ef77ce0dSRichard Henderson        }
3946ef77ce0dSRichard Henderson        return 0;
3947139c1837SPaolo Bonzini    case INDEX_op_sarv_vec:
3948ef77ce0dSRichard Henderson        switch (vece) {
3949ef77ce0dSRichard Henderson        case MO_16:
3950ef77ce0dSRichard Henderson            return have_avx512bw;
3951ef77ce0dSRichard Henderson        case MO_32:
3952ef77ce0dSRichard Henderson            return have_avx2;
3953ef77ce0dSRichard Henderson        case MO_64:
3954ef77ce0dSRichard Henderson            return have_avx512vl;
3955ef77ce0dSRichard Henderson        }
3956ef77ce0dSRichard Henderson        return 0;
3957139c1837SPaolo Bonzini    case INDEX_op_rotlv_vec:
3958139c1837SPaolo Bonzini    case INDEX_op_rotrv_vec:
3959102cd35cSRichard Henderson        switch (vece) {
3960786c7ef3SRichard Henderson        case MO_16:
3961786c7ef3SRichard Henderson            return have_avx512vbmi2 ? -1 : 0;
3962102cd35cSRichard Henderson        case MO_32:
3963102cd35cSRichard Henderson        case MO_64:
3964102cd35cSRichard Henderson            return have_avx512vl ? 1 : have_avx2 ? -1 : 0;
3965102cd35cSRichard Henderson        }
3966102cd35cSRichard Henderson        return 0;
3967139c1837SPaolo Bonzini
3968139c1837SPaolo Bonzini    case INDEX_op_mul_vec:
39694c8b9686SRichard Henderson        switch (vece) {
39704c8b9686SRichard Henderson        case MO_8:
3971139c1837SPaolo Bonzini            return -1;
39724c8b9686SRichard Henderson        case MO_64:
39734c8b9686SRichard Henderson            return have_avx512dq;
3974139c1837SPaolo Bonzini        }
3975139c1837SPaolo Bonzini        return 1;
3976139c1837SPaolo Bonzini
3977139c1837SPaolo Bonzini    case INDEX_op_ssadd_vec:
3978139c1837SPaolo Bonzini    case INDEX_op_usadd_vec:
3979139c1837SPaolo Bonzini    case INDEX_op_sssub_vec:
3980139c1837SPaolo Bonzini    case INDEX_op_ussub_vec:
3981139c1837SPaolo Bonzini        return vece <= MO_16;
3982139c1837SPaolo Bonzini    case INDEX_op_smin_vec:
3983139c1837SPaolo Bonzini    case INDEX_op_smax_vec:
3984139c1837SPaolo Bonzini    case INDEX_op_umin_vec:
3985139c1837SPaolo Bonzini    case INDEX_op_umax_vec:
3986139c1837SPaolo Bonzini    case INDEX_op_abs_vec:
3987dac1648fSRichard Henderson        return vece <= MO_32 || have_avx512vl;
3988139c1837SPaolo Bonzini
3989139c1837SPaolo Bonzini    default:
3990139c1837SPaolo Bonzini        return 0;
3991139c1837SPaolo Bonzini    }
3992139c1837SPaolo Bonzini}
3993139c1837SPaolo Bonzini
39942623ca6aSRichard Hendersonstatic void expand_vec_shi(TCGType type, unsigned vece, bool right,
3995139c1837SPaolo Bonzini                           TCGv_vec v0, TCGv_vec v1, TCGArg imm)
3996139c1837SPaolo Bonzini{
39972623ca6aSRichard Henderson    uint8_t mask;
3998139c1837SPaolo Bonzini
3999139c1837SPaolo Bonzini    tcg_debug_assert(vece == MO_8);
40002623ca6aSRichard Henderson    if (right) {
40012623ca6aSRichard Henderson        mask = 0xff >> imm;
40022623ca6aSRichard Henderson        tcg_gen_shri_vec(MO_16, v0, v1, imm);
4003139c1837SPaolo Bonzini    } else {
40042623ca6aSRichard Henderson        mask = 0xff << imm;
40052623ca6aSRichard Henderson        tcg_gen_shli_vec(MO_16, v0, v1, imm);
4006139c1837SPaolo Bonzini    }
40072623ca6aSRichard Henderson    tcg_gen_and_vec(MO_8, v0, v0, tcg_constant_vec(type, MO_8, mask));
4008139c1837SPaolo Bonzini}
4009139c1837SPaolo Bonzini
4010139c1837SPaolo Bonzinistatic void expand_vec_sari(TCGType type, unsigned vece,
4011139c1837SPaolo Bonzini                            TCGv_vec v0, TCGv_vec v1, TCGArg imm)
4012139c1837SPaolo Bonzini{
4013139c1837SPaolo Bonzini    TCGv_vec t1, t2;
4014139c1837SPaolo Bonzini
4015139c1837SPaolo Bonzini    switch (vece) {
4016139c1837SPaolo Bonzini    case MO_8:
40172623ca6aSRichard Henderson        /* Unpack to 16-bit, shift, and repack.  */
4018139c1837SPaolo Bonzini        t1 = tcg_temp_new_vec(type);
4019139c1837SPaolo Bonzini        t2 = tcg_temp_new_vec(type);
4020139c1837SPaolo Bonzini        vec_gen_3(INDEX_op_x86_punpckl_vec, type, MO_8,
4021139c1837SPaolo Bonzini                  tcgv_vec_arg(t1), tcgv_vec_arg(v1), tcgv_vec_arg(v1));
4022139c1837SPaolo Bonzini        vec_gen_3(INDEX_op_x86_punpckh_vec, type, MO_8,
4023139c1837SPaolo Bonzini                  tcgv_vec_arg(t2), tcgv_vec_arg(v1), tcgv_vec_arg(v1));
4024139c1837SPaolo Bonzini        tcg_gen_sari_vec(MO_16, t1, t1, imm + 8);
4025139c1837SPaolo Bonzini        tcg_gen_sari_vec(MO_16, t2, t2, imm + 8);
4026139c1837SPaolo Bonzini        vec_gen_3(INDEX_op_x86_packss_vec, type, MO_8,
4027139c1837SPaolo Bonzini                  tcgv_vec_arg(v0), tcgv_vec_arg(t1), tcgv_vec_arg(t2));
4028139c1837SPaolo Bonzini        tcg_temp_free_vec(t1);
4029139c1837SPaolo Bonzini        tcg_temp_free_vec(t2);
4030139c1837SPaolo Bonzini        break;
4031139c1837SPaolo Bonzini
4032139c1837SPaolo Bonzini    case MO_64:
4033f6ff9c2fSRichard Henderson        t1 = tcg_temp_new_vec(type);
4034139c1837SPaolo Bonzini        if (imm <= 32) {
4035139c1837SPaolo Bonzini            /*
4036139c1837SPaolo Bonzini             * We can emulate a small sign extend by performing an arithmetic
4037139c1837SPaolo Bonzini             * 32-bit shift and overwriting the high half of a 64-bit logical
4038139c1837SPaolo Bonzini             * shift.  Note that the ISA says shift of 32 is valid, but TCG
4039139c1837SPaolo Bonzini             * does not, so we have to bound the smaller shift -- we get the
4040139c1837SPaolo Bonzini             * same result in the high half either way.
4041139c1837SPaolo Bonzini             */
4042139c1837SPaolo Bonzini            tcg_gen_sari_vec(MO_32, t1, v1, MIN(imm, 31));
4043139c1837SPaolo Bonzini            tcg_gen_shri_vec(MO_64, v0, v1, imm);
4044139c1837SPaolo Bonzini            vec_gen_4(INDEX_op_x86_blend_vec, type, MO_32,
4045139c1837SPaolo Bonzini                      tcgv_vec_arg(v0), tcgv_vec_arg(v0),
4046139c1837SPaolo Bonzini                      tcgv_vec_arg(t1), 0xaa);
4047139c1837SPaolo Bonzini        } else {
4048139c1837SPaolo Bonzini            /* Otherwise we will need to use a compare vs 0 to produce
4049139c1837SPaolo Bonzini             * the sign-extend, shift and merge.
4050139c1837SPaolo Bonzini             */
4051f6ff9c2fSRichard Henderson            tcg_gen_cmp_vec(TCG_COND_GT, MO_64, t1,
4052f6ff9c2fSRichard Henderson                            tcg_constant_vec(type, MO_64, 0), v1);
4053139c1837SPaolo Bonzini            tcg_gen_shri_vec(MO_64, v0, v1, imm);
4054139c1837SPaolo Bonzini            tcg_gen_shli_vec(MO_64, t1, t1, 64 - imm);
4055139c1837SPaolo Bonzini            tcg_gen_or_vec(MO_64, v0, v0, t1);
4056139c1837SPaolo Bonzini        }
4057f6ff9c2fSRichard Henderson        tcg_temp_free_vec(t1);
4058139c1837SPaolo Bonzini        break;
4059139c1837SPaolo Bonzini
4060139c1837SPaolo Bonzini    default:
4061139c1837SPaolo Bonzini        g_assert_not_reached();
4062139c1837SPaolo Bonzini    }
4063139c1837SPaolo Bonzini}
4064139c1837SPaolo Bonzini
4065139c1837SPaolo Bonzinistatic void expand_vec_rotli(TCGType type, unsigned vece,
4066139c1837SPaolo Bonzini                             TCGv_vec v0, TCGv_vec v1, TCGArg imm)
4067139c1837SPaolo Bonzini{
4068139c1837SPaolo Bonzini    TCGv_vec t;
4069139c1837SPaolo Bonzini
40702623ca6aSRichard Henderson    if (vece != MO_8 && have_avx512vbmi2) {
4071786c7ef3SRichard Henderson        vec_gen_4(INDEX_op_x86_vpshldi_vec, type, vece,
4072786c7ef3SRichard Henderson                  tcgv_vec_arg(v0), tcgv_vec_arg(v1), tcgv_vec_arg(v1), imm);
4073786c7ef3SRichard Henderson        return;
4074786c7ef3SRichard Henderson    }
4075786c7ef3SRichard Henderson
4076139c1837SPaolo Bonzini    t = tcg_temp_new_vec(type);
4077139c1837SPaolo Bonzini    tcg_gen_shli_vec(vece, t, v1, imm);
4078139c1837SPaolo Bonzini    tcg_gen_shri_vec(vece, v0, v1, (8 << vece) - imm);
4079139c1837SPaolo Bonzini    tcg_gen_or_vec(vece, v0, v0, t);
4080139c1837SPaolo Bonzini    tcg_temp_free_vec(t);
4081139c1837SPaolo Bonzini}
4082139c1837SPaolo Bonzini
4083139c1837SPaolo Bonzinistatic void expand_vec_rotv(TCGType type, unsigned vece, TCGv_vec v0,
4084139c1837SPaolo Bonzini                            TCGv_vec v1, TCGv_vec sh, bool right)
4085139c1837SPaolo Bonzini{
4086786c7ef3SRichard Henderson    TCGv_vec t;
4087139c1837SPaolo Bonzini
4088786c7ef3SRichard Henderson    if (have_avx512vbmi2) {
4089786c7ef3SRichard Henderson        vec_gen_4(right ? INDEX_op_x86_vpshrdv_vec : INDEX_op_x86_vpshldv_vec,
4090786c7ef3SRichard Henderson                  type, vece, tcgv_vec_arg(v0), tcgv_vec_arg(v1),
4091786c7ef3SRichard Henderson                  tcgv_vec_arg(v1), tcgv_vec_arg(sh));
4092786c7ef3SRichard Henderson        return;
4093786c7ef3SRichard Henderson    }
4094786c7ef3SRichard Henderson
4095786c7ef3SRichard Henderson    t = tcg_temp_new_vec(type);
4096139c1837SPaolo Bonzini    tcg_gen_dupi_vec(vece, t, 8 << vece);
4097139c1837SPaolo Bonzini    tcg_gen_sub_vec(vece, t, t, sh);
4098139c1837SPaolo Bonzini    if (right) {
4099139c1837SPaolo Bonzini        tcg_gen_shlv_vec(vece, t, v1, t);
4100139c1837SPaolo Bonzini        tcg_gen_shrv_vec(vece, v0, v1, sh);
4101139c1837SPaolo Bonzini    } else {
4102139c1837SPaolo Bonzini        tcg_gen_shrv_vec(vece, t, v1, t);
4103139c1837SPaolo Bonzini        tcg_gen_shlv_vec(vece, v0, v1, sh);
4104139c1837SPaolo Bonzini    }
4105139c1837SPaolo Bonzini    tcg_gen_or_vec(vece, v0, v0, t);
4106139c1837SPaolo Bonzini    tcg_temp_free_vec(t);
4107139c1837SPaolo Bonzini}
4108139c1837SPaolo Bonzini
41091d442e42SRichard Hendersonstatic void expand_vec_rotls(TCGType type, unsigned vece,
41101d442e42SRichard Henderson                             TCGv_vec v0, TCGv_vec v1, TCGv_i32 lsh)
41111d442e42SRichard Henderson{
41121d442e42SRichard Henderson    TCGv_vec t = tcg_temp_new_vec(type);
41131d442e42SRichard Henderson
41141d442e42SRichard Henderson    tcg_debug_assert(vece != MO_8);
41151d442e42SRichard Henderson
41161d442e42SRichard Henderson    if (vece >= MO_32 ? have_avx512vl : have_avx512vbmi2) {
41171d442e42SRichard Henderson        tcg_gen_dup_i32_vec(vece, t, lsh);
41181d442e42SRichard Henderson        if (vece >= MO_32) {
41191d442e42SRichard Henderson            tcg_gen_rotlv_vec(vece, v0, v1, t);
41201d442e42SRichard Henderson        } else {
41211d442e42SRichard Henderson            expand_vec_rotv(type, vece, v0, v1, t, false);
41221d442e42SRichard Henderson        }
41231d442e42SRichard Henderson    } else {
41241d442e42SRichard Henderson        TCGv_i32 rsh = tcg_temp_new_i32();
41251d442e42SRichard Henderson
41261d442e42SRichard Henderson        tcg_gen_neg_i32(rsh, lsh);
41271d442e42SRichard Henderson        tcg_gen_andi_i32(rsh, rsh, (8 << vece) - 1);
41281d442e42SRichard Henderson        tcg_gen_shls_vec(vece, t, v1, lsh);
41291d442e42SRichard Henderson        tcg_gen_shrs_vec(vece, v0, v1, rsh);
41301d442e42SRichard Henderson        tcg_gen_or_vec(vece, v0, v0, t);
41311d442e42SRichard Henderson
41321d442e42SRichard Henderson        tcg_temp_free_i32(rsh);
41331d442e42SRichard Henderson    }
41341d442e42SRichard Henderson
41351d442e42SRichard Henderson    tcg_temp_free_vec(t);
41361d442e42SRichard Henderson}
41371d442e42SRichard Henderson
4138139c1837SPaolo Bonzinistatic void expand_vec_mul(TCGType type, unsigned vece,
4139139c1837SPaolo Bonzini                           TCGv_vec v0, TCGv_vec v1, TCGv_vec v2)
4140139c1837SPaolo Bonzini{
41419739a052SRichard Henderson    TCGv_vec t1, t2, t3, t4, zero;
4142139c1837SPaolo Bonzini
4143139c1837SPaolo Bonzini    tcg_debug_assert(vece == MO_8);
4144139c1837SPaolo Bonzini
4145139c1837SPaolo Bonzini    /*
4146139c1837SPaolo Bonzini     * Unpack v1 bytes to words, 0 | x.
4147139c1837SPaolo Bonzini     * Unpack v2 bytes to words, y | 0.
4148139c1837SPaolo Bonzini     * This leaves the 8-bit result, x * y, with 8 bits of right padding.
4149139c1837SPaolo Bonzini     * Shift logical right by 8 bits to clear the high 8 bytes before
4150139c1837SPaolo Bonzini     * using an unsigned saturated pack.
4151139c1837SPaolo Bonzini     *
4152139c1837SPaolo Bonzini     * The difference between the V64, V128 and V256 cases is merely how
4153139c1837SPaolo Bonzini     * we distribute the expansion between temporaries.
4154139c1837SPaolo Bonzini     */
4155139c1837SPaolo Bonzini    switch (type) {
4156139c1837SPaolo Bonzini    case TCG_TYPE_V64:
4157139c1837SPaolo Bonzini        t1 = tcg_temp_new_vec(TCG_TYPE_V128);
4158139c1837SPaolo Bonzini        t2 = tcg_temp_new_vec(TCG_TYPE_V128);
41599739a052SRichard Henderson        zero = tcg_constant_vec(TCG_TYPE_V128, MO_8, 0);
4160139c1837SPaolo Bonzini        vec_gen_3(INDEX_op_x86_punpckl_vec, TCG_TYPE_V128, MO_8,
41619739a052SRichard Henderson                  tcgv_vec_arg(t1), tcgv_vec_arg(v1), tcgv_vec_arg(zero));
4162139c1837SPaolo Bonzini        vec_gen_3(INDEX_op_x86_punpckl_vec, TCG_TYPE_V128, MO_8,
41639739a052SRichard Henderson                  tcgv_vec_arg(t2), tcgv_vec_arg(zero), tcgv_vec_arg(v2));
4164139c1837SPaolo Bonzini        tcg_gen_mul_vec(MO_16, t1, t1, t2);
4165139c1837SPaolo Bonzini        tcg_gen_shri_vec(MO_16, t1, t1, 8);
4166139c1837SPaolo Bonzini        vec_gen_3(INDEX_op_x86_packus_vec, TCG_TYPE_V128, MO_8,
4167139c1837SPaolo Bonzini                  tcgv_vec_arg(v0), tcgv_vec_arg(t1), tcgv_vec_arg(t1));
4168139c1837SPaolo Bonzini        tcg_temp_free_vec(t1);
4169139c1837SPaolo Bonzini        tcg_temp_free_vec(t2);
4170139c1837SPaolo Bonzini        break;
4171139c1837SPaolo Bonzini
4172139c1837SPaolo Bonzini    case TCG_TYPE_V128:
4173139c1837SPaolo Bonzini    case TCG_TYPE_V256:
4174139c1837SPaolo Bonzini        t1 = tcg_temp_new_vec(type);
4175139c1837SPaolo Bonzini        t2 = tcg_temp_new_vec(type);
4176139c1837SPaolo Bonzini        t3 = tcg_temp_new_vec(type);
4177139c1837SPaolo Bonzini        t4 = tcg_temp_new_vec(type);
41789739a052SRichard Henderson        zero = tcg_constant_vec(TCG_TYPE_V128, MO_8, 0);
4179139c1837SPaolo Bonzini        vec_gen_3(INDEX_op_x86_punpckl_vec, type, MO_8,
41809739a052SRichard Henderson                  tcgv_vec_arg(t1), tcgv_vec_arg(v1), tcgv_vec_arg(zero));
4181139c1837SPaolo Bonzini        vec_gen_3(INDEX_op_x86_punpckl_vec, type, MO_8,
41829739a052SRichard Henderson                  tcgv_vec_arg(t2), tcgv_vec_arg(zero), tcgv_vec_arg(v2));
4183139c1837SPaolo Bonzini        vec_gen_3(INDEX_op_x86_punpckh_vec, type, MO_8,
41849739a052SRichard Henderson                  tcgv_vec_arg(t3), tcgv_vec_arg(v1), tcgv_vec_arg(zero));
4185139c1837SPaolo Bonzini        vec_gen_3(INDEX_op_x86_punpckh_vec, type, MO_8,
41869739a052SRichard Henderson                  tcgv_vec_arg(t4), tcgv_vec_arg(zero), tcgv_vec_arg(v2));
4187139c1837SPaolo Bonzini        tcg_gen_mul_vec(MO_16, t1, t1, t2);
4188139c1837SPaolo Bonzini        tcg_gen_mul_vec(MO_16, t3, t3, t4);
4189139c1837SPaolo Bonzini        tcg_gen_shri_vec(MO_16, t1, t1, 8);
4190139c1837SPaolo Bonzini        tcg_gen_shri_vec(MO_16, t3, t3, 8);
4191139c1837SPaolo Bonzini        vec_gen_3(INDEX_op_x86_packus_vec, type, MO_8,
4192139c1837SPaolo Bonzini                  tcgv_vec_arg(v0), tcgv_vec_arg(t1), tcgv_vec_arg(t3));
4193139c1837SPaolo Bonzini        tcg_temp_free_vec(t1);
4194139c1837SPaolo Bonzini        tcg_temp_free_vec(t2);
4195139c1837SPaolo Bonzini        tcg_temp_free_vec(t3);
4196139c1837SPaolo Bonzini        tcg_temp_free_vec(t4);
4197139c1837SPaolo Bonzini        break;
4198139c1837SPaolo Bonzini
4199139c1837SPaolo Bonzini    default:
4200139c1837SPaolo Bonzini        g_assert_not_reached();
4201139c1837SPaolo Bonzini    }
4202139c1837SPaolo Bonzini}
4203139c1837SPaolo Bonzini
4204db4121d2SRichard Hendersonstatic TCGCond expand_vec_cond(TCGType type, unsigned vece,
4205db4121d2SRichard Henderson                               TCGArg *a1, TCGArg *a2, TCGCond cond)
4206139c1837SPaolo Bonzini{
4207b8a56703SRichard Henderson    /*
4208b8a56703SRichard Henderson     * Without AVX512, there are no 64-bit unsigned comparisons.
4209b8a56703SRichard Henderson     * We must bias the inputs so that they become signed.
4210b8a56703SRichard Henderson     * All other swapping and inversion are handled during code generation.
4211b8a56703SRichard Henderson     */
4212717da87dSRichard Henderson    if (vece == MO_64 && !have_avx512dq && is_unsigned_cond(cond)) {
4213db4121d2SRichard Henderson        TCGv_vec v1 = temp_tcgv_vec(arg_temp(*a1));
4214db4121d2SRichard Henderson        TCGv_vec v2 = temp_tcgv_vec(arg_temp(*a2));
4215b8a56703SRichard Henderson        TCGv_vec t1 = tcg_temp_new_vec(type);
4216b8a56703SRichard Henderson        TCGv_vec t2 = tcg_temp_new_vec(type);
4217b8a56703SRichard Henderson        TCGv_vec t3 = tcg_constant_vec(type, vece, 1ull << ((8 << vece) - 1));
4218139c1837SPaolo Bonzini
42199739a052SRichard Henderson        tcg_gen_sub_vec(vece, t1, v1, t3);
42209739a052SRichard Henderson        tcg_gen_sub_vec(vece, t2, v2, t3);
4221db4121d2SRichard Henderson        *a1 = tcgv_vec_arg(t1);
4222db4121d2SRichard Henderson        *a2 = tcgv_vec_arg(t2);
4223139c1837SPaolo Bonzini        cond = tcg_signed_cond(cond);
4224139c1837SPaolo Bonzini    }
4225db4121d2SRichard Henderson    return cond;
4226139c1837SPaolo Bonzini}
4227139c1837SPaolo Bonzini
4228db4121d2SRichard Hendersonstatic void expand_vec_cmp(TCGType type, unsigned vece, TCGArg a0,
4229db4121d2SRichard Henderson                           TCGArg a1, TCGArg a2, TCGCond cond)
4230139c1837SPaolo Bonzini{
4231db4121d2SRichard Henderson    cond = expand_vec_cond(type, vece, &a1, &a2, cond);
4232db4121d2SRichard Henderson    /* Expand directly; do not recurse.  */
4233db4121d2SRichard Henderson    vec_gen_4(INDEX_op_cmp_vec, type, vece, a0, a1, a2, cond);
4234db4121d2SRichard Henderson}
4235139c1837SPaolo Bonzini
4236db4121d2SRichard Hendersonstatic void expand_vec_cmpsel(TCGType type, unsigned vece, TCGArg a0,
4237db4121d2SRichard Henderson                              TCGArg a1, TCGArg a2,
4238db4121d2SRichard Henderson                              TCGArg a3, TCGArg a4, TCGCond cond)
4239db4121d2SRichard Henderson{
4240db4121d2SRichard Henderson    cond = expand_vec_cond(type, vece, &a1, &a2, cond);
4241db4121d2SRichard Henderson    /* Expand directly; do not recurse.  */
4242db4121d2SRichard Henderson    vec_gen_6(INDEX_op_cmpsel_vec, type, vece, a0, a1, a2, a3, a4, cond);
4243139c1837SPaolo Bonzini}
4244139c1837SPaolo Bonzini
4245139c1837SPaolo Bonzinivoid tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece,
4246139c1837SPaolo Bonzini                       TCGArg a0, ...)
4247139c1837SPaolo Bonzini{
4248139c1837SPaolo Bonzini    va_list va;
4249db4121d2SRichard Henderson    TCGArg a1, a2, a3, a4, a5;
4250db4121d2SRichard Henderson    TCGv_vec v0, v1, v2;
4251139c1837SPaolo Bonzini
4252139c1837SPaolo Bonzini    va_start(va, a0);
4253db4121d2SRichard Henderson    a1 = va_arg(va, TCGArg);
4254139c1837SPaolo Bonzini    a2 = va_arg(va, TCGArg);
4255db4121d2SRichard Henderson    v0 = temp_tcgv_vec(arg_temp(a0));
4256db4121d2SRichard Henderson    v1 = temp_tcgv_vec(arg_temp(a1));
4257139c1837SPaolo Bonzini
4258139c1837SPaolo Bonzini    switch (opc) {
4259139c1837SPaolo Bonzini    case INDEX_op_shli_vec:
42602623ca6aSRichard Henderson        expand_vec_shi(type, vece, false, v0, v1, a2);
4261139c1837SPaolo Bonzini        break;
42622623ca6aSRichard Henderson    case INDEX_op_shri_vec:
42632623ca6aSRichard Henderson        expand_vec_shi(type, vece, true, v0, v1, a2);
42642623ca6aSRichard Henderson        break;
4265139c1837SPaolo Bonzini    case INDEX_op_sari_vec:
4266139c1837SPaolo Bonzini        expand_vec_sari(type, vece, v0, v1, a2);
4267139c1837SPaolo Bonzini        break;
4268139c1837SPaolo Bonzini
4269139c1837SPaolo Bonzini    case INDEX_op_rotli_vec:
4270139c1837SPaolo Bonzini        expand_vec_rotli(type, vece, v0, v1, a2);
4271139c1837SPaolo Bonzini        break;
4272139c1837SPaolo Bonzini
4273139c1837SPaolo Bonzini    case INDEX_op_rotls_vec:
4274139c1837SPaolo Bonzini        expand_vec_rotls(type, vece, v0, v1, temp_tcgv_i32(arg_temp(a2)));
4275139c1837SPaolo Bonzini        break;
4276139c1837SPaolo Bonzini
4277139c1837SPaolo Bonzini    case INDEX_op_rotlv_vec:
4278139c1837SPaolo Bonzini        v2 = temp_tcgv_vec(arg_temp(a2));
4279139c1837SPaolo Bonzini        expand_vec_rotv(type, vece, v0, v1, v2, false);
4280139c1837SPaolo Bonzini        break;
4281139c1837SPaolo Bonzini    case INDEX_op_rotrv_vec:
4282139c1837SPaolo Bonzini        v2 = temp_tcgv_vec(arg_temp(a2));
4283139c1837SPaolo Bonzini        expand_vec_rotv(type, vece, v0, v1, v2, true);
4284139c1837SPaolo Bonzini        break;
4285139c1837SPaolo Bonzini
4286139c1837SPaolo Bonzini    case INDEX_op_mul_vec:
4287139c1837SPaolo Bonzini        v2 = temp_tcgv_vec(arg_temp(a2));
4288139c1837SPaolo Bonzini        expand_vec_mul(type, vece, v0, v1, v2);
4289139c1837SPaolo Bonzini        break;
4290139c1837SPaolo Bonzini
4291139c1837SPaolo Bonzini    case INDEX_op_cmp_vec:
4292db4121d2SRichard Henderson        a3 = va_arg(va, TCGArg);
4293db4121d2SRichard Henderson        expand_vec_cmp(type, vece, a0, a1, a2, a3);
4294139c1837SPaolo Bonzini        break;
4295139c1837SPaolo Bonzini
4296139c1837SPaolo Bonzini    case INDEX_op_cmpsel_vec:
4297db4121d2SRichard Henderson        a3 = va_arg(va, TCGArg);
4298db4121d2SRichard Henderson        a4 = va_arg(va, TCGArg);
4299db4121d2SRichard Henderson        a5 = va_arg(va, TCGArg);
4300db4121d2SRichard Henderson        expand_vec_cmpsel(type, vece, a0, a1, a2, a3, a4, a5);
4301139c1837SPaolo Bonzini        break;
4302139c1837SPaolo Bonzini
4303139c1837SPaolo Bonzini    default:
4304139c1837SPaolo Bonzini        break;
4305139c1837SPaolo Bonzini    }
4306139c1837SPaolo Bonzini
4307139c1837SPaolo Bonzini    va_end(va);
4308139c1837SPaolo Bonzini}
4309139c1837SPaolo Bonzini
4310139c1837SPaolo Bonzinistatic const int tcg_target_callee_save_regs[] = {
4311139c1837SPaolo Bonzini#if TCG_TARGET_REG_BITS == 64
4312139c1837SPaolo Bonzini    TCG_REG_RBP,
4313139c1837SPaolo Bonzini    TCG_REG_RBX,
4314139c1837SPaolo Bonzini#if defined(_WIN64)
4315139c1837SPaolo Bonzini    TCG_REG_RDI,
4316139c1837SPaolo Bonzini    TCG_REG_RSI,
4317139c1837SPaolo Bonzini#endif
4318139c1837SPaolo Bonzini    TCG_REG_R12,
4319139c1837SPaolo Bonzini    TCG_REG_R13,
4320139c1837SPaolo Bonzini    TCG_REG_R14, /* Currently used for the global env. */
4321139c1837SPaolo Bonzini    TCG_REG_R15,
4322139c1837SPaolo Bonzini#else
4323139c1837SPaolo Bonzini    TCG_REG_EBP, /* Currently used for the global env. */
4324139c1837SPaolo Bonzini    TCG_REG_EBX,
4325139c1837SPaolo Bonzini    TCG_REG_ESI,
4326139c1837SPaolo Bonzini    TCG_REG_EDI,
4327139c1837SPaolo Bonzini#endif
4328139c1837SPaolo Bonzini};
4329139c1837SPaolo Bonzini
4330139c1837SPaolo Bonzini/* Compute frame size via macros, to share between tcg_target_qemu_prologue
4331139c1837SPaolo Bonzini   and tcg_register_jit.  */
4332139c1837SPaolo Bonzini
4333139c1837SPaolo Bonzini#define PUSH_SIZE \
4334139c1837SPaolo Bonzini    ((1 + ARRAY_SIZE(tcg_target_callee_save_regs)) \
4335139c1837SPaolo Bonzini     * (TCG_TARGET_REG_BITS / 8))
4336139c1837SPaolo Bonzini
4337139c1837SPaolo Bonzini#define FRAME_SIZE \
4338139c1837SPaolo Bonzini    ((PUSH_SIZE \
4339139c1837SPaolo Bonzini      + TCG_STATIC_CALL_ARGS_SIZE \
4340139c1837SPaolo Bonzini      + CPU_TEMP_BUF_NLONGS * sizeof(long) \
4341139c1837SPaolo Bonzini      + TCG_TARGET_STACK_ALIGN - 1) \
4342139c1837SPaolo Bonzini     & ~(TCG_TARGET_STACK_ALIGN - 1))
4343139c1837SPaolo Bonzini
4344139c1837SPaolo Bonzini/* Generate global QEMU prologue and epilogue code */
4345139c1837SPaolo Bonzinistatic void tcg_target_qemu_prologue(TCGContext *s)
4346139c1837SPaolo Bonzini{
4347139c1837SPaolo Bonzini    int i, stack_addend;
4348139c1837SPaolo Bonzini
4349139c1837SPaolo Bonzini    /* TB prologue */
4350139c1837SPaolo Bonzini
4351139c1837SPaolo Bonzini    /* Reserve some stack space, also for TCG temps.  */
4352139c1837SPaolo Bonzini    stack_addend = FRAME_SIZE - PUSH_SIZE;
4353139c1837SPaolo Bonzini    tcg_set_frame(s, TCG_REG_CALL_STACK, TCG_STATIC_CALL_ARGS_SIZE,
4354139c1837SPaolo Bonzini                  CPU_TEMP_BUF_NLONGS * sizeof(long));
4355139c1837SPaolo Bonzini
4356139c1837SPaolo Bonzini    /* Save all callee saved registers.  */
4357139c1837SPaolo Bonzini    for (i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); i++) {
4358139c1837SPaolo Bonzini        tcg_out_push(s, tcg_target_callee_save_regs[i]);
4359139c1837SPaolo Bonzini    }
4360139c1837SPaolo Bonzini
4361915e1d52SRichard Henderson    if (!tcg_use_softmmu && guest_base) {
4362915e1d52SRichard Henderson        int seg = setup_guest_base_seg();
4363915e1d52SRichard Henderson        if (seg != 0) {
4364915e1d52SRichard Henderson            x86_guest_base.seg = seg;
4365915e1d52SRichard Henderson        } else if (guest_base == (int32_t)guest_base) {
4366915e1d52SRichard Henderson            x86_guest_base.ofs = guest_base;
4367915e1d52SRichard Henderson        } else {
4368915e1d52SRichard Henderson            assert(TCG_TARGET_REG_BITS == 64);
4369915e1d52SRichard Henderson            /* Choose R12 because, as a base, it requires a SIB byte. */
4370915e1d52SRichard Henderson            x86_guest_base.index = TCG_REG_R12;
4371915e1d52SRichard Henderson            tcg_out_movi(s, TCG_TYPE_PTR, x86_guest_base.index, guest_base);
4372915e1d52SRichard Henderson            tcg_regset_set_reg(s->reserved_regs, x86_guest_base.index);
4373915e1d52SRichard Henderson        }
4374915e1d52SRichard Henderson    }
4375915e1d52SRichard Henderson
4376915e1d52SRichard Henderson    if (TCG_TARGET_REG_BITS == 32) {
4377139c1837SPaolo Bonzini        tcg_out_ld(s, TCG_TYPE_PTR, TCG_AREG0, TCG_REG_ESP,
4378139c1837SPaolo Bonzini                   (ARRAY_SIZE(tcg_target_callee_save_regs) + 1) * 4);
4379139c1837SPaolo Bonzini        tcg_out_addi(s, TCG_REG_ESP, -stack_addend);
4380139c1837SPaolo Bonzini        /* jmp *tb.  */
4381139c1837SPaolo Bonzini        tcg_out_modrm_offset(s, OPC_GRP5, EXT5_JMPN_Ev, TCG_REG_ESP,
4382139c1837SPaolo Bonzini                             (ARRAY_SIZE(tcg_target_callee_save_regs) + 2) * 4
4383139c1837SPaolo Bonzini                             + stack_addend);
4384139c1837SPaolo Bonzini    } else {
4385139c1837SPaolo Bonzini        tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]);
4386139c1837SPaolo Bonzini        tcg_out_addi(s, TCG_REG_ESP, -stack_addend);
4387139c1837SPaolo Bonzini        /* jmp *tb.  */
4388139c1837SPaolo Bonzini        tcg_out_modrm(s, OPC_GRP5, EXT5_JMPN_Ev, tcg_target_call_iarg_regs[1]);
4389915e1d52SRichard Henderson    }
4390139c1837SPaolo Bonzini
4391139c1837SPaolo Bonzini    /*
4392139c1837SPaolo Bonzini     * Return path for goto_ptr. Set return value to 0, a-la exit_tb,
4393139c1837SPaolo Bonzini     * and fall through to the rest of the epilogue.
4394139c1837SPaolo Bonzini     */
4395c8bc1168SRichard Henderson    tcg_code_gen_epilogue = tcg_splitwx_to_rx(s->code_ptr);
4396139c1837SPaolo Bonzini    tcg_out_movi(s, TCG_TYPE_REG, TCG_REG_EAX, 0);
4397139c1837SPaolo Bonzini
4398139c1837SPaolo Bonzini    /* TB epilogue */
4399705ed477SRichard Henderson    tb_ret_addr = tcg_splitwx_to_rx(s->code_ptr);
4400139c1837SPaolo Bonzini
4401139c1837SPaolo Bonzini    tcg_out_addi(s, TCG_REG_CALL_STACK, stack_addend);
4402139c1837SPaolo Bonzini
4403139c1837SPaolo Bonzini    if (have_avx2) {
4404139c1837SPaolo Bonzini        tcg_out_vex_opc(s, OPC_VZEROUPPER, 0, 0, 0, 0);
4405139c1837SPaolo Bonzini    }
4406139c1837SPaolo Bonzini    for (i = ARRAY_SIZE(tcg_target_callee_save_regs) - 1; i >= 0; i--) {
4407139c1837SPaolo Bonzini        tcg_out_pop(s, tcg_target_callee_save_regs[i]);
4408139c1837SPaolo Bonzini    }
4409139c1837SPaolo Bonzini    tcg_out_opc(s, OPC_RET, 0, 0, 0);
4410139c1837SPaolo Bonzini}
4411139c1837SPaolo Bonzini
44129358fbbfSRichard Hendersonstatic void tcg_out_tb_start(TCGContext *s)
44139358fbbfSRichard Henderson{
44149358fbbfSRichard Henderson    /* nothing to do */
44159358fbbfSRichard Henderson}
44169358fbbfSRichard Henderson
4417139c1837SPaolo Bonzinistatic void tcg_out_nop_fill(tcg_insn_unit *p, int count)
4418139c1837SPaolo Bonzini{
4419139c1837SPaolo Bonzini    memset(p, 0x90, count);
4420139c1837SPaolo Bonzini}
4421139c1837SPaolo Bonzini
4422139c1837SPaolo Bonzinistatic void tcg_target_init(TCGContext *s)
4423139c1837SPaolo Bonzini{
4424139c1837SPaolo Bonzini    tcg_target_available_regs[TCG_TYPE_I32] = ALL_GENERAL_REGS;
4425139c1837SPaolo Bonzini    if (TCG_TARGET_REG_BITS == 64) {
4426139c1837SPaolo Bonzini        tcg_target_available_regs[TCG_TYPE_I64] = ALL_GENERAL_REGS;
4427139c1837SPaolo Bonzini    }
4428139c1837SPaolo Bonzini    if (have_avx1) {
4429139c1837SPaolo Bonzini        tcg_target_available_regs[TCG_TYPE_V64] = ALL_VECTOR_REGS;
4430139c1837SPaolo Bonzini        tcg_target_available_regs[TCG_TYPE_V128] = ALL_VECTOR_REGS;
4431139c1837SPaolo Bonzini    }
4432139c1837SPaolo Bonzini    if (have_avx2) {
4433139c1837SPaolo Bonzini        tcg_target_available_regs[TCG_TYPE_V256] = ALL_VECTOR_REGS;
4434139c1837SPaolo Bonzini    }
4435139c1837SPaolo Bonzini
4436139c1837SPaolo Bonzini    tcg_target_call_clobber_regs = ALL_VECTOR_REGS;
4437139c1837SPaolo Bonzini    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_EAX);
4438139c1837SPaolo Bonzini    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_EDX);
4439139c1837SPaolo Bonzini    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_ECX);
4440139c1837SPaolo Bonzini    if (TCG_TARGET_REG_BITS == 64) {
4441139c1837SPaolo Bonzini#if !defined(_WIN64)
4442139c1837SPaolo Bonzini        tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_RDI);
4443139c1837SPaolo Bonzini        tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_RSI);
4444139c1837SPaolo Bonzini#endif
4445139c1837SPaolo Bonzini        tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R8);
4446139c1837SPaolo Bonzini        tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R9);
4447139c1837SPaolo Bonzini        tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R10);
4448139c1837SPaolo Bonzini        tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R11);
4449139c1837SPaolo Bonzini    }
4450139c1837SPaolo Bonzini
4451139c1837SPaolo Bonzini    s->reserved_regs = 0;
4452139c1837SPaolo Bonzini    tcg_regset_set_reg(s->reserved_regs, TCG_REG_CALL_STACK);
4453098d0fc1SRichard Henderson    tcg_regset_set_reg(s->reserved_regs, TCG_TMP_VEC);
44546b258e74SRichard Henderson#ifdef _WIN64
44556b258e74SRichard Henderson    /* These are call saved, and we don't save them, so don't use them. */
44566b258e74SRichard Henderson    tcg_regset_set_reg(s->reserved_regs, TCG_REG_XMM6);
44576b258e74SRichard Henderson    tcg_regset_set_reg(s->reserved_regs, TCG_REG_XMM7);
44586b258e74SRichard Henderson    tcg_regset_set_reg(s->reserved_regs, TCG_REG_XMM8);
44596b258e74SRichard Henderson    tcg_regset_set_reg(s->reserved_regs, TCG_REG_XMM9);
44606b258e74SRichard Henderson    tcg_regset_set_reg(s->reserved_regs, TCG_REG_XMM10);
44616b258e74SRichard Henderson    tcg_regset_set_reg(s->reserved_regs, TCG_REG_XMM11);
44626b258e74SRichard Henderson    tcg_regset_set_reg(s->reserved_regs, TCG_REG_XMM12);
44636b258e74SRichard Henderson    tcg_regset_set_reg(s->reserved_regs, TCG_REG_XMM13);
44646b258e74SRichard Henderson    tcg_regset_set_reg(s->reserved_regs, TCG_REG_XMM14);
44656b258e74SRichard Henderson    tcg_regset_set_reg(s->reserved_regs, TCG_REG_XMM15);
44666b258e74SRichard Henderson#endif
4467139c1837SPaolo Bonzini}
4468139c1837SPaolo Bonzini
4469139c1837SPaolo Bonzinitypedef struct {
4470139c1837SPaolo Bonzini    DebugFrameHeader h;
4471139c1837SPaolo Bonzini    uint8_t fde_def_cfa[4];
4472139c1837SPaolo Bonzini    uint8_t fde_reg_ofs[14];
4473139c1837SPaolo Bonzini} DebugFrame;
4474139c1837SPaolo Bonzini
4475139c1837SPaolo Bonzini/* We're expecting a 2 byte uleb128 encoded value.  */
4476139c1837SPaolo BonziniQEMU_BUILD_BUG_ON(FRAME_SIZE >= (1 << 14));
4477139c1837SPaolo Bonzini
4478139c1837SPaolo Bonzini#if !defined(__ELF__)
4479139c1837SPaolo Bonzini    /* Host machine without ELF. */
4480139c1837SPaolo Bonzini#elif TCG_TARGET_REG_BITS == 64
4481139c1837SPaolo Bonzini#define ELF_HOST_MACHINE EM_X86_64
4482139c1837SPaolo Bonzinistatic const DebugFrame debug_frame = {
4483139c1837SPaolo Bonzini    .h.cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */
4484139c1837SPaolo Bonzini    .h.cie.id = -1,
4485139c1837SPaolo Bonzini    .h.cie.version = 1,
4486139c1837SPaolo Bonzini    .h.cie.code_align = 1,
4487139c1837SPaolo Bonzini    .h.cie.data_align = 0x78,             /* sleb128 -8 */
4488139c1837SPaolo Bonzini    .h.cie.return_column = 16,
4489139c1837SPaolo Bonzini
4490139c1837SPaolo Bonzini    /* Total FDE size does not include the "len" member.  */
4491139c1837SPaolo Bonzini    .h.fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, h.fde.cie_offset),
4492139c1837SPaolo Bonzini
4493139c1837SPaolo Bonzini    .fde_def_cfa = {
4494139c1837SPaolo Bonzini        12, 7,                          /* DW_CFA_def_cfa %rsp, ... */
4495139c1837SPaolo Bonzini        (FRAME_SIZE & 0x7f) | 0x80,     /* ... uleb128 FRAME_SIZE */
4496139c1837SPaolo Bonzini        (FRAME_SIZE >> 7)
4497139c1837SPaolo Bonzini    },
4498139c1837SPaolo Bonzini    .fde_reg_ofs = {
4499139c1837SPaolo Bonzini        0x90, 1,                        /* DW_CFA_offset, %rip, -8 */
4500139c1837SPaolo Bonzini        /* The following ordering must match tcg_target_callee_save_regs.  */
4501139c1837SPaolo Bonzini        0x86, 2,                        /* DW_CFA_offset, %rbp, -16 */
4502139c1837SPaolo Bonzini        0x83, 3,                        /* DW_CFA_offset, %rbx, -24 */
4503139c1837SPaolo Bonzini        0x8c, 4,                        /* DW_CFA_offset, %r12, -32 */
4504139c1837SPaolo Bonzini        0x8d, 5,                        /* DW_CFA_offset, %r13, -40 */
4505139c1837SPaolo Bonzini        0x8e, 6,                        /* DW_CFA_offset, %r14, -48 */
4506139c1837SPaolo Bonzini        0x8f, 7,                        /* DW_CFA_offset, %r15, -56 */
4507139c1837SPaolo Bonzini    }
4508139c1837SPaolo Bonzini};
4509139c1837SPaolo Bonzini#else
4510139c1837SPaolo Bonzini#define ELF_HOST_MACHINE EM_386
4511139c1837SPaolo Bonzinistatic const DebugFrame debug_frame = {
4512139c1837SPaolo Bonzini    .h.cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */
4513139c1837SPaolo Bonzini    .h.cie.id = -1,
4514139c1837SPaolo Bonzini    .h.cie.version = 1,
4515139c1837SPaolo Bonzini    .h.cie.code_align = 1,
4516139c1837SPaolo Bonzini    .h.cie.data_align = 0x7c,             /* sleb128 -4 */
4517139c1837SPaolo Bonzini    .h.cie.return_column = 8,
4518139c1837SPaolo Bonzini
4519139c1837SPaolo Bonzini    /* Total FDE size does not include the "len" member.  */
4520139c1837SPaolo Bonzini    .h.fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, h.fde.cie_offset),
4521139c1837SPaolo Bonzini
4522139c1837SPaolo Bonzini    .fde_def_cfa = {
4523139c1837SPaolo Bonzini        12, 4,                          /* DW_CFA_def_cfa %esp, ... */
4524139c1837SPaolo Bonzini        (FRAME_SIZE & 0x7f) | 0x80,     /* ... uleb128 FRAME_SIZE */
4525139c1837SPaolo Bonzini        (FRAME_SIZE >> 7)
4526139c1837SPaolo Bonzini    },
4527139c1837SPaolo Bonzini    .fde_reg_ofs = {
4528139c1837SPaolo Bonzini        0x88, 1,                        /* DW_CFA_offset, %eip, -4 */
4529139c1837SPaolo Bonzini        /* The following ordering must match tcg_target_callee_save_regs.  */
4530139c1837SPaolo Bonzini        0x85, 2,                        /* DW_CFA_offset, %ebp, -8 */
4531139c1837SPaolo Bonzini        0x83, 3,                        /* DW_CFA_offset, %ebx, -12 */
4532139c1837SPaolo Bonzini        0x86, 4,                        /* DW_CFA_offset, %esi, -16 */
4533139c1837SPaolo Bonzini        0x87, 5,                        /* DW_CFA_offset, %edi, -20 */
4534139c1837SPaolo Bonzini    }
4535139c1837SPaolo Bonzini};
4536139c1837SPaolo Bonzini#endif
4537139c1837SPaolo Bonzini
4538139c1837SPaolo Bonzini#if defined(ELF_HOST_MACHINE)
4539755bf9e5SRichard Hendersonvoid tcg_register_jit(const void *buf, size_t buf_size)
4540139c1837SPaolo Bonzini{
4541139c1837SPaolo Bonzini    tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame));
4542139c1837SPaolo Bonzini}
4543139c1837SPaolo Bonzini#endif
4544