xref: /openbmc/qemu/tcg/riscv/tcg-target.c.inc (revision 4d8722183932d9502e405ae86b1889e1d8a475e5)
1/*
2 * Tiny Code Generator for QEMU
3 *
4 * Copyright (c) 2018 SiFive, Inc
5 * Copyright (c) 2008-2009 Arnaud Patard <arnaud.patard@rtp-net.org>
6 * Copyright (c) 2009 Aurelien Jarno <aurelien@aurel32.net>
7 * Copyright (c) 2008 Fabrice Bellard
8 *
9 * Based on i386/tcg-target.c and mips/tcg-target.c
10 *
11 * Permission is hereby granted, free of charge, to any person obtaining a copy
12 * of this software and associated documentation files (the "Software"), to deal
13 * in the Software without restriction, including without limitation the rights
14 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
15 * copies of the Software, and to permit persons to whom the Software is
16 * furnished to do so, subject to the following conditions:
17 *
18 * The above copyright notice and this permission notice shall be included in
19 * all copies or substantial portions of the Software.
20 *
21 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
22 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
23 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
24 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
25 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
26 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
27 * THE SOFTWARE.
28 */
29
30#include "../tcg-ldst.c.inc"
31#include "../tcg-pool.c.inc"
32
33/* Used for function call generation. */
34#define TCG_REG_CALL_STACK              TCG_REG_SP
35#define TCG_TARGET_STACK_ALIGN          16
36#define TCG_TARGET_CALL_STACK_OFFSET    0
37#define TCG_TARGET_CALL_ARG_I32         TCG_CALL_ARG_NORMAL
38#define TCG_TARGET_CALL_ARG_I64         TCG_CALL_ARG_NORMAL
39#define TCG_TARGET_CALL_ARG_I128        TCG_CALL_ARG_NORMAL
40#define TCG_TARGET_CALL_RET_I128        TCG_CALL_RET_NORMAL
41
42#ifdef CONFIG_DEBUG_TCG
43static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
44    "zero", "ra",  "sp",  "gp",  "tp",  "t0",  "t1",  "t2",
45    "s0",   "s1",  "a0",  "a1",  "a2",  "a3",  "a4",  "a5",
46    "a6",   "a7",  "s2",  "s3",  "s4",  "s5",  "s6",  "s7",
47    "s8",   "s9",  "s10", "s11", "t3",  "t4",  "t5",  "t6",
48    "v0",   "v1",  "v2",  "v3",  "v4",  "v5",  "v6",  "v7",
49    "v8",   "v9",  "v10", "v11", "v12", "v13", "v14", "v15",
50    "v16",  "v17", "v18", "v19", "v20", "v21", "v22", "v23",
51    "v24",  "v25", "v26", "v27", "v28", "v29", "v30", "v31",
52};
53#endif
54
55static const int tcg_target_reg_alloc_order[] = {
56    /* Call saved registers */
57    /* TCG_REG_S0 reserved for TCG_AREG0 */
58    TCG_REG_S1,
59    TCG_REG_S2,
60    TCG_REG_S3,
61    TCG_REG_S4,
62    TCG_REG_S5,
63    TCG_REG_S6,
64    TCG_REG_S7,
65    TCG_REG_S8,
66    TCG_REG_S9,
67    TCG_REG_S10,
68    TCG_REG_S11,
69
70    /* Call clobbered registers */
71    TCG_REG_T0,
72    TCG_REG_T1,
73    TCG_REG_T2,
74    TCG_REG_T3,
75    TCG_REG_T4,
76    TCG_REG_T5,
77    TCG_REG_T6,
78
79    /* Argument registers */
80    TCG_REG_A0,
81    TCG_REG_A1,
82    TCG_REG_A2,
83    TCG_REG_A3,
84    TCG_REG_A4,
85    TCG_REG_A5,
86    TCG_REG_A6,
87    TCG_REG_A7,
88
89    /* Vector registers and TCG_REG_V0 reserved for mask. */
90    TCG_REG_V1,  TCG_REG_V2,  TCG_REG_V3,  TCG_REG_V4,
91    TCG_REG_V5,  TCG_REG_V6,  TCG_REG_V7,  TCG_REG_V8,
92    TCG_REG_V9,  TCG_REG_V10, TCG_REG_V11, TCG_REG_V12,
93    TCG_REG_V13, TCG_REG_V14, TCG_REG_V15, TCG_REG_V16,
94    TCG_REG_V17, TCG_REG_V18, TCG_REG_V19, TCG_REG_V20,
95    TCG_REG_V21, TCG_REG_V22, TCG_REG_V23, TCG_REG_V24,
96    TCG_REG_V25, TCG_REG_V26, TCG_REG_V27, TCG_REG_V28,
97    TCG_REG_V29, TCG_REG_V30, TCG_REG_V31,
98};
99
100static const int tcg_target_call_iarg_regs[] = {
101    TCG_REG_A0,
102    TCG_REG_A1,
103    TCG_REG_A2,
104    TCG_REG_A3,
105    TCG_REG_A4,
106    TCG_REG_A5,
107    TCG_REG_A6,
108    TCG_REG_A7,
109};
110
111static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot)
112{
113    tcg_debug_assert(kind == TCG_CALL_RET_NORMAL);
114    tcg_debug_assert(slot >= 0 && slot <= 1);
115    return TCG_REG_A0 + slot;
116}
117
118#define TCG_CT_CONST_ZERO    0x100
119#define TCG_CT_CONST_S12     0x200
120#define TCG_CT_CONST_N12     0x400
121#define TCG_CT_CONST_M12     0x800
122#define TCG_CT_CONST_J12    0x1000
123#define TCG_CT_CONST_S5     0x2000
124#define TCG_CT_CONST_CMP_VI 0x4000
125
126#define ALL_GENERAL_REGS   MAKE_64BIT_MASK(0, 32)
127#define ALL_VECTOR_REGS    MAKE_64BIT_MASK(32, 32)
128#define ALL_DVECTOR_REG_GROUPS 0x5555555500000000
129#define ALL_QVECTOR_REG_GROUPS 0x1111111100000000
130
131#define sextreg  sextract64
132
133/*
134 * RISC-V Base ISA opcodes (IM)
135 */
136
137#define V_OPIVV (0x0 << 12)
138#define V_OPFVV (0x1 << 12)
139#define V_OPMVV (0x2 << 12)
140#define V_OPIVI (0x3 << 12)
141#define V_OPIVX (0x4 << 12)
142#define V_OPFVF (0x5 << 12)
143#define V_OPMVX (0x6 << 12)
144#define V_OPCFG (0x7 << 12)
145
146/* NF <= 7 && NF >= 0 */
147#define V_NF(x) (x << 29)
148#define V_UNIT_STRIDE (0x0 << 20)
149#define V_UNIT_STRIDE_WHOLE_REG (0x8 << 20)
150
151typedef enum {
152    VLMUL_M1 = 0, /* LMUL=1 */
153    VLMUL_M2,     /* LMUL=2 */
154    VLMUL_M4,     /* LMUL=4 */
155    VLMUL_M8,     /* LMUL=8 */
156    VLMUL_RESERVED,
157    VLMUL_MF8,    /* LMUL=1/8 */
158    VLMUL_MF4,    /* LMUL=1/4 */
159    VLMUL_MF2,    /* LMUL=1/2 */
160} RISCVVlmul;
161
162typedef enum {
163    OPC_ADD = 0x33,
164    OPC_ADDI = 0x13,
165    OPC_AND = 0x7033,
166    OPC_ANDI = 0x7013,
167    OPC_AUIPC = 0x17,
168    OPC_BEQ = 0x63,
169    OPC_BGE = 0x5063,
170    OPC_BGEU = 0x7063,
171    OPC_BLT = 0x4063,
172    OPC_BLTU = 0x6063,
173    OPC_BNE = 0x1063,
174    OPC_DIV = 0x2004033,
175    OPC_DIVU = 0x2005033,
176    OPC_JAL = 0x6f,
177    OPC_JALR = 0x67,
178    OPC_LB = 0x3,
179    OPC_LBU = 0x4003,
180    OPC_LD = 0x3003,
181    OPC_LH = 0x1003,
182    OPC_LHU = 0x5003,
183    OPC_LUI = 0x37,
184    OPC_LW = 0x2003,
185    OPC_LWU = 0x6003,
186    OPC_MUL = 0x2000033,
187    OPC_MULH = 0x2001033,
188    OPC_MULHSU = 0x2002033,
189    OPC_MULHU = 0x2003033,
190    OPC_OR = 0x6033,
191    OPC_ORI = 0x6013,
192    OPC_REM = 0x2006033,
193    OPC_REMU = 0x2007033,
194    OPC_SB = 0x23,
195    OPC_SD = 0x3023,
196    OPC_SH = 0x1023,
197    OPC_SLL = 0x1033,
198    OPC_SLLI = 0x1013,
199    OPC_SLT = 0x2033,
200    OPC_SLTI = 0x2013,
201    OPC_SLTIU = 0x3013,
202    OPC_SLTU = 0x3033,
203    OPC_SRA = 0x40005033,
204    OPC_SRAI = 0x40005013,
205    OPC_SRL = 0x5033,
206    OPC_SRLI = 0x5013,
207    OPC_SUB = 0x40000033,
208    OPC_SW = 0x2023,
209    OPC_XOR = 0x4033,
210    OPC_XORI = 0x4013,
211
212    OPC_ADDIW = 0x1b,
213    OPC_ADDW = 0x3b,
214    OPC_DIVUW = 0x200503b,
215    OPC_DIVW = 0x200403b,
216    OPC_MULW = 0x200003b,
217    OPC_REMUW = 0x200703b,
218    OPC_REMW = 0x200603b,
219    OPC_SLLIW = 0x101b,
220    OPC_SLLW = 0x103b,
221    OPC_SRAIW = 0x4000501b,
222    OPC_SRAW = 0x4000503b,
223    OPC_SRLIW = 0x501b,
224    OPC_SRLW = 0x503b,
225    OPC_SUBW = 0x4000003b,
226
227    OPC_FENCE = 0x0000000f,
228    OPC_NOP   = OPC_ADDI,   /* nop = addi r0,r0,0 */
229
230    /* Zba: Bit manipulation extension, address generation */
231    OPC_ADD_UW = 0x0800003b,
232
233    /* Zbb: Bit manipulation extension, basic bit manipulation */
234    OPC_ANDN   = 0x40007033,
235    OPC_CLZ    = 0x60001013,
236    OPC_CLZW   = 0x6000101b,
237    OPC_CPOP   = 0x60201013,
238    OPC_CPOPW  = 0x6020101b,
239    OPC_CTZ    = 0x60101013,
240    OPC_CTZW   = 0x6010101b,
241    OPC_ORN    = 0x40006033,
242    OPC_REV8   = 0x6b805013,
243    OPC_ROL    = 0x60001033,
244    OPC_ROLW   = 0x6000103b,
245    OPC_ROR    = 0x60005033,
246    OPC_RORW   = 0x6000503b,
247    OPC_RORI   = 0x60005013,
248    OPC_RORIW  = 0x6000501b,
249    OPC_SEXT_B = 0x60401013,
250    OPC_SEXT_H = 0x60501013,
251    OPC_XNOR   = 0x40004033,
252    OPC_ZEXT_H = 0x0800403b,
253
254    /* Zicond: integer conditional operations */
255    OPC_CZERO_EQZ = 0x0e005033,
256    OPC_CZERO_NEZ = 0x0e007033,
257
258    /* V: Vector extension 1.0 */
259    OPC_VSETVLI  = 0x57 | V_OPCFG,
260    OPC_VSETIVLI = 0xc0000057 | V_OPCFG,
261    OPC_VSETVL   = 0x80000057 | V_OPCFG,
262
263    OPC_VLE8_V  = 0x7 | V_UNIT_STRIDE,
264    OPC_VLE16_V = 0x5007 | V_UNIT_STRIDE,
265    OPC_VLE32_V = 0x6007 | V_UNIT_STRIDE,
266    OPC_VLE64_V = 0x7007 | V_UNIT_STRIDE,
267    OPC_VSE8_V  = 0x27 | V_UNIT_STRIDE,
268    OPC_VSE16_V = 0x5027 | V_UNIT_STRIDE,
269    OPC_VSE32_V = 0x6027 | V_UNIT_STRIDE,
270    OPC_VSE64_V = 0x7027 | V_UNIT_STRIDE,
271
272    OPC_VL1RE64_V = 0x2007007 | V_UNIT_STRIDE_WHOLE_REG | V_NF(0),
273    OPC_VL2RE64_V = 0x2007007 | V_UNIT_STRIDE_WHOLE_REG | V_NF(1),
274    OPC_VL4RE64_V = 0x2007007 | V_UNIT_STRIDE_WHOLE_REG | V_NF(3),
275    OPC_VL8RE64_V = 0x2007007 | V_UNIT_STRIDE_WHOLE_REG | V_NF(7),
276
277    OPC_VS1R_V = 0x2000027 | V_UNIT_STRIDE_WHOLE_REG | V_NF(0),
278    OPC_VS2R_V = 0x2000027 | V_UNIT_STRIDE_WHOLE_REG | V_NF(1),
279    OPC_VS4R_V = 0x2000027 | V_UNIT_STRIDE_WHOLE_REG | V_NF(3),
280    OPC_VS8R_V = 0x2000027 | V_UNIT_STRIDE_WHOLE_REG | V_NF(7),
281
282    OPC_VMERGE_VIM = 0x5c000057 | V_OPIVI,
283    OPC_VMERGE_VVM = 0x5c000057 | V_OPIVV,
284
285    OPC_VADD_VV = 0x57 | V_OPIVV,
286    OPC_VADD_VI = 0x57 | V_OPIVI,
287    OPC_VSUB_VV = 0x8000057 | V_OPIVV,
288    OPC_VRSUB_VI = 0xc000057 | V_OPIVI,
289    OPC_VAND_VV = 0x24000057 | V_OPIVV,
290    OPC_VAND_VI = 0x24000057 | V_OPIVI,
291    OPC_VOR_VV = 0x28000057 | V_OPIVV,
292    OPC_VOR_VI = 0x28000057 | V_OPIVI,
293    OPC_VXOR_VV = 0x2c000057 | V_OPIVV,
294    OPC_VXOR_VI = 0x2c000057 | V_OPIVI,
295
296    OPC_VMUL_VV = 0x94000057 | V_OPMVV,
297    OPC_VSADD_VV = 0x84000057 | V_OPIVV,
298    OPC_VSADD_VI = 0x84000057 | V_OPIVI,
299    OPC_VSSUB_VV = 0x8c000057 | V_OPIVV,
300    OPC_VSSUB_VI = 0x8c000057 | V_OPIVI,
301    OPC_VSADDU_VV = 0x80000057 | V_OPIVV,
302    OPC_VSADDU_VI = 0x80000057 | V_OPIVI,
303    OPC_VSSUBU_VV = 0x88000057 | V_OPIVV,
304    OPC_VSSUBU_VI = 0x88000057 | V_OPIVI,
305
306    OPC_VMAX_VV = 0x1c000057 | V_OPIVV,
307    OPC_VMAX_VI = 0x1c000057 | V_OPIVI,
308    OPC_VMAXU_VV = 0x18000057 | V_OPIVV,
309    OPC_VMAXU_VI = 0x18000057 | V_OPIVI,
310    OPC_VMIN_VV = 0x14000057 | V_OPIVV,
311    OPC_VMIN_VI = 0x14000057 | V_OPIVI,
312    OPC_VMINU_VV = 0x10000057 | V_OPIVV,
313    OPC_VMINU_VI = 0x10000057 | V_OPIVI,
314
315    OPC_VMSEQ_VV = 0x60000057 | V_OPIVV,
316    OPC_VMSEQ_VI = 0x60000057 | V_OPIVI,
317    OPC_VMSEQ_VX = 0x60000057 | V_OPIVX,
318    OPC_VMSNE_VV = 0x64000057 | V_OPIVV,
319    OPC_VMSNE_VI = 0x64000057 | V_OPIVI,
320    OPC_VMSNE_VX = 0x64000057 | V_OPIVX,
321
322    OPC_VMSLTU_VV = 0x68000057 | V_OPIVV,
323    OPC_VMSLTU_VX = 0x68000057 | V_OPIVX,
324    OPC_VMSLT_VV = 0x6c000057 | V_OPIVV,
325    OPC_VMSLT_VX = 0x6c000057 | V_OPIVX,
326    OPC_VMSLEU_VV = 0x70000057 | V_OPIVV,
327    OPC_VMSLEU_VX = 0x70000057 | V_OPIVX,
328    OPC_VMSLE_VV = 0x74000057 | V_OPIVV,
329    OPC_VMSLE_VX = 0x74000057 | V_OPIVX,
330
331    OPC_VMSLEU_VI = 0x70000057 | V_OPIVI,
332    OPC_VMSLE_VI = 0x74000057 | V_OPIVI,
333    OPC_VMSGTU_VI = 0x78000057 | V_OPIVI,
334    OPC_VMSGTU_VX = 0x78000057 | V_OPIVX,
335    OPC_VMSGT_VI = 0x7c000057 | V_OPIVI,
336    OPC_VMSGT_VX = 0x7c000057 | V_OPIVX,
337
338    OPC_VSLL_VV = 0x94000057 | V_OPIVV,
339    OPC_VSLL_VI = 0x94000057 | V_OPIVI,
340    OPC_VSLL_VX = 0x94000057 | V_OPIVX,
341    OPC_VSRL_VV = 0xa0000057 | V_OPIVV,
342    OPC_VSRL_VI = 0xa0000057 | V_OPIVI,
343    OPC_VSRL_VX = 0xa0000057 | V_OPIVX,
344    OPC_VSRA_VV = 0xa4000057 | V_OPIVV,
345    OPC_VSRA_VI = 0xa4000057 | V_OPIVI,
346    OPC_VSRA_VX = 0xa4000057 | V_OPIVX,
347
348    OPC_VMV_V_V = 0x5e000057 | V_OPIVV,
349    OPC_VMV_V_I = 0x5e000057 | V_OPIVI,
350    OPC_VMV_V_X = 0x5e000057 | V_OPIVX,
351
352    OPC_VMVNR_V = 0x9e000057 | V_OPIVI,
353} RISCVInsn;
354
355static const struct {
356    RISCVInsn op;
357    bool swap;
358} tcg_cmpcond_to_rvv_vv[] = {
359    [TCG_COND_EQ] =  { OPC_VMSEQ_VV,  false },
360    [TCG_COND_NE] =  { OPC_VMSNE_VV,  false },
361    [TCG_COND_LT] =  { OPC_VMSLT_VV,  false },
362    [TCG_COND_GE] =  { OPC_VMSLE_VV,  true  },
363    [TCG_COND_GT] =  { OPC_VMSLT_VV,  true  },
364    [TCG_COND_LE] =  { OPC_VMSLE_VV,  false },
365    [TCG_COND_LTU] = { OPC_VMSLTU_VV, false },
366    [TCG_COND_GEU] = { OPC_VMSLEU_VV, true  },
367    [TCG_COND_GTU] = { OPC_VMSLTU_VV, true  },
368    [TCG_COND_LEU] = { OPC_VMSLEU_VV, false }
369};
370
371static const struct {
372    RISCVInsn op;
373    int min;
374    int max;
375    bool adjust;
376}  tcg_cmpcond_to_rvv_vi[] = {
377    [TCG_COND_EQ]  = { OPC_VMSEQ_VI,  -16, 15, false },
378    [TCG_COND_NE]  = { OPC_VMSNE_VI,  -16, 15, false },
379    [TCG_COND_GT]  = { OPC_VMSGT_VI,  -16, 15, false },
380    [TCG_COND_LE]  = { OPC_VMSLE_VI,  -16, 15, false },
381    [TCG_COND_LT]  = { OPC_VMSLE_VI,  -15, 16, true  },
382    [TCG_COND_GE]  = { OPC_VMSGT_VI,  -15, 16, true  },
383    [TCG_COND_LEU] = { OPC_VMSLEU_VI,   0, 15, false },
384    [TCG_COND_GTU] = { OPC_VMSGTU_VI,   0, 15, false },
385    [TCG_COND_LTU] = { OPC_VMSLEU_VI,   1, 16, true  },
386    [TCG_COND_GEU] = { OPC_VMSGTU_VI,   1, 16, true  },
387};
388
389/* test if a constant matches the constraint */
390static bool tcg_target_const_match(int64_t val, int ct,
391                                   TCGType type, TCGCond cond, int vece)
392{
393    if (ct & TCG_CT_CONST) {
394        return 1;
395    }
396    if ((ct & TCG_CT_CONST_ZERO) && val == 0) {
397        return 1;
398    }
399    if (type >= TCG_TYPE_V64) {
400        /* Val is replicated by VECE; extract the highest element. */
401        val >>= (-8 << vece) & 63;
402    }
403    /*
404     * Sign extended from 12 bits: [-0x800, 0x7ff].
405     * Used for most arithmetic, as this is the isa field.
406     */
407    if ((ct & TCG_CT_CONST_S12) && val >= -0x800 && val <= 0x7ff) {
408        return 1;
409    }
410    /*
411     * Sign extended from 12 bits, negated: [-0x7ff, 0x800].
412     * Used for subtraction, where a constant must be handled by ADDI.
413     */
414    if ((ct & TCG_CT_CONST_N12) && val >= -0x7ff && val <= 0x800) {
415        return 1;
416    }
417    /*
418     * Sign extended from 12 bits, +/- matching: [-0x7ff, 0x7ff].
419     * Used by addsub2 and movcond, which may need the negative value,
420     * and requires the modified constant to be representable.
421     */
422    if ((ct & TCG_CT_CONST_M12) && val >= -0x7ff && val <= 0x7ff) {
423        return 1;
424    }
425    /*
426     * Inverse of sign extended from 12 bits: ~[-0x800, 0x7ff].
427     * Used to map ANDN back to ANDI, etc.
428     */
429    if ((ct & TCG_CT_CONST_J12) && ~val >= -0x800 && ~val <= 0x7ff) {
430        return 1;
431    }
432    /*
433     * Sign extended from 5 bits: [-0x10, 0x0f].
434     * Used for vector-immediate.
435     */
436    if ((ct & TCG_CT_CONST_S5) && val >= -0x10 && val <= 0x0f) {
437        return 1;
438    }
439    /*
440     * Used for vector compare OPIVI instructions.
441     */
442    if ((ct & TCG_CT_CONST_CMP_VI) &&
443        val >= tcg_cmpcond_to_rvv_vi[cond].min &&
444        val <= tcg_cmpcond_to_rvv_vi[cond].max) {
445        return true;
446     }
447    return 0;
448}
449
450/*
451 * RISC-V immediate and instruction encoders (excludes 16-bit RVC)
452 */
453
454/* Type-R */
455
456static int32_t encode_r(RISCVInsn opc, TCGReg rd, TCGReg rs1, TCGReg rs2)
457{
458    return opc | (rd & 0x1f) << 7 | (rs1 & 0x1f) << 15 | (rs2 & 0x1f) << 20;
459}
460
461/* Type-I */
462
463static int32_t encode_imm12(uint32_t imm)
464{
465    return (imm & 0xfff) << 20;
466}
467
468static int32_t encode_i(RISCVInsn opc, TCGReg rd, TCGReg rs1, uint32_t imm)
469{
470    return opc | (rd & 0x1f) << 7 | (rs1 & 0x1f) << 15 | encode_imm12(imm);
471}
472
473/* Type-S */
474
475static int32_t encode_simm12(uint32_t imm)
476{
477    int32_t ret = 0;
478
479    ret |= (imm & 0xFE0) << 20;
480    ret |= (imm & 0x1F) << 7;
481
482    return ret;
483}
484
485static int32_t encode_s(RISCVInsn opc, TCGReg rs1, TCGReg rs2, uint32_t imm)
486{
487    return opc | (rs1 & 0x1f) << 15 | (rs2 & 0x1f) << 20 | encode_simm12(imm);
488}
489
490/* Type-SB */
491
492static int32_t encode_sbimm12(uint32_t imm)
493{
494    int32_t ret = 0;
495
496    ret |= (imm & 0x1000) << 19;
497    ret |= (imm & 0x7e0) << 20;
498    ret |= (imm & 0x1e) << 7;
499    ret |= (imm & 0x800) >> 4;
500
501    return ret;
502}
503
504static int32_t encode_sb(RISCVInsn opc, TCGReg rs1, TCGReg rs2, uint32_t imm)
505{
506    return opc | (rs1 & 0x1f) << 15 | (rs2 & 0x1f) << 20 | encode_sbimm12(imm);
507}
508
509/* Type-U */
510
511static int32_t encode_uimm20(uint32_t imm)
512{
513    return imm & 0xfffff000;
514}
515
516static int32_t encode_u(RISCVInsn opc, TCGReg rd, uint32_t imm)
517{
518    return opc | (rd & 0x1f) << 7 | encode_uimm20(imm);
519}
520
521/* Type-UJ */
522
523static int32_t encode_ujimm20(uint32_t imm)
524{
525    int32_t ret = 0;
526
527    ret |= (imm & 0x0007fe) << (21 - 1);
528    ret |= (imm & 0x000800) << (20 - 11);
529    ret |= (imm & 0x0ff000) << (12 - 12);
530    ret |= (imm & 0x100000) << (31 - 20);
531
532    return ret;
533}
534
535static int32_t encode_uj(RISCVInsn opc, TCGReg rd, uint32_t imm)
536{
537    return opc | (rd & 0x1f) << 7 | encode_ujimm20(imm);
538}
539
540
541/* Type-OPIVI */
542
543static int32_t encode_vi(RISCVInsn opc, TCGReg rd, int32_t imm,
544                         TCGReg vs2, bool vm)
545{
546    return opc | (rd & 0x1f) << 7 | (imm & 0x1f) << 15 |
547           (vs2 & 0x1f) << 20 | (vm << 25);
548}
549
550/* Type-OPIVV/OPMVV/OPIVX/OPMVX, Vector load and store */
551
552static int32_t encode_v(RISCVInsn opc, TCGReg d, TCGReg s1,
553                        TCGReg s2, bool vm)
554{
555    return opc | (d & 0x1f) << 7 | (s1 & 0x1f) << 15 |
556           (s2 & 0x1f) << 20 | (vm << 25);
557}
558
559/* Vector vtype */
560
561static uint32_t encode_vtype(bool vta, bool vma,
562                            MemOp vsew, RISCVVlmul vlmul)
563{
564    return vma << 7 | vta << 6 | vsew << 3 | vlmul;
565}
566
567static int32_t encode_vset(RISCVInsn opc, TCGReg rd,
568                           TCGArg rs1, uint32_t vtype)
569{
570    return opc | (rd & 0x1f) << 7 | (rs1 & 0x1f) << 15 | (vtype & 0x7ff) << 20;
571}
572
573static int32_t encode_vseti(RISCVInsn opc, TCGReg rd,
574                            uint32_t uimm, uint32_t vtype)
575{
576    return opc | (rd & 0x1f) << 7 | (uimm & 0x1f) << 15 | (vtype & 0x3ff) << 20;
577}
578
579/*
580 * RISC-V instruction emitters
581 */
582
583static void tcg_out_opc_reg(TCGContext *s, RISCVInsn opc,
584                            TCGReg rd, TCGReg rs1, TCGReg rs2)
585{
586    tcg_out32(s, encode_r(opc, rd, rs1, rs2));
587}
588
589static void tcg_out_opc_imm(TCGContext *s, RISCVInsn opc,
590                            TCGReg rd, TCGReg rs1, TCGArg imm)
591{
592    tcg_out32(s, encode_i(opc, rd, rs1, imm));
593}
594
595static void tcg_out_opc_store(TCGContext *s, RISCVInsn opc,
596                              TCGReg rs1, TCGReg rs2, uint32_t imm)
597{
598    tcg_out32(s, encode_s(opc, rs1, rs2, imm));
599}
600
601static void tcg_out_opc_branch(TCGContext *s, RISCVInsn opc,
602                               TCGReg rs1, TCGReg rs2, uint32_t imm)
603{
604    tcg_out32(s, encode_sb(opc, rs1, rs2, imm));
605}
606
607static void tcg_out_opc_upper(TCGContext *s, RISCVInsn opc,
608                              TCGReg rd, uint32_t imm)
609{
610    tcg_out32(s, encode_u(opc, rd, imm));
611}
612
613static void tcg_out_opc_jump(TCGContext *s, RISCVInsn opc,
614                             TCGReg rd, uint32_t imm)
615{
616    tcg_out32(s, encode_uj(opc, rd, imm));
617}
618
619static void tcg_out_nop_fill(tcg_insn_unit *p, int count)
620{
621    int i;
622    for (i = 0; i < count; ++i) {
623        p[i] = OPC_NOP;
624    }
625}
626
627/*
628 * Relocations
629 */
630
631static bool reloc_sbimm12(tcg_insn_unit *src_rw, const tcg_insn_unit *target)
632{
633    const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw);
634    intptr_t offset = (intptr_t)target - (intptr_t)src_rx;
635
636    tcg_debug_assert((offset & 1) == 0);
637    if (offset == sextreg(offset, 0, 12)) {
638        *src_rw |= encode_sbimm12(offset);
639        return true;
640    }
641
642    return false;
643}
644
645static bool reloc_jimm20(tcg_insn_unit *src_rw, const tcg_insn_unit *target)
646{
647    const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw);
648    intptr_t offset = (intptr_t)target - (intptr_t)src_rx;
649
650    tcg_debug_assert((offset & 1) == 0);
651    if (offset == sextreg(offset, 0, 20)) {
652        *src_rw |= encode_ujimm20(offset);
653        return true;
654    }
655
656    return false;
657}
658
659static bool reloc_call(tcg_insn_unit *src_rw, const tcg_insn_unit *target)
660{
661    const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw);
662    intptr_t offset = (intptr_t)target - (intptr_t)src_rx;
663    int32_t lo = sextreg(offset, 0, 12);
664    int32_t hi = offset - lo;
665
666    if (offset == hi + lo) {
667        src_rw[0] |= encode_uimm20(hi);
668        src_rw[1] |= encode_imm12(lo);
669        return true;
670    }
671
672    return false;
673}
674
675static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
676                        intptr_t value, intptr_t addend)
677{
678    tcg_debug_assert(addend == 0);
679    switch (type) {
680    case R_RISCV_BRANCH:
681        return reloc_sbimm12(code_ptr, (tcg_insn_unit *)value);
682    case R_RISCV_JAL:
683        return reloc_jimm20(code_ptr, (tcg_insn_unit *)value);
684    case R_RISCV_CALL:
685        return reloc_call(code_ptr, (tcg_insn_unit *)value);
686    default:
687        g_assert_not_reached();
688    }
689}
690
691/*
692 * RISC-V vector instruction emitters
693 */
694
695/*
696 * Vector registers uses the same 5 lower bits as GPR registers,
697 * and vm=0 (vm = false) means vector masking ENABLED.
698 * With RVV 1.0, vs2 is the first operand, while rs1/imm is the
699 * second operand.
700 */
701static void tcg_out_opc_vv(TCGContext *s, RISCVInsn opc,
702                           TCGReg vd, TCGReg vs2, TCGReg vs1)
703{
704    tcg_out32(s, encode_v(opc, vd, vs1, vs2, true));
705}
706
707static void tcg_out_opc_vx(TCGContext *s, RISCVInsn opc,
708                           TCGReg vd, TCGReg vs2, TCGReg rs1)
709{
710    tcg_out32(s, encode_v(opc, vd, rs1, vs2, true));
711}
712
713static void tcg_out_opc_vi(TCGContext *s, RISCVInsn opc,
714                           TCGReg vd, TCGReg vs2, int32_t imm)
715{
716    tcg_out32(s, encode_vi(opc, vd, imm, vs2, true));
717}
718
719static void tcg_out_opc_vv_vi(TCGContext *s, RISCVInsn o_vv, RISCVInsn o_vi,
720                              TCGReg vd, TCGReg vs2, TCGArg vi1, int c_vi1)
721{
722    if (c_vi1) {
723        tcg_out_opc_vi(s, o_vi, vd, vs2, vi1);
724    } else {
725        tcg_out_opc_vv(s, o_vv, vd, vs2, vi1);
726    }
727}
728
729static void tcg_out_opc_vim_mask(TCGContext *s, RISCVInsn opc, TCGReg vd,
730                                 TCGReg vs2, int32_t imm)
731{
732    tcg_out32(s, encode_vi(opc, vd, imm, vs2, false));
733}
734
735static void tcg_out_opc_vvm_mask(TCGContext *s, RISCVInsn opc, TCGReg vd,
736                                 TCGReg vs2, TCGReg vs1)
737{
738    tcg_out32(s, encode_v(opc, vd, vs1, vs2, false));
739}
740
741typedef struct VsetCache {
742    uint32_t movi_insn;
743    uint32_t vset_insn;
744} VsetCache;
745
746static VsetCache riscv_vset_cache[3][4];
747
748static void set_vtype(TCGContext *s, TCGType type, MemOp vsew)
749{
750    const VsetCache *p = &riscv_vset_cache[type - TCG_TYPE_V64][vsew];
751
752    s->riscv_cur_type = type;
753    s->riscv_cur_vsew = vsew;
754
755    if (p->movi_insn) {
756        tcg_out32(s, p->movi_insn);
757    }
758    tcg_out32(s, p->vset_insn);
759}
760
761static MemOp set_vtype_len(TCGContext *s, TCGType type)
762{
763    if (type != s->riscv_cur_type) {
764        set_vtype(s, type, MO_64);
765    }
766    return s->riscv_cur_vsew;
767}
768
769static void set_vtype_len_sew(TCGContext *s, TCGType type, MemOp vsew)
770{
771    if (type != s->riscv_cur_type || vsew != s->riscv_cur_vsew) {
772        set_vtype(s, type, vsew);
773    }
774}
775
776/*
777 * TCG intrinsics
778 */
779
780static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg)
781{
782    if (ret == arg) {
783        return true;
784    }
785    switch (type) {
786    case TCG_TYPE_I32:
787    case TCG_TYPE_I64:
788        tcg_out_opc_imm(s, OPC_ADDI, ret, arg, 0);
789        break;
790    case TCG_TYPE_V64:
791    case TCG_TYPE_V128:
792    case TCG_TYPE_V256:
793        {
794            int lmul = type - riscv_lg2_vlenb;
795            int nf = 1 << MAX(lmul, 0);
796            tcg_out_opc_vi(s, OPC_VMVNR_V, ret, arg, nf - 1);
797        }
798        break;
799    default:
800        g_assert_not_reached();
801    }
802    return true;
803}
804
805static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg rd,
806                         tcg_target_long val)
807{
808    tcg_target_long lo, hi, tmp;
809    int shift, ret;
810
811    if (type == TCG_TYPE_I32) {
812        val = (int32_t)val;
813    }
814
815    lo = sextreg(val, 0, 12);
816    if (val == lo) {
817        tcg_out_opc_imm(s, OPC_ADDI, rd, TCG_REG_ZERO, lo);
818        return;
819    }
820
821    hi = val - lo;
822    if (val == (int32_t)val) {
823        tcg_out_opc_upper(s, OPC_LUI, rd, hi);
824        if (lo != 0) {
825            tcg_out_opc_imm(s, OPC_ADDIW, rd, rd, lo);
826        }
827        return;
828    }
829
830    tmp = tcg_pcrel_diff(s, (void *)val);
831    if (tmp == (int32_t)tmp) {
832        tcg_out_opc_upper(s, OPC_AUIPC, rd, 0);
833        tcg_out_opc_imm(s, OPC_ADDI, rd, rd, 0);
834        ret = reloc_call(s->code_ptr - 2, (const tcg_insn_unit *)val);
835        tcg_debug_assert(ret == true);
836        return;
837    }
838
839    /* Look for a single 20-bit section.  */
840    shift = ctz64(val);
841    tmp = val >> shift;
842    if (tmp == sextreg(tmp, 0, 20)) {
843        tcg_out_opc_upper(s, OPC_LUI, rd, tmp << 12);
844        if (shift > 12) {
845            tcg_out_opc_imm(s, OPC_SLLI, rd, rd, shift - 12);
846        } else {
847            tcg_out_opc_imm(s, OPC_SRAI, rd, rd, 12 - shift);
848        }
849        return;
850    }
851
852    /* Look for a few high zero bits, with lots of bits set in the middle.  */
853    shift = clz64(val);
854    tmp = val << shift;
855    if (tmp == sextreg(tmp, 12, 20) << 12) {
856        tcg_out_opc_upper(s, OPC_LUI, rd, tmp);
857        tcg_out_opc_imm(s, OPC_SRLI, rd, rd, shift);
858        return;
859    } else if (tmp == sextreg(tmp, 0, 12)) {
860        tcg_out_opc_imm(s, OPC_ADDI, rd, TCG_REG_ZERO, tmp);
861        tcg_out_opc_imm(s, OPC_SRLI, rd, rd, shift);
862        return;
863    }
864
865    /* Drop into the constant pool.  */
866    new_pool_label(s, val, R_RISCV_CALL, s->code_ptr, 0);
867    tcg_out_opc_upper(s, OPC_AUIPC, rd, 0);
868    tcg_out_opc_imm(s, OPC_LD, rd, rd, 0);
869}
870
871static bool tcg_out_xchg(TCGContext *s, TCGType type, TCGReg r1, TCGReg r2)
872{
873    return false;
874}
875
876static void tcg_out_addi_ptr(TCGContext *s, TCGReg rd, TCGReg rs,
877                             tcg_target_long imm)
878{
879    /* This function is only used for passing structs by reference. */
880    g_assert_not_reached();
881}
882
883static void tcg_out_ext8u(TCGContext *s, TCGReg ret, TCGReg arg)
884{
885    tcg_out_opc_imm(s, OPC_ANDI, ret, arg, 0xff);
886}
887
888static void tcg_out_ext16u(TCGContext *s, TCGReg ret, TCGReg arg)
889{
890    if (cpuinfo & CPUINFO_ZBB) {
891        tcg_out_opc_reg(s, OPC_ZEXT_H, ret, arg, TCG_REG_ZERO);
892    } else {
893        tcg_out_opc_imm(s, OPC_SLLIW, ret, arg, 16);
894        tcg_out_opc_imm(s, OPC_SRLIW, ret, ret, 16);
895    }
896}
897
898static void tcg_out_ext32u(TCGContext *s, TCGReg ret, TCGReg arg)
899{
900    if (cpuinfo & CPUINFO_ZBA) {
901        tcg_out_opc_reg(s, OPC_ADD_UW, ret, arg, TCG_REG_ZERO);
902    } else {
903        tcg_out_opc_imm(s, OPC_SLLI, ret, arg, 32);
904        tcg_out_opc_imm(s, OPC_SRLI, ret, ret, 32);
905    }
906}
907
908static void tcg_out_ext8s(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg)
909{
910    if (cpuinfo & CPUINFO_ZBB) {
911        tcg_out_opc_imm(s, OPC_SEXT_B, ret, arg, 0);
912    } else {
913        tcg_out_opc_imm(s, OPC_SLLIW, ret, arg, 24);
914        tcg_out_opc_imm(s, OPC_SRAIW, ret, ret, 24);
915    }
916}
917
918static void tcg_out_ext16s(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg)
919{
920    if (cpuinfo & CPUINFO_ZBB) {
921        tcg_out_opc_imm(s, OPC_SEXT_H, ret, arg, 0);
922    } else {
923        tcg_out_opc_imm(s, OPC_SLLIW, ret, arg, 16);
924        tcg_out_opc_imm(s, OPC_SRAIW, ret, ret, 16);
925    }
926}
927
928static void tcg_out_ext32s(TCGContext *s, TCGReg ret, TCGReg arg)
929{
930    tcg_out_opc_imm(s, OPC_ADDIW, ret, arg, 0);
931}
932
933static void tcg_out_exts_i32_i64(TCGContext *s, TCGReg ret, TCGReg arg)
934{
935    if (ret != arg) {
936        tcg_out_ext32s(s, ret, arg);
937    }
938}
939
940static void tcg_out_extu_i32_i64(TCGContext *s, TCGReg ret, TCGReg arg)
941{
942    tcg_out_ext32u(s, ret, arg);
943}
944
945static void tcg_out_extrl_i64_i32(TCGContext *s, TCGReg ret, TCGReg arg)
946{
947    tcg_out_ext32s(s, ret, arg);
948}
949
950static void tcg_out_ldst(TCGContext *s, RISCVInsn opc, TCGReg data,
951                         TCGReg addr, intptr_t offset)
952{
953    intptr_t imm12 = sextreg(offset, 0, 12);
954
955    if (offset != imm12) {
956        intptr_t diff = tcg_pcrel_diff(s, (void *)offset);
957
958        if (addr == TCG_REG_ZERO && diff == (int32_t)diff) {
959            imm12 = sextreg(diff, 0, 12);
960            tcg_out_opc_upper(s, OPC_AUIPC, TCG_REG_TMP2, diff - imm12);
961        } else {
962            tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP2, offset - imm12);
963            if (addr != TCG_REG_ZERO) {
964                tcg_out_opc_reg(s, OPC_ADD, TCG_REG_TMP2, TCG_REG_TMP2, addr);
965            }
966        }
967        addr = TCG_REG_TMP2;
968    }
969
970    switch (opc) {
971    case OPC_SB:
972    case OPC_SH:
973    case OPC_SW:
974    case OPC_SD:
975        tcg_out_opc_store(s, opc, addr, data, imm12);
976        break;
977    case OPC_LB:
978    case OPC_LBU:
979    case OPC_LH:
980    case OPC_LHU:
981    case OPC_LW:
982    case OPC_LWU:
983    case OPC_LD:
984        tcg_out_opc_imm(s, opc, data, addr, imm12);
985        break;
986    default:
987        g_assert_not_reached();
988    }
989}
990
991static void tcg_out_vec_ldst(TCGContext *s, RISCVInsn opc, TCGReg data,
992                             TCGReg addr, intptr_t offset)
993{
994    tcg_debug_assert(data >= TCG_REG_V0);
995    tcg_debug_assert(addr < TCG_REG_V0);
996
997    if (offset) {
998        tcg_debug_assert(addr != TCG_REG_ZERO);
999        if (offset == sextreg(offset, 0, 12)) {
1000            tcg_out_opc_imm(s, OPC_ADDI, TCG_REG_TMP0, addr, offset);
1001        } else {
1002            tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP0, offset);
1003            tcg_out_opc_reg(s, OPC_ADD, TCG_REG_TMP0, TCG_REG_TMP0, addr);
1004        }
1005        addr = TCG_REG_TMP0;
1006    }
1007    tcg_out32(s, encode_v(opc, data, addr, 0, true));
1008}
1009
1010static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg arg,
1011                       TCGReg arg1, intptr_t arg2)
1012{
1013    RISCVInsn insn;
1014
1015    switch (type) {
1016    case TCG_TYPE_I32:
1017        tcg_out_ldst(s, OPC_LW, arg, arg1, arg2);
1018        break;
1019    case TCG_TYPE_I64:
1020        tcg_out_ldst(s, OPC_LD, arg, arg1, arg2);
1021        break;
1022    case TCG_TYPE_V64:
1023    case TCG_TYPE_V128:
1024    case TCG_TYPE_V256:
1025        if (type >= riscv_lg2_vlenb) {
1026            static const RISCVInsn whole_reg_ld[] = {
1027                OPC_VL1RE64_V, OPC_VL2RE64_V, OPC_VL4RE64_V, OPC_VL8RE64_V
1028            };
1029            unsigned idx = type - riscv_lg2_vlenb;
1030
1031            tcg_debug_assert(idx < ARRAY_SIZE(whole_reg_ld));
1032            insn = whole_reg_ld[idx];
1033        } else {
1034            static const RISCVInsn unit_stride_ld[] = {
1035                OPC_VLE8_V, OPC_VLE16_V, OPC_VLE32_V, OPC_VLE64_V
1036            };
1037            MemOp prev_vsew = set_vtype_len(s, type);
1038
1039            tcg_debug_assert(prev_vsew < ARRAY_SIZE(unit_stride_ld));
1040            insn = unit_stride_ld[prev_vsew];
1041        }
1042        tcg_out_vec_ldst(s, insn, arg, arg1, arg2);
1043        break;
1044    default:
1045        g_assert_not_reached();
1046    }
1047}
1048
1049static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg,
1050                       TCGReg arg1, intptr_t arg2)
1051{
1052    RISCVInsn insn;
1053
1054    switch (type) {
1055    case TCG_TYPE_I32:
1056        tcg_out_ldst(s, OPC_SW, arg, arg1, arg2);
1057        break;
1058    case TCG_TYPE_I64:
1059        tcg_out_ldst(s, OPC_SD, arg, arg1, arg2);
1060        break;
1061    case TCG_TYPE_V64:
1062    case TCG_TYPE_V128:
1063    case TCG_TYPE_V256:
1064        if (type >= riscv_lg2_vlenb) {
1065            static const RISCVInsn whole_reg_st[] = {
1066                OPC_VS1R_V, OPC_VS2R_V, OPC_VS4R_V, OPC_VS8R_V
1067            };
1068            unsigned idx = type - riscv_lg2_vlenb;
1069
1070            tcg_debug_assert(idx < ARRAY_SIZE(whole_reg_st));
1071            insn = whole_reg_st[idx];
1072        } else {
1073            static const RISCVInsn unit_stride_st[] = {
1074                OPC_VSE8_V, OPC_VSE16_V, OPC_VSE32_V, OPC_VSE64_V
1075            };
1076            MemOp prev_vsew = set_vtype_len(s, type);
1077
1078            tcg_debug_assert(prev_vsew < ARRAY_SIZE(unit_stride_st));
1079            insn = unit_stride_st[prev_vsew];
1080        }
1081        tcg_out_vec_ldst(s, insn, arg, arg1, arg2);
1082        break;
1083    default:
1084        g_assert_not_reached();
1085    }
1086}
1087
1088static bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
1089                        TCGReg base, intptr_t ofs)
1090{
1091    if (val == 0) {
1092        tcg_out_st(s, type, TCG_REG_ZERO, base, ofs);
1093        return true;
1094    }
1095    return false;
1096}
1097
1098static void tcg_out_addsub2(TCGContext *s,
1099                            TCGReg rl, TCGReg rh,
1100                            TCGReg al, TCGReg ah,
1101                            TCGArg bl, TCGArg bh,
1102                            bool cbl, bool cbh, bool is_sub, bool is32bit)
1103{
1104    const RISCVInsn opc_add = is32bit ? OPC_ADDW : OPC_ADD;
1105    const RISCVInsn opc_addi = is32bit ? OPC_ADDIW : OPC_ADDI;
1106    const RISCVInsn opc_sub = is32bit ? OPC_SUBW : OPC_SUB;
1107    TCGReg th = TCG_REG_TMP1;
1108
1109    /* If we have a negative constant such that negating it would
1110       make the high part zero, we can (usually) eliminate one insn.  */
1111    if (cbl && cbh && bh == -1 && bl != 0) {
1112        bl = -bl;
1113        bh = 0;
1114        is_sub = !is_sub;
1115    }
1116
1117    /* By operating on the high part first, we get to use the final
1118       carry operation to move back from the temporary.  */
1119    if (!cbh) {
1120        tcg_out_opc_reg(s, (is_sub ? opc_sub : opc_add), th, ah, bh);
1121    } else if (bh != 0 || ah == rl) {
1122        tcg_out_opc_imm(s, opc_addi, th, ah, (is_sub ? -bh : bh));
1123    } else {
1124        th = ah;
1125    }
1126
1127    /* Note that tcg optimization should eliminate the bl == 0 case.  */
1128    if (is_sub) {
1129        if (cbl) {
1130            tcg_out_opc_imm(s, OPC_SLTIU, TCG_REG_TMP0, al, bl);
1131            tcg_out_opc_imm(s, opc_addi, rl, al, -bl);
1132        } else {
1133            tcg_out_opc_reg(s, OPC_SLTU, TCG_REG_TMP0, al, bl);
1134            tcg_out_opc_reg(s, opc_sub, rl, al, bl);
1135        }
1136        tcg_out_opc_reg(s, opc_sub, rh, th, TCG_REG_TMP0);
1137    } else {
1138        if (cbl) {
1139            tcg_out_opc_imm(s, opc_addi, rl, al, bl);
1140            tcg_out_opc_imm(s, OPC_SLTIU, TCG_REG_TMP0, rl, bl);
1141        } else if (al == bl) {
1142            /*
1143             * If the input regs overlap, this is a simple doubling
1144             * and carry-out is the input msb.  This special case is
1145             * required when the output reg overlaps the input,
1146             * but we might as well use it always.
1147             */
1148            tcg_out_opc_imm(s, OPC_SLTI, TCG_REG_TMP0, al, 0);
1149            tcg_out_opc_reg(s, opc_add, rl, al, al);
1150        } else {
1151            tcg_out_opc_reg(s, opc_add, rl, al, bl);
1152            tcg_out_opc_reg(s, OPC_SLTU, TCG_REG_TMP0,
1153                            rl, (rl == bl ? al : bl));
1154        }
1155        tcg_out_opc_reg(s, opc_add, rh, th, TCG_REG_TMP0);
1156    }
1157}
1158
1159static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
1160                                   TCGReg dst, TCGReg src)
1161{
1162    set_vtype_len_sew(s, type, vece);
1163    tcg_out_opc_vx(s, OPC_VMV_V_X, dst, 0, src);
1164    return true;
1165}
1166
1167static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
1168                                    TCGReg dst, TCGReg base, intptr_t offset)
1169{
1170    tcg_out_ld(s, TCG_TYPE_REG, TCG_REG_TMP0, base, offset);
1171    return tcg_out_dup_vec(s, type, vece, dst, TCG_REG_TMP0);
1172}
1173
1174static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
1175                                    TCGReg dst, int64_t arg)
1176{
1177    /* Arg is replicated by VECE; extract the highest element. */
1178    arg >>= (-8 << vece) & 63;
1179
1180    if (arg >= -16 && arg < 16) {
1181        if (arg == 0 || arg == -1) {
1182            set_vtype_len(s, type);
1183        } else {
1184            set_vtype_len_sew(s, type, vece);
1185        }
1186        tcg_out_opc_vi(s, OPC_VMV_V_I, dst, 0, arg);
1187        return;
1188    }
1189    tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP0, arg);
1190    tcg_out_dup_vec(s, type, vece, dst, TCG_REG_TMP0);
1191}
1192
1193static const struct {
1194    RISCVInsn op;
1195    bool swap;
1196} tcg_brcond_to_riscv[] = {
1197    [TCG_COND_EQ] =  { OPC_BEQ,  false },
1198    [TCG_COND_NE] =  { OPC_BNE,  false },
1199    [TCG_COND_LT] =  { OPC_BLT,  false },
1200    [TCG_COND_GE] =  { OPC_BGE,  false },
1201    [TCG_COND_LE] =  { OPC_BGE,  true  },
1202    [TCG_COND_GT] =  { OPC_BLT,  true  },
1203    [TCG_COND_LTU] = { OPC_BLTU, false },
1204    [TCG_COND_GEU] = { OPC_BGEU, false },
1205    [TCG_COND_LEU] = { OPC_BGEU, true  },
1206    [TCG_COND_GTU] = { OPC_BLTU, true  }
1207};
1208
1209static void tcg_out_brcond(TCGContext *s, TCGCond cond, TCGReg arg1,
1210                           TCGReg arg2, TCGLabel *l)
1211{
1212    RISCVInsn op = tcg_brcond_to_riscv[cond].op;
1213
1214    tcg_debug_assert(op != 0);
1215
1216    if (tcg_brcond_to_riscv[cond].swap) {
1217        TCGReg t = arg1;
1218        arg1 = arg2;
1219        arg2 = t;
1220    }
1221
1222    tcg_out_reloc(s, s->code_ptr, R_RISCV_BRANCH, l, 0);
1223    tcg_out_opc_branch(s, op, arg1, arg2, 0);
1224}
1225
1226#define SETCOND_INV    TCG_TARGET_NB_REGS
1227#define SETCOND_NEZ    (SETCOND_INV << 1)
1228#define SETCOND_FLAGS  (SETCOND_INV | SETCOND_NEZ)
1229
1230static int tcg_out_setcond_int(TCGContext *s, TCGCond cond, TCGReg ret,
1231                               TCGReg arg1, tcg_target_long arg2, bool c2)
1232{
1233    int flags = 0;
1234
1235    switch (cond) {
1236    case TCG_COND_EQ:    /* -> NE  */
1237    case TCG_COND_GE:    /* -> LT  */
1238    case TCG_COND_GEU:   /* -> LTU */
1239    case TCG_COND_GT:    /* -> LE  */
1240    case TCG_COND_GTU:   /* -> LEU */
1241        cond = tcg_invert_cond(cond);
1242        flags ^= SETCOND_INV;
1243        break;
1244    default:
1245        break;
1246    }
1247
1248    switch (cond) {
1249    case TCG_COND_LE:
1250    case TCG_COND_LEU:
1251        /*
1252         * If we have a constant input, the most efficient way to implement
1253         * LE is by adding 1 and using LT.  Watch out for wrap around for LEU.
1254         * We don't need to care for this for LE because the constant input
1255         * is constrained to signed 12-bit, and 0x800 is representable in the
1256         * temporary register.
1257         */
1258        if (c2) {
1259            if (cond == TCG_COND_LEU) {
1260                /* unsigned <= -1 is true */
1261                if (arg2 == -1) {
1262                    tcg_out_movi(s, TCG_TYPE_REG, ret, !(flags & SETCOND_INV));
1263                    return ret;
1264                }
1265                cond = TCG_COND_LTU;
1266            } else {
1267                cond = TCG_COND_LT;
1268            }
1269            tcg_debug_assert(arg2 <= 0x7ff);
1270            if (++arg2 == 0x800) {
1271                tcg_out_movi(s, TCG_TYPE_REG, TCG_REG_TMP0, arg2);
1272                arg2 = TCG_REG_TMP0;
1273                c2 = false;
1274            }
1275        } else {
1276            TCGReg tmp = arg2;
1277            arg2 = arg1;
1278            arg1 = tmp;
1279            cond = tcg_swap_cond(cond);    /* LE -> GE */
1280            cond = tcg_invert_cond(cond);  /* GE -> LT */
1281            flags ^= SETCOND_INV;
1282        }
1283        break;
1284    default:
1285        break;
1286    }
1287
1288    switch (cond) {
1289    case TCG_COND_NE:
1290        flags |= SETCOND_NEZ;
1291        if (!c2) {
1292            tcg_out_opc_reg(s, OPC_XOR, ret, arg1, arg2);
1293        } else if (arg2 == 0) {
1294            ret = arg1;
1295        } else {
1296            tcg_out_opc_imm(s, OPC_XORI, ret, arg1, arg2);
1297        }
1298        break;
1299
1300    case TCG_COND_LT:
1301        if (c2) {
1302            tcg_out_opc_imm(s, OPC_SLTI, ret, arg1, arg2);
1303        } else {
1304            tcg_out_opc_reg(s, OPC_SLT, ret, arg1, arg2);
1305        }
1306        break;
1307
1308    case TCG_COND_LTU:
1309        if (c2) {
1310            tcg_out_opc_imm(s, OPC_SLTIU, ret, arg1, arg2);
1311        } else {
1312            tcg_out_opc_reg(s, OPC_SLTU, ret, arg1, arg2);
1313        }
1314        break;
1315
1316    default:
1317        g_assert_not_reached();
1318    }
1319
1320    return ret | flags;
1321}
1322
1323static void tcg_out_setcond(TCGContext *s, TCGCond cond, TCGReg ret,
1324                            TCGReg arg1, tcg_target_long arg2, bool c2)
1325{
1326    int tmpflags = tcg_out_setcond_int(s, cond, ret, arg1, arg2, c2);
1327
1328    if (tmpflags != ret) {
1329        TCGReg tmp = tmpflags & ~SETCOND_FLAGS;
1330
1331        switch (tmpflags & SETCOND_FLAGS) {
1332        case SETCOND_INV:
1333            /* Intermediate result is boolean: simply invert. */
1334            tcg_out_opc_imm(s, OPC_XORI, ret, tmp, 1);
1335            break;
1336        case SETCOND_NEZ:
1337            /* Intermediate result is zero/non-zero: test != 0. */
1338            tcg_out_opc_reg(s, OPC_SLTU, ret, TCG_REG_ZERO, tmp);
1339            break;
1340        case SETCOND_NEZ | SETCOND_INV:
1341            /* Intermediate result is zero/non-zero: test == 0. */
1342            tcg_out_opc_imm(s, OPC_SLTIU, ret, tmp, 1);
1343            break;
1344        default:
1345            g_assert_not_reached();
1346        }
1347    }
1348}
1349
1350static void tcg_out_negsetcond(TCGContext *s, TCGCond cond, TCGReg ret,
1351                               TCGReg arg1, tcg_target_long arg2, bool c2)
1352{
1353    int tmpflags;
1354    TCGReg tmp;
1355
1356    /* For LT/GE comparison against 0, replicate the sign bit. */
1357    if (c2 && arg2 == 0) {
1358        switch (cond) {
1359        case TCG_COND_GE:
1360            tcg_out_opc_imm(s, OPC_XORI, ret, arg1, -1);
1361            arg1 = ret;
1362            /* fall through */
1363        case TCG_COND_LT:
1364            tcg_out_opc_imm(s, OPC_SRAI, ret, arg1, TCG_TARGET_REG_BITS - 1);
1365            return;
1366        default:
1367            break;
1368        }
1369    }
1370
1371    tmpflags = tcg_out_setcond_int(s, cond, ret, arg1, arg2, c2);
1372    tmp = tmpflags & ~SETCOND_FLAGS;
1373
1374    /* If intermediate result is zero/non-zero: test != 0. */
1375    if (tmpflags & SETCOND_NEZ) {
1376        tcg_out_opc_reg(s, OPC_SLTU, ret, TCG_REG_ZERO, tmp);
1377        tmp = ret;
1378    }
1379
1380    /* Produce the 0/-1 result. */
1381    if (tmpflags & SETCOND_INV) {
1382        tcg_out_opc_imm(s, OPC_ADDI, ret, tmp, -1);
1383    } else {
1384        tcg_out_opc_reg(s, OPC_SUB, ret, TCG_REG_ZERO, tmp);
1385    }
1386}
1387
1388static void tcg_out_movcond_zicond(TCGContext *s, TCGReg ret, TCGReg test_ne,
1389                                   int val1, bool c_val1,
1390                                   int val2, bool c_val2)
1391{
1392    if (val1 == 0) {
1393        if (c_val2) {
1394            tcg_out_movi(s, TCG_TYPE_REG, TCG_REG_TMP1, val2);
1395            val2 = TCG_REG_TMP1;
1396        }
1397        tcg_out_opc_reg(s, OPC_CZERO_NEZ, ret, val2, test_ne);
1398        return;
1399    }
1400
1401    if (val2 == 0) {
1402        if (c_val1) {
1403            tcg_out_movi(s, TCG_TYPE_REG, TCG_REG_TMP1, val1);
1404            val1 = TCG_REG_TMP1;
1405        }
1406        tcg_out_opc_reg(s, OPC_CZERO_EQZ, ret, val1, test_ne);
1407        return;
1408    }
1409
1410    if (c_val2) {
1411        if (c_val1) {
1412            tcg_out_movi(s, TCG_TYPE_REG, TCG_REG_TMP1, val1 - val2);
1413        } else {
1414            tcg_out_opc_imm(s, OPC_ADDI, TCG_REG_TMP1, val1, -val2);
1415        }
1416        tcg_out_opc_reg(s, OPC_CZERO_EQZ, ret, TCG_REG_TMP1, test_ne);
1417        tcg_out_opc_imm(s, OPC_ADDI, ret, ret, val2);
1418        return;
1419    }
1420
1421    if (c_val1) {
1422        tcg_out_opc_imm(s, OPC_ADDI, TCG_REG_TMP1, val2, -val1);
1423        tcg_out_opc_reg(s, OPC_CZERO_NEZ, ret, TCG_REG_TMP1, test_ne);
1424        tcg_out_opc_imm(s, OPC_ADDI, ret, ret, val1);
1425        return;
1426    }
1427
1428    tcg_out_opc_reg(s, OPC_CZERO_NEZ, TCG_REG_TMP1, val2, test_ne);
1429    tcg_out_opc_reg(s, OPC_CZERO_EQZ, TCG_REG_TMP0, val1, test_ne);
1430    tcg_out_opc_reg(s, OPC_OR, ret, TCG_REG_TMP0, TCG_REG_TMP1);
1431}
1432
1433static void tcg_out_movcond_br1(TCGContext *s, TCGCond cond, TCGReg ret,
1434                                TCGReg cmp1, TCGReg cmp2,
1435                                int val, bool c_val)
1436{
1437    RISCVInsn op;
1438    int disp = 8;
1439
1440    tcg_debug_assert((unsigned)cond < ARRAY_SIZE(tcg_brcond_to_riscv));
1441    op = tcg_brcond_to_riscv[cond].op;
1442    tcg_debug_assert(op != 0);
1443
1444    if (tcg_brcond_to_riscv[cond].swap) {
1445        tcg_out_opc_branch(s, op, cmp2, cmp1, disp);
1446    } else {
1447        tcg_out_opc_branch(s, op, cmp1, cmp2, disp);
1448    }
1449    if (c_val) {
1450        tcg_out_opc_imm(s, OPC_ADDI, ret, TCG_REG_ZERO, val);
1451    } else {
1452        tcg_out_opc_imm(s, OPC_ADDI, ret, val, 0);
1453    }
1454}
1455
1456static void tcg_out_movcond_br2(TCGContext *s, TCGCond cond, TCGReg ret,
1457                                TCGReg cmp1, TCGReg cmp2,
1458                                int val1, bool c_val1,
1459                                int val2, bool c_val2)
1460{
1461    TCGReg tmp;
1462
1463    /* TCG optimizer reorders to prefer ret matching val2. */
1464    if (!c_val2 && ret == val2) {
1465        cond = tcg_invert_cond(cond);
1466        tcg_out_movcond_br1(s, cond, ret, cmp1, cmp2, val1, c_val1);
1467        return;
1468    }
1469
1470    if (!c_val1 && ret == val1) {
1471        tcg_out_movcond_br1(s, cond, ret, cmp1, cmp2, val2, c_val2);
1472        return;
1473    }
1474
1475    tmp = (ret == cmp1 || ret == cmp2 ? TCG_REG_TMP1 : ret);
1476    if (c_val1) {
1477        tcg_out_movi(s, TCG_TYPE_REG, tmp, val1);
1478    } else {
1479        tcg_out_mov(s, TCG_TYPE_REG, tmp, val1);
1480    }
1481    tcg_out_movcond_br1(s, cond, tmp, cmp1, cmp2, val2, c_val2);
1482    tcg_out_mov(s, TCG_TYPE_REG, ret, tmp);
1483}
1484
1485static void tcg_out_movcond(TCGContext *s, TCGCond cond, TCGReg ret,
1486                            TCGReg cmp1, int cmp2, bool c_cmp2,
1487                            TCGReg val1, bool c_val1,
1488                            TCGReg val2, bool c_val2)
1489{
1490    int tmpflags;
1491    TCGReg t;
1492
1493    if (!(cpuinfo & CPUINFO_ZICOND) && (!c_cmp2 || cmp2 == 0)) {
1494        tcg_out_movcond_br2(s, cond, ret, cmp1, cmp2,
1495                            val1, c_val1, val2, c_val2);
1496        return;
1497    }
1498
1499    tmpflags = tcg_out_setcond_int(s, cond, TCG_REG_TMP0, cmp1, cmp2, c_cmp2);
1500    t = tmpflags & ~SETCOND_FLAGS;
1501
1502    if (cpuinfo & CPUINFO_ZICOND) {
1503        if (tmpflags & SETCOND_INV) {
1504            tcg_out_movcond_zicond(s, ret, t, val2, c_val2, val1, c_val1);
1505        } else {
1506            tcg_out_movcond_zicond(s, ret, t, val1, c_val1, val2, c_val2);
1507        }
1508    } else {
1509        cond = tmpflags & SETCOND_INV ? TCG_COND_EQ : TCG_COND_NE;
1510        tcg_out_movcond_br2(s, cond, ret, t, TCG_REG_ZERO,
1511                            val1, c_val1, val2, c_val2);
1512    }
1513}
1514
1515static void tcg_out_cltz(TCGContext *s, TCGType type, RISCVInsn insn,
1516                         TCGReg ret, TCGReg src1, int src2, bool c_src2)
1517{
1518    tcg_out_opc_imm(s, insn, ret, src1, 0);
1519
1520    if (!c_src2 || src2 != (type == TCG_TYPE_I32 ? 32 : 64)) {
1521        /*
1522         * The requested zero result does not match the insn, so adjust.
1523         * Note that constraints put 'ret' in a new register, so the
1524         * computation above did not clobber either 'src1' or 'src2'.
1525         */
1526        tcg_out_movcond(s, TCG_COND_EQ, ret, src1, 0, true,
1527                        src2, c_src2, ret, false);
1528    }
1529}
1530
1531static void tcg_out_cmpsel(TCGContext *s, TCGType type, unsigned vece,
1532                           TCGCond cond, TCGReg ret,
1533                           TCGReg cmp1, TCGReg cmp2, bool c_cmp2,
1534                           TCGReg val1, bool c_val1,
1535                           TCGReg val2, bool c_val2)
1536{
1537    set_vtype_len_sew(s, type, vece);
1538
1539    /* Use only vmerge_vim if possible, by inverting the test. */
1540    if (c_val2 && !c_val1) {
1541        TCGArg temp = val1;
1542        cond = tcg_invert_cond(cond);
1543        val1 = val2;
1544        val2 = temp;
1545        c_val1 = true;
1546        c_val2 = false;
1547    }
1548
1549    /* Perform the comparison into V0 mask. */
1550    if (c_cmp2) {
1551        tcg_out_opc_vi(s, tcg_cmpcond_to_rvv_vi[cond].op, TCG_REG_V0, cmp1,
1552                       cmp2 - tcg_cmpcond_to_rvv_vi[cond].adjust);
1553    } else if (tcg_cmpcond_to_rvv_vv[cond].swap) {
1554        tcg_out_opc_vv(s, tcg_cmpcond_to_rvv_vv[cond].op,
1555                       TCG_REG_V0, cmp2, cmp1);
1556    } else {
1557        tcg_out_opc_vv(s, tcg_cmpcond_to_rvv_vv[cond].op,
1558                       TCG_REG_V0, cmp1, cmp2);
1559    }
1560    if (c_val1) {
1561        if (c_val2) {
1562            tcg_out_opc_vi(s, OPC_VMV_V_I, ret, 0, val2);
1563            val2 = ret;
1564        }
1565        /* vd[i] == v0.mask[i] ? imm : vs2[i] */
1566        tcg_out_opc_vim_mask(s, OPC_VMERGE_VIM, ret, val2, val1);
1567    } else {
1568        /* vd[i] == v0.mask[i] ? vs1[i] : vs2[i] */
1569        tcg_out_opc_vvm_mask(s, OPC_VMERGE_VVM, ret, val2, val1);
1570    }
1571}
1572
1573static void tcg_out_vshifti(TCGContext *s, RISCVInsn opc_vi, RISCVInsn opc_vx,
1574                             TCGReg dst, TCGReg src, unsigned imm)
1575{
1576    if (imm < 32) {
1577        tcg_out_opc_vi(s, opc_vi, dst, src, imm);
1578    } else {
1579        tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_TMP0, imm);
1580        tcg_out_opc_vx(s, opc_vx, dst, src, TCG_REG_TMP0);
1581    }
1582}
1583
1584static void init_setting_vtype(TCGContext *s)
1585{
1586    s->riscv_cur_type = TCG_TYPE_COUNT;
1587}
1588
1589static void tcg_out_call_int(TCGContext *s, const tcg_insn_unit *arg, bool tail)
1590{
1591    TCGReg link = tail ? TCG_REG_ZERO : TCG_REG_RA;
1592    ptrdiff_t offset = tcg_pcrel_diff(s, arg);
1593    int ret;
1594
1595    init_setting_vtype(s);
1596
1597    tcg_debug_assert((offset & 1) == 0);
1598    if (offset == sextreg(offset, 0, 20)) {
1599        /* short jump: -2097150 to 2097152 */
1600        tcg_out_opc_jump(s, OPC_JAL, link, offset);
1601    } else if (offset == (int32_t)offset) {
1602        /* long jump: -2147483646 to 2147483648 */
1603        tcg_out_opc_upper(s, OPC_AUIPC, TCG_REG_TMP0, 0);
1604        tcg_out_opc_imm(s, OPC_JALR, link, TCG_REG_TMP0, 0);
1605        ret = reloc_call(s->code_ptr - 2, arg);
1606        tcg_debug_assert(ret == true);
1607    } else {
1608        /* far jump: 64-bit */
1609        tcg_target_long imm = sextreg((tcg_target_long)arg, 0, 12);
1610        tcg_target_long base = (tcg_target_long)arg - imm;
1611        tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP0, base);
1612        tcg_out_opc_imm(s, OPC_JALR, link, TCG_REG_TMP0, imm);
1613    }
1614}
1615
1616static void tcg_out_call(TCGContext *s, const tcg_insn_unit *arg,
1617                         const TCGHelperInfo *info)
1618{
1619    tcg_out_call_int(s, arg, false);
1620}
1621
1622static void tcg_out_mb(TCGContext *s, TCGArg a0)
1623{
1624    tcg_insn_unit insn = OPC_FENCE;
1625
1626    if (a0 & TCG_MO_LD_LD) {
1627        insn |= 0x02200000;
1628    }
1629    if (a0 & TCG_MO_ST_LD) {
1630        insn |= 0x01200000;
1631    }
1632    if (a0 & TCG_MO_LD_ST) {
1633        insn |= 0x02100000;
1634    }
1635    if (a0 & TCG_MO_ST_ST) {
1636        insn |= 0x01100000;
1637    }
1638    tcg_out32(s, insn);
1639}
1640
1641/*
1642 * Load/store and TLB
1643 */
1644
1645static void tcg_out_goto(TCGContext *s, const tcg_insn_unit *target)
1646{
1647    tcg_out_opc_jump(s, OPC_JAL, TCG_REG_ZERO, 0);
1648    bool ok = reloc_jimm20(s->code_ptr - 1, target);
1649    tcg_debug_assert(ok);
1650}
1651
1652bool tcg_target_has_memory_bswap(MemOp memop)
1653{
1654    return false;
1655}
1656
1657/* We have three temps, we might as well expose them. */
1658static const TCGLdstHelperParam ldst_helper_param = {
1659    .ntmp = 3, .tmp = { TCG_REG_TMP0, TCG_REG_TMP1, TCG_REG_TMP2 }
1660};
1661
1662static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
1663{
1664    MemOp opc = get_memop(l->oi);
1665
1666    /* resolve label address */
1667    if (!reloc_sbimm12(l->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) {
1668        return false;
1669    }
1670
1671    /* call load helper */
1672    tcg_out_ld_helper_args(s, l, &ldst_helper_param);
1673    tcg_out_call_int(s, qemu_ld_helpers[opc & MO_SSIZE], false);
1674    tcg_out_ld_helper_ret(s, l, true, &ldst_helper_param);
1675
1676    tcg_out_goto(s, l->raddr);
1677    return true;
1678}
1679
1680static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
1681{
1682    MemOp opc = get_memop(l->oi);
1683
1684    /* resolve label address */
1685    if (!reloc_sbimm12(l->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) {
1686        return false;
1687    }
1688
1689    /* call store helper */
1690    tcg_out_st_helper_args(s, l, &ldst_helper_param);
1691    tcg_out_call_int(s, qemu_st_helpers[opc & MO_SIZE], false);
1692
1693    tcg_out_goto(s, l->raddr);
1694    return true;
1695}
1696
1697/* We expect to use a 12-bit negative offset from ENV.  */
1698#define MIN_TLB_MASK_TABLE_OFS  -(1 << 11)
1699
1700/*
1701 * For system-mode, perform the TLB load and compare.
1702 * For user-mode, perform any required alignment tests.
1703 * In both cases, return a TCGLabelQemuLdst structure if the slow path
1704 * is required and fill in @h with the host address for the fast path.
1705 */
1706static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, TCGReg *pbase,
1707                                           TCGReg addr_reg, MemOpIdx oi,
1708                                           bool is_ld)
1709{
1710    TCGType addr_type = s->addr_type;
1711    TCGLabelQemuLdst *ldst = NULL;
1712    MemOp opc = get_memop(oi);
1713    TCGAtomAlign aa;
1714    unsigned a_mask;
1715
1716    aa = atom_and_align_for_opc(s, opc, MO_ATOM_IFALIGN, false);
1717    a_mask = (1u << aa.align) - 1;
1718
1719    if (tcg_use_softmmu) {
1720        unsigned s_bits = opc & MO_SIZE;
1721        unsigned s_mask = (1u << s_bits) - 1;
1722        int mem_index = get_mmuidx(oi);
1723        int fast_ofs = tlb_mask_table_ofs(s, mem_index);
1724        int mask_ofs = fast_ofs + offsetof(CPUTLBDescFast, mask);
1725        int table_ofs = fast_ofs + offsetof(CPUTLBDescFast, table);
1726        int compare_mask;
1727        TCGReg addr_adj;
1728
1729        ldst = new_ldst_label(s);
1730        ldst->is_ld = is_ld;
1731        ldst->oi = oi;
1732        ldst->addrlo_reg = addr_reg;
1733
1734        init_setting_vtype(s);
1735
1736        tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP0, TCG_AREG0, mask_ofs);
1737        tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP1, TCG_AREG0, table_ofs);
1738
1739        tcg_out_opc_imm(s, OPC_SRLI, TCG_REG_TMP2, addr_reg,
1740                        s->page_bits - CPU_TLB_ENTRY_BITS);
1741        tcg_out_opc_reg(s, OPC_AND, TCG_REG_TMP2, TCG_REG_TMP2, TCG_REG_TMP0);
1742        tcg_out_opc_reg(s, OPC_ADD, TCG_REG_TMP2, TCG_REG_TMP2, TCG_REG_TMP1);
1743
1744        /*
1745         * For aligned accesses, we check the first byte and include the
1746         * alignment bits within the address.  For unaligned access, we
1747         * check that we don't cross pages using the address of the last
1748         * byte of the access.
1749         */
1750        addr_adj = addr_reg;
1751        if (a_mask < s_mask) {
1752            addr_adj = TCG_REG_TMP0;
1753            tcg_out_opc_imm(s, addr_type == TCG_TYPE_I32 ? OPC_ADDIW : OPC_ADDI,
1754                            addr_adj, addr_reg, s_mask - a_mask);
1755        }
1756        compare_mask = s->page_mask | a_mask;
1757        if (compare_mask == sextreg(compare_mask, 0, 12)) {
1758            tcg_out_opc_imm(s, OPC_ANDI, TCG_REG_TMP1, addr_adj, compare_mask);
1759        } else {
1760            tcg_out_movi(s, addr_type, TCG_REG_TMP1, compare_mask);
1761            tcg_out_opc_reg(s, OPC_AND, TCG_REG_TMP1, TCG_REG_TMP1, addr_adj);
1762        }
1763
1764        /* Load the tlb comparator and the addend.  */
1765        QEMU_BUILD_BUG_ON(HOST_BIG_ENDIAN);
1766        tcg_out_ld(s, addr_type, TCG_REG_TMP0, TCG_REG_TMP2,
1767                   is_ld ? offsetof(CPUTLBEntry, addr_read)
1768                         : offsetof(CPUTLBEntry, addr_write));
1769        tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP2, TCG_REG_TMP2,
1770                   offsetof(CPUTLBEntry, addend));
1771
1772        /* Compare masked address with the TLB entry. */
1773        ldst->label_ptr[0] = s->code_ptr;
1774        tcg_out_opc_branch(s, OPC_BNE, TCG_REG_TMP0, TCG_REG_TMP1, 0);
1775
1776        /* TLB Hit - translate address using addend.  */
1777        if (addr_type != TCG_TYPE_I32) {
1778            tcg_out_opc_reg(s, OPC_ADD, TCG_REG_TMP0, addr_reg, TCG_REG_TMP2);
1779        } else if (cpuinfo & CPUINFO_ZBA) {
1780            tcg_out_opc_reg(s, OPC_ADD_UW, TCG_REG_TMP0,
1781                            addr_reg, TCG_REG_TMP2);
1782        } else {
1783            tcg_out_ext32u(s, TCG_REG_TMP0, addr_reg);
1784            tcg_out_opc_reg(s, OPC_ADD, TCG_REG_TMP0,
1785                            TCG_REG_TMP0, TCG_REG_TMP2);
1786        }
1787        *pbase = TCG_REG_TMP0;
1788    } else {
1789        TCGReg base;
1790
1791        if (a_mask) {
1792            ldst = new_ldst_label(s);
1793            ldst->is_ld = is_ld;
1794            ldst->oi = oi;
1795            ldst->addrlo_reg = addr_reg;
1796
1797            init_setting_vtype(s);
1798
1799            /* We are expecting alignment max 7, so we can always use andi. */
1800            tcg_debug_assert(a_mask == sextreg(a_mask, 0, 12));
1801            tcg_out_opc_imm(s, OPC_ANDI, TCG_REG_TMP1, addr_reg, a_mask);
1802
1803            ldst->label_ptr[0] = s->code_ptr;
1804            tcg_out_opc_branch(s, OPC_BNE, TCG_REG_TMP1, TCG_REG_ZERO, 0);
1805        }
1806
1807        if (guest_base != 0) {
1808            base = TCG_REG_TMP0;
1809            if (addr_type != TCG_TYPE_I32) {
1810                tcg_out_opc_reg(s, OPC_ADD, base, addr_reg,
1811                                TCG_GUEST_BASE_REG);
1812            } else if (cpuinfo & CPUINFO_ZBA) {
1813                tcg_out_opc_reg(s, OPC_ADD_UW, base, addr_reg,
1814                                TCG_GUEST_BASE_REG);
1815            } else {
1816                tcg_out_ext32u(s, base, addr_reg);
1817                tcg_out_opc_reg(s, OPC_ADD, base, base, TCG_GUEST_BASE_REG);
1818            }
1819        } else if (addr_type != TCG_TYPE_I32) {
1820            base = addr_reg;
1821        } else {
1822            base = TCG_REG_TMP0;
1823            tcg_out_ext32u(s, base, addr_reg);
1824        }
1825        *pbase = base;
1826    }
1827
1828    return ldst;
1829}
1830
1831static void tcg_out_qemu_ld_direct(TCGContext *s, TCGReg val,
1832                                   TCGReg base, MemOp opc, TCGType type)
1833{
1834    /* Byte swapping is left to middle-end expansion. */
1835    tcg_debug_assert((opc & MO_BSWAP) == 0);
1836
1837    switch (opc & (MO_SSIZE)) {
1838    case MO_UB:
1839        tcg_out_opc_imm(s, OPC_LBU, val, base, 0);
1840        break;
1841    case MO_SB:
1842        tcg_out_opc_imm(s, OPC_LB, val, base, 0);
1843        break;
1844    case MO_UW:
1845        tcg_out_opc_imm(s, OPC_LHU, val, base, 0);
1846        break;
1847    case MO_SW:
1848        tcg_out_opc_imm(s, OPC_LH, val, base, 0);
1849        break;
1850    case MO_UL:
1851        if (type == TCG_TYPE_I64) {
1852            tcg_out_opc_imm(s, OPC_LWU, val, base, 0);
1853            break;
1854        }
1855        /* FALLTHRU */
1856    case MO_SL:
1857        tcg_out_opc_imm(s, OPC_LW, val, base, 0);
1858        break;
1859    case MO_UQ:
1860        tcg_out_opc_imm(s, OPC_LD, val, base, 0);
1861        break;
1862    default:
1863        g_assert_not_reached();
1864    }
1865}
1866
1867static void tcg_out_qemu_ld(TCGContext *s, TCGReg data_reg, TCGReg addr_reg,
1868                            MemOpIdx oi, TCGType data_type)
1869{
1870    TCGLabelQemuLdst *ldst;
1871    TCGReg base;
1872
1873    ldst = prepare_host_addr(s, &base, addr_reg, oi, true);
1874    tcg_out_qemu_ld_direct(s, data_reg, base, get_memop(oi), data_type);
1875
1876    if (ldst) {
1877        ldst->type = data_type;
1878        ldst->datalo_reg = data_reg;
1879        ldst->raddr = tcg_splitwx_to_rx(s->code_ptr);
1880    }
1881}
1882
1883static void tcg_out_qemu_st_direct(TCGContext *s, TCGReg val,
1884                                   TCGReg base, MemOp opc)
1885{
1886    /* Byte swapping is left to middle-end expansion. */
1887    tcg_debug_assert((opc & MO_BSWAP) == 0);
1888
1889    switch (opc & (MO_SSIZE)) {
1890    case MO_8:
1891        tcg_out_opc_store(s, OPC_SB, base, val, 0);
1892        break;
1893    case MO_16:
1894        tcg_out_opc_store(s, OPC_SH, base, val, 0);
1895        break;
1896    case MO_32:
1897        tcg_out_opc_store(s, OPC_SW, base, val, 0);
1898        break;
1899    case MO_64:
1900        tcg_out_opc_store(s, OPC_SD, base, val, 0);
1901        break;
1902    default:
1903        g_assert_not_reached();
1904    }
1905}
1906
1907static void tcg_out_qemu_st(TCGContext *s, TCGReg data_reg, TCGReg addr_reg,
1908                            MemOpIdx oi, TCGType data_type)
1909{
1910    TCGLabelQemuLdst *ldst;
1911    TCGReg base;
1912
1913    ldst = prepare_host_addr(s, &base, addr_reg, oi, false);
1914    tcg_out_qemu_st_direct(s, data_reg, base, get_memop(oi));
1915
1916    if (ldst) {
1917        ldst->type = data_type;
1918        ldst->datalo_reg = data_reg;
1919        ldst->raddr = tcg_splitwx_to_rx(s->code_ptr);
1920    }
1921}
1922
1923static const tcg_insn_unit *tb_ret_addr;
1924
1925static void tcg_out_exit_tb(TCGContext *s, uintptr_t a0)
1926{
1927    /* Reuse the zeroing that exists for goto_ptr.  */
1928    if (a0 == 0) {
1929        tcg_out_call_int(s, tcg_code_gen_epilogue, true);
1930    } else {
1931        tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_A0, a0);
1932        tcg_out_call_int(s, tb_ret_addr, true);
1933    }
1934}
1935
1936static void tcg_out_goto_tb(TCGContext *s, int which)
1937{
1938    /* Direct branch will be patched by tb_target_set_jmp_target. */
1939    set_jmp_insn_offset(s, which);
1940    tcg_out32(s, OPC_JAL);
1941
1942    /* When branch is out of range, fall through to indirect. */
1943    tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP0, TCG_REG_ZERO,
1944               get_jmp_target_addr(s, which));
1945    tcg_out_opc_imm(s, OPC_JALR, TCG_REG_ZERO, TCG_REG_TMP0, 0);
1946    set_jmp_reset_offset(s, which);
1947}
1948
1949void tb_target_set_jmp_target(const TranslationBlock *tb, int n,
1950                              uintptr_t jmp_rx, uintptr_t jmp_rw)
1951{
1952    uintptr_t addr = tb->jmp_target_addr[n];
1953    ptrdiff_t offset = addr - jmp_rx;
1954    tcg_insn_unit insn;
1955
1956    /* Either directly branch, or fall through to indirect branch. */
1957    if (offset == sextreg(offset, 0, 20)) {
1958        insn = encode_uj(OPC_JAL, TCG_REG_ZERO, offset);
1959    } else {
1960        insn = OPC_NOP;
1961    }
1962    qatomic_set((uint32_t *)jmp_rw, insn);
1963    flush_idcache_range(jmp_rx, jmp_rw, 4);
1964}
1965
1966static void tcg_out_op(TCGContext *s, TCGOpcode opc,
1967                       const TCGArg args[TCG_MAX_OP_ARGS],
1968                       const int const_args[TCG_MAX_OP_ARGS])
1969{
1970    TCGArg a0 = args[0];
1971    TCGArg a1 = args[1];
1972    TCGArg a2 = args[2];
1973    int c2 = const_args[2];
1974
1975    switch (opc) {
1976    case INDEX_op_goto_ptr:
1977        tcg_out_opc_imm(s, OPC_JALR, TCG_REG_ZERO, a0, 0);
1978        break;
1979
1980    case INDEX_op_br:
1981        tcg_out_reloc(s, s->code_ptr, R_RISCV_JAL, arg_label(a0), 0);
1982        tcg_out_opc_jump(s, OPC_JAL, TCG_REG_ZERO, 0);
1983        break;
1984
1985    case INDEX_op_ld8u_i32:
1986    case INDEX_op_ld8u_i64:
1987        tcg_out_ldst(s, OPC_LBU, a0, a1, a2);
1988        break;
1989    case INDEX_op_ld8s_i32:
1990    case INDEX_op_ld8s_i64:
1991        tcg_out_ldst(s, OPC_LB, a0, a1, a2);
1992        break;
1993    case INDEX_op_ld16u_i32:
1994    case INDEX_op_ld16u_i64:
1995        tcg_out_ldst(s, OPC_LHU, a0, a1, a2);
1996        break;
1997    case INDEX_op_ld16s_i32:
1998    case INDEX_op_ld16s_i64:
1999        tcg_out_ldst(s, OPC_LH, a0, a1, a2);
2000        break;
2001    case INDEX_op_ld32u_i64:
2002        tcg_out_ldst(s, OPC_LWU, a0, a1, a2);
2003        break;
2004    case INDEX_op_ld_i32:
2005    case INDEX_op_ld32s_i64:
2006        tcg_out_ldst(s, OPC_LW, a0, a1, a2);
2007        break;
2008    case INDEX_op_ld_i64:
2009        tcg_out_ldst(s, OPC_LD, a0, a1, a2);
2010        break;
2011
2012    case INDEX_op_st8_i32:
2013    case INDEX_op_st8_i64:
2014        tcg_out_ldst(s, OPC_SB, a0, a1, a2);
2015        break;
2016    case INDEX_op_st16_i32:
2017    case INDEX_op_st16_i64:
2018        tcg_out_ldst(s, OPC_SH, a0, a1, a2);
2019        break;
2020    case INDEX_op_st_i32:
2021    case INDEX_op_st32_i64:
2022        tcg_out_ldst(s, OPC_SW, a0, a1, a2);
2023        break;
2024    case INDEX_op_st_i64:
2025        tcg_out_ldst(s, OPC_SD, a0, a1, a2);
2026        break;
2027
2028    case INDEX_op_add_i32:
2029        if (c2) {
2030            tcg_out_opc_imm(s, OPC_ADDIW, a0, a1, a2);
2031        } else {
2032            tcg_out_opc_reg(s, OPC_ADDW, a0, a1, a2);
2033        }
2034        break;
2035    case INDEX_op_add_i64:
2036        if (c2) {
2037            tcg_out_opc_imm(s, OPC_ADDI, a0, a1, a2);
2038        } else {
2039            tcg_out_opc_reg(s, OPC_ADD, a0, a1, a2);
2040        }
2041        break;
2042
2043    case INDEX_op_sub_i32:
2044        if (c2) {
2045            tcg_out_opc_imm(s, OPC_ADDIW, a0, a1, -a2);
2046        } else {
2047            tcg_out_opc_reg(s, OPC_SUBW, a0, a1, a2);
2048        }
2049        break;
2050    case INDEX_op_sub_i64:
2051        if (c2) {
2052            tcg_out_opc_imm(s, OPC_ADDI, a0, a1, -a2);
2053        } else {
2054            tcg_out_opc_reg(s, OPC_SUB, a0, a1, a2);
2055        }
2056        break;
2057
2058    case INDEX_op_and_i32:
2059    case INDEX_op_and_i64:
2060        if (c2) {
2061            tcg_out_opc_imm(s, OPC_ANDI, a0, a1, a2);
2062        } else {
2063            tcg_out_opc_reg(s, OPC_AND, a0, a1, a2);
2064        }
2065        break;
2066
2067    case INDEX_op_or_i32:
2068    case INDEX_op_or_i64:
2069        if (c2) {
2070            tcg_out_opc_imm(s, OPC_ORI, a0, a1, a2);
2071        } else {
2072            tcg_out_opc_reg(s, OPC_OR, a0, a1, a2);
2073        }
2074        break;
2075
2076    case INDEX_op_xor_i32:
2077    case INDEX_op_xor_i64:
2078        if (c2) {
2079            tcg_out_opc_imm(s, OPC_XORI, a0, a1, a2);
2080        } else {
2081            tcg_out_opc_reg(s, OPC_XOR, a0, a1, a2);
2082        }
2083        break;
2084
2085    case INDEX_op_andc_i32:
2086    case INDEX_op_andc_i64:
2087        if (c2) {
2088            tcg_out_opc_imm(s, OPC_ANDI, a0, a1, ~a2);
2089        } else {
2090            tcg_out_opc_reg(s, OPC_ANDN, a0, a1, a2);
2091        }
2092        break;
2093    case INDEX_op_orc_i32:
2094    case INDEX_op_orc_i64:
2095        if (c2) {
2096            tcg_out_opc_imm(s, OPC_ORI, a0, a1, ~a2);
2097        } else {
2098            tcg_out_opc_reg(s, OPC_ORN, a0, a1, a2);
2099        }
2100        break;
2101    case INDEX_op_eqv_i32:
2102    case INDEX_op_eqv_i64:
2103        if (c2) {
2104            tcg_out_opc_imm(s, OPC_XORI, a0, a1, ~a2);
2105        } else {
2106            tcg_out_opc_reg(s, OPC_XNOR, a0, a1, a2);
2107        }
2108        break;
2109
2110    case INDEX_op_not_i32:
2111    case INDEX_op_not_i64:
2112        tcg_out_opc_imm(s, OPC_XORI, a0, a1, -1);
2113        break;
2114
2115    case INDEX_op_neg_i32:
2116        tcg_out_opc_reg(s, OPC_SUBW, a0, TCG_REG_ZERO, a1);
2117        break;
2118    case INDEX_op_neg_i64:
2119        tcg_out_opc_reg(s, OPC_SUB, a0, TCG_REG_ZERO, a1);
2120        break;
2121
2122    case INDEX_op_mul_i32:
2123        tcg_out_opc_reg(s, OPC_MULW, a0, a1, a2);
2124        break;
2125    case INDEX_op_mul_i64:
2126        tcg_out_opc_reg(s, OPC_MUL, a0, a1, a2);
2127        break;
2128
2129    case INDEX_op_div_i32:
2130        tcg_out_opc_reg(s, OPC_DIVW, a0, a1, a2);
2131        break;
2132    case INDEX_op_div_i64:
2133        tcg_out_opc_reg(s, OPC_DIV, a0, a1, a2);
2134        break;
2135
2136    case INDEX_op_divu_i32:
2137        tcg_out_opc_reg(s, OPC_DIVUW, a0, a1, a2);
2138        break;
2139    case INDEX_op_divu_i64:
2140        tcg_out_opc_reg(s, OPC_DIVU, a0, a1, a2);
2141        break;
2142
2143    case INDEX_op_rem_i32:
2144        tcg_out_opc_reg(s, OPC_REMW, a0, a1, a2);
2145        break;
2146    case INDEX_op_rem_i64:
2147        tcg_out_opc_reg(s, OPC_REM, a0, a1, a2);
2148        break;
2149
2150    case INDEX_op_remu_i32:
2151        tcg_out_opc_reg(s, OPC_REMUW, a0, a1, a2);
2152        break;
2153    case INDEX_op_remu_i64:
2154        tcg_out_opc_reg(s, OPC_REMU, a0, a1, a2);
2155        break;
2156
2157    case INDEX_op_shl_i32:
2158        if (c2) {
2159            tcg_out_opc_imm(s, OPC_SLLIW, a0, a1, a2 & 0x1f);
2160        } else {
2161            tcg_out_opc_reg(s, OPC_SLLW, a0, a1, a2);
2162        }
2163        break;
2164    case INDEX_op_shl_i64:
2165        if (c2) {
2166            tcg_out_opc_imm(s, OPC_SLLI, a0, a1, a2 & 0x3f);
2167        } else {
2168            tcg_out_opc_reg(s, OPC_SLL, a0, a1, a2);
2169        }
2170        break;
2171
2172    case INDEX_op_shr_i32:
2173        if (c2) {
2174            tcg_out_opc_imm(s, OPC_SRLIW, a0, a1, a2 & 0x1f);
2175        } else {
2176            tcg_out_opc_reg(s, OPC_SRLW, a0, a1, a2);
2177        }
2178        break;
2179    case INDEX_op_shr_i64:
2180        if (c2) {
2181            tcg_out_opc_imm(s, OPC_SRLI, a0, a1, a2 & 0x3f);
2182        } else {
2183            tcg_out_opc_reg(s, OPC_SRL, a0, a1, a2);
2184        }
2185        break;
2186
2187    case INDEX_op_sar_i32:
2188        if (c2) {
2189            tcg_out_opc_imm(s, OPC_SRAIW, a0, a1, a2 & 0x1f);
2190        } else {
2191            tcg_out_opc_reg(s, OPC_SRAW, a0, a1, a2);
2192        }
2193        break;
2194    case INDEX_op_sar_i64:
2195        if (c2) {
2196            tcg_out_opc_imm(s, OPC_SRAI, a0, a1, a2 & 0x3f);
2197        } else {
2198            tcg_out_opc_reg(s, OPC_SRA, a0, a1, a2);
2199        }
2200        break;
2201
2202    case INDEX_op_rotl_i32:
2203        if (c2) {
2204            tcg_out_opc_imm(s, OPC_RORIW, a0, a1, -a2 & 0x1f);
2205        } else {
2206            tcg_out_opc_reg(s, OPC_ROLW, a0, a1, a2);
2207        }
2208        break;
2209    case INDEX_op_rotl_i64:
2210        if (c2) {
2211            tcg_out_opc_imm(s, OPC_RORI, a0, a1, -a2 & 0x3f);
2212        } else {
2213            tcg_out_opc_reg(s, OPC_ROL, a0, a1, a2);
2214        }
2215        break;
2216
2217    case INDEX_op_rotr_i32:
2218        if (c2) {
2219            tcg_out_opc_imm(s, OPC_RORIW, a0, a1, a2 & 0x1f);
2220        } else {
2221            tcg_out_opc_reg(s, OPC_RORW, a0, a1, a2);
2222        }
2223        break;
2224    case INDEX_op_rotr_i64:
2225        if (c2) {
2226            tcg_out_opc_imm(s, OPC_RORI, a0, a1, a2 & 0x3f);
2227        } else {
2228            tcg_out_opc_reg(s, OPC_ROR, a0, a1, a2);
2229        }
2230        break;
2231
2232    case INDEX_op_bswap64_i64:
2233        tcg_out_opc_imm(s, OPC_REV8, a0, a1, 0);
2234        break;
2235    case INDEX_op_bswap32_i32:
2236        a2 = 0;
2237        /* fall through */
2238    case INDEX_op_bswap32_i64:
2239        tcg_out_opc_imm(s, OPC_REV8, a0, a1, 0);
2240        if (a2 & TCG_BSWAP_OZ) {
2241            tcg_out_opc_imm(s, OPC_SRLI, a0, a0, 32);
2242        } else {
2243            tcg_out_opc_imm(s, OPC_SRAI, a0, a0, 32);
2244        }
2245        break;
2246    case INDEX_op_bswap16_i64:
2247    case INDEX_op_bswap16_i32:
2248        tcg_out_opc_imm(s, OPC_REV8, a0, a1, 0);
2249        if (a2 & TCG_BSWAP_OZ) {
2250            tcg_out_opc_imm(s, OPC_SRLI, a0, a0, 48);
2251        } else {
2252            tcg_out_opc_imm(s, OPC_SRAI, a0, a0, 48);
2253        }
2254        break;
2255
2256    case INDEX_op_ctpop_i32:
2257        tcg_out_opc_imm(s, OPC_CPOPW, a0, a1, 0);
2258        break;
2259    case INDEX_op_ctpop_i64:
2260        tcg_out_opc_imm(s, OPC_CPOP, a0, a1, 0);
2261        break;
2262
2263    case INDEX_op_clz_i32:
2264        tcg_out_cltz(s, TCG_TYPE_I32, OPC_CLZW, a0, a1, a2, c2);
2265        break;
2266    case INDEX_op_clz_i64:
2267        tcg_out_cltz(s, TCG_TYPE_I64, OPC_CLZ, a0, a1, a2, c2);
2268        break;
2269    case INDEX_op_ctz_i32:
2270        tcg_out_cltz(s, TCG_TYPE_I32, OPC_CTZW, a0, a1, a2, c2);
2271        break;
2272    case INDEX_op_ctz_i64:
2273        tcg_out_cltz(s, TCG_TYPE_I64, OPC_CTZ, a0, a1, a2, c2);
2274        break;
2275
2276    case INDEX_op_add2_i32:
2277        tcg_out_addsub2(s, a0, a1, a2, args[3], args[4], args[5],
2278                        const_args[4], const_args[5], false, true);
2279        break;
2280    case INDEX_op_add2_i64:
2281        tcg_out_addsub2(s, a0, a1, a2, args[3], args[4], args[5],
2282                        const_args[4], const_args[5], false, false);
2283        break;
2284    case INDEX_op_sub2_i32:
2285        tcg_out_addsub2(s, a0, a1, a2, args[3], args[4], args[5],
2286                        const_args[4], const_args[5], true, true);
2287        break;
2288    case INDEX_op_sub2_i64:
2289        tcg_out_addsub2(s, a0, a1, a2, args[3], args[4], args[5],
2290                        const_args[4], const_args[5], true, false);
2291        break;
2292
2293    case INDEX_op_brcond_i32:
2294    case INDEX_op_brcond_i64:
2295        tcg_out_brcond(s, a2, a0, a1, arg_label(args[3]));
2296        break;
2297
2298    case INDEX_op_setcond_i32:
2299    case INDEX_op_setcond_i64:
2300        tcg_out_setcond(s, args[3], a0, a1, a2, c2);
2301        break;
2302
2303    case INDEX_op_negsetcond_i32:
2304    case INDEX_op_negsetcond_i64:
2305        tcg_out_negsetcond(s, args[3], a0, a1, a2, c2);
2306        break;
2307
2308    case INDEX_op_movcond_i32:
2309    case INDEX_op_movcond_i64:
2310        tcg_out_movcond(s, args[5], a0, a1, a2, c2,
2311                        args[3], const_args[3], args[4], const_args[4]);
2312        break;
2313
2314    case INDEX_op_qemu_ld_a32_i32:
2315    case INDEX_op_qemu_ld_a64_i32:
2316        tcg_out_qemu_ld(s, a0, a1, a2, TCG_TYPE_I32);
2317        break;
2318    case INDEX_op_qemu_ld_a32_i64:
2319    case INDEX_op_qemu_ld_a64_i64:
2320        tcg_out_qemu_ld(s, a0, a1, a2, TCG_TYPE_I64);
2321        break;
2322    case INDEX_op_qemu_st_a32_i32:
2323    case INDEX_op_qemu_st_a64_i32:
2324        tcg_out_qemu_st(s, a0, a1, a2, TCG_TYPE_I32);
2325        break;
2326    case INDEX_op_qemu_st_a32_i64:
2327    case INDEX_op_qemu_st_a64_i64:
2328        tcg_out_qemu_st(s, a0, a1, a2, TCG_TYPE_I64);
2329        break;
2330
2331    case INDEX_op_extrh_i64_i32:
2332        tcg_out_opc_imm(s, OPC_SRAI, a0, a1, 32);
2333        break;
2334
2335    case INDEX_op_mulsh_i32:
2336    case INDEX_op_mulsh_i64:
2337        tcg_out_opc_reg(s, OPC_MULH, a0, a1, a2);
2338        break;
2339
2340    case INDEX_op_muluh_i32:
2341    case INDEX_op_muluh_i64:
2342        tcg_out_opc_reg(s, OPC_MULHU, a0, a1, a2);
2343        break;
2344
2345    case INDEX_op_mb:
2346        tcg_out_mb(s, a0);
2347        break;
2348
2349    case INDEX_op_mov_i32:  /* Always emitted via tcg_out_mov.  */
2350    case INDEX_op_mov_i64:
2351    case INDEX_op_call:     /* Always emitted via tcg_out_call.  */
2352    case INDEX_op_exit_tb:  /* Always emitted via tcg_out_exit_tb.  */
2353    case INDEX_op_goto_tb:  /* Always emitted via tcg_out_goto_tb.  */
2354    case INDEX_op_ext8s_i32:  /* Always emitted via tcg_reg_alloc_op.  */
2355    case INDEX_op_ext8s_i64:
2356    case INDEX_op_ext8u_i32:
2357    case INDEX_op_ext8u_i64:
2358    case INDEX_op_ext16s_i32:
2359    case INDEX_op_ext16s_i64:
2360    case INDEX_op_ext16u_i32:
2361    case INDEX_op_ext16u_i64:
2362    case INDEX_op_ext32s_i64:
2363    case INDEX_op_ext32u_i64:
2364    case INDEX_op_ext_i32_i64:
2365    case INDEX_op_extu_i32_i64:
2366    case INDEX_op_extrl_i64_i32:
2367    default:
2368        g_assert_not_reached();
2369    }
2370}
2371
2372static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
2373                           unsigned vecl, unsigned vece,
2374                           const TCGArg args[TCG_MAX_OP_ARGS],
2375                           const int const_args[TCG_MAX_OP_ARGS])
2376{
2377    TCGType type = vecl + TCG_TYPE_V64;
2378    TCGArg a0, a1, a2;
2379    int c2;
2380
2381    a0 = args[0];
2382    a1 = args[1];
2383    a2 = args[2];
2384    c2 = const_args[2];
2385
2386    switch (opc) {
2387    case INDEX_op_dupm_vec:
2388        tcg_out_dupm_vec(s, type, vece, a0, a1, a2);
2389        break;
2390    case INDEX_op_ld_vec:
2391        tcg_out_ld(s, type, a0, a1, a2);
2392        break;
2393    case INDEX_op_st_vec:
2394        tcg_out_st(s, type, a0, a1, a2);
2395        break;
2396    case INDEX_op_add_vec:
2397        set_vtype_len_sew(s, type, vece);
2398        tcg_out_opc_vv_vi(s, OPC_VADD_VV, OPC_VADD_VI, a0, a1, a2, c2);
2399        break;
2400    case INDEX_op_sub_vec:
2401        set_vtype_len_sew(s, type, vece);
2402        if (const_args[1]) {
2403            tcg_out_opc_vi(s, OPC_VRSUB_VI, a0, a2, a1);
2404        } else {
2405            tcg_out_opc_vv(s, OPC_VSUB_VV, a0, a1, a2);
2406        }
2407        break;
2408    case INDEX_op_and_vec:
2409        set_vtype_len(s, type);
2410        tcg_out_opc_vv_vi(s, OPC_VAND_VV, OPC_VAND_VI, a0, a1, a2, c2);
2411        break;
2412    case INDEX_op_or_vec:
2413        set_vtype_len(s, type);
2414        tcg_out_opc_vv_vi(s, OPC_VOR_VV, OPC_VOR_VI, a0, a1, a2, c2);
2415        break;
2416    case INDEX_op_xor_vec:
2417        set_vtype_len(s, type);
2418        tcg_out_opc_vv_vi(s, OPC_VXOR_VV, OPC_VXOR_VI, a0, a1, a2, c2);
2419        break;
2420    case INDEX_op_not_vec:
2421        set_vtype_len(s, type);
2422        tcg_out_opc_vi(s, OPC_VXOR_VI, a0, a1, -1);
2423        break;
2424    case INDEX_op_neg_vec:
2425        set_vtype_len_sew(s, type, vece);
2426        tcg_out_opc_vi(s, OPC_VRSUB_VI, a0, a1, 0);
2427        break;
2428    case INDEX_op_mul_vec:
2429        set_vtype_len_sew(s, type, vece);
2430        tcg_out_opc_vv(s, OPC_VMUL_VV, a0, a1, a2);
2431        break;
2432    case INDEX_op_ssadd_vec:
2433        set_vtype_len_sew(s, type, vece);
2434        tcg_out_opc_vv_vi(s, OPC_VSADD_VV, OPC_VSADD_VI, a0, a1, a2, c2);
2435        break;
2436    case INDEX_op_sssub_vec:
2437        set_vtype_len_sew(s, type, vece);
2438        tcg_out_opc_vv_vi(s, OPC_VSSUB_VV, OPC_VSSUB_VI, a0, a1, a2, c2);
2439        break;
2440    case INDEX_op_usadd_vec:
2441        set_vtype_len_sew(s, type, vece);
2442        tcg_out_opc_vv_vi(s, OPC_VSADDU_VV, OPC_VSADDU_VI, a0, a1, a2, c2);
2443        break;
2444    case INDEX_op_ussub_vec:
2445        set_vtype_len_sew(s, type, vece);
2446        tcg_out_opc_vv_vi(s, OPC_VSSUBU_VV, OPC_VSSUBU_VI, a0, a1, a2, c2);
2447        break;
2448    case INDEX_op_smax_vec:
2449        set_vtype_len_sew(s, type, vece);
2450        tcg_out_opc_vv_vi(s, OPC_VMAX_VV, OPC_VMAX_VI, a0, a1, a2, c2);
2451        break;
2452    case INDEX_op_smin_vec:
2453        set_vtype_len_sew(s, type, vece);
2454        tcg_out_opc_vv_vi(s, OPC_VMIN_VV, OPC_VMIN_VI, a0, a1, a2, c2);
2455        break;
2456    case INDEX_op_umax_vec:
2457        set_vtype_len_sew(s, type, vece);
2458        tcg_out_opc_vv_vi(s, OPC_VMAXU_VV, OPC_VMAXU_VI, a0, a1, a2, c2);
2459        break;
2460    case INDEX_op_umin_vec:
2461        set_vtype_len_sew(s, type, vece);
2462        tcg_out_opc_vv_vi(s, OPC_VMINU_VV, OPC_VMINU_VI, a0, a1, a2, c2);
2463        break;
2464    case INDEX_op_shls_vec:
2465        set_vtype_len_sew(s, type, vece);
2466        tcg_out_opc_vx(s, OPC_VSLL_VX, a0, a1, a2);
2467        break;
2468    case INDEX_op_shrs_vec:
2469        set_vtype_len_sew(s, type, vece);
2470        tcg_out_opc_vx(s, OPC_VSRL_VX, a0, a1, a2);
2471        break;
2472    case INDEX_op_sars_vec:
2473        set_vtype_len_sew(s, type, vece);
2474        tcg_out_opc_vx(s, OPC_VSRA_VX, a0, a1, a2);
2475        break;
2476    case INDEX_op_shlv_vec:
2477        set_vtype_len_sew(s, type, vece);
2478        tcg_out_opc_vv(s, OPC_VSLL_VV, a0, a1, a2);
2479        break;
2480    case INDEX_op_shrv_vec:
2481        set_vtype_len_sew(s, type, vece);
2482        tcg_out_opc_vv(s, OPC_VSRL_VV, a0, a1, a2);
2483        break;
2484    case INDEX_op_sarv_vec:
2485        set_vtype_len_sew(s, type, vece);
2486        tcg_out_opc_vv(s, OPC_VSRA_VV, a0, a1, a2);
2487        break;
2488    case INDEX_op_shli_vec:
2489        set_vtype_len_sew(s, type, vece);
2490        tcg_out_vshifti(s, OPC_VSLL_VI, OPC_VSLL_VX, a0, a1, a2);
2491        break;
2492    case INDEX_op_shri_vec:
2493        set_vtype_len_sew(s, type, vece);
2494        tcg_out_vshifti(s, OPC_VSRL_VI, OPC_VSRL_VX, a0, a1, a2);
2495        break;
2496    case INDEX_op_sari_vec:
2497        set_vtype_len_sew(s, type, vece);
2498        tcg_out_vshifti(s, OPC_VSRA_VI, OPC_VSRA_VX, a0, a1, a2);
2499        break;
2500    case INDEX_op_rotli_vec:
2501        set_vtype_len_sew(s, type, vece);
2502        tcg_out_vshifti(s, OPC_VSLL_VI, OPC_VSLL_VX, TCG_REG_V0, a1, a2);
2503        tcg_out_vshifti(s, OPC_VSRL_VI, OPC_VSRL_VX, a0, a1,
2504                        -a2 & ((8 << vece) - 1));
2505        tcg_out_opc_vv(s, OPC_VOR_VV, a0, a0, TCG_REG_V0);
2506        break;
2507    case INDEX_op_rotls_vec:
2508        set_vtype_len_sew(s, type, vece);
2509        tcg_out_opc_vx(s, OPC_VSLL_VX, TCG_REG_V0, a1, a2);
2510        tcg_out_opc_reg(s, OPC_SUBW, TCG_REG_TMP0, TCG_REG_ZERO, a2);
2511        tcg_out_opc_vx(s, OPC_VSRL_VX, a0, a1, TCG_REG_TMP0);
2512        tcg_out_opc_vv(s, OPC_VOR_VV, a0, a0, TCG_REG_V0);
2513        break;
2514    case INDEX_op_rotlv_vec:
2515        set_vtype_len_sew(s, type, vece);
2516        tcg_out_opc_vi(s, OPC_VRSUB_VI, TCG_REG_V0, a2, 0);
2517        tcg_out_opc_vv(s, OPC_VSRL_VV, TCG_REG_V0, a1, TCG_REG_V0);
2518        tcg_out_opc_vv(s, OPC_VSLL_VV, a0, a1, a2);
2519        tcg_out_opc_vv(s, OPC_VOR_VV, a0, a0, TCG_REG_V0);
2520        break;
2521    case INDEX_op_rotrv_vec:
2522        set_vtype_len_sew(s, type, vece);
2523        tcg_out_opc_vi(s, OPC_VRSUB_VI, TCG_REG_V0, a2, 0);
2524        tcg_out_opc_vv(s, OPC_VSLL_VV, TCG_REG_V0, a1, TCG_REG_V0);
2525        tcg_out_opc_vv(s, OPC_VSRL_VV, a0, a1, a2);
2526        tcg_out_opc_vv(s, OPC_VOR_VV, a0, a0, TCG_REG_V0);
2527        break;
2528    case INDEX_op_cmp_vec:
2529        tcg_out_cmpsel(s, type, vece, args[3], a0, a1, a2, c2,
2530                       -1, true, 0, true);
2531        break;
2532    case INDEX_op_cmpsel_vec:
2533        tcg_out_cmpsel(s, type, vece, args[5], a0, a1, a2, c2,
2534                       args[3], const_args[3], args[4], const_args[4]);
2535        break;
2536    case INDEX_op_mov_vec: /* Always emitted via tcg_out_mov.  */
2537    case INDEX_op_dup_vec: /* Always emitted via tcg_out_dup_vec.  */
2538    default:
2539        g_assert_not_reached();
2540    }
2541}
2542
2543void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece,
2544                       TCGArg a0, ...)
2545{
2546    g_assert_not_reached();
2547}
2548
2549int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece)
2550{
2551    switch (opc) {
2552    case INDEX_op_add_vec:
2553    case INDEX_op_sub_vec:
2554    case INDEX_op_and_vec:
2555    case INDEX_op_or_vec:
2556    case INDEX_op_xor_vec:
2557    case INDEX_op_not_vec:
2558    case INDEX_op_neg_vec:
2559    case INDEX_op_mul_vec:
2560    case INDEX_op_ssadd_vec:
2561    case INDEX_op_sssub_vec:
2562    case INDEX_op_usadd_vec:
2563    case INDEX_op_ussub_vec:
2564    case INDEX_op_smax_vec:
2565    case INDEX_op_smin_vec:
2566    case INDEX_op_umax_vec:
2567    case INDEX_op_umin_vec:
2568    case INDEX_op_shls_vec:
2569    case INDEX_op_shrs_vec:
2570    case INDEX_op_sars_vec:
2571    case INDEX_op_shlv_vec:
2572    case INDEX_op_shrv_vec:
2573    case INDEX_op_sarv_vec:
2574    case INDEX_op_shri_vec:
2575    case INDEX_op_shli_vec:
2576    case INDEX_op_sari_vec:
2577    case INDEX_op_rotls_vec:
2578    case INDEX_op_rotlv_vec:
2579    case INDEX_op_rotrv_vec:
2580    case INDEX_op_rotli_vec:
2581    case INDEX_op_cmp_vec:
2582    case INDEX_op_cmpsel_vec:
2583        return 1;
2584    default:
2585        return 0;
2586    }
2587}
2588
2589static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
2590{
2591    switch (op) {
2592    case INDEX_op_goto_ptr:
2593        return C_O0_I1(r);
2594
2595    case INDEX_op_ld8u_i32:
2596    case INDEX_op_ld8s_i32:
2597    case INDEX_op_ld16u_i32:
2598    case INDEX_op_ld16s_i32:
2599    case INDEX_op_ld_i32:
2600    case INDEX_op_not_i32:
2601    case INDEX_op_neg_i32:
2602    case INDEX_op_ld8u_i64:
2603    case INDEX_op_ld8s_i64:
2604    case INDEX_op_ld16u_i64:
2605    case INDEX_op_ld16s_i64:
2606    case INDEX_op_ld32s_i64:
2607    case INDEX_op_ld32u_i64:
2608    case INDEX_op_ld_i64:
2609    case INDEX_op_not_i64:
2610    case INDEX_op_neg_i64:
2611    case INDEX_op_ext8u_i32:
2612    case INDEX_op_ext8u_i64:
2613    case INDEX_op_ext16u_i32:
2614    case INDEX_op_ext16u_i64:
2615    case INDEX_op_ext32u_i64:
2616    case INDEX_op_extu_i32_i64:
2617    case INDEX_op_ext8s_i32:
2618    case INDEX_op_ext8s_i64:
2619    case INDEX_op_ext16s_i32:
2620    case INDEX_op_ext16s_i64:
2621    case INDEX_op_ext32s_i64:
2622    case INDEX_op_extrl_i64_i32:
2623    case INDEX_op_extrh_i64_i32:
2624    case INDEX_op_ext_i32_i64:
2625    case INDEX_op_bswap16_i32:
2626    case INDEX_op_bswap32_i32:
2627    case INDEX_op_bswap16_i64:
2628    case INDEX_op_bswap32_i64:
2629    case INDEX_op_bswap64_i64:
2630    case INDEX_op_ctpop_i32:
2631    case INDEX_op_ctpop_i64:
2632        return C_O1_I1(r, r);
2633
2634    case INDEX_op_st8_i32:
2635    case INDEX_op_st16_i32:
2636    case INDEX_op_st_i32:
2637    case INDEX_op_st8_i64:
2638    case INDEX_op_st16_i64:
2639    case INDEX_op_st32_i64:
2640    case INDEX_op_st_i64:
2641        return C_O0_I2(rZ, r);
2642
2643    case INDEX_op_add_i32:
2644    case INDEX_op_and_i32:
2645    case INDEX_op_or_i32:
2646    case INDEX_op_xor_i32:
2647    case INDEX_op_add_i64:
2648    case INDEX_op_and_i64:
2649    case INDEX_op_or_i64:
2650    case INDEX_op_xor_i64:
2651    case INDEX_op_setcond_i32:
2652    case INDEX_op_setcond_i64:
2653    case INDEX_op_negsetcond_i32:
2654    case INDEX_op_negsetcond_i64:
2655        return C_O1_I2(r, r, rI);
2656
2657    case INDEX_op_andc_i32:
2658    case INDEX_op_andc_i64:
2659    case INDEX_op_orc_i32:
2660    case INDEX_op_orc_i64:
2661    case INDEX_op_eqv_i32:
2662    case INDEX_op_eqv_i64:
2663        return C_O1_I2(r, r, rJ);
2664
2665    case INDEX_op_sub_i32:
2666    case INDEX_op_sub_i64:
2667        return C_O1_I2(r, rZ, rN);
2668
2669    case INDEX_op_mul_i32:
2670    case INDEX_op_mulsh_i32:
2671    case INDEX_op_muluh_i32:
2672    case INDEX_op_div_i32:
2673    case INDEX_op_divu_i32:
2674    case INDEX_op_rem_i32:
2675    case INDEX_op_remu_i32:
2676    case INDEX_op_mul_i64:
2677    case INDEX_op_mulsh_i64:
2678    case INDEX_op_muluh_i64:
2679    case INDEX_op_div_i64:
2680    case INDEX_op_divu_i64:
2681    case INDEX_op_rem_i64:
2682    case INDEX_op_remu_i64:
2683        return C_O1_I2(r, rZ, rZ);
2684
2685    case INDEX_op_shl_i32:
2686    case INDEX_op_shr_i32:
2687    case INDEX_op_sar_i32:
2688    case INDEX_op_rotl_i32:
2689    case INDEX_op_rotr_i32:
2690    case INDEX_op_shl_i64:
2691    case INDEX_op_shr_i64:
2692    case INDEX_op_sar_i64:
2693    case INDEX_op_rotl_i64:
2694    case INDEX_op_rotr_i64:
2695        return C_O1_I2(r, r, ri);
2696
2697    case INDEX_op_clz_i32:
2698    case INDEX_op_clz_i64:
2699    case INDEX_op_ctz_i32:
2700    case INDEX_op_ctz_i64:
2701        return C_N1_I2(r, r, rM);
2702
2703    case INDEX_op_brcond_i32:
2704    case INDEX_op_brcond_i64:
2705        return C_O0_I2(rZ, rZ);
2706
2707    case INDEX_op_movcond_i32:
2708    case INDEX_op_movcond_i64:
2709        return C_O1_I4(r, r, rI, rM, rM);
2710
2711    case INDEX_op_add2_i32:
2712    case INDEX_op_add2_i64:
2713    case INDEX_op_sub2_i32:
2714    case INDEX_op_sub2_i64:
2715        return C_O2_I4(r, r, rZ, rZ, rM, rM);
2716
2717    case INDEX_op_qemu_ld_a32_i32:
2718    case INDEX_op_qemu_ld_a64_i32:
2719    case INDEX_op_qemu_ld_a32_i64:
2720    case INDEX_op_qemu_ld_a64_i64:
2721        return C_O1_I1(r, r);
2722    case INDEX_op_qemu_st_a32_i32:
2723    case INDEX_op_qemu_st_a64_i32:
2724    case INDEX_op_qemu_st_a32_i64:
2725    case INDEX_op_qemu_st_a64_i64:
2726        return C_O0_I2(rZ, r);
2727
2728    case INDEX_op_st_vec:
2729        return C_O0_I2(v, r);
2730    case INDEX_op_dup_vec:
2731    case INDEX_op_dupm_vec:
2732    case INDEX_op_ld_vec:
2733        return C_O1_I1(v, r);
2734    case INDEX_op_neg_vec:
2735    case INDEX_op_not_vec:
2736    case INDEX_op_shli_vec:
2737    case INDEX_op_shri_vec:
2738    case INDEX_op_sari_vec:
2739    case INDEX_op_rotli_vec:
2740        return C_O1_I1(v, v);
2741    case INDEX_op_add_vec:
2742    case INDEX_op_and_vec:
2743    case INDEX_op_or_vec:
2744    case INDEX_op_xor_vec:
2745    case INDEX_op_ssadd_vec:
2746    case INDEX_op_sssub_vec:
2747    case INDEX_op_usadd_vec:
2748    case INDEX_op_ussub_vec:
2749    case INDEX_op_smax_vec:
2750    case INDEX_op_smin_vec:
2751    case INDEX_op_umax_vec:
2752    case INDEX_op_umin_vec:
2753        return C_O1_I2(v, v, vK);
2754    case INDEX_op_sub_vec:
2755        return C_O1_I2(v, vK, v);
2756    case INDEX_op_mul_vec:
2757    case INDEX_op_shlv_vec:
2758    case INDEX_op_shrv_vec:
2759    case INDEX_op_sarv_vec:
2760    case INDEX_op_rotlv_vec:
2761    case INDEX_op_rotrv_vec:
2762        return C_O1_I2(v, v, v);
2763    case INDEX_op_shls_vec:
2764    case INDEX_op_shrs_vec:
2765    case INDEX_op_sars_vec:
2766    case INDEX_op_rotls_vec:
2767        return C_O1_I2(v, v, r);
2768    case INDEX_op_cmp_vec:
2769        return C_O1_I2(v, v, vL);
2770    case INDEX_op_cmpsel_vec:
2771        return C_O1_I4(v, v, vL, vK, vK);
2772    default:
2773        g_assert_not_reached();
2774    }
2775}
2776
2777static const int tcg_target_callee_save_regs[] = {
2778    TCG_REG_S0,       /* used for the global env (TCG_AREG0) */
2779    TCG_REG_S1,
2780    TCG_REG_S2,
2781    TCG_REG_S3,
2782    TCG_REG_S4,
2783    TCG_REG_S5,
2784    TCG_REG_S6,
2785    TCG_REG_S7,
2786    TCG_REG_S8,
2787    TCG_REG_S9,
2788    TCG_REG_S10,
2789    TCG_REG_S11,
2790    TCG_REG_RA,       /* should be last for ABI compliance */
2791};
2792
2793/* Stack frame parameters.  */
2794#define REG_SIZE   (TCG_TARGET_REG_BITS / 8)
2795#define SAVE_SIZE  ((int)ARRAY_SIZE(tcg_target_callee_save_regs) * REG_SIZE)
2796#define TEMP_SIZE  (CPU_TEMP_BUF_NLONGS * (int)sizeof(long))
2797#define FRAME_SIZE ((TCG_STATIC_CALL_ARGS_SIZE + TEMP_SIZE + SAVE_SIZE \
2798                     + TCG_TARGET_STACK_ALIGN - 1) \
2799                    & -TCG_TARGET_STACK_ALIGN)
2800#define SAVE_OFS   (TCG_STATIC_CALL_ARGS_SIZE + TEMP_SIZE)
2801
2802/* We're expecting to be able to use an immediate for frame allocation.  */
2803QEMU_BUILD_BUG_ON(FRAME_SIZE > 0x7ff);
2804
2805/* Generate global QEMU prologue and epilogue code */
2806static void tcg_target_qemu_prologue(TCGContext *s)
2807{
2808    int i;
2809
2810    tcg_set_frame(s, TCG_REG_SP, TCG_STATIC_CALL_ARGS_SIZE, TEMP_SIZE);
2811
2812    /* TB prologue */
2813    tcg_out_opc_imm(s, OPC_ADDI, TCG_REG_SP, TCG_REG_SP, -FRAME_SIZE);
2814    for (i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); i++) {
2815        tcg_out_st(s, TCG_TYPE_REG, tcg_target_callee_save_regs[i],
2816                   TCG_REG_SP, SAVE_OFS + i * REG_SIZE);
2817    }
2818
2819    if (!tcg_use_softmmu && guest_base) {
2820        tcg_out_movi(s, TCG_TYPE_PTR, TCG_GUEST_BASE_REG, guest_base);
2821        tcg_regset_set_reg(s->reserved_regs, TCG_GUEST_BASE_REG);
2822    }
2823
2824    /* Call generated code */
2825    tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]);
2826    tcg_out_opc_imm(s, OPC_JALR, TCG_REG_ZERO, tcg_target_call_iarg_regs[1], 0);
2827
2828    /* Return path for goto_ptr. Set return value to 0 */
2829    tcg_code_gen_epilogue = tcg_splitwx_to_rx(s->code_ptr);
2830    tcg_out_mov(s, TCG_TYPE_REG, TCG_REG_A0, TCG_REG_ZERO);
2831
2832    /* TB epilogue */
2833    tb_ret_addr = tcg_splitwx_to_rx(s->code_ptr);
2834    for (i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); i++) {
2835        tcg_out_ld(s, TCG_TYPE_REG, tcg_target_callee_save_regs[i],
2836                   TCG_REG_SP, SAVE_OFS + i * REG_SIZE);
2837    }
2838
2839    tcg_out_opc_imm(s, OPC_ADDI, TCG_REG_SP, TCG_REG_SP, FRAME_SIZE);
2840    tcg_out_opc_imm(s, OPC_JALR, TCG_REG_ZERO, TCG_REG_RA, 0);
2841}
2842
2843static void tcg_out_tb_start(TCGContext *s)
2844{
2845    init_setting_vtype(s);
2846}
2847
2848static bool vtype_check(unsigned vtype)
2849{
2850    unsigned long tmp;
2851
2852    /* vsetvl tmp, zero, vtype */
2853    asm(".insn r 0x57, 7, 0x40, %0, zero, %1" : "=r"(tmp) : "r"(vtype));
2854    return tmp != 0;
2855}
2856
2857static void probe_frac_lmul_1(TCGType type, MemOp vsew)
2858{
2859    VsetCache *p = &riscv_vset_cache[type - TCG_TYPE_V64][vsew];
2860    unsigned avl = tcg_type_size(type) >> vsew;
2861    int lmul = type - riscv_lg2_vlenb;
2862    unsigned vtype = encode_vtype(true, true, vsew, lmul & 7);
2863    bool lmul_eq_avl = true;
2864
2865    /* Guaranteed by Zve64x. */
2866    assert(lmul < 3);
2867
2868    /*
2869     * For LMUL < -3, the host vector size is so large that TYPE
2870     * is smaller than the minimum 1/8 fraction.
2871     *
2872     * For other fractional LMUL settings, implementations must
2873     * support SEW settings between SEW_MIN and LMUL * ELEN, inclusive.
2874     * So if ELEN = 64, LMUL = 1/2, then SEW will support e8, e16, e32,
2875     * but e64 may not be supported. In other words, the hardware only
2876     * guarantees SEW_MIN <= SEW <= LMUL * ELEN.  Check.
2877     */
2878    if (lmul < 0 && (lmul < -3 || !vtype_check(vtype))) {
2879        vtype = encode_vtype(true, true, vsew, VLMUL_M1);
2880        lmul_eq_avl = false;
2881    }
2882
2883    if (avl < 32) {
2884        p->vset_insn = encode_vseti(OPC_VSETIVLI, TCG_REG_ZERO, avl, vtype);
2885    } else if (lmul_eq_avl) {
2886        /* rd != 0 and rs1 == 0 uses vlmax */
2887        p->vset_insn = encode_vset(OPC_VSETVLI, TCG_REG_TMP0, TCG_REG_ZERO, vtype);
2888    } else {
2889        p->movi_insn = encode_i(OPC_ADDI, TCG_REG_TMP0, TCG_REG_ZERO, avl);
2890        p->vset_insn = encode_vset(OPC_VSETVLI, TCG_REG_ZERO, TCG_REG_TMP0, vtype);
2891    }
2892}
2893
2894static void probe_frac_lmul(void)
2895{
2896    /* Match riscv_lg2_vlenb to TCG_TYPE_V64. */
2897    QEMU_BUILD_BUG_ON(TCG_TYPE_V64 != 3);
2898
2899    for (TCGType t = TCG_TYPE_V64; t <= TCG_TYPE_V256; t++) {
2900        for (MemOp e = MO_8; e <= MO_64; e++) {
2901            probe_frac_lmul_1(t, e);
2902        }
2903    }
2904}
2905
2906static void tcg_target_init(TCGContext *s)
2907{
2908    tcg_target_available_regs[TCG_TYPE_I32] = 0xffffffff;
2909    tcg_target_available_regs[TCG_TYPE_I64] = 0xffffffff;
2910
2911    tcg_target_call_clobber_regs = -1;
2912    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_S0);
2913    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_S1);
2914    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_S2);
2915    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_S3);
2916    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_S4);
2917    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_S5);
2918    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_S6);
2919    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_S7);
2920    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_S8);
2921    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_S9);
2922    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_S10);
2923    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_S11);
2924
2925    s->reserved_regs = 0;
2926    tcg_regset_set_reg(s->reserved_regs, TCG_REG_ZERO);
2927    tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP0);
2928    tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP1);
2929    tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP2);
2930    tcg_regset_set_reg(s->reserved_regs, TCG_REG_SP);
2931    tcg_regset_set_reg(s->reserved_regs, TCG_REG_GP);
2932    tcg_regset_set_reg(s->reserved_regs, TCG_REG_TP);
2933
2934    if (cpuinfo & CPUINFO_ZVE64X) {
2935        switch (riscv_lg2_vlenb) {
2936        case TCG_TYPE_V64:
2937            tcg_target_available_regs[TCG_TYPE_V64] = ALL_VECTOR_REGS;
2938            tcg_target_available_regs[TCG_TYPE_V128] = ALL_DVECTOR_REG_GROUPS;
2939            tcg_target_available_regs[TCG_TYPE_V256] = ALL_QVECTOR_REG_GROUPS;
2940            s->reserved_regs |= (~ALL_QVECTOR_REG_GROUPS & ALL_VECTOR_REGS);
2941            break;
2942        case TCG_TYPE_V128:
2943            tcg_target_available_regs[TCG_TYPE_V64] = ALL_VECTOR_REGS;
2944            tcg_target_available_regs[TCG_TYPE_V128] = ALL_VECTOR_REGS;
2945            tcg_target_available_regs[TCG_TYPE_V256] = ALL_DVECTOR_REG_GROUPS;
2946            s->reserved_regs |= (~ALL_DVECTOR_REG_GROUPS & ALL_VECTOR_REGS);
2947            break;
2948        default:
2949            /* Guaranteed by Zve64x. */
2950            tcg_debug_assert(riscv_lg2_vlenb >= TCG_TYPE_V256);
2951            tcg_target_available_regs[TCG_TYPE_V64] = ALL_VECTOR_REGS;
2952            tcg_target_available_regs[TCG_TYPE_V128] = ALL_VECTOR_REGS;
2953            tcg_target_available_regs[TCG_TYPE_V256] = ALL_VECTOR_REGS;
2954            break;
2955        }
2956        tcg_regset_set_reg(s->reserved_regs, TCG_REG_V0);
2957        probe_frac_lmul();
2958    }
2959}
2960
2961typedef struct {
2962    DebugFrameHeader h;
2963    uint8_t fde_def_cfa[4];
2964    uint8_t fde_reg_ofs[ARRAY_SIZE(tcg_target_callee_save_regs) * 2];
2965} DebugFrame;
2966
2967#define ELF_HOST_MACHINE EM_RISCV
2968
2969static const DebugFrame debug_frame = {
2970    .h.cie.len = sizeof(DebugFrameCIE) - 4, /* length after .len member */
2971    .h.cie.id = -1,
2972    .h.cie.version = 1,
2973    .h.cie.code_align = 1,
2974    .h.cie.data_align = -(TCG_TARGET_REG_BITS / 8) & 0x7f, /* sleb128 */
2975    .h.cie.return_column = TCG_REG_RA,
2976
2977    /* Total FDE size does not include the "len" member.  */
2978    .h.fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, h.fde.cie_offset),
2979
2980    .fde_def_cfa = {
2981        12, TCG_REG_SP,                 /* DW_CFA_def_cfa sp, ... */
2982        (FRAME_SIZE & 0x7f) | 0x80,     /* ... uleb128 FRAME_SIZE */
2983        (FRAME_SIZE >> 7)
2984    },
2985    .fde_reg_ofs = {
2986        0x80 + 9,  12,                  /* DW_CFA_offset, s1,  -96 */
2987        0x80 + 18, 11,                  /* DW_CFA_offset, s2,  -88 */
2988        0x80 + 19, 10,                  /* DW_CFA_offset, s3,  -80 */
2989        0x80 + 20, 9,                   /* DW_CFA_offset, s4,  -72 */
2990        0x80 + 21, 8,                   /* DW_CFA_offset, s5,  -64 */
2991        0x80 + 22, 7,                   /* DW_CFA_offset, s6,  -56 */
2992        0x80 + 23, 6,                   /* DW_CFA_offset, s7,  -48 */
2993        0x80 + 24, 5,                   /* DW_CFA_offset, s8,  -40 */
2994        0x80 + 25, 4,                   /* DW_CFA_offset, s9,  -32 */
2995        0x80 + 26, 3,                   /* DW_CFA_offset, s10, -24 */
2996        0x80 + 27, 2,                   /* DW_CFA_offset, s11, -16 */
2997        0x80 + 1 , 1,                   /* DW_CFA_offset, ra,  -8 */
2998    }
2999};
3000
3001void tcg_register_jit(const void *buf, size_t buf_size)
3002{
3003    tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame));
3004}
3005