1/* 2 * Tiny Code Generator for QEMU 3 * 4 * Copyright (c) 2018 SiFive, Inc 5 * Copyright (c) 2008-2009 Arnaud Patard <arnaud.patard@rtp-net.org> 6 * Copyright (c) 2009 Aurelien Jarno <aurelien@aurel32.net> 7 * Copyright (c) 2008 Fabrice Bellard 8 * 9 * Based on i386/tcg-target.c and mips/tcg-target.c 10 * 11 * Permission is hereby granted, free of charge, to any person obtaining a copy 12 * of this software and associated documentation files (the "Software"), to deal 13 * in the Software without restriction, including without limitation the rights 14 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 15 * copies of the Software, and to permit persons to whom the Software is 16 * furnished to do so, subject to the following conditions: 17 * 18 * The above copyright notice and this permission notice shall be included in 19 * all copies or substantial portions of the Software. 20 * 21 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 22 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 23 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 24 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 25 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 26 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 27 * THE SOFTWARE. 28 */ 29 30/* Used for function call generation. */ 31#define TCG_REG_CALL_STACK TCG_REG_SP 32#define TCG_TARGET_STACK_ALIGN 16 33#define TCG_TARGET_CALL_STACK_OFFSET 0 34#define TCG_TARGET_CALL_ARG_I32 TCG_CALL_ARG_NORMAL 35#define TCG_TARGET_CALL_ARG_I64 TCG_CALL_ARG_NORMAL 36#define TCG_TARGET_CALL_ARG_I128 TCG_CALL_ARG_NORMAL 37#define TCG_TARGET_CALL_RET_I128 TCG_CALL_RET_NORMAL 38 39#ifdef CONFIG_DEBUG_TCG 40static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = { 41 "zero", "ra", "sp", "gp", "tp", "t0", "t1", "t2", 42 "s0", "s1", "a0", "a1", "a2", "a3", "a4", "a5", 43 "a6", "a7", "s2", "s3", "s4", "s5", "s6", "s7", 44 "s8", "s9", "s10", "s11", "t3", "t4", "t5", "t6", 45 "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", 46 "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15", 47 "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", 48 "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31", 49}; 50#endif 51 52static const int tcg_target_reg_alloc_order[] = { 53 /* Call saved registers */ 54 /* TCG_REG_S0 reserved for TCG_AREG0 */ 55 TCG_REG_S1, 56 TCG_REG_S2, 57 TCG_REG_S3, 58 TCG_REG_S4, 59 TCG_REG_S5, 60 TCG_REG_S6, 61 TCG_REG_S7, 62 TCG_REG_S8, 63 TCG_REG_S9, 64 TCG_REG_S10, 65 TCG_REG_S11, 66 67 /* Call clobbered registers */ 68 TCG_REG_T0, 69 TCG_REG_T1, 70 TCG_REG_T2, 71 TCG_REG_T3, 72 TCG_REG_T4, 73 TCG_REG_T5, 74 TCG_REG_T6, 75 76 /* Argument registers */ 77 TCG_REG_A0, 78 TCG_REG_A1, 79 TCG_REG_A2, 80 TCG_REG_A3, 81 TCG_REG_A4, 82 TCG_REG_A5, 83 TCG_REG_A6, 84 TCG_REG_A7, 85 86 /* Vector registers and TCG_REG_V0 reserved for mask. */ 87 TCG_REG_V1, TCG_REG_V2, TCG_REG_V3, TCG_REG_V4, 88 TCG_REG_V5, TCG_REG_V6, TCG_REG_V7, TCG_REG_V8, 89 TCG_REG_V9, TCG_REG_V10, TCG_REG_V11, TCG_REG_V12, 90 TCG_REG_V13, TCG_REG_V14, TCG_REG_V15, TCG_REG_V16, 91 TCG_REG_V17, TCG_REG_V18, TCG_REG_V19, TCG_REG_V20, 92 TCG_REG_V21, TCG_REG_V22, TCG_REG_V23, TCG_REG_V24, 93 TCG_REG_V25, TCG_REG_V26, TCG_REG_V27, TCG_REG_V28, 94 TCG_REG_V29, TCG_REG_V30, TCG_REG_V31, 95}; 96 97static const int tcg_target_call_iarg_regs[] = { 98 TCG_REG_A0, 99 TCG_REG_A1, 100 TCG_REG_A2, 101 TCG_REG_A3, 102 TCG_REG_A4, 103 TCG_REG_A5, 104 TCG_REG_A6, 105 TCG_REG_A7, 106}; 107 108static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot) 109{ 110 tcg_debug_assert(kind == TCG_CALL_RET_NORMAL); 111 tcg_debug_assert(slot >= 0 && slot <= 1); 112 return TCG_REG_A0 + slot; 113} 114 115#define TCG_CT_CONST_ZERO 0x100 116#define TCG_CT_CONST_S12 0x200 117#define TCG_CT_CONST_N12 0x400 118#define TCG_CT_CONST_M12 0x800 119#define TCG_CT_CONST_J12 0x1000 120#define TCG_CT_CONST_S5 0x2000 121#define TCG_CT_CONST_CMP_VI 0x4000 122 123#define ALL_GENERAL_REGS MAKE_64BIT_MASK(0, 32) 124#define ALL_VECTOR_REGS MAKE_64BIT_MASK(32, 32) 125#define ALL_DVECTOR_REG_GROUPS 0x5555555500000000 126#define ALL_QVECTOR_REG_GROUPS 0x1111111100000000 127 128#define sextreg sextract64 129 130/* 131 * RISC-V Base ISA opcodes (IM) 132 */ 133 134#define V_OPIVV (0x0 << 12) 135#define V_OPFVV (0x1 << 12) 136#define V_OPMVV (0x2 << 12) 137#define V_OPIVI (0x3 << 12) 138#define V_OPIVX (0x4 << 12) 139#define V_OPFVF (0x5 << 12) 140#define V_OPMVX (0x6 << 12) 141#define V_OPCFG (0x7 << 12) 142 143/* NF <= 7 && NF >= 0 */ 144#define V_NF(x) (x << 29) 145#define V_UNIT_STRIDE (0x0 << 20) 146#define V_UNIT_STRIDE_WHOLE_REG (0x8 << 20) 147 148typedef enum { 149 VLMUL_M1 = 0, /* LMUL=1 */ 150 VLMUL_M2, /* LMUL=2 */ 151 VLMUL_M4, /* LMUL=4 */ 152 VLMUL_M8, /* LMUL=8 */ 153 VLMUL_RESERVED, 154 VLMUL_MF8, /* LMUL=1/8 */ 155 VLMUL_MF4, /* LMUL=1/4 */ 156 VLMUL_MF2, /* LMUL=1/2 */ 157} RISCVVlmul; 158 159typedef enum { 160 OPC_ADD = 0x33, 161 OPC_ADDI = 0x13, 162 OPC_AND = 0x7033, 163 OPC_ANDI = 0x7013, 164 OPC_AUIPC = 0x17, 165 OPC_BEQ = 0x63, 166 OPC_BEXTI = 0x48005013, 167 OPC_BGE = 0x5063, 168 OPC_BGEU = 0x7063, 169 OPC_BLT = 0x4063, 170 OPC_BLTU = 0x6063, 171 OPC_BNE = 0x1063, 172 OPC_DIV = 0x2004033, 173 OPC_DIVU = 0x2005033, 174 OPC_JAL = 0x6f, 175 OPC_JALR = 0x67, 176 OPC_LB = 0x3, 177 OPC_LBU = 0x4003, 178 OPC_LD = 0x3003, 179 OPC_LH = 0x1003, 180 OPC_LHU = 0x5003, 181 OPC_LUI = 0x37, 182 OPC_LW = 0x2003, 183 OPC_LWU = 0x6003, 184 OPC_MUL = 0x2000033, 185 OPC_MULH = 0x2001033, 186 OPC_MULHSU = 0x2002033, 187 OPC_MULHU = 0x2003033, 188 OPC_OR = 0x6033, 189 OPC_ORI = 0x6013, 190 OPC_REM = 0x2006033, 191 OPC_REMU = 0x2007033, 192 OPC_SB = 0x23, 193 OPC_SD = 0x3023, 194 OPC_SH = 0x1023, 195 OPC_SLL = 0x1033, 196 OPC_SLLI = 0x1013, 197 OPC_SLT = 0x2033, 198 OPC_SLTI = 0x2013, 199 OPC_SLTIU = 0x3013, 200 OPC_SLTU = 0x3033, 201 OPC_SRA = 0x40005033, 202 OPC_SRAI = 0x40005013, 203 OPC_SRL = 0x5033, 204 OPC_SRLI = 0x5013, 205 OPC_SUB = 0x40000033, 206 OPC_SW = 0x2023, 207 OPC_XOR = 0x4033, 208 OPC_XORI = 0x4013, 209 210 OPC_ADDIW = 0x1b, 211 OPC_ADDW = 0x3b, 212 OPC_DIVUW = 0x200503b, 213 OPC_DIVW = 0x200403b, 214 OPC_MULW = 0x200003b, 215 OPC_REMUW = 0x200703b, 216 OPC_REMW = 0x200603b, 217 OPC_SLLIW = 0x101b, 218 OPC_SLLW = 0x103b, 219 OPC_SRAIW = 0x4000501b, 220 OPC_SRAW = 0x4000503b, 221 OPC_SRLIW = 0x501b, 222 OPC_SRLW = 0x503b, 223 OPC_SUBW = 0x4000003b, 224 225 OPC_FENCE = 0x0000000f, 226 OPC_NOP = OPC_ADDI, /* nop = addi r0,r0,0 */ 227 228 /* Zba: Bit manipulation extension, address generation */ 229 OPC_ADD_UW = 0x0800003b, 230 231 /* Zbb: Bit manipulation extension, basic bit manipulation */ 232 OPC_ANDN = 0x40007033, 233 OPC_CLZ = 0x60001013, 234 OPC_CLZW = 0x6000101b, 235 OPC_CPOP = 0x60201013, 236 OPC_CPOPW = 0x6020101b, 237 OPC_CTZ = 0x60101013, 238 OPC_CTZW = 0x6010101b, 239 OPC_ORN = 0x40006033, 240 OPC_REV8 = 0x6b805013, 241 OPC_ROL = 0x60001033, 242 OPC_ROLW = 0x6000103b, 243 OPC_ROR = 0x60005033, 244 OPC_RORW = 0x6000503b, 245 OPC_RORI = 0x60005013, 246 OPC_RORIW = 0x6000501b, 247 OPC_SEXT_B = 0x60401013, 248 OPC_SEXT_H = 0x60501013, 249 OPC_XNOR = 0x40004033, 250 OPC_ZEXT_H = 0x0800403b, 251 252 /* Zicond: integer conditional operations */ 253 OPC_CZERO_EQZ = 0x0e005033, 254 OPC_CZERO_NEZ = 0x0e007033, 255 256 /* V: Vector extension 1.0 */ 257 OPC_VSETVLI = 0x57 | V_OPCFG, 258 OPC_VSETIVLI = 0xc0000057 | V_OPCFG, 259 OPC_VSETVL = 0x80000057 | V_OPCFG, 260 261 OPC_VLE8_V = 0x7 | V_UNIT_STRIDE, 262 OPC_VLE16_V = 0x5007 | V_UNIT_STRIDE, 263 OPC_VLE32_V = 0x6007 | V_UNIT_STRIDE, 264 OPC_VLE64_V = 0x7007 | V_UNIT_STRIDE, 265 OPC_VSE8_V = 0x27 | V_UNIT_STRIDE, 266 OPC_VSE16_V = 0x5027 | V_UNIT_STRIDE, 267 OPC_VSE32_V = 0x6027 | V_UNIT_STRIDE, 268 OPC_VSE64_V = 0x7027 | V_UNIT_STRIDE, 269 270 OPC_VL1RE64_V = 0x2007007 | V_UNIT_STRIDE_WHOLE_REG | V_NF(0), 271 OPC_VL2RE64_V = 0x2007007 | V_UNIT_STRIDE_WHOLE_REG | V_NF(1), 272 OPC_VL4RE64_V = 0x2007007 | V_UNIT_STRIDE_WHOLE_REG | V_NF(3), 273 OPC_VL8RE64_V = 0x2007007 | V_UNIT_STRIDE_WHOLE_REG | V_NF(7), 274 275 OPC_VS1R_V = 0x2000027 | V_UNIT_STRIDE_WHOLE_REG | V_NF(0), 276 OPC_VS2R_V = 0x2000027 | V_UNIT_STRIDE_WHOLE_REG | V_NF(1), 277 OPC_VS4R_V = 0x2000027 | V_UNIT_STRIDE_WHOLE_REG | V_NF(3), 278 OPC_VS8R_V = 0x2000027 | V_UNIT_STRIDE_WHOLE_REG | V_NF(7), 279 280 OPC_VMERGE_VIM = 0x5c000057 | V_OPIVI, 281 OPC_VMERGE_VVM = 0x5c000057 | V_OPIVV, 282 283 OPC_VADD_VV = 0x57 | V_OPIVV, 284 OPC_VADD_VI = 0x57 | V_OPIVI, 285 OPC_VSUB_VV = 0x8000057 | V_OPIVV, 286 OPC_VRSUB_VI = 0xc000057 | V_OPIVI, 287 OPC_VAND_VV = 0x24000057 | V_OPIVV, 288 OPC_VAND_VI = 0x24000057 | V_OPIVI, 289 OPC_VOR_VV = 0x28000057 | V_OPIVV, 290 OPC_VOR_VI = 0x28000057 | V_OPIVI, 291 OPC_VXOR_VV = 0x2c000057 | V_OPIVV, 292 OPC_VXOR_VI = 0x2c000057 | V_OPIVI, 293 294 OPC_VMUL_VV = 0x94000057 | V_OPMVV, 295 OPC_VSADD_VV = 0x84000057 | V_OPIVV, 296 OPC_VSADD_VI = 0x84000057 | V_OPIVI, 297 OPC_VSSUB_VV = 0x8c000057 | V_OPIVV, 298 OPC_VSSUB_VI = 0x8c000057 | V_OPIVI, 299 OPC_VSADDU_VV = 0x80000057 | V_OPIVV, 300 OPC_VSADDU_VI = 0x80000057 | V_OPIVI, 301 OPC_VSSUBU_VV = 0x88000057 | V_OPIVV, 302 OPC_VSSUBU_VI = 0x88000057 | V_OPIVI, 303 304 OPC_VMAX_VV = 0x1c000057 | V_OPIVV, 305 OPC_VMAX_VI = 0x1c000057 | V_OPIVI, 306 OPC_VMAXU_VV = 0x18000057 | V_OPIVV, 307 OPC_VMAXU_VI = 0x18000057 | V_OPIVI, 308 OPC_VMIN_VV = 0x14000057 | V_OPIVV, 309 OPC_VMIN_VI = 0x14000057 | V_OPIVI, 310 OPC_VMINU_VV = 0x10000057 | V_OPIVV, 311 OPC_VMINU_VI = 0x10000057 | V_OPIVI, 312 313 OPC_VMSEQ_VV = 0x60000057 | V_OPIVV, 314 OPC_VMSEQ_VI = 0x60000057 | V_OPIVI, 315 OPC_VMSEQ_VX = 0x60000057 | V_OPIVX, 316 OPC_VMSNE_VV = 0x64000057 | V_OPIVV, 317 OPC_VMSNE_VI = 0x64000057 | V_OPIVI, 318 OPC_VMSNE_VX = 0x64000057 | V_OPIVX, 319 320 OPC_VMSLTU_VV = 0x68000057 | V_OPIVV, 321 OPC_VMSLTU_VX = 0x68000057 | V_OPIVX, 322 OPC_VMSLT_VV = 0x6c000057 | V_OPIVV, 323 OPC_VMSLT_VX = 0x6c000057 | V_OPIVX, 324 OPC_VMSLEU_VV = 0x70000057 | V_OPIVV, 325 OPC_VMSLEU_VX = 0x70000057 | V_OPIVX, 326 OPC_VMSLE_VV = 0x74000057 | V_OPIVV, 327 OPC_VMSLE_VX = 0x74000057 | V_OPIVX, 328 329 OPC_VMSLEU_VI = 0x70000057 | V_OPIVI, 330 OPC_VMSLE_VI = 0x74000057 | V_OPIVI, 331 OPC_VMSGTU_VI = 0x78000057 | V_OPIVI, 332 OPC_VMSGTU_VX = 0x78000057 | V_OPIVX, 333 OPC_VMSGT_VI = 0x7c000057 | V_OPIVI, 334 OPC_VMSGT_VX = 0x7c000057 | V_OPIVX, 335 336 OPC_VSLL_VV = 0x94000057 | V_OPIVV, 337 OPC_VSLL_VI = 0x94000057 | V_OPIVI, 338 OPC_VSLL_VX = 0x94000057 | V_OPIVX, 339 OPC_VSRL_VV = 0xa0000057 | V_OPIVV, 340 OPC_VSRL_VI = 0xa0000057 | V_OPIVI, 341 OPC_VSRL_VX = 0xa0000057 | V_OPIVX, 342 OPC_VSRA_VV = 0xa4000057 | V_OPIVV, 343 OPC_VSRA_VI = 0xa4000057 | V_OPIVI, 344 OPC_VSRA_VX = 0xa4000057 | V_OPIVX, 345 346 OPC_VMV_V_V = 0x5e000057 | V_OPIVV, 347 OPC_VMV_V_I = 0x5e000057 | V_OPIVI, 348 OPC_VMV_V_X = 0x5e000057 | V_OPIVX, 349 350 OPC_VMVNR_V = 0x9e000057 | V_OPIVI, 351} RISCVInsn; 352 353static const struct { 354 RISCVInsn op; 355 bool swap; 356} tcg_cmpcond_to_rvv_vv[] = { 357 [TCG_COND_EQ] = { OPC_VMSEQ_VV, false }, 358 [TCG_COND_NE] = { OPC_VMSNE_VV, false }, 359 [TCG_COND_LT] = { OPC_VMSLT_VV, false }, 360 [TCG_COND_GE] = { OPC_VMSLE_VV, true }, 361 [TCG_COND_GT] = { OPC_VMSLT_VV, true }, 362 [TCG_COND_LE] = { OPC_VMSLE_VV, false }, 363 [TCG_COND_LTU] = { OPC_VMSLTU_VV, false }, 364 [TCG_COND_GEU] = { OPC_VMSLEU_VV, true }, 365 [TCG_COND_GTU] = { OPC_VMSLTU_VV, true }, 366 [TCG_COND_LEU] = { OPC_VMSLEU_VV, false } 367}; 368 369static const struct { 370 RISCVInsn op; 371 int min; 372 int max; 373 bool adjust; 374} tcg_cmpcond_to_rvv_vi[] = { 375 [TCG_COND_EQ] = { OPC_VMSEQ_VI, -16, 15, false }, 376 [TCG_COND_NE] = { OPC_VMSNE_VI, -16, 15, false }, 377 [TCG_COND_GT] = { OPC_VMSGT_VI, -16, 15, false }, 378 [TCG_COND_LE] = { OPC_VMSLE_VI, -16, 15, false }, 379 [TCG_COND_LT] = { OPC_VMSLE_VI, -15, 16, true }, 380 [TCG_COND_GE] = { OPC_VMSGT_VI, -15, 16, true }, 381 [TCG_COND_LEU] = { OPC_VMSLEU_VI, 0, 15, false }, 382 [TCG_COND_GTU] = { OPC_VMSGTU_VI, 0, 15, false }, 383 [TCG_COND_LTU] = { OPC_VMSLEU_VI, 1, 16, true }, 384 [TCG_COND_GEU] = { OPC_VMSGTU_VI, 1, 16, true }, 385}; 386 387/* test if a constant matches the constraint */ 388static bool tcg_target_const_match(int64_t val, int ct, 389 TCGType type, TCGCond cond, int vece) 390{ 391 if (ct & TCG_CT_CONST) { 392 return 1; 393 } 394 if ((ct & TCG_CT_CONST_ZERO) && val == 0) { 395 return 1; 396 } 397 if (type >= TCG_TYPE_V64) { 398 /* Val is replicated by VECE; extract the highest element. */ 399 val >>= (-8 << vece) & 63; 400 } 401 /* 402 * Sign extended from 12 bits: [-0x800, 0x7ff]. 403 * Used for most arithmetic, as this is the isa field. 404 */ 405 if ((ct & TCG_CT_CONST_S12) && val >= -0x800 && val <= 0x7ff) { 406 return 1; 407 } 408 /* 409 * Sign extended from 12 bits, negated: [-0x7ff, 0x800]. 410 * Used for subtraction, where a constant must be handled by ADDI. 411 */ 412 if ((ct & TCG_CT_CONST_N12) && val >= -0x7ff && val <= 0x800) { 413 return 1; 414 } 415 /* 416 * Sign extended from 12 bits, +/- matching: [-0x7ff, 0x7ff]. 417 * Used by addsub2 and movcond, which may need the negative value, 418 * and requires the modified constant to be representable. 419 */ 420 if ((ct & TCG_CT_CONST_M12) && val >= -0x7ff && val <= 0x7ff) { 421 return 1; 422 } 423 /* 424 * Inverse of sign extended from 12 bits: ~[-0x800, 0x7ff]. 425 * Used to map ANDN back to ANDI, etc. 426 */ 427 if ((ct & TCG_CT_CONST_J12) && ~val >= -0x800 && ~val <= 0x7ff) { 428 return 1; 429 } 430 /* 431 * Sign extended from 5 bits: [-0x10, 0x0f]. 432 * Used for vector-immediate. 433 */ 434 if ((ct & TCG_CT_CONST_S5) && val >= -0x10 && val <= 0x0f) { 435 return 1; 436 } 437 /* 438 * Used for vector compare OPIVI instructions. 439 */ 440 if ((ct & TCG_CT_CONST_CMP_VI) && 441 val >= tcg_cmpcond_to_rvv_vi[cond].min && 442 val <= tcg_cmpcond_to_rvv_vi[cond].max) { 443 return true; 444 } 445 return 0; 446} 447 448/* 449 * RISC-V immediate and instruction encoders (excludes 16-bit RVC) 450 */ 451 452/* Type-R */ 453 454static int32_t encode_r(RISCVInsn opc, TCGReg rd, TCGReg rs1, TCGReg rs2) 455{ 456 return opc | (rd & 0x1f) << 7 | (rs1 & 0x1f) << 15 | (rs2 & 0x1f) << 20; 457} 458 459/* Type-I */ 460 461static int32_t encode_imm12(uint32_t imm) 462{ 463 return (imm & 0xfff) << 20; 464} 465 466static int32_t encode_i(RISCVInsn opc, TCGReg rd, TCGReg rs1, uint32_t imm) 467{ 468 return opc | (rd & 0x1f) << 7 | (rs1 & 0x1f) << 15 | encode_imm12(imm); 469} 470 471/* Type-S */ 472 473static int32_t encode_simm12(uint32_t imm) 474{ 475 int32_t ret = 0; 476 477 ret |= (imm & 0xFE0) << 20; 478 ret |= (imm & 0x1F) << 7; 479 480 return ret; 481} 482 483static int32_t encode_s(RISCVInsn opc, TCGReg rs1, TCGReg rs2, uint32_t imm) 484{ 485 return opc | (rs1 & 0x1f) << 15 | (rs2 & 0x1f) << 20 | encode_simm12(imm); 486} 487 488/* Type-SB */ 489 490static int32_t encode_sbimm12(uint32_t imm) 491{ 492 int32_t ret = 0; 493 494 ret |= (imm & 0x1000) << 19; 495 ret |= (imm & 0x7e0) << 20; 496 ret |= (imm & 0x1e) << 7; 497 ret |= (imm & 0x800) >> 4; 498 499 return ret; 500} 501 502static int32_t encode_sb(RISCVInsn opc, TCGReg rs1, TCGReg rs2, uint32_t imm) 503{ 504 return opc | (rs1 & 0x1f) << 15 | (rs2 & 0x1f) << 20 | encode_sbimm12(imm); 505} 506 507/* Type-U */ 508 509static int32_t encode_uimm20(uint32_t imm) 510{ 511 return imm & 0xfffff000; 512} 513 514static int32_t encode_u(RISCVInsn opc, TCGReg rd, uint32_t imm) 515{ 516 return opc | (rd & 0x1f) << 7 | encode_uimm20(imm); 517} 518 519/* Type-UJ */ 520 521static int32_t encode_ujimm20(uint32_t imm) 522{ 523 int32_t ret = 0; 524 525 ret |= (imm & 0x0007fe) << (21 - 1); 526 ret |= (imm & 0x000800) << (20 - 11); 527 ret |= (imm & 0x0ff000) << (12 - 12); 528 ret |= (imm & 0x100000) << (31 - 20); 529 530 return ret; 531} 532 533static int32_t encode_uj(RISCVInsn opc, TCGReg rd, uint32_t imm) 534{ 535 return opc | (rd & 0x1f) << 7 | encode_ujimm20(imm); 536} 537 538 539/* Type-OPIVI */ 540 541static int32_t encode_vi(RISCVInsn opc, TCGReg rd, int32_t imm, 542 TCGReg vs2, bool vm) 543{ 544 return opc | (rd & 0x1f) << 7 | (imm & 0x1f) << 15 | 545 (vs2 & 0x1f) << 20 | (vm << 25); 546} 547 548/* Type-OPIVV/OPMVV/OPIVX/OPMVX, Vector load and store */ 549 550static int32_t encode_v(RISCVInsn opc, TCGReg d, TCGReg s1, 551 TCGReg s2, bool vm) 552{ 553 return opc | (d & 0x1f) << 7 | (s1 & 0x1f) << 15 | 554 (s2 & 0x1f) << 20 | (vm << 25); 555} 556 557/* Vector vtype */ 558 559static uint32_t encode_vtype(bool vta, bool vma, 560 MemOp vsew, RISCVVlmul vlmul) 561{ 562 return vma << 7 | vta << 6 | vsew << 3 | vlmul; 563} 564 565static int32_t encode_vset(RISCVInsn opc, TCGReg rd, 566 TCGArg rs1, uint32_t vtype) 567{ 568 return opc | (rd & 0x1f) << 7 | (rs1 & 0x1f) << 15 | (vtype & 0x7ff) << 20; 569} 570 571static int32_t encode_vseti(RISCVInsn opc, TCGReg rd, 572 uint32_t uimm, uint32_t vtype) 573{ 574 return opc | (rd & 0x1f) << 7 | (uimm & 0x1f) << 15 | (vtype & 0x3ff) << 20; 575} 576 577/* 578 * RISC-V instruction emitters 579 */ 580 581static void tcg_out_opc_reg(TCGContext *s, RISCVInsn opc, 582 TCGReg rd, TCGReg rs1, TCGReg rs2) 583{ 584 tcg_out32(s, encode_r(opc, rd, rs1, rs2)); 585} 586 587static void tcg_out_opc_imm(TCGContext *s, RISCVInsn opc, 588 TCGReg rd, TCGReg rs1, TCGArg imm) 589{ 590 tcg_out32(s, encode_i(opc, rd, rs1, imm)); 591} 592 593static void tcg_out_opc_store(TCGContext *s, RISCVInsn opc, 594 TCGReg rs1, TCGReg rs2, uint32_t imm) 595{ 596 tcg_out32(s, encode_s(opc, rs1, rs2, imm)); 597} 598 599static void tcg_out_opc_branch(TCGContext *s, RISCVInsn opc, 600 TCGReg rs1, TCGReg rs2, uint32_t imm) 601{ 602 tcg_out32(s, encode_sb(opc, rs1, rs2, imm)); 603} 604 605static void tcg_out_opc_upper(TCGContext *s, RISCVInsn opc, 606 TCGReg rd, uint32_t imm) 607{ 608 tcg_out32(s, encode_u(opc, rd, imm)); 609} 610 611static void tcg_out_opc_jump(TCGContext *s, RISCVInsn opc, 612 TCGReg rd, uint32_t imm) 613{ 614 tcg_out32(s, encode_uj(opc, rd, imm)); 615} 616 617static void tcg_out_nop_fill(tcg_insn_unit *p, int count) 618{ 619 int i; 620 for (i = 0; i < count; ++i) { 621 p[i] = OPC_NOP; 622 } 623} 624 625/* 626 * Relocations 627 */ 628 629static bool reloc_sbimm12(tcg_insn_unit *src_rw, const tcg_insn_unit *target) 630{ 631 const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw); 632 intptr_t offset = (intptr_t)target - (intptr_t)src_rx; 633 634 tcg_debug_assert((offset & 1) == 0); 635 if (offset == sextreg(offset, 0, 12)) { 636 *src_rw |= encode_sbimm12(offset); 637 return true; 638 } 639 640 return false; 641} 642 643static bool reloc_jimm20(tcg_insn_unit *src_rw, const tcg_insn_unit *target) 644{ 645 const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw); 646 intptr_t offset = (intptr_t)target - (intptr_t)src_rx; 647 648 tcg_debug_assert((offset & 1) == 0); 649 if (offset == sextreg(offset, 0, 20)) { 650 *src_rw |= encode_ujimm20(offset); 651 return true; 652 } 653 654 return false; 655} 656 657static bool reloc_call(tcg_insn_unit *src_rw, const tcg_insn_unit *target) 658{ 659 const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw); 660 intptr_t offset = (intptr_t)target - (intptr_t)src_rx; 661 int32_t lo = sextreg(offset, 0, 12); 662 int32_t hi = offset - lo; 663 664 if (offset == hi + lo) { 665 src_rw[0] |= encode_uimm20(hi); 666 src_rw[1] |= encode_imm12(lo); 667 return true; 668 } 669 670 return false; 671} 672 673static bool patch_reloc(tcg_insn_unit *code_ptr, int type, 674 intptr_t value, intptr_t addend) 675{ 676 tcg_debug_assert(addend == 0); 677 switch (type) { 678 case R_RISCV_BRANCH: 679 return reloc_sbimm12(code_ptr, (tcg_insn_unit *)value); 680 case R_RISCV_JAL: 681 return reloc_jimm20(code_ptr, (tcg_insn_unit *)value); 682 case R_RISCV_CALL: 683 return reloc_call(code_ptr, (tcg_insn_unit *)value); 684 default: 685 g_assert_not_reached(); 686 } 687} 688 689/* 690 * RISC-V vector instruction emitters 691 */ 692 693/* 694 * Vector registers uses the same 5 lower bits as GPR registers, 695 * and vm=0 (vm = false) means vector masking ENABLED. 696 * With RVV 1.0, vs2 is the first operand, while rs1/imm is the 697 * second operand. 698 */ 699static void tcg_out_opc_vv(TCGContext *s, RISCVInsn opc, 700 TCGReg vd, TCGReg vs2, TCGReg vs1) 701{ 702 tcg_out32(s, encode_v(opc, vd, vs1, vs2, true)); 703} 704 705static void tcg_out_opc_vx(TCGContext *s, RISCVInsn opc, 706 TCGReg vd, TCGReg vs2, TCGReg rs1) 707{ 708 tcg_out32(s, encode_v(opc, vd, rs1, vs2, true)); 709} 710 711static void tcg_out_opc_vi(TCGContext *s, RISCVInsn opc, 712 TCGReg vd, TCGReg vs2, int32_t imm) 713{ 714 tcg_out32(s, encode_vi(opc, vd, imm, vs2, true)); 715} 716 717static void tcg_out_opc_vv_vi(TCGContext *s, RISCVInsn o_vv, RISCVInsn o_vi, 718 TCGReg vd, TCGReg vs2, TCGArg vi1, int c_vi1) 719{ 720 if (c_vi1) { 721 tcg_out_opc_vi(s, o_vi, vd, vs2, vi1); 722 } else { 723 tcg_out_opc_vv(s, o_vv, vd, vs2, vi1); 724 } 725} 726 727static void tcg_out_opc_vim_mask(TCGContext *s, RISCVInsn opc, TCGReg vd, 728 TCGReg vs2, int32_t imm) 729{ 730 tcg_out32(s, encode_vi(opc, vd, imm, vs2, false)); 731} 732 733static void tcg_out_opc_vvm_mask(TCGContext *s, RISCVInsn opc, TCGReg vd, 734 TCGReg vs2, TCGReg vs1) 735{ 736 tcg_out32(s, encode_v(opc, vd, vs1, vs2, false)); 737} 738 739typedef struct VsetCache { 740 uint32_t movi_insn; 741 uint32_t vset_insn; 742} VsetCache; 743 744static VsetCache riscv_vset_cache[3][4]; 745 746static void set_vtype(TCGContext *s, TCGType type, MemOp vsew) 747{ 748 const VsetCache *p = &riscv_vset_cache[type - TCG_TYPE_V64][vsew]; 749 750 s->riscv_cur_type = type; 751 s->riscv_cur_vsew = vsew; 752 753 if (p->movi_insn) { 754 tcg_out32(s, p->movi_insn); 755 } 756 tcg_out32(s, p->vset_insn); 757} 758 759static MemOp set_vtype_len(TCGContext *s, TCGType type) 760{ 761 if (type != s->riscv_cur_type) { 762 set_vtype(s, type, MO_64); 763 } 764 return s->riscv_cur_vsew; 765} 766 767static void set_vtype_len_sew(TCGContext *s, TCGType type, MemOp vsew) 768{ 769 if (type != s->riscv_cur_type || vsew != s->riscv_cur_vsew) { 770 set_vtype(s, type, vsew); 771 } 772} 773 774/* 775 * TCG intrinsics 776 */ 777 778static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg) 779{ 780 if (ret == arg) { 781 return true; 782 } 783 switch (type) { 784 case TCG_TYPE_I32: 785 case TCG_TYPE_I64: 786 tcg_out_opc_imm(s, OPC_ADDI, ret, arg, 0); 787 break; 788 case TCG_TYPE_V64: 789 case TCG_TYPE_V128: 790 case TCG_TYPE_V256: 791 { 792 int lmul = type - riscv_lg2_vlenb; 793 int nf = 1 << MAX(lmul, 0); 794 tcg_out_opc_vi(s, OPC_VMVNR_V, ret, arg, nf - 1); 795 } 796 break; 797 default: 798 g_assert_not_reached(); 799 } 800 return true; 801} 802 803static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg rd, 804 tcg_target_long val) 805{ 806 tcg_target_long lo, hi, tmp; 807 int shift, ret; 808 809 if (type == TCG_TYPE_I32) { 810 val = (int32_t)val; 811 } 812 813 lo = sextreg(val, 0, 12); 814 if (val == lo) { 815 tcg_out_opc_imm(s, OPC_ADDI, rd, TCG_REG_ZERO, lo); 816 return; 817 } 818 819 hi = val - lo; 820 if (val == (int32_t)val) { 821 tcg_out_opc_upper(s, OPC_LUI, rd, hi); 822 if (lo != 0) { 823 tcg_out_opc_imm(s, OPC_ADDIW, rd, rd, lo); 824 } 825 return; 826 } 827 828 tmp = tcg_pcrel_diff(s, (void *)val); 829 if (tmp == (int32_t)tmp) { 830 tcg_out_opc_upper(s, OPC_AUIPC, rd, 0); 831 tcg_out_opc_imm(s, OPC_ADDI, rd, rd, 0); 832 ret = reloc_call(s->code_ptr - 2, (const tcg_insn_unit *)val); 833 tcg_debug_assert(ret == true); 834 return; 835 } 836 837 /* Look for a single 20-bit section. */ 838 shift = ctz64(val); 839 tmp = val >> shift; 840 if (tmp == sextreg(tmp, 0, 20)) { 841 tcg_out_opc_upper(s, OPC_LUI, rd, tmp << 12); 842 if (shift > 12) { 843 tcg_out_opc_imm(s, OPC_SLLI, rd, rd, shift - 12); 844 } else { 845 tcg_out_opc_imm(s, OPC_SRAI, rd, rd, 12 - shift); 846 } 847 return; 848 } 849 850 /* Look for a few high zero bits, with lots of bits set in the middle. */ 851 shift = clz64(val); 852 tmp = val << shift; 853 if (tmp == sextreg(tmp, 12, 20) << 12) { 854 tcg_out_opc_upper(s, OPC_LUI, rd, tmp); 855 tcg_out_opc_imm(s, OPC_SRLI, rd, rd, shift); 856 return; 857 } else if (tmp == sextreg(tmp, 0, 12)) { 858 tcg_out_opc_imm(s, OPC_ADDI, rd, TCG_REG_ZERO, tmp); 859 tcg_out_opc_imm(s, OPC_SRLI, rd, rd, shift); 860 return; 861 } 862 863 /* Drop into the constant pool. */ 864 new_pool_label(s, val, R_RISCV_CALL, s->code_ptr, 0); 865 tcg_out_opc_upper(s, OPC_AUIPC, rd, 0); 866 tcg_out_opc_imm(s, OPC_LD, rd, rd, 0); 867} 868 869static bool tcg_out_xchg(TCGContext *s, TCGType type, TCGReg r1, TCGReg r2) 870{ 871 return false; 872} 873 874static void tcg_out_addi_ptr(TCGContext *s, TCGReg rd, TCGReg rs, 875 tcg_target_long imm) 876{ 877 /* This function is only used for passing structs by reference. */ 878 g_assert_not_reached(); 879} 880 881static void tcg_out_ext8u(TCGContext *s, TCGReg ret, TCGReg arg) 882{ 883 tcg_out_opc_imm(s, OPC_ANDI, ret, arg, 0xff); 884} 885 886static void tcg_out_ext16u(TCGContext *s, TCGReg ret, TCGReg arg) 887{ 888 if (cpuinfo & CPUINFO_ZBB) { 889 tcg_out_opc_reg(s, OPC_ZEXT_H, ret, arg, TCG_REG_ZERO); 890 } else { 891 tcg_out_opc_imm(s, OPC_SLLIW, ret, arg, 16); 892 tcg_out_opc_imm(s, OPC_SRLIW, ret, ret, 16); 893 } 894} 895 896static void tcg_out_ext32u(TCGContext *s, TCGReg ret, TCGReg arg) 897{ 898 if (cpuinfo & CPUINFO_ZBA) { 899 tcg_out_opc_reg(s, OPC_ADD_UW, ret, arg, TCG_REG_ZERO); 900 } else { 901 tcg_out_opc_imm(s, OPC_SLLI, ret, arg, 32); 902 tcg_out_opc_imm(s, OPC_SRLI, ret, ret, 32); 903 } 904} 905 906static void tcg_out_ext8s(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg) 907{ 908 if (cpuinfo & CPUINFO_ZBB) { 909 tcg_out_opc_imm(s, OPC_SEXT_B, ret, arg, 0); 910 } else { 911 tcg_out_opc_imm(s, OPC_SLLIW, ret, arg, 24); 912 tcg_out_opc_imm(s, OPC_SRAIW, ret, ret, 24); 913 } 914} 915 916static void tcg_out_ext16s(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg) 917{ 918 if (cpuinfo & CPUINFO_ZBB) { 919 tcg_out_opc_imm(s, OPC_SEXT_H, ret, arg, 0); 920 } else { 921 tcg_out_opc_imm(s, OPC_SLLIW, ret, arg, 16); 922 tcg_out_opc_imm(s, OPC_SRAIW, ret, ret, 16); 923 } 924} 925 926static void tcg_out_ext32s(TCGContext *s, TCGReg ret, TCGReg arg) 927{ 928 tcg_out_opc_imm(s, OPC_ADDIW, ret, arg, 0); 929} 930 931static void tcg_out_exts_i32_i64(TCGContext *s, TCGReg ret, TCGReg arg) 932{ 933 if (ret != arg) { 934 tcg_out_ext32s(s, ret, arg); 935 } 936} 937 938static void tcg_out_extu_i32_i64(TCGContext *s, TCGReg ret, TCGReg arg) 939{ 940 tcg_out_ext32u(s, ret, arg); 941} 942 943static void tcg_out_extrl_i64_i32(TCGContext *s, TCGReg ret, TCGReg arg) 944{ 945 tcg_out_ext32s(s, ret, arg); 946} 947 948static void tcg_out_ldst(TCGContext *s, RISCVInsn opc, TCGReg data, 949 TCGReg addr, intptr_t offset) 950{ 951 intptr_t imm12 = sextreg(offset, 0, 12); 952 953 if (offset != imm12) { 954 intptr_t diff = tcg_pcrel_diff(s, (void *)offset); 955 956 if (addr == TCG_REG_ZERO && diff == (int32_t)diff) { 957 imm12 = sextreg(diff, 0, 12); 958 tcg_out_opc_upper(s, OPC_AUIPC, TCG_REG_TMP2, diff - imm12); 959 } else { 960 tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP2, offset - imm12); 961 if (addr != TCG_REG_ZERO) { 962 tcg_out_opc_reg(s, OPC_ADD, TCG_REG_TMP2, TCG_REG_TMP2, addr); 963 } 964 } 965 addr = TCG_REG_TMP2; 966 } 967 968 switch (opc) { 969 case OPC_SB: 970 case OPC_SH: 971 case OPC_SW: 972 case OPC_SD: 973 tcg_out_opc_store(s, opc, addr, data, imm12); 974 break; 975 case OPC_LB: 976 case OPC_LBU: 977 case OPC_LH: 978 case OPC_LHU: 979 case OPC_LW: 980 case OPC_LWU: 981 case OPC_LD: 982 tcg_out_opc_imm(s, opc, data, addr, imm12); 983 break; 984 default: 985 g_assert_not_reached(); 986 } 987} 988 989static void tcg_out_vec_ldst(TCGContext *s, RISCVInsn opc, TCGReg data, 990 TCGReg addr, intptr_t offset) 991{ 992 tcg_debug_assert(data >= TCG_REG_V0); 993 tcg_debug_assert(addr < TCG_REG_V0); 994 995 if (offset) { 996 tcg_debug_assert(addr != TCG_REG_ZERO); 997 if (offset == sextreg(offset, 0, 12)) { 998 tcg_out_opc_imm(s, OPC_ADDI, TCG_REG_TMP0, addr, offset); 999 } else { 1000 tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP0, offset); 1001 tcg_out_opc_reg(s, OPC_ADD, TCG_REG_TMP0, TCG_REG_TMP0, addr); 1002 } 1003 addr = TCG_REG_TMP0; 1004 } 1005 tcg_out32(s, encode_v(opc, data, addr, 0, true)); 1006} 1007 1008static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg arg, 1009 TCGReg arg1, intptr_t arg2) 1010{ 1011 RISCVInsn insn; 1012 1013 switch (type) { 1014 case TCG_TYPE_I32: 1015 tcg_out_ldst(s, OPC_LW, arg, arg1, arg2); 1016 break; 1017 case TCG_TYPE_I64: 1018 tcg_out_ldst(s, OPC_LD, arg, arg1, arg2); 1019 break; 1020 case TCG_TYPE_V64: 1021 case TCG_TYPE_V128: 1022 case TCG_TYPE_V256: 1023 if (type >= riscv_lg2_vlenb) { 1024 static const RISCVInsn whole_reg_ld[] = { 1025 OPC_VL1RE64_V, OPC_VL2RE64_V, OPC_VL4RE64_V, OPC_VL8RE64_V 1026 }; 1027 unsigned idx = type - riscv_lg2_vlenb; 1028 1029 tcg_debug_assert(idx < ARRAY_SIZE(whole_reg_ld)); 1030 insn = whole_reg_ld[idx]; 1031 } else { 1032 static const RISCVInsn unit_stride_ld[] = { 1033 OPC_VLE8_V, OPC_VLE16_V, OPC_VLE32_V, OPC_VLE64_V 1034 }; 1035 MemOp prev_vsew = set_vtype_len(s, type); 1036 1037 tcg_debug_assert(prev_vsew < ARRAY_SIZE(unit_stride_ld)); 1038 insn = unit_stride_ld[prev_vsew]; 1039 } 1040 tcg_out_vec_ldst(s, insn, arg, arg1, arg2); 1041 break; 1042 default: 1043 g_assert_not_reached(); 1044 } 1045} 1046 1047static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, 1048 TCGReg arg1, intptr_t arg2) 1049{ 1050 RISCVInsn insn; 1051 1052 switch (type) { 1053 case TCG_TYPE_I32: 1054 tcg_out_ldst(s, OPC_SW, arg, arg1, arg2); 1055 break; 1056 case TCG_TYPE_I64: 1057 tcg_out_ldst(s, OPC_SD, arg, arg1, arg2); 1058 break; 1059 case TCG_TYPE_V64: 1060 case TCG_TYPE_V128: 1061 case TCG_TYPE_V256: 1062 if (type >= riscv_lg2_vlenb) { 1063 static const RISCVInsn whole_reg_st[] = { 1064 OPC_VS1R_V, OPC_VS2R_V, OPC_VS4R_V, OPC_VS8R_V 1065 }; 1066 unsigned idx = type - riscv_lg2_vlenb; 1067 1068 tcg_debug_assert(idx < ARRAY_SIZE(whole_reg_st)); 1069 insn = whole_reg_st[idx]; 1070 } else { 1071 static const RISCVInsn unit_stride_st[] = { 1072 OPC_VSE8_V, OPC_VSE16_V, OPC_VSE32_V, OPC_VSE64_V 1073 }; 1074 MemOp prev_vsew = set_vtype_len(s, type); 1075 1076 tcg_debug_assert(prev_vsew < ARRAY_SIZE(unit_stride_st)); 1077 insn = unit_stride_st[prev_vsew]; 1078 } 1079 tcg_out_vec_ldst(s, insn, arg, arg1, arg2); 1080 break; 1081 default: 1082 g_assert_not_reached(); 1083 } 1084} 1085 1086static bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val, 1087 TCGReg base, intptr_t ofs) 1088{ 1089 if (val == 0) { 1090 tcg_out_st(s, type, TCG_REG_ZERO, base, ofs); 1091 return true; 1092 } 1093 return false; 1094} 1095 1096static void tcg_out_addsub2(TCGContext *s, 1097 TCGReg rl, TCGReg rh, 1098 TCGReg al, TCGReg ah, 1099 TCGArg bl, TCGArg bh, 1100 bool cbl, bool cbh, bool is_sub, bool is32bit) 1101{ 1102 const RISCVInsn opc_add = is32bit ? OPC_ADDW : OPC_ADD; 1103 const RISCVInsn opc_addi = is32bit ? OPC_ADDIW : OPC_ADDI; 1104 const RISCVInsn opc_sub = is32bit ? OPC_SUBW : OPC_SUB; 1105 TCGReg th = TCG_REG_TMP1; 1106 1107 /* If we have a negative constant such that negating it would 1108 make the high part zero, we can (usually) eliminate one insn. */ 1109 if (cbl && cbh && bh == -1 && bl != 0) { 1110 bl = -bl; 1111 bh = 0; 1112 is_sub = !is_sub; 1113 } 1114 1115 /* By operating on the high part first, we get to use the final 1116 carry operation to move back from the temporary. */ 1117 if (!cbh) { 1118 tcg_out_opc_reg(s, (is_sub ? opc_sub : opc_add), th, ah, bh); 1119 } else if (bh != 0 || ah == rl) { 1120 tcg_out_opc_imm(s, opc_addi, th, ah, (is_sub ? -bh : bh)); 1121 } else { 1122 th = ah; 1123 } 1124 1125 /* Note that tcg optimization should eliminate the bl == 0 case. */ 1126 if (is_sub) { 1127 if (cbl) { 1128 tcg_out_opc_imm(s, OPC_SLTIU, TCG_REG_TMP0, al, bl); 1129 tcg_out_opc_imm(s, opc_addi, rl, al, -bl); 1130 } else { 1131 tcg_out_opc_reg(s, OPC_SLTU, TCG_REG_TMP0, al, bl); 1132 tcg_out_opc_reg(s, opc_sub, rl, al, bl); 1133 } 1134 tcg_out_opc_reg(s, opc_sub, rh, th, TCG_REG_TMP0); 1135 } else { 1136 if (cbl) { 1137 tcg_out_opc_imm(s, opc_addi, rl, al, bl); 1138 tcg_out_opc_imm(s, OPC_SLTIU, TCG_REG_TMP0, rl, bl); 1139 } else if (al == bl) { 1140 /* 1141 * If the input regs overlap, this is a simple doubling 1142 * and carry-out is the input msb. This special case is 1143 * required when the output reg overlaps the input, 1144 * but we might as well use it always. 1145 */ 1146 tcg_out_opc_imm(s, OPC_SLTI, TCG_REG_TMP0, al, 0); 1147 tcg_out_opc_reg(s, opc_add, rl, al, al); 1148 } else { 1149 tcg_out_opc_reg(s, opc_add, rl, al, bl); 1150 tcg_out_opc_reg(s, OPC_SLTU, TCG_REG_TMP0, 1151 rl, (rl == bl ? al : bl)); 1152 } 1153 tcg_out_opc_reg(s, opc_add, rh, th, TCG_REG_TMP0); 1154 } 1155} 1156 1157static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece, 1158 TCGReg dst, TCGReg src) 1159{ 1160 set_vtype_len_sew(s, type, vece); 1161 tcg_out_opc_vx(s, OPC_VMV_V_X, dst, 0, src); 1162 return true; 1163} 1164 1165static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece, 1166 TCGReg dst, TCGReg base, intptr_t offset) 1167{ 1168 tcg_out_ld(s, TCG_TYPE_REG, TCG_REG_TMP0, base, offset); 1169 return tcg_out_dup_vec(s, type, vece, dst, TCG_REG_TMP0); 1170} 1171 1172static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece, 1173 TCGReg dst, int64_t arg) 1174{ 1175 /* Arg is replicated by VECE; extract the highest element. */ 1176 arg >>= (-8 << vece) & 63; 1177 1178 if (arg >= -16 && arg < 16) { 1179 if (arg == 0 || arg == -1) { 1180 set_vtype_len(s, type); 1181 } else { 1182 set_vtype_len_sew(s, type, vece); 1183 } 1184 tcg_out_opc_vi(s, OPC_VMV_V_I, dst, 0, arg); 1185 return; 1186 } 1187 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP0, arg); 1188 tcg_out_dup_vec(s, type, vece, dst, TCG_REG_TMP0); 1189} 1190 1191static const struct { 1192 RISCVInsn op; 1193 bool swap; 1194} tcg_brcond_to_riscv[] = { 1195 [TCG_COND_EQ] = { OPC_BEQ, false }, 1196 [TCG_COND_NE] = { OPC_BNE, false }, 1197 [TCG_COND_LT] = { OPC_BLT, false }, 1198 [TCG_COND_GE] = { OPC_BGE, false }, 1199 [TCG_COND_LE] = { OPC_BGE, true }, 1200 [TCG_COND_GT] = { OPC_BLT, true }, 1201 [TCG_COND_LTU] = { OPC_BLTU, false }, 1202 [TCG_COND_GEU] = { OPC_BGEU, false }, 1203 [TCG_COND_LEU] = { OPC_BGEU, true }, 1204 [TCG_COND_GTU] = { OPC_BLTU, true } 1205}; 1206 1207static void tcg_out_brcond(TCGContext *s, TCGCond cond, TCGReg arg1, 1208 TCGReg arg2, TCGLabel *l) 1209{ 1210 RISCVInsn op = tcg_brcond_to_riscv[cond].op; 1211 1212 tcg_debug_assert(op != 0); 1213 1214 if (tcg_brcond_to_riscv[cond].swap) { 1215 TCGReg t = arg1; 1216 arg1 = arg2; 1217 arg2 = t; 1218 } 1219 1220 tcg_out_reloc(s, s->code_ptr, R_RISCV_BRANCH, l, 0); 1221 tcg_out_opc_branch(s, op, arg1, arg2, 0); 1222} 1223 1224#define SETCOND_INV TCG_TARGET_NB_REGS 1225#define SETCOND_NEZ (SETCOND_INV << 1) 1226#define SETCOND_FLAGS (SETCOND_INV | SETCOND_NEZ) 1227 1228static int tcg_out_setcond_int(TCGContext *s, TCGCond cond, TCGReg ret, 1229 TCGReg arg1, tcg_target_long arg2, bool c2) 1230{ 1231 int flags = 0; 1232 1233 switch (cond) { 1234 case TCG_COND_EQ: /* -> NE */ 1235 case TCG_COND_GE: /* -> LT */ 1236 case TCG_COND_GEU: /* -> LTU */ 1237 case TCG_COND_GT: /* -> LE */ 1238 case TCG_COND_GTU: /* -> LEU */ 1239 cond = tcg_invert_cond(cond); 1240 flags ^= SETCOND_INV; 1241 break; 1242 default: 1243 break; 1244 } 1245 1246 switch (cond) { 1247 case TCG_COND_LE: 1248 case TCG_COND_LEU: 1249 /* 1250 * If we have a constant input, the most efficient way to implement 1251 * LE is by adding 1 and using LT. Watch out for wrap around for LEU. 1252 * We don't need to care for this for LE because the constant input 1253 * is constrained to signed 12-bit, and 0x800 is representable in the 1254 * temporary register. 1255 */ 1256 if (c2) { 1257 if (cond == TCG_COND_LEU) { 1258 /* unsigned <= -1 is true */ 1259 if (arg2 == -1) { 1260 tcg_out_movi(s, TCG_TYPE_REG, ret, !(flags & SETCOND_INV)); 1261 return ret; 1262 } 1263 cond = TCG_COND_LTU; 1264 } else { 1265 cond = TCG_COND_LT; 1266 } 1267 tcg_debug_assert(arg2 <= 0x7ff); 1268 if (++arg2 == 0x800) { 1269 tcg_out_movi(s, TCG_TYPE_REG, TCG_REG_TMP0, arg2); 1270 arg2 = TCG_REG_TMP0; 1271 c2 = false; 1272 } 1273 } else { 1274 TCGReg tmp = arg2; 1275 arg2 = arg1; 1276 arg1 = tmp; 1277 cond = tcg_swap_cond(cond); /* LE -> GE */ 1278 cond = tcg_invert_cond(cond); /* GE -> LT */ 1279 flags ^= SETCOND_INV; 1280 } 1281 break; 1282 default: 1283 break; 1284 } 1285 1286 switch (cond) { 1287 case TCG_COND_NE: 1288 flags |= SETCOND_NEZ; 1289 if (!c2) { 1290 tcg_out_opc_reg(s, OPC_XOR, ret, arg1, arg2); 1291 } else if (arg2 == 0) { 1292 ret = arg1; 1293 } else { 1294 tcg_out_opc_imm(s, OPC_XORI, ret, arg1, arg2); 1295 } 1296 break; 1297 1298 case TCG_COND_LT: 1299 if (c2) { 1300 tcg_out_opc_imm(s, OPC_SLTI, ret, arg1, arg2); 1301 } else { 1302 tcg_out_opc_reg(s, OPC_SLT, ret, arg1, arg2); 1303 } 1304 break; 1305 1306 case TCG_COND_LTU: 1307 if (c2) { 1308 tcg_out_opc_imm(s, OPC_SLTIU, ret, arg1, arg2); 1309 } else { 1310 tcg_out_opc_reg(s, OPC_SLTU, ret, arg1, arg2); 1311 } 1312 break; 1313 1314 default: 1315 g_assert_not_reached(); 1316 } 1317 1318 return ret | flags; 1319} 1320 1321static void tcg_out_setcond(TCGContext *s, TCGCond cond, TCGReg ret, 1322 TCGReg arg1, tcg_target_long arg2, bool c2) 1323{ 1324 int tmpflags = tcg_out_setcond_int(s, cond, ret, arg1, arg2, c2); 1325 1326 if (tmpflags != ret) { 1327 TCGReg tmp = tmpflags & ~SETCOND_FLAGS; 1328 1329 switch (tmpflags & SETCOND_FLAGS) { 1330 case SETCOND_INV: 1331 /* Intermediate result is boolean: simply invert. */ 1332 tcg_out_opc_imm(s, OPC_XORI, ret, tmp, 1); 1333 break; 1334 case SETCOND_NEZ: 1335 /* Intermediate result is zero/non-zero: test != 0. */ 1336 tcg_out_opc_reg(s, OPC_SLTU, ret, TCG_REG_ZERO, tmp); 1337 break; 1338 case SETCOND_NEZ | SETCOND_INV: 1339 /* Intermediate result is zero/non-zero: test == 0. */ 1340 tcg_out_opc_imm(s, OPC_SLTIU, ret, tmp, 1); 1341 break; 1342 default: 1343 g_assert_not_reached(); 1344 } 1345 } 1346} 1347 1348static void tcg_out_negsetcond(TCGContext *s, TCGCond cond, TCGReg ret, 1349 TCGReg arg1, tcg_target_long arg2, bool c2) 1350{ 1351 int tmpflags; 1352 TCGReg tmp; 1353 1354 /* For LT/GE comparison against 0, replicate the sign bit. */ 1355 if (c2 && arg2 == 0) { 1356 switch (cond) { 1357 case TCG_COND_GE: 1358 tcg_out_opc_imm(s, OPC_XORI, ret, arg1, -1); 1359 arg1 = ret; 1360 /* fall through */ 1361 case TCG_COND_LT: 1362 tcg_out_opc_imm(s, OPC_SRAI, ret, arg1, TCG_TARGET_REG_BITS - 1); 1363 return; 1364 default: 1365 break; 1366 } 1367 } 1368 1369 tmpflags = tcg_out_setcond_int(s, cond, ret, arg1, arg2, c2); 1370 tmp = tmpflags & ~SETCOND_FLAGS; 1371 1372 /* If intermediate result is zero/non-zero: test != 0. */ 1373 if (tmpflags & SETCOND_NEZ) { 1374 tcg_out_opc_reg(s, OPC_SLTU, ret, TCG_REG_ZERO, tmp); 1375 tmp = ret; 1376 } 1377 1378 /* Produce the 0/-1 result. */ 1379 if (tmpflags & SETCOND_INV) { 1380 tcg_out_opc_imm(s, OPC_ADDI, ret, tmp, -1); 1381 } else { 1382 tcg_out_opc_reg(s, OPC_SUB, ret, TCG_REG_ZERO, tmp); 1383 } 1384} 1385 1386static void tcg_out_movcond_zicond(TCGContext *s, TCGReg ret, TCGReg test_ne, 1387 int val1, bool c_val1, 1388 int val2, bool c_val2) 1389{ 1390 if (val1 == 0) { 1391 if (c_val2) { 1392 tcg_out_movi(s, TCG_TYPE_REG, TCG_REG_TMP1, val2); 1393 val2 = TCG_REG_TMP1; 1394 } 1395 tcg_out_opc_reg(s, OPC_CZERO_NEZ, ret, val2, test_ne); 1396 return; 1397 } 1398 1399 if (val2 == 0) { 1400 if (c_val1) { 1401 tcg_out_movi(s, TCG_TYPE_REG, TCG_REG_TMP1, val1); 1402 val1 = TCG_REG_TMP1; 1403 } 1404 tcg_out_opc_reg(s, OPC_CZERO_EQZ, ret, val1, test_ne); 1405 return; 1406 } 1407 1408 if (c_val2) { 1409 if (c_val1) { 1410 tcg_out_movi(s, TCG_TYPE_REG, TCG_REG_TMP1, val1 - val2); 1411 } else { 1412 tcg_out_opc_imm(s, OPC_ADDI, TCG_REG_TMP1, val1, -val2); 1413 } 1414 tcg_out_opc_reg(s, OPC_CZERO_EQZ, ret, TCG_REG_TMP1, test_ne); 1415 tcg_out_opc_imm(s, OPC_ADDI, ret, ret, val2); 1416 return; 1417 } 1418 1419 if (c_val1) { 1420 tcg_out_opc_imm(s, OPC_ADDI, TCG_REG_TMP1, val2, -val1); 1421 tcg_out_opc_reg(s, OPC_CZERO_NEZ, ret, TCG_REG_TMP1, test_ne); 1422 tcg_out_opc_imm(s, OPC_ADDI, ret, ret, val1); 1423 return; 1424 } 1425 1426 tcg_out_opc_reg(s, OPC_CZERO_NEZ, TCG_REG_TMP1, val2, test_ne); 1427 tcg_out_opc_reg(s, OPC_CZERO_EQZ, TCG_REG_TMP0, val1, test_ne); 1428 tcg_out_opc_reg(s, OPC_OR, ret, TCG_REG_TMP0, TCG_REG_TMP1); 1429} 1430 1431static void tcg_out_movcond_br1(TCGContext *s, TCGCond cond, TCGReg ret, 1432 TCGReg cmp1, TCGReg cmp2, 1433 int val, bool c_val) 1434{ 1435 RISCVInsn op; 1436 int disp = 8; 1437 1438 tcg_debug_assert((unsigned)cond < ARRAY_SIZE(tcg_brcond_to_riscv)); 1439 op = tcg_brcond_to_riscv[cond].op; 1440 tcg_debug_assert(op != 0); 1441 1442 if (tcg_brcond_to_riscv[cond].swap) { 1443 tcg_out_opc_branch(s, op, cmp2, cmp1, disp); 1444 } else { 1445 tcg_out_opc_branch(s, op, cmp1, cmp2, disp); 1446 } 1447 if (c_val) { 1448 tcg_out_opc_imm(s, OPC_ADDI, ret, TCG_REG_ZERO, val); 1449 } else { 1450 tcg_out_opc_imm(s, OPC_ADDI, ret, val, 0); 1451 } 1452} 1453 1454static void tcg_out_movcond_br2(TCGContext *s, TCGCond cond, TCGReg ret, 1455 TCGReg cmp1, TCGReg cmp2, 1456 int val1, bool c_val1, 1457 int val2, bool c_val2) 1458{ 1459 TCGReg tmp; 1460 1461 /* TCG optimizer reorders to prefer ret matching val2. */ 1462 if (!c_val2 && ret == val2) { 1463 cond = tcg_invert_cond(cond); 1464 tcg_out_movcond_br1(s, cond, ret, cmp1, cmp2, val1, c_val1); 1465 return; 1466 } 1467 1468 if (!c_val1 && ret == val1) { 1469 tcg_out_movcond_br1(s, cond, ret, cmp1, cmp2, val2, c_val2); 1470 return; 1471 } 1472 1473 tmp = (ret == cmp1 || ret == cmp2 ? TCG_REG_TMP1 : ret); 1474 if (c_val1) { 1475 tcg_out_movi(s, TCG_TYPE_REG, tmp, val1); 1476 } else { 1477 tcg_out_mov(s, TCG_TYPE_REG, tmp, val1); 1478 } 1479 tcg_out_movcond_br1(s, cond, tmp, cmp1, cmp2, val2, c_val2); 1480 tcg_out_mov(s, TCG_TYPE_REG, ret, tmp); 1481} 1482 1483static void tcg_out_movcond(TCGContext *s, TCGCond cond, TCGReg ret, 1484 TCGReg cmp1, int cmp2, bool c_cmp2, 1485 TCGReg val1, bool c_val1, 1486 TCGReg val2, bool c_val2) 1487{ 1488 int tmpflags; 1489 TCGReg t; 1490 1491 if (!(cpuinfo & CPUINFO_ZICOND) && (!c_cmp2 || cmp2 == 0)) { 1492 tcg_out_movcond_br2(s, cond, ret, cmp1, cmp2, 1493 val1, c_val1, val2, c_val2); 1494 return; 1495 } 1496 1497 tmpflags = tcg_out_setcond_int(s, cond, TCG_REG_TMP0, cmp1, cmp2, c_cmp2); 1498 t = tmpflags & ~SETCOND_FLAGS; 1499 1500 if (cpuinfo & CPUINFO_ZICOND) { 1501 if (tmpflags & SETCOND_INV) { 1502 tcg_out_movcond_zicond(s, ret, t, val2, c_val2, val1, c_val1); 1503 } else { 1504 tcg_out_movcond_zicond(s, ret, t, val1, c_val1, val2, c_val2); 1505 } 1506 } else { 1507 cond = tmpflags & SETCOND_INV ? TCG_COND_EQ : TCG_COND_NE; 1508 tcg_out_movcond_br2(s, cond, ret, t, TCG_REG_ZERO, 1509 val1, c_val1, val2, c_val2); 1510 } 1511} 1512 1513static void tcg_out_cltz(TCGContext *s, TCGType type, RISCVInsn insn, 1514 TCGReg ret, TCGReg src1, int src2, bool c_src2) 1515{ 1516 tcg_out_opc_imm(s, insn, ret, src1, 0); 1517 1518 if (!c_src2 || src2 != (type == TCG_TYPE_I32 ? 32 : 64)) { 1519 /* 1520 * The requested zero result does not match the insn, so adjust. 1521 * Note that constraints put 'ret' in a new register, so the 1522 * computation above did not clobber either 'src1' or 'src2'. 1523 */ 1524 tcg_out_movcond(s, TCG_COND_EQ, ret, src1, 0, true, 1525 src2, c_src2, ret, false); 1526 } 1527} 1528 1529static void tcg_out_cmpsel(TCGContext *s, TCGType type, unsigned vece, 1530 TCGCond cond, TCGReg ret, 1531 TCGReg cmp1, TCGReg cmp2, bool c_cmp2, 1532 TCGReg val1, bool c_val1, 1533 TCGReg val2, bool c_val2) 1534{ 1535 set_vtype_len_sew(s, type, vece); 1536 1537 /* Use only vmerge_vim if possible, by inverting the test. */ 1538 if (c_val2 && !c_val1) { 1539 TCGArg temp = val1; 1540 cond = tcg_invert_cond(cond); 1541 val1 = val2; 1542 val2 = temp; 1543 c_val1 = true; 1544 c_val2 = false; 1545 } 1546 1547 /* Perform the comparison into V0 mask. */ 1548 if (c_cmp2) { 1549 tcg_out_opc_vi(s, tcg_cmpcond_to_rvv_vi[cond].op, TCG_REG_V0, cmp1, 1550 cmp2 - tcg_cmpcond_to_rvv_vi[cond].adjust); 1551 } else if (tcg_cmpcond_to_rvv_vv[cond].swap) { 1552 tcg_out_opc_vv(s, tcg_cmpcond_to_rvv_vv[cond].op, 1553 TCG_REG_V0, cmp2, cmp1); 1554 } else { 1555 tcg_out_opc_vv(s, tcg_cmpcond_to_rvv_vv[cond].op, 1556 TCG_REG_V0, cmp1, cmp2); 1557 } 1558 if (c_val1) { 1559 if (c_val2) { 1560 tcg_out_opc_vi(s, OPC_VMV_V_I, ret, 0, val2); 1561 val2 = ret; 1562 } 1563 /* vd[i] == v0.mask[i] ? imm : vs2[i] */ 1564 tcg_out_opc_vim_mask(s, OPC_VMERGE_VIM, ret, val2, val1); 1565 } else { 1566 /* vd[i] == v0.mask[i] ? vs1[i] : vs2[i] */ 1567 tcg_out_opc_vvm_mask(s, OPC_VMERGE_VVM, ret, val2, val1); 1568 } 1569} 1570 1571static void tcg_out_vshifti(TCGContext *s, RISCVInsn opc_vi, RISCVInsn opc_vx, 1572 TCGReg dst, TCGReg src, unsigned imm) 1573{ 1574 if (imm < 32) { 1575 tcg_out_opc_vi(s, opc_vi, dst, src, imm); 1576 } else { 1577 tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_TMP0, imm); 1578 tcg_out_opc_vx(s, opc_vx, dst, src, TCG_REG_TMP0); 1579 } 1580} 1581 1582static void init_setting_vtype(TCGContext *s) 1583{ 1584 s->riscv_cur_type = TCG_TYPE_COUNT; 1585} 1586 1587static void tcg_out_call_int(TCGContext *s, const tcg_insn_unit *arg, bool tail) 1588{ 1589 TCGReg link = tail ? TCG_REG_ZERO : TCG_REG_RA; 1590 ptrdiff_t offset = tcg_pcrel_diff(s, arg); 1591 int ret; 1592 1593 init_setting_vtype(s); 1594 1595 tcg_debug_assert((offset & 1) == 0); 1596 if (offset == sextreg(offset, 0, 20)) { 1597 /* short jump: -2097150 to 2097152 */ 1598 tcg_out_opc_jump(s, OPC_JAL, link, offset); 1599 } else if (offset == (int32_t)offset) { 1600 /* long jump: -2147483646 to 2147483648 */ 1601 tcg_out_opc_upper(s, OPC_AUIPC, TCG_REG_TMP0, 0); 1602 tcg_out_opc_imm(s, OPC_JALR, link, TCG_REG_TMP0, 0); 1603 ret = reloc_call(s->code_ptr - 2, arg); 1604 tcg_debug_assert(ret == true); 1605 } else { 1606 /* far jump: 64-bit */ 1607 tcg_target_long imm = sextreg((tcg_target_long)arg, 0, 12); 1608 tcg_target_long base = (tcg_target_long)arg - imm; 1609 tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP0, base); 1610 tcg_out_opc_imm(s, OPC_JALR, link, TCG_REG_TMP0, imm); 1611 } 1612} 1613 1614static void tcg_out_call(TCGContext *s, const tcg_insn_unit *arg, 1615 const TCGHelperInfo *info) 1616{ 1617 tcg_out_call_int(s, arg, false); 1618} 1619 1620static void tcg_out_mb(TCGContext *s, TCGArg a0) 1621{ 1622 tcg_insn_unit insn = OPC_FENCE; 1623 1624 if (a0 & TCG_MO_LD_LD) { 1625 insn |= 0x02200000; 1626 } 1627 if (a0 & TCG_MO_ST_LD) { 1628 insn |= 0x01200000; 1629 } 1630 if (a0 & TCG_MO_LD_ST) { 1631 insn |= 0x02100000; 1632 } 1633 if (a0 & TCG_MO_ST_ST) { 1634 insn |= 0x01100000; 1635 } 1636 tcg_out32(s, insn); 1637} 1638 1639/* 1640 * Load/store and TLB 1641 */ 1642 1643static void tcg_out_goto(TCGContext *s, const tcg_insn_unit *target) 1644{ 1645 tcg_out_opc_jump(s, OPC_JAL, TCG_REG_ZERO, 0); 1646 bool ok = reloc_jimm20(s->code_ptr - 1, target); 1647 tcg_debug_assert(ok); 1648} 1649 1650bool tcg_target_has_memory_bswap(MemOp memop) 1651{ 1652 return false; 1653} 1654 1655/* We have three temps, we might as well expose them. */ 1656static const TCGLdstHelperParam ldst_helper_param = { 1657 .ntmp = 3, .tmp = { TCG_REG_TMP0, TCG_REG_TMP1, TCG_REG_TMP2 } 1658}; 1659 1660static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l) 1661{ 1662 MemOp opc = get_memop(l->oi); 1663 1664 /* resolve label address */ 1665 if (!reloc_sbimm12(l->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) { 1666 return false; 1667 } 1668 1669 /* call load helper */ 1670 tcg_out_ld_helper_args(s, l, &ldst_helper_param); 1671 tcg_out_call_int(s, qemu_ld_helpers[opc & MO_SSIZE], false); 1672 tcg_out_ld_helper_ret(s, l, true, &ldst_helper_param); 1673 1674 tcg_out_goto(s, l->raddr); 1675 return true; 1676} 1677 1678static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l) 1679{ 1680 MemOp opc = get_memop(l->oi); 1681 1682 /* resolve label address */ 1683 if (!reloc_sbimm12(l->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) { 1684 return false; 1685 } 1686 1687 /* call store helper */ 1688 tcg_out_st_helper_args(s, l, &ldst_helper_param); 1689 tcg_out_call_int(s, qemu_st_helpers[opc & MO_SIZE], false); 1690 1691 tcg_out_goto(s, l->raddr); 1692 return true; 1693} 1694 1695/* We expect to use a 12-bit negative offset from ENV. */ 1696#define MIN_TLB_MASK_TABLE_OFS -(1 << 11) 1697 1698/* 1699 * For system-mode, perform the TLB load and compare. 1700 * For user-mode, perform any required alignment tests. 1701 * In both cases, return a TCGLabelQemuLdst structure if the slow path 1702 * is required and fill in @h with the host address for the fast path. 1703 */ 1704static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, TCGReg *pbase, 1705 TCGReg addr_reg, MemOpIdx oi, 1706 bool is_ld) 1707{ 1708 TCGType addr_type = s->addr_type; 1709 TCGLabelQemuLdst *ldst = NULL; 1710 MemOp opc = get_memop(oi); 1711 TCGAtomAlign aa; 1712 unsigned a_mask; 1713 1714 aa = atom_and_align_for_opc(s, opc, MO_ATOM_IFALIGN, false); 1715 a_mask = (1u << aa.align) - 1; 1716 1717 if (tcg_use_softmmu) { 1718 unsigned s_bits = opc & MO_SIZE; 1719 unsigned s_mask = (1u << s_bits) - 1; 1720 int mem_index = get_mmuidx(oi); 1721 int fast_ofs = tlb_mask_table_ofs(s, mem_index); 1722 int mask_ofs = fast_ofs + offsetof(CPUTLBDescFast, mask); 1723 int table_ofs = fast_ofs + offsetof(CPUTLBDescFast, table); 1724 int compare_mask; 1725 TCGReg addr_adj; 1726 1727 ldst = new_ldst_label(s); 1728 ldst->is_ld = is_ld; 1729 ldst->oi = oi; 1730 ldst->addrlo_reg = addr_reg; 1731 1732 init_setting_vtype(s); 1733 1734 tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP0, TCG_AREG0, mask_ofs); 1735 tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP1, TCG_AREG0, table_ofs); 1736 1737 tcg_out_opc_imm(s, OPC_SRLI, TCG_REG_TMP2, addr_reg, 1738 s->page_bits - CPU_TLB_ENTRY_BITS); 1739 tcg_out_opc_reg(s, OPC_AND, TCG_REG_TMP2, TCG_REG_TMP2, TCG_REG_TMP0); 1740 tcg_out_opc_reg(s, OPC_ADD, TCG_REG_TMP2, TCG_REG_TMP2, TCG_REG_TMP1); 1741 1742 /* 1743 * For aligned accesses, we check the first byte and include the 1744 * alignment bits within the address. For unaligned access, we 1745 * check that we don't cross pages using the address of the last 1746 * byte of the access. 1747 */ 1748 addr_adj = addr_reg; 1749 if (a_mask < s_mask) { 1750 addr_adj = TCG_REG_TMP0; 1751 tcg_out_opc_imm(s, addr_type == TCG_TYPE_I32 ? OPC_ADDIW : OPC_ADDI, 1752 addr_adj, addr_reg, s_mask - a_mask); 1753 } 1754 compare_mask = s->page_mask | a_mask; 1755 if (compare_mask == sextreg(compare_mask, 0, 12)) { 1756 tcg_out_opc_imm(s, OPC_ANDI, TCG_REG_TMP1, addr_adj, compare_mask); 1757 } else { 1758 tcg_out_movi(s, addr_type, TCG_REG_TMP1, compare_mask); 1759 tcg_out_opc_reg(s, OPC_AND, TCG_REG_TMP1, TCG_REG_TMP1, addr_adj); 1760 } 1761 1762 /* Load the tlb comparator and the addend. */ 1763 QEMU_BUILD_BUG_ON(HOST_BIG_ENDIAN); 1764 tcg_out_ld(s, addr_type, TCG_REG_TMP0, TCG_REG_TMP2, 1765 is_ld ? offsetof(CPUTLBEntry, addr_read) 1766 : offsetof(CPUTLBEntry, addr_write)); 1767 tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP2, TCG_REG_TMP2, 1768 offsetof(CPUTLBEntry, addend)); 1769 1770 /* Compare masked address with the TLB entry. */ 1771 ldst->label_ptr[0] = s->code_ptr; 1772 tcg_out_opc_branch(s, OPC_BNE, TCG_REG_TMP0, TCG_REG_TMP1, 0); 1773 1774 /* TLB Hit - translate address using addend. */ 1775 if (addr_type != TCG_TYPE_I32) { 1776 tcg_out_opc_reg(s, OPC_ADD, TCG_REG_TMP0, addr_reg, TCG_REG_TMP2); 1777 } else if (cpuinfo & CPUINFO_ZBA) { 1778 tcg_out_opc_reg(s, OPC_ADD_UW, TCG_REG_TMP0, 1779 addr_reg, TCG_REG_TMP2); 1780 } else { 1781 tcg_out_ext32u(s, TCG_REG_TMP0, addr_reg); 1782 tcg_out_opc_reg(s, OPC_ADD, TCG_REG_TMP0, 1783 TCG_REG_TMP0, TCG_REG_TMP2); 1784 } 1785 *pbase = TCG_REG_TMP0; 1786 } else { 1787 TCGReg base; 1788 1789 if (a_mask) { 1790 ldst = new_ldst_label(s); 1791 ldst->is_ld = is_ld; 1792 ldst->oi = oi; 1793 ldst->addrlo_reg = addr_reg; 1794 1795 init_setting_vtype(s); 1796 1797 /* We are expecting alignment max 7, so we can always use andi. */ 1798 tcg_debug_assert(a_mask == sextreg(a_mask, 0, 12)); 1799 tcg_out_opc_imm(s, OPC_ANDI, TCG_REG_TMP1, addr_reg, a_mask); 1800 1801 ldst->label_ptr[0] = s->code_ptr; 1802 tcg_out_opc_branch(s, OPC_BNE, TCG_REG_TMP1, TCG_REG_ZERO, 0); 1803 } 1804 1805 if (guest_base != 0) { 1806 base = TCG_REG_TMP0; 1807 if (addr_type != TCG_TYPE_I32) { 1808 tcg_out_opc_reg(s, OPC_ADD, base, addr_reg, 1809 TCG_GUEST_BASE_REG); 1810 } else if (cpuinfo & CPUINFO_ZBA) { 1811 tcg_out_opc_reg(s, OPC_ADD_UW, base, addr_reg, 1812 TCG_GUEST_BASE_REG); 1813 } else { 1814 tcg_out_ext32u(s, base, addr_reg); 1815 tcg_out_opc_reg(s, OPC_ADD, base, base, TCG_GUEST_BASE_REG); 1816 } 1817 } else if (addr_type != TCG_TYPE_I32) { 1818 base = addr_reg; 1819 } else { 1820 base = TCG_REG_TMP0; 1821 tcg_out_ext32u(s, base, addr_reg); 1822 } 1823 *pbase = base; 1824 } 1825 1826 return ldst; 1827} 1828 1829static void tcg_out_qemu_ld_direct(TCGContext *s, TCGReg val, 1830 TCGReg base, MemOp opc, TCGType type) 1831{ 1832 /* Byte swapping is left to middle-end expansion. */ 1833 tcg_debug_assert((opc & MO_BSWAP) == 0); 1834 1835 switch (opc & (MO_SSIZE)) { 1836 case MO_UB: 1837 tcg_out_opc_imm(s, OPC_LBU, val, base, 0); 1838 break; 1839 case MO_SB: 1840 tcg_out_opc_imm(s, OPC_LB, val, base, 0); 1841 break; 1842 case MO_UW: 1843 tcg_out_opc_imm(s, OPC_LHU, val, base, 0); 1844 break; 1845 case MO_SW: 1846 tcg_out_opc_imm(s, OPC_LH, val, base, 0); 1847 break; 1848 case MO_UL: 1849 if (type == TCG_TYPE_I64) { 1850 tcg_out_opc_imm(s, OPC_LWU, val, base, 0); 1851 break; 1852 } 1853 /* FALLTHRU */ 1854 case MO_SL: 1855 tcg_out_opc_imm(s, OPC_LW, val, base, 0); 1856 break; 1857 case MO_UQ: 1858 tcg_out_opc_imm(s, OPC_LD, val, base, 0); 1859 break; 1860 default: 1861 g_assert_not_reached(); 1862 } 1863} 1864 1865static void tcg_out_qemu_ld(TCGContext *s, TCGReg data_reg, TCGReg addr_reg, 1866 MemOpIdx oi, TCGType data_type) 1867{ 1868 TCGLabelQemuLdst *ldst; 1869 TCGReg base; 1870 1871 ldst = prepare_host_addr(s, &base, addr_reg, oi, true); 1872 tcg_out_qemu_ld_direct(s, data_reg, base, get_memop(oi), data_type); 1873 1874 if (ldst) { 1875 ldst->type = data_type; 1876 ldst->datalo_reg = data_reg; 1877 ldst->raddr = tcg_splitwx_to_rx(s->code_ptr); 1878 } 1879} 1880 1881static void tcg_out_qemu_st_direct(TCGContext *s, TCGReg val, 1882 TCGReg base, MemOp opc) 1883{ 1884 /* Byte swapping is left to middle-end expansion. */ 1885 tcg_debug_assert((opc & MO_BSWAP) == 0); 1886 1887 switch (opc & (MO_SSIZE)) { 1888 case MO_8: 1889 tcg_out_opc_store(s, OPC_SB, base, val, 0); 1890 break; 1891 case MO_16: 1892 tcg_out_opc_store(s, OPC_SH, base, val, 0); 1893 break; 1894 case MO_32: 1895 tcg_out_opc_store(s, OPC_SW, base, val, 0); 1896 break; 1897 case MO_64: 1898 tcg_out_opc_store(s, OPC_SD, base, val, 0); 1899 break; 1900 default: 1901 g_assert_not_reached(); 1902 } 1903} 1904 1905static void tcg_out_qemu_st(TCGContext *s, TCGReg data_reg, TCGReg addr_reg, 1906 MemOpIdx oi, TCGType data_type) 1907{ 1908 TCGLabelQemuLdst *ldst; 1909 TCGReg base; 1910 1911 ldst = prepare_host_addr(s, &base, addr_reg, oi, false); 1912 tcg_out_qemu_st_direct(s, data_reg, base, get_memop(oi)); 1913 1914 if (ldst) { 1915 ldst->type = data_type; 1916 ldst->datalo_reg = data_reg; 1917 ldst->raddr = tcg_splitwx_to_rx(s->code_ptr); 1918 } 1919} 1920 1921static const tcg_insn_unit *tb_ret_addr; 1922 1923static void tcg_out_exit_tb(TCGContext *s, uintptr_t a0) 1924{ 1925 /* Reuse the zeroing that exists for goto_ptr. */ 1926 if (a0 == 0) { 1927 tcg_out_call_int(s, tcg_code_gen_epilogue, true); 1928 } else { 1929 tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_A0, a0); 1930 tcg_out_call_int(s, tb_ret_addr, true); 1931 } 1932} 1933 1934static void tcg_out_goto_tb(TCGContext *s, int which) 1935{ 1936 /* Direct branch will be patched by tb_target_set_jmp_target. */ 1937 set_jmp_insn_offset(s, which); 1938 tcg_out32(s, OPC_JAL); 1939 1940 /* When branch is out of range, fall through to indirect. */ 1941 tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP0, TCG_REG_ZERO, 1942 get_jmp_target_addr(s, which)); 1943 tcg_out_opc_imm(s, OPC_JALR, TCG_REG_ZERO, TCG_REG_TMP0, 0); 1944 set_jmp_reset_offset(s, which); 1945} 1946 1947void tb_target_set_jmp_target(const TranslationBlock *tb, int n, 1948 uintptr_t jmp_rx, uintptr_t jmp_rw) 1949{ 1950 uintptr_t addr = tb->jmp_target_addr[n]; 1951 ptrdiff_t offset = addr - jmp_rx; 1952 tcg_insn_unit insn; 1953 1954 /* Either directly branch, or fall through to indirect branch. */ 1955 if (offset == sextreg(offset, 0, 20)) { 1956 insn = encode_uj(OPC_JAL, TCG_REG_ZERO, offset); 1957 } else { 1958 insn = OPC_NOP; 1959 } 1960 qatomic_set((uint32_t *)jmp_rw, insn); 1961 flush_idcache_range(jmp_rx, jmp_rw, 4); 1962} 1963 1964static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, 1965 const TCGArg args[TCG_MAX_OP_ARGS], 1966 const int const_args[TCG_MAX_OP_ARGS]) 1967{ 1968 TCGArg a0 = args[0]; 1969 TCGArg a1 = args[1]; 1970 TCGArg a2 = args[2]; 1971 int c2 = const_args[2]; 1972 1973 switch (opc) { 1974 case INDEX_op_goto_ptr: 1975 tcg_out_opc_imm(s, OPC_JALR, TCG_REG_ZERO, a0, 0); 1976 break; 1977 1978 case INDEX_op_br: 1979 tcg_out_reloc(s, s->code_ptr, R_RISCV_JAL, arg_label(a0), 0); 1980 tcg_out_opc_jump(s, OPC_JAL, TCG_REG_ZERO, 0); 1981 break; 1982 1983 case INDEX_op_ld8u_i32: 1984 case INDEX_op_ld8u_i64: 1985 tcg_out_ldst(s, OPC_LBU, a0, a1, a2); 1986 break; 1987 case INDEX_op_ld8s_i32: 1988 case INDEX_op_ld8s_i64: 1989 tcg_out_ldst(s, OPC_LB, a0, a1, a2); 1990 break; 1991 case INDEX_op_ld16u_i32: 1992 case INDEX_op_ld16u_i64: 1993 tcg_out_ldst(s, OPC_LHU, a0, a1, a2); 1994 break; 1995 case INDEX_op_ld16s_i32: 1996 case INDEX_op_ld16s_i64: 1997 tcg_out_ldst(s, OPC_LH, a0, a1, a2); 1998 break; 1999 case INDEX_op_ld32u_i64: 2000 tcg_out_ldst(s, OPC_LWU, a0, a1, a2); 2001 break; 2002 case INDEX_op_ld_i32: 2003 case INDEX_op_ld32s_i64: 2004 tcg_out_ldst(s, OPC_LW, a0, a1, a2); 2005 break; 2006 case INDEX_op_ld_i64: 2007 tcg_out_ldst(s, OPC_LD, a0, a1, a2); 2008 break; 2009 2010 case INDEX_op_st8_i32: 2011 case INDEX_op_st8_i64: 2012 tcg_out_ldst(s, OPC_SB, a0, a1, a2); 2013 break; 2014 case INDEX_op_st16_i32: 2015 case INDEX_op_st16_i64: 2016 tcg_out_ldst(s, OPC_SH, a0, a1, a2); 2017 break; 2018 case INDEX_op_st_i32: 2019 case INDEX_op_st32_i64: 2020 tcg_out_ldst(s, OPC_SW, a0, a1, a2); 2021 break; 2022 case INDEX_op_st_i64: 2023 tcg_out_ldst(s, OPC_SD, a0, a1, a2); 2024 break; 2025 2026 case INDEX_op_add_i32: 2027 if (c2) { 2028 tcg_out_opc_imm(s, OPC_ADDIW, a0, a1, a2); 2029 } else { 2030 tcg_out_opc_reg(s, OPC_ADDW, a0, a1, a2); 2031 } 2032 break; 2033 case INDEX_op_add_i64: 2034 if (c2) { 2035 tcg_out_opc_imm(s, OPC_ADDI, a0, a1, a2); 2036 } else { 2037 tcg_out_opc_reg(s, OPC_ADD, a0, a1, a2); 2038 } 2039 break; 2040 2041 case INDEX_op_sub_i32: 2042 if (c2) { 2043 tcg_out_opc_imm(s, OPC_ADDIW, a0, a1, -a2); 2044 } else { 2045 tcg_out_opc_reg(s, OPC_SUBW, a0, a1, a2); 2046 } 2047 break; 2048 case INDEX_op_sub_i64: 2049 if (c2) { 2050 tcg_out_opc_imm(s, OPC_ADDI, a0, a1, -a2); 2051 } else { 2052 tcg_out_opc_reg(s, OPC_SUB, a0, a1, a2); 2053 } 2054 break; 2055 2056 case INDEX_op_and_i32: 2057 case INDEX_op_and_i64: 2058 if (c2) { 2059 tcg_out_opc_imm(s, OPC_ANDI, a0, a1, a2); 2060 } else { 2061 tcg_out_opc_reg(s, OPC_AND, a0, a1, a2); 2062 } 2063 break; 2064 2065 case INDEX_op_or_i32: 2066 case INDEX_op_or_i64: 2067 if (c2) { 2068 tcg_out_opc_imm(s, OPC_ORI, a0, a1, a2); 2069 } else { 2070 tcg_out_opc_reg(s, OPC_OR, a0, a1, a2); 2071 } 2072 break; 2073 2074 case INDEX_op_xor_i32: 2075 case INDEX_op_xor_i64: 2076 if (c2) { 2077 tcg_out_opc_imm(s, OPC_XORI, a0, a1, a2); 2078 } else { 2079 tcg_out_opc_reg(s, OPC_XOR, a0, a1, a2); 2080 } 2081 break; 2082 2083 case INDEX_op_andc_i32: 2084 case INDEX_op_andc_i64: 2085 if (c2) { 2086 tcg_out_opc_imm(s, OPC_ANDI, a0, a1, ~a2); 2087 } else { 2088 tcg_out_opc_reg(s, OPC_ANDN, a0, a1, a2); 2089 } 2090 break; 2091 case INDEX_op_orc_i32: 2092 case INDEX_op_orc_i64: 2093 if (c2) { 2094 tcg_out_opc_imm(s, OPC_ORI, a0, a1, ~a2); 2095 } else { 2096 tcg_out_opc_reg(s, OPC_ORN, a0, a1, a2); 2097 } 2098 break; 2099 case INDEX_op_eqv_i32: 2100 case INDEX_op_eqv_i64: 2101 if (c2) { 2102 tcg_out_opc_imm(s, OPC_XORI, a0, a1, ~a2); 2103 } else { 2104 tcg_out_opc_reg(s, OPC_XNOR, a0, a1, a2); 2105 } 2106 break; 2107 2108 case INDEX_op_not_i32: 2109 case INDEX_op_not_i64: 2110 tcg_out_opc_imm(s, OPC_XORI, a0, a1, -1); 2111 break; 2112 2113 case INDEX_op_neg_i32: 2114 tcg_out_opc_reg(s, OPC_SUBW, a0, TCG_REG_ZERO, a1); 2115 break; 2116 case INDEX_op_neg_i64: 2117 tcg_out_opc_reg(s, OPC_SUB, a0, TCG_REG_ZERO, a1); 2118 break; 2119 2120 case INDEX_op_mul_i32: 2121 tcg_out_opc_reg(s, OPC_MULW, a0, a1, a2); 2122 break; 2123 case INDEX_op_mul_i64: 2124 tcg_out_opc_reg(s, OPC_MUL, a0, a1, a2); 2125 break; 2126 2127 case INDEX_op_div_i32: 2128 tcg_out_opc_reg(s, OPC_DIVW, a0, a1, a2); 2129 break; 2130 case INDEX_op_div_i64: 2131 tcg_out_opc_reg(s, OPC_DIV, a0, a1, a2); 2132 break; 2133 2134 case INDEX_op_divu_i32: 2135 tcg_out_opc_reg(s, OPC_DIVUW, a0, a1, a2); 2136 break; 2137 case INDEX_op_divu_i64: 2138 tcg_out_opc_reg(s, OPC_DIVU, a0, a1, a2); 2139 break; 2140 2141 case INDEX_op_rem_i32: 2142 tcg_out_opc_reg(s, OPC_REMW, a0, a1, a2); 2143 break; 2144 case INDEX_op_rem_i64: 2145 tcg_out_opc_reg(s, OPC_REM, a0, a1, a2); 2146 break; 2147 2148 case INDEX_op_remu_i32: 2149 tcg_out_opc_reg(s, OPC_REMUW, a0, a1, a2); 2150 break; 2151 case INDEX_op_remu_i64: 2152 tcg_out_opc_reg(s, OPC_REMU, a0, a1, a2); 2153 break; 2154 2155 case INDEX_op_shl_i32: 2156 if (c2) { 2157 tcg_out_opc_imm(s, OPC_SLLIW, a0, a1, a2 & 0x1f); 2158 } else { 2159 tcg_out_opc_reg(s, OPC_SLLW, a0, a1, a2); 2160 } 2161 break; 2162 case INDEX_op_shl_i64: 2163 if (c2) { 2164 tcg_out_opc_imm(s, OPC_SLLI, a0, a1, a2 & 0x3f); 2165 } else { 2166 tcg_out_opc_reg(s, OPC_SLL, a0, a1, a2); 2167 } 2168 break; 2169 2170 case INDEX_op_shr_i32: 2171 if (c2) { 2172 tcg_out_opc_imm(s, OPC_SRLIW, a0, a1, a2 & 0x1f); 2173 } else { 2174 tcg_out_opc_reg(s, OPC_SRLW, a0, a1, a2); 2175 } 2176 break; 2177 case INDEX_op_shr_i64: 2178 if (c2) { 2179 tcg_out_opc_imm(s, OPC_SRLI, a0, a1, a2 & 0x3f); 2180 } else { 2181 tcg_out_opc_reg(s, OPC_SRL, a0, a1, a2); 2182 } 2183 break; 2184 2185 case INDEX_op_sar_i32: 2186 if (c2) { 2187 tcg_out_opc_imm(s, OPC_SRAIW, a0, a1, a2 & 0x1f); 2188 } else { 2189 tcg_out_opc_reg(s, OPC_SRAW, a0, a1, a2); 2190 } 2191 break; 2192 case INDEX_op_sar_i64: 2193 if (c2) { 2194 tcg_out_opc_imm(s, OPC_SRAI, a0, a1, a2 & 0x3f); 2195 } else { 2196 tcg_out_opc_reg(s, OPC_SRA, a0, a1, a2); 2197 } 2198 break; 2199 2200 case INDEX_op_rotl_i32: 2201 if (c2) { 2202 tcg_out_opc_imm(s, OPC_RORIW, a0, a1, -a2 & 0x1f); 2203 } else { 2204 tcg_out_opc_reg(s, OPC_ROLW, a0, a1, a2); 2205 } 2206 break; 2207 case INDEX_op_rotl_i64: 2208 if (c2) { 2209 tcg_out_opc_imm(s, OPC_RORI, a0, a1, -a2 & 0x3f); 2210 } else { 2211 tcg_out_opc_reg(s, OPC_ROL, a0, a1, a2); 2212 } 2213 break; 2214 2215 case INDEX_op_rotr_i32: 2216 if (c2) { 2217 tcg_out_opc_imm(s, OPC_RORIW, a0, a1, a2 & 0x1f); 2218 } else { 2219 tcg_out_opc_reg(s, OPC_RORW, a0, a1, a2); 2220 } 2221 break; 2222 case INDEX_op_rotr_i64: 2223 if (c2) { 2224 tcg_out_opc_imm(s, OPC_RORI, a0, a1, a2 & 0x3f); 2225 } else { 2226 tcg_out_opc_reg(s, OPC_ROR, a0, a1, a2); 2227 } 2228 break; 2229 2230 case INDEX_op_bswap64_i64: 2231 tcg_out_opc_imm(s, OPC_REV8, a0, a1, 0); 2232 break; 2233 case INDEX_op_bswap32_i32: 2234 a2 = 0; 2235 /* fall through */ 2236 case INDEX_op_bswap32_i64: 2237 tcg_out_opc_imm(s, OPC_REV8, a0, a1, 0); 2238 if (a2 & TCG_BSWAP_OZ) { 2239 tcg_out_opc_imm(s, OPC_SRLI, a0, a0, 32); 2240 } else { 2241 tcg_out_opc_imm(s, OPC_SRAI, a0, a0, 32); 2242 } 2243 break; 2244 case INDEX_op_bswap16_i64: 2245 case INDEX_op_bswap16_i32: 2246 tcg_out_opc_imm(s, OPC_REV8, a0, a1, 0); 2247 if (a2 & TCG_BSWAP_OZ) { 2248 tcg_out_opc_imm(s, OPC_SRLI, a0, a0, 48); 2249 } else { 2250 tcg_out_opc_imm(s, OPC_SRAI, a0, a0, 48); 2251 } 2252 break; 2253 2254 case INDEX_op_ctpop_i32: 2255 tcg_out_opc_imm(s, OPC_CPOPW, a0, a1, 0); 2256 break; 2257 case INDEX_op_ctpop_i64: 2258 tcg_out_opc_imm(s, OPC_CPOP, a0, a1, 0); 2259 break; 2260 2261 case INDEX_op_clz_i32: 2262 tcg_out_cltz(s, TCG_TYPE_I32, OPC_CLZW, a0, a1, a2, c2); 2263 break; 2264 case INDEX_op_clz_i64: 2265 tcg_out_cltz(s, TCG_TYPE_I64, OPC_CLZ, a0, a1, a2, c2); 2266 break; 2267 case INDEX_op_ctz_i32: 2268 tcg_out_cltz(s, TCG_TYPE_I32, OPC_CTZW, a0, a1, a2, c2); 2269 break; 2270 case INDEX_op_ctz_i64: 2271 tcg_out_cltz(s, TCG_TYPE_I64, OPC_CTZ, a0, a1, a2, c2); 2272 break; 2273 2274 case INDEX_op_add2_i32: 2275 tcg_out_addsub2(s, a0, a1, a2, args[3], args[4], args[5], 2276 const_args[4], const_args[5], false, true); 2277 break; 2278 case INDEX_op_add2_i64: 2279 tcg_out_addsub2(s, a0, a1, a2, args[3], args[4], args[5], 2280 const_args[4], const_args[5], false, false); 2281 break; 2282 case INDEX_op_sub2_i32: 2283 tcg_out_addsub2(s, a0, a1, a2, args[3], args[4], args[5], 2284 const_args[4], const_args[5], true, true); 2285 break; 2286 case INDEX_op_sub2_i64: 2287 tcg_out_addsub2(s, a0, a1, a2, args[3], args[4], args[5], 2288 const_args[4], const_args[5], true, false); 2289 break; 2290 2291 case INDEX_op_brcond_i32: 2292 case INDEX_op_brcond_i64: 2293 tcg_out_brcond(s, a2, a0, a1, arg_label(args[3])); 2294 break; 2295 2296 case INDEX_op_setcond_i32: 2297 case INDEX_op_setcond_i64: 2298 tcg_out_setcond(s, args[3], a0, a1, a2, c2); 2299 break; 2300 2301 case INDEX_op_negsetcond_i32: 2302 case INDEX_op_negsetcond_i64: 2303 tcg_out_negsetcond(s, args[3], a0, a1, a2, c2); 2304 break; 2305 2306 case INDEX_op_movcond_i32: 2307 case INDEX_op_movcond_i64: 2308 tcg_out_movcond(s, args[5], a0, a1, a2, c2, 2309 args[3], const_args[3], args[4], const_args[4]); 2310 break; 2311 2312 case INDEX_op_qemu_ld_a32_i32: 2313 case INDEX_op_qemu_ld_a64_i32: 2314 tcg_out_qemu_ld(s, a0, a1, a2, TCG_TYPE_I32); 2315 break; 2316 case INDEX_op_qemu_ld_a32_i64: 2317 case INDEX_op_qemu_ld_a64_i64: 2318 tcg_out_qemu_ld(s, a0, a1, a2, TCG_TYPE_I64); 2319 break; 2320 case INDEX_op_qemu_st_a32_i32: 2321 case INDEX_op_qemu_st_a64_i32: 2322 tcg_out_qemu_st(s, a0, a1, a2, TCG_TYPE_I32); 2323 break; 2324 case INDEX_op_qemu_st_a32_i64: 2325 case INDEX_op_qemu_st_a64_i64: 2326 tcg_out_qemu_st(s, a0, a1, a2, TCG_TYPE_I64); 2327 break; 2328 2329 case INDEX_op_extrh_i64_i32: 2330 tcg_out_opc_imm(s, OPC_SRAI, a0, a1, 32); 2331 break; 2332 2333 case INDEX_op_mulsh_i32: 2334 case INDEX_op_mulsh_i64: 2335 tcg_out_opc_reg(s, OPC_MULH, a0, a1, a2); 2336 break; 2337 2338 case INDEX_op_muluh_i32: 2339 case INDEX_op_muluh_i64: 2340 tcg_out_opc_reg(s, OPC_MULHU, a0, a1, a2); 2341 break; 2342 2343 case INDEX_op_mb: 2344 tcg_out_mb(s, a0); 2345 break; 2346 2347 case INDEX_op_extract_i64: 2348 if (a2 + args[3] == 32) { 2349 if (a2 == 0) { 2350 tcg_out_ext32u(s, a0, a1); 2351 } else { 2352 tcg_out_opc_imm(s, OPC_SRLIW, a0, a1, a2); 2353 } 2354 break; 2355 } 2356 /* FALLTHRU */ 2357 case INDEX_op_extract_i32: 2358 switch (args[3]) { 2359 case 1: 2360 tcg_out_opc_imm(s, OPC_BEXTI, a0, a1, a2); 2361 break; 2362 case 16: 2363 tcg_debug_assert(a2 == 0); 2364 tcg_out_ext16u(s, a0, a1); 2365 break; 2366 default: 2367 g_assert_not_reached(); 2368 } 2369 break; 2370 2371 case INDEX_op_sextract_i64: 2372 if (a2 + args[3] == 32) { 2373 if (a2 == 0) { 2374 tcg_out_ext32s(s, a0, a1); 2375 } else { 2376 tcg_out_opc_imm(s, OPC_SRAIW, a0, a1, a2); 2377 } 2378 break; 2379 } 2380 /* FALLTHRU */ 2381 case INDEX_op_sextract_i32: 2382 if (a2 == 0 && args[3] == 8) { 2383 tcg_out_ext8s(s, TCG_TYPE_REG, a0, a1); 2384 } else if (a2 == 0 && args[3] == 16) { 2385 tcg_out_ext16s(s, TCG_TYPE_REG, a0, a1); 2386 } else { 2387 g_assert_not_reached(); 2388 } 2389 break; 2390 2391 case INDEX_op_mov_i32: /* Always emitted via tcg_out_mov. */ 2392 case INDEX_op_mov_i64: 2393 case INDEX_op_call: /* Always emitted via tcg_out_call. */ 2394 case INDEX_op_exit_tb: /* Always emitted via tcg_out_exit_tb. */ 2395 case INDEX_op_goto_tb: /* Always emitted via tcg_out_goto_tb. */ 2396 case INDEX_op_ext8s_i32: /* Always emitted via tcg_reg_alloc_op. */ 2397 case INDEX_op_ext8s_i64: 2398 case INDEX_op_ext8u_i32: 2399 case INDEX_op_ext8u_i64: 2400 case INDEX_op_ext16s_i32: 2401 case INDEX_op_ext16s_i64: 2402 case INDEX_op_ext16u_i32: 2403 case INDEX_op_ext16u_i64: 2404 case INDEX_op_ext32s_i64: 2405 case INDEX_op_ext32u_i64: 2406 case INDEX_op_ext_i32_i64: 2407 case INDEX_op_extu_i32_i64: 2408 case INDEX_op_extrl_i64_i32: 2409 default: 2410 g_assert_not_reached(); 2411 } 2412} 2413 2414static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, 2415 unsigned vecl, unsigned vece, 2416 const TCGArg args[TCG_MAX_OP_ARGS], 2417 const int const_args[TCG_MAX_OP_ARGS]) 2418{ 2419 TCGType type = vecl + TCG_TYPE_V64; 2420 TCGArg a0, a1, a2; 2421 int c2; 2422 2423 a0 = args[0]; 2424 a1 = args[1]; 2425 a2 = args[2]; 2426 c2 = const_args[2]; 2427 2428 switch (opc) { 2429 case INDEX_op_dupm_vec: 2430 tcg_out_dupm_vec(s, type, vece, a0, a1, a2); 2431 break; 2432 case INDEX_op_ld_vec: 2433 tcg_out_ld(s, type, a0, a1, a2); 2434 break; 2435 case INDEX_op_st_vec: 2436 tcg_out_st(s, type, a0, a1, a2); 2437 break; 2438 case INDEX_op_add_vec: 2439 set_vtype_len_sew(s, type, vece); 2440 tcg_out_opc_vv_vi(s, OPC_VADD_VV, OPC_VADD_VI, a0, a1, a2, c2); 2441 break; 2442 case INDEX_op_sub_vec: 2443 set_vtype_len_sew(s, type, vece); 2444 if (const_args[1]) { 2445 tcg_out_opc_vi(s, OPC_VRSUB_VI, a0, a2, a1); 2446 } else { 2447 tcg_out_opc_vv(s, OPC_VSUB_VV, a0, a1, a2); 2448 } 2449 break; 2450 case INDEX_op_and_vec: 2451 set_vtype_len(s, type); 2452 tcg_out_opc_vv_vi(s, OPC_VAND_VV, OPC_VAND_VI, a0, a1, a2, c2); 2453 break; 2454 case INDEX_op_or_vec: 2455 set_vtype_len(s, type); 2456 tcg_out_opc_vv_vi(s, OPC_VOR_VV, OPC_VOR_VI, a0, a1, a2, c2); 2457 break; 2458 case INDEX_op_xor_vec: 2459 set_vtype_len(s, type); 2460 tcg_out_opc_vv_vi(s, OPC_VXOR_VV, OPC_VXOR_VI, a0, a1, a2, c2); 2461 break; 2462 case INDEX_op_not_vec: 2463 set_vtype_len(s, type); 2464 tcg_out_opc_vi(s, OPC_VXOR_VI, a0, a1, -1); 2465 break; 2466 case INDEX_op_neg_vec: 2467 set_vtype_len_sew(s, type, vece); 2468 tcg_out_opc_vi(s, OPC_VRSUB_VI, a0, a1, 0); 2469 break; 2470 case INDEX_op_mul_vec: 2471 set_vtype_len_sew(s, type, vece); 2472 tcg_out_opc_vv(s, OPC_VMUL_VV, a0, a1, a2); 2473 break; 2474 case INDEX_op_ssadd_vec: 2475 set_vtype_len_sew(s, type, vece); 2476 tcg_out_opc_vv_vi(s, OPC_VSADD_VV, OPC_VSADD_VI, a0, a1, a2, c2); 2477 break; 2478 case INDEX_op_sssub_vec: 2479 set_vtype_len_sew(s, type, vece); 2480 tcg_out_opc_vv_vi(s, OPC_VSSUB_VV, OPC_VSSUB_VI, a0, a1, a2, c2); 2481 break; 2482 case INDEX_op_usadd_vec: 2483 set_vtype_len_sew(s, type, vece); 2484 tcg_out_opc_vv_vi(s, OPC_VSADDU_VV, OPC_VSADDU_VI, a0, a1, a2, c2); 2485 break; 2486 case INDEX_op_ussub_vec: 2487 set_vtype_len_sew(s, type, vece); 2488 tcg_out_opc_vv_vi(s, OPC_VSSUBU_VV, OPC_VSSUBU_VI, a0, a1, a2, c2); 2489 break; 2490 case INDEX_op_smax_vec: 2491 set_vtype_len_sew(s, type, vece); 2492 tcg_out_opc_vv_vi(s, OPC_VMAX_VV, OPC_VMAX_VI, a0, a1, a2, c2); 2493 break; 2494 case INDEX_op_smin_vec: 2495 set_vtype_len_sew(s, type, vece); 2496 tcg_out_opc_vv_vi(s, OPC_VMIN_VV, OPC_VMIN_VI, a0, a1, a2, c2); 2497 break; 2498 case INDEX_op_umax_vec: 2499 set_vtype_len_sew(s, type, vece); 2500 tcg_out_opc_vv_vi(s, OPC_VMAXU_VV, OPC_VMAXU_VI, a0, a1, a2, c2); 2501 break; 2502 case INDEX_op_umin_vec: 2503 set_vtype_len_sew(s, type, vece); 2504 tcg_out_opc_vv_vi(s, OPC_VMINU_VV, OPC_VMINU_VI, a0, a1, a2, c2); 2505 break; 2506 case INDEX_op_shls_vec: 2507 set_vtype_len_sew(s, type, vece); 2508 tcg_out_opc_vx(s, OPC_VSLL_VX, a0, a1, a2); 2509 break; 2510 case INDEX_op_shrs_vec: 2511 set_vtype_len_sew(s, type, vece); 2512 tcg_out_opc_vx(s, OPC_VSRL_VX, a0, a1, a2); 2513 break; 2514 case INDEX_op_sars_vec: 2515 set_vtype_len_sew(s, type, vece); 2516 tcg_out_opc_vx(s, OPC_VSRA_VX, a0, a1, a2); 2517 break; 2518 case INDEX_op_shlv_vec: 2519 set_vtype_len_sew(s, type, vece); 2520 tcg_out_opc_vv(s, OPC_VSLL_VV, a0, a1, a2); 2521 break; 2522 case INDEX_op_shrv_vec: 2523 set_vtype_len_sew(s, type, vece); 2524 tcg_out_opc_vv(s, OPC_VSRL_VV, a0, a1, a2); 2525 break; 2526 case INDEX_op_sarv_vec: 2527 set_vtype_len_sew(s, type, vece); 2528 tcg_out_opc_vv(s, OPC_VSRA_VV, a0, a1, a2); 2529 break; 2530 case INDEX_op_shli_vec: 2531 set_vtype_len_sew(s, type, vece); 2532 tcg_out_vshifti(s, OPC_VSLL_VI, OPC_VSLL_VX, a0, a1, a2); 2533 break; 2534 case INDEX_op_shri_vec: 2535 set_vtype_len_sew(s, type, vece); 2536 tcg_out_vshifti(s, OPC_VSRL_VI, OPC_VSRL_VX, a0, a1, a2); 2537 break; 2538 case INDEX_op_sari_vec: 2539 set_vtype_len_sew(s, type, vece); 2540 tcg_out_vshifti(s, OPC_VSRA_VI, OPC_VSRA_VX, a0, a1, a2); 2541 break; 2542 case INDEX_op_rotli_vec: 2543 set_vtype_len_sew(s, type, vece); 2544 tcg_out_vshifti(s, OPC_VSLL_VI, OPC_VSLL_VX, TCG_REG_V0, a1, a2); 2545 tcg_out_vshifti(s, OPC_VSRL_VI, OPC_VSRL_VX, a0, a1, 2546 -a2 & ((8 << vece) - 1)); 2547 tcg_out_opc_vv(s, OPC_VOR_VV, a0, a0, TCG_REG_V0); 2548 break; 2549 case INDEX_op_rotls_vec: 2550 set_vtype_len_sew(s, type, vece); 2551 tcg_out_opc_vx(s, OPC_VSLL_VX, TCG_REG_V0, a1, a2); 2552 tcg_out_opc_reg(s, OPC_SUBW, TCG_REG_TMP0, TCG_REG_ZERO, a2); 2553 tcg_out_opc_vx(s, OPC_VSRL_VX, a0, a1, TCG_REG_TMP0); 2554 tcg_out_opc_vv(s, OPC_VOR_VV, a0, a0, TCG_REG_V0); 2555 break; 2556 case INDEX_op_rotlv_vec: 2557 set_vtype_len_sew(s, type, vece); 2558 tcg_out_opc_vi(s, OPC_VRSUB_VI, TCG_REG_V0, a2, 0); 2559 tcg_out_opc_vv(s, OPC_VSRL_VV, TCG_REG_V0, a1, TCG_REG_V0); 2560 tcg_out_opc_vv(s, OPC_VSLL_VV, a0, a1, a2); 2561 tcg_out_opc_vv(s, OPC_VOR_VV, a0, a0, TCG_REG_V0); 2562 break; 2563 case INDEX_op_rotrv_vec: 2564 set_vtype_len_sew(s, type, vece); 2565 tcg_out_opc_vi(s, OPC_VRSUB_VI, TCG_REG_V0, a2, 0); 2566 tcg_out_opc_vv(s, OPC_VSLL_VV, TCG_REG_V0, a1, TCG_REG_V0); 2567 tcg_out_opc_vv(s, OPC_VSRL_VV, a0, a1, a2); 2568 tcg_out_opc_vv(s, OPC_VOR_VV, a0, a0, TCG_REG_V0); 2569 break; 2570 case INDEX_op_cmp_vec: 2571 tcg_out_cmpsel(s, type, vece, args[3], a0, a1, a2, c2, 2572 -1, true, 0, true); 2573 break; 2574 case INDEX_op_cmpsel_vec: 2575 tcg_out_cmpsel(s, type, vece, args[5], a0, a1, a2, c2, 2576 args[3], const_args[3], args[4], const_args[4]); 2577 break; 2578 case INDEX_op_mov_vec: /* Always emitted via tcg_out_mov. */ 2579 case INDEX_op_dup_vec: /* Always emitted via tcg_out_dup_vec. */ 2580 default: 2581 g_assert_not_reached(); 2582 } 2583} 2584 2585void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece, 2586 TCGArg a0, ...) 2587{ 2588 g_assert_not_reached(); 2589} 2590 2591int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece) 2592{ 2593 switch (opc) { 2594 case INDEX_op_add_vec: 2595 case INDEX_op_sub_vec: 2596 case INDEX_op_and_vec: 2597 case INDEX_op_or_vec: 2598 case INDEX_op_xor_vec: 2599 case INDEX_op_not_vec: 2600 case INDEX_op_neg_vec: 2601 case INDEX_op_mul_vec: 2602 case INDEX_op_ssadd_vec: 2603 case INDEX_op_sssub_vec: 2604 case INDEX_op_usadd_vec: 2605 case INDEX_op_ussub_vec: 2606 case INDEX_op_smax_vec: 2607 case INDEX_op_smin_vec: 2608 case INDEX_op_umax_vec: 2609 case INDEX_op_umin_vec: 2610 case INDEX_op_shls_vec: 2611 case INDEX_op_shrs_vec: 2612 case INDEX_op_sars_vec: 2613 case INDEX_op_shlv_vec: 2614 case INDEX_op_shrv_vec: 2615 case INDEX_op_sarv_vec: 2616 case INDEX_op_shri_vec: 2617 case INDEX_op_shli_vec: 2618 case INDEX_op_sari_vec: 2619 case INDEX_op_rotls_vec: 2620 case INDEX_op_rotlv_vec: 2621 case INDEX_op_rotrv_vec: 2622 case INDEX_op_rotli_vec: 2623 case INDEX_op_cmp_vec: 2624 case INDEX_op_cmpsel_vec: 2625 return 1; 2626 default: 2627 return 0; 2628 } 2629} 2630 2631static TCGConstraintSetIndex 2632tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) 2633{ 2634 switch (op) { 2635 case INDEX_op_goto_ptr: 2636 return C_O0_I1(r); 2637 2638 case INDEX_op_ld8u_i32: 2639 case INDEX_op_ld8s_i32: 2640 case INDEX_op_ld16u_i32: 2641 case INDEX_op_ld16s_i32: 2642 case INDEX_op_ld_i32: 2643 case INDEX_op_not_i32: 2644 case INDEX_op_neg_i32: 2645 case INDEX_op_ld8u_i64: 2646 case INDEX_op_ld8s_i64: 2647 case INDEX_op_ld16u_i64: 2648 case INDEX_op_ld16s_i64: 2649 case INDEX_op_ld32s_i64: 2650 case INDEX_op_ld32u_i64: 2651 case INDEX_op_ld_i64: 2652 case INDEX_op_not_i64: 2653 case INDEX_op_neg_i64: 2654 case INDEX_op_ext8u_i32: 2655 case INDEX_op_ext8u_i64: 2656 case INDEX_op_ext16u_i32: 2657 case INDEX_op_ext16u_i64: 2658 case INDEX_op_ext32u_i64: 2659 case INDEX_op_extu_i32_i64: 2660 case INDEX_op_ext8s_i32: 2661 case INDEX_op_ext8s_i64: 2662 case INDEX_op_ext16s_i32: 2663 case INDEX_op_ext16s_i64: 2664 case INDEX_op_ext32s_i64: 2665 case INDEX_op_extrl_i64_i32: 2666 case INDEX_op_extrh_i64_i32: 2667 case INDEX_op_ext_i32_i64: 2668 case INDEX_op_extract_i32: 2669 case INDEX_op_extract_i64: 2670 case INDEX_op_sextract_i32: 2671 case INDEX_op_sextract_i64: 2672 case INDEX_op_bswap16_i32: 2673 case INDEX_op_bswap32_i32: 2674 case INDEX_op_bswap16_i64: 2675 case INDEX_op_bswap32_i64: 2676 case INDEX_op_bswap64_i64: 2677 case INDEX_op_ctpop_i32: 2678 case INDEX_op_ctpop_i64: 2679 return C_O1_I1(r, r); 2680 2681 case INDEX_op_st8_i32: 2682 case INDEX_op_st16_i32: 2683 case INDEX_op_st_i32: 2684 case INDEX_op_st8_i64: 2685 case INDEX_op_st16_i64: 2686 case INDEX_op_st32_i64: 2687 case INDEX_op_st_i64: 2688 return C_O0_I2(rZ, r); 2689 2690 case INDEX_op_add_i32: 2691 case INDEX_op_and_i32: 2692 case INDEX_op_or_i32: 2693 case INDEX_op_xor_i32: 2694 case INDEX_op_add_i64: 2695 case INDEX_op_and_i64: 2696 case INDEX_op_or_i64: 2697 case INDEX_op_xor_i64: 2698 case INDEX_op_setcond_i32: 2699 case INDEX_op_setcond_i64: 2700 case INDEX_op_negsetcond_i32: 2701 case INDEX_op_negsetcond_i64: 2702 return C_O1_I2(r, r, rI); 2703 2704 case INDEX_op_andc_i32: 2705 case INDEX_op_andc_i64: 2706 case INDEX_op_orc_i32: 2707 case INDEX_op_orc_i64: 2708 case INDEX_op_eqv_i32: 2709 case INDEX_op_eqv_i64: 2710 return C_O1_I2(r, r, rJ); 2711 2712 case INDEX_op_sub_i32: 2713 case INDEX_op_sub_i64: 2714 return C_O1_I2(r, rZ, rN); 2715 2716 case INDEX_op_mul_i32: 2717 case INDEX_op_mulsh_i32: 2718 case INDEX_op_muluh_i32: 2719 case INDEX_op_div_i32: 2720 case INDEX_op_divu_i32: 2721 case INDEX_op_rem_i32: 2722 case INDEX_op_remu_i32: 2723 case INDEX_op_mul_i64: 2724 case INDEX_op_mulsh_i64: 2725 case INDEX_op_muluh_i64: 2726 case INDEX_op_div_i64: 2727 case INDEX_op_divu_i64: 2728 case INDEX_op_rem_i64: 2729 case INDEX_op_remu_i64: 2730 return C_O1_I2(r, rZ, rZ); 2731 2732 case INDEX_op_shl_i32: 2733 case INDEX_op_shr_i32: 2734 case INDEX_op_sar_i32: 2735 case INDEX_op_rotl_i32: 2736 case INDEX_op_rotr_i32: 2737 case INDEX_op_shl_i64: 2738 case INDEX_op_shr_i64: 2739 case INDEX_op_sar_i64: 2740 case INDEX_op_rotl_i64: 2741 case INDEX_op_rotr_i64: 2742 return C_O1_I2(r, r, ri); 2743 2744 case INDEX_op_clz_i32: 2745 case INDEX_op_clz_i64: 2746 case INDEX_op_ctz_i32: 2747 case INDEX_op_ctz_i64: 2748 return C_N1_I2(r, r, rM); 2749 2750 case INDEX_op_brcond_i32: 2751 case INDEX_op_brcond_i64: 2752 return C_O0_I2(rZ, rZ); 2753 2754 case INDEX_op_movcond_i32: 2755 case INDEX_op_movcond_i64: 2756 return C_O1_I4(r, r, rI, rM, rM); 2757 2758 case INDEX_op_add2_i32: 2759 case INDEX_op_add2_i64: 2760 case INDEX_op_sub2_i32: 2761 case INDEX_op_sub2_i64: 2762 return C_O2_I4(r, r, rZ, rZ, rM, rM); 2763 2764 case INDEX_op_qemu_ld_a32_i32: 2765 case INDEX_op_qemu_ld_a64_i32: 2766 case INDEX_op_qemu_ld_a32_i64: 2767 case INDEX_op_qemu_ld_a64_i64: 2768 return C_O1_I1(r, r); 2769 case INDEX_op_qemu_st_a32_i32: 2770 case INDEX_op_qemu_st_a64_i32: 2771 case INDEX_op_qemu_st_a32_i64: 2772 case INDEX_op_qemu_st_a64_i64: 2773 return C_O0_I2(rZ, r); 2774 2775 case INDEX_op_st_vec: 2776 return C_O0_I2(v, r); 2777 case INDEX_op_dup_vec: 2778 case INDEX_op_dupm_vec: 2779 case INDEX_op_ld_vec: 2780 return C_O1_I1(v, r); 2781 case INDEX_op_neg_vec: 2782 case INDEX_op_not_vec: 2783 case INDEX_op_shli_vec: 2784 case INDEX_op_shri_vec: 2785 case INDEX_op_sari_vec: 2786 case INDEX_op_rotli_vec: 2787 return C_O1_I1(v, v); 2788 case INDEX_op_add_vec: 2789 case INDEX_op_and_vec: 2790 case INDEX_op_or_vec: 2791 case INDEX_op_xor_vec: 2792 case INDEX_op_ssadd_vec: 2793 case INDEX_op_sssub_vec: 2794 case INDEX_op_usadd_vec: 2795 case INDEX_op_ussub_vec: 2796 case INDEX_op_smax_vec: 2797 case INDEX_op_smin_vec: 2798 case INDEX_op_umax_vec: 2799 case INDEX_op_umin_vec: 2800 return C_O1_I2(v, v, vK); 2801 case INDEX_op_sub_vec: 2802 return C_O1_I2(v, vK, v); 2803 case INDEX_op_mul_vec: 2804 case INDEX_op_shlv_vec: 2805 case INDEX_op_shrv_vec: 2806 case INDEX_op_sarv_vec: 2807 case INDEX_op_rotlv_vec: 2808 case INDEX_op_rotrv_vec: 2809 return C_O1_I2(v, v, v); 2810 case INDEX_op_shls_vec: 2811 case INDEX_op_shrs_vec: 2812 case INDEX_op_sars_vec: 2813 case INDEX_op_rotls_vec: 2814 return C_O1_I2(v, v, r); 2815 case INDEX_op_cmp_vec: 2816 return C_O1_I2(v, v, vL); 2817 case INDEX_op_cmpsel_vec: 2818 return C_O1_I4(v, v, vL, vK, vK); 2819 default: 2820 return C_NotImplemented; 2821 } 2822} 2823 2824static const int tcg_target_callee_save_regs[] = { 2825 TCG_REG_S0, /* used for the global env (TCG_AREG0) */ 2826 TCG_REG_S1, 2827 TCG_REG_S2, 2828 TCG_REG_S3, 2829 TCG_REG_S4, 2830 TCG_REG_S5, 2831 TCG_REG_S6, 2832 TCG_REG_S7, 2833 TCG_REG_S8, 2834 TCG_REG_S9, 2835 TCG_REG_S10, 2836 TCG_REG_S11, 2837 TCG_REG_RA, /* should be last for ABI compliance */ 2838}; 2839 2840/* Stack frame parameters. */ 2841#define REG_SIZE (TCG_TARGET_REG_BITS / 8) 2842#define SAVE_SIZE ((int)ARRAY_SIZE(tcg_target_callee_save_regs) * REG_SIZE) 2843#define TEMP_SIZE (CPU_TEMP_BUF_NLONGS * (int)sizeof(long)) 2844#define FRAME_SIZE ((TCG_STATIC_CALL_ARGS_SIZE + TEMP_SIZE + SAVE_SIZE \ 2845 + TCG_TARGET_STACK_ALIGN - 1) \ 2846 & -TCG_TARGET_STACK_ALIGN) 2847#define SAVE_OFS (TCG_STATIC_CALL_ARGS_SIZE + TEMP_SIZE) 2848 2849/* We're expecting to be able to use an immediate for frame allocation. */ 2850QEMU_BUILD_BUG_ON(FRAME_SIZE > 0x7ff); 2851 2852/* Generate global QEMU prologue and epilogue code */ 2853static void tcg_target_qemu_prologue(TCGContext *s) 2854{ 2855 int i; 2856 2857 tcg_set_frame(s, TCG_REG_SP, TCG_STATIC_CALL_ARGS_SIZE, TEMP_SIZE); 2858 2859 /* TB prologue */ 2860 tcg_out_opc_imm(s, OPC_ADDI, TCG_REG_SP, TCG_REG_SP, -FRAME_SIZE); 2861 for (i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); i++) { 2862 tcg_out_st(s, TCG_TYPE_REG, tcg_target_callee_save_regs[i], 2863 TCG_REG_SP, SAVE_OFS + i * REG_SIZE); 2864 } 2865 2866 if (!tcg_use_softmmu && guest_base) { 2867 tcg_out_movi(s, TCG_TYPE_PTR, TCG_GUEST_BASE_REG, guest_base); 2868 tcg_regset_set_reg(s->reserved_regs, TCG_GUEST_BASE_REG); 2869 } 2870 2871 /* Call generated code */ 2872 tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]); 2873 tcg_out_opc_imm(s, OPC_JALR, TCG_REG_ZERO, tcg_target_call_iarg_regs[1], 0); 2874 2875 /* Return path for goto_ptr. Set return value to 0 */ 2876 tcg_code_gen_epilogue = tcg_splitwx_to_rx(s->code_ptr); 2877 tcg_out_mov(s, TCG_TYPE_REG, TCG_REG_A0, TCG_REG_ZERO); 2878 2879 /* TB epilogue */ 2880 tb_ret_addr = tcg_splitwx_to_rx(s->code_ptr); 2881 for (i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); i++) { 2882 tcg_out_ld(s, TCG_TYPE_REG, tcg_target_callee_save_regs[i], 2883 TCG_REG_SP, SAVE_OFS + i * REG_SIZE); 2884 } 2885 2886 tcg_out_opc_imm(s, OPC_ADDI, TCG_REG_SP, TCG_REG_SP, FRAME_SIZE); 2887 tcg_out_opc_imm(s, OPC_JALR, TCG_REG_ZERO, TCG_REG_RA, 0); 2888} 2889 2890static void tcg_out_tb_start(TCGContext *s) 2891{ 2892 init_setting_vtype(s); 2893} 2894 2895static bool vtype_check(unsigned vtype) 2896{ 2897 unsigned long tmp; 2898 2899 /* vsetvl tmp, zero, vtype */ 2900 asm(".insn r 0x57, 7, 0x40, %0, zero, %1" : "=r"(tmp) : "r"(vtype)); 2901 return tmp != 0; 2902} 2903 2904static void probe_frac_lmul_1(TCGType type, MemOp vsew) 2905{ 2906 VsetCache *p = &riscv_vset_cache[type - TCG_TYPE_V64][vsew]; 2907 unsigned avl = tcg_type_size(type) >> vsew; 2908 int lmul = type - riscv_lg2_vlenb; 2909 unsigned vtype = encode_vtype(true, true, vsew, lmul & 7); 2910 bool lmul_eq_avl = true; 2911 2912 /* Guaranteed by Zve64x. */ 2913 assert(lmul < 3); 2914 2915 /* 2916 * For LMUL < -3, the host vector size is so large that TYPE 2917 * is smaller than the minimum 1/8 fraction. 2918 * 2919 * For other fractional LMUL settings, implementations must 2920 * support SEW settings between SEW_MIN and LMUL * ELEN, inclusive. 2921 * So if ELEN = 64, LMUL = 1/2, then SEW will support e8, e16, e32, 2922 * but e64 may not be supported. In other words, the hardware only 2923 * guarantees SEW_MIN <= SEW <= LMUL * ELEN. Check. 2924 */ 2925 if (lmul < 0 && (lmul < -3 || !vtype_check(vtype))) { 2926 vtype = encode_vtype(true, true, vsew, VLMUL_M1); 2927 lmul_eq_avl = false; 2928 } 2929 2930 if (avl < 32) { 2931 p->vset_insn = encode_vseti(OPC_VSETIVLI, TCG_REG_ZERO, avl, vtype); 2932 } else if (lmul_eq_avl) { 2933 /* rd != 0 and rs1 == 0 uses vlmax */ 2934 p->vset_insn = encode_vset(OPC_VSETVLI, TCG_REG_TMP0, TCG_REG_ZERO, vtype); 2935 } else { 2936 p->movi_insn = encode_i(OPC_ADDI, TCG_REG_TMP0, TCG_REG_ZERO, avl); 2937 p->vset_insn = encode_vset(OPC_VSETVLI, TCG_REG_ZERO, TCG_REG_TMP0, vtype); 2938 } 2939} 2940 2941static void probe_frac_lmul(void) 2942{ 2943 /* Match riscv_lg2_vlenb to TCG_TYPE_V64. */ 2944 QEMU_BUILD_BUG_ON(TCG_TYPE_V64 != 3); 2945 2946 for (TCGType t = TCG_TYPE_V64; t <= TCG_TYPE_V256; t++) { 2947 for (MemOp e = MO_8; e <= MO_64; e++) { 2948 probe_frac_lmul_1(t, e); 2949 } 2950 } 2951} 2952 2953static void tcg_target_init(TCGContext *s) 2954{ 2955 tcg_target_available_regs[TCG_TYPE_I32] = 0xffffffff; 2956 tcg_target_available_regs[TCG_TYPE_I64] = 0xffffffff; 2957 2958 tcg_target_call_clobber_regs = -1; 2959 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_S0); 2960 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_S1); 2961 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_S2); 2962 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_S3); 2963 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_S4); 2964 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_S5); 2965 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_S6); 2966 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_S7); 2967 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_S8); 2968 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_S9); 2969 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_S10); 2970 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_S11); 2971 2972 s->reserved_regs = 0; 2973 tcg_regset_set_reg(s->reserved_regs, TCG_REG_ZERO); 2974 tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP0); 2975 tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP1); 2976 tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP2); 2977 tcg_regset_set_reg(s->reserved_regs, TCG_REG_SP); 2978 tcg_regset_set_reg(s->reserved_regs, TCG_REG_GP); 2979 tcg_regset_set_reg(s->reserved_regs, TCG_REG_TP); 2980 2981 if (cpuinfo & CPUINFO_ZVE64X) { 2982 switch (riscv_lg2_vlenb) { 2983 case TCG_TYPE_V64: 2984 tcg_target_available_regs[TCG_TYPE_V64] = ALL_VECTOR_REGS; 2985 tcg_target_available_regs[TCG_TYPE_V128] = ALL_DVECTOR_REG_GROUPS; 2986 tcg_target_available_regs[TCG_TYPE_V256] = ALL_QVECTOR_REG_GROUPS; 2987 s->reserved_regs |= (~ALL_QVECTOR_REG_GROUPS & ALL_VECTOR_REGS); 2988 break; 2989 case TCG_TYPE_V128: 2990 tcg_target_available_regs[TCG_TYPE_V64] = ALL_VECTOR_REGS; 2991 tcg_target_available_regs[TCG_TYPE_V128] = ALL_VECTOR_REGS; 2992 tcg_target_available_regs[TCG_TYPE_V256] = ALL_DVECTOR_REG_GROUPS; 2993 s->reserved_regs |= (~ALL_DVECTOR_REG_GROUPS & ALL_VECTOR_REGS); 2994 break; 2995 default: 2996 /* Guaranteed by Zve64x. */ 2997 tcg_debug_assert(riscv_lg2_vlenb >= TCG_TYPE_V256); 2998 tcg_target_available_regs[TCG_TYPE_V64] = ALL_VECTOR_REGS; 2999 tcg_target_available_regs[TCG_TYPE_V128] = ALL_VECTOR_REGS; 3000 tcg_target_available_regs[TCG_TYPE_V256] = ALL_VECTOR_REGS; 3001 break; 3002 } 3003 tcg_regset_set_reg(s->reserved_regs, TCG_REG_V0); 3004 probe_frac_lmul(); 3005 } 3006} 3007 3008typedef struct { 3009 DebugFrameHeader h; 3010 uint8_t fde_def_cfa[4]; 3011 uint8_t fde_reg_ofs[ARRAY_SIZE(tcg_target_callee_save_regs) * 2]; 3012} DebugFrame; 3013 3014#define ELF_HOST_MACHINE EM_RISCV 3015 3016static const DebugFrame debug_frame = { 3017 .h.cie.len = sizeof(DebugFrameCIE) - 4, /* length after .len member */ 3018 .h.cie.id = -1, 3019 .h.cie.version = 1, 3020 .h.cie.code_align = 1, 3021 .h.cie.data_align = -(TCG_TARGET_REG_BITS / 8) & 0x7f, /* sleb128 */ 3022 .h.cie.return_column = TCG_REG_RA, 3023 3024 /* Total FDE size does not include the "len" member. */ 3025 .h.fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, h.fde.cie_offset), 3026 3027 .fde_def_cfa = { 3028 12, TCG_REG_SP, /* DW_CFA_def_cfa sp, ... */ 3029 (FRAME_SIZE & 0x7f) | 0x80, /* ... uleb128 FRAME_SIZE */ 3030 (FRAME_SIZE >> 7) 3031 }, 3032 .fde_reg_ofs = { 3033 0x80 + 9, 12, /* DW_CFA_offset, s1, -96 */ 3034 0x80 + 18, 11, /* DW_CFA_offset, s2, -88 */ 3035 0x80 + 19, 10, /* DW_CFA_offset, s3, -80 */ 3036 0x80 + 20, 9, /* DW_CFA_offset, s4, -72 */ 3037 0x80 + 21, 8, /* DW_CFA_offset, s5, -64 */ 3038 0x80 + 22, 7, /* DW_CFA_offset, s6, -56 */ 3039 0x80 + 23, 6, /* DW_CFA_offset, s7, -48 */ 3040 0x80 + 24, 5, /* DW_CFA_offset, s8, -40 */ 3041 0x80 + 25, 4, /* DW_CFA_offset, s9, -32 */ 3042 0x80 + 26, 3, /* DW_CFA_offset, s10, -24 */ 3043 0x80 + 27, 2, /* DW_CFA_offset, s11, -16 */ 3044 0x80 + 1 , 1, /* DW_CFA_offset, ra, -8 */ 3045 } 3046}; 3047 3048void tcg_register_jit(const void *buf, size_t buf_size) 3049{ 3050 tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame)); 3051} 3052