1/* 2 * Tiny Code Generator for QEMU 3 * 4 * Copyright (c) 2021 WANG Xuerui <git@xen0n.name> 5 * 6 * Based on tcg/riscv/tcg-target.c.inc 7 * 8 * Copyright (c) 2018 SiFive, Inc 9 * Copyright (c) 2008-2009 Arnaud Patard <arnaud.patard@rtp-net.org> 10 * Copyright (c) 2009 Aurelien Jarno <aurelien@aurel32.net> 11 * Copyright (c) 2008 Fabrice Bellard 12 * 13 * Permission is hereby granted, free of charge, to any person obtaining a copy 14 * of this software and associated documentation files (the "Software"), to deal 15 * in the Software without restriction, including without limitation the rights 16 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 17 * copies of the Software, and to permit persons to whom the Software is 18 * furnished to do so, subject to the following conditions: 19 * 20 * The above copyright notice and this permission notice shall be included in 21 * all copies or substantial portions of the Software. 22 * 23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 24 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 25 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 26 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 27 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 28 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 29 * THE SOFTWARE. 30 */ 31 32#include "../tcg-ldst.c.inc" 33 34#ifdef CONFIG_DEBUG_TCG 35static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = { 36 "zero", 37 "ra", 38 "tp", 39 "sp", 40 "a0", 41 "a1", 42 "a2", 43 "a3", 44 "a4", 45 "a5", 46 "a6", 47 "a7", 48 "t0", 49 "t1", 50 "t2", 51 "t3", 52 "t4", 53 "t5", 54 "t6", 55 "t7", 56 "t8", 57 "r21", /* reserved in the LP64* ABI, hence no ABI name */ 58 "s9", 59 "s0", 60 "s1", 61 "s2", 62 "s3", 63 "s4", 64 "s5", 65 "s6", 66 "s7", 67 "s8" 68}; 69#endif 70 71static const int tcg_target_reg_alloc_order[] = { 72 /* Registers preserved across calls */ 73 /* TCG_REG_S0 reserved for TCG_AREG0 */ 74 TCG_REG_S1, 75 TCG_REG_S2, 76 TCG_REG_S3, 77 TCG_REG_S4, 78 TCG_REG_S5, 79 TCG_REG_S6, 80 TCG_REG_S7, 81 TCG_REG_S8, 82 TCG_REG_S9, 83 84 /* Registers (potentially) clobbered across calls */ 85 TCG_REG_T0, 86 TCG_REG_T1, 87 TCG_REG_T2, 88 TCG_REG_T3, 89 TCG_REG_T4, 90 TCG_REG_T5, 91 TCG_REG_T6, 92 TCG_REG_T7, 93 TCG_REG_T8, 94 95 /* Argument registers, opposite order of allocation. */ 96 TCG_REG_A7, 97 TCG_REG_A6, 98 TCG_REG_A5, 99 TCG_REG_A4, 100 TCG_REG_A3, 101 TCG_REG_A2, 102 TCG_REG_A1, 103 TCG_REG_A0, 104}; 105 106static const int tcg_target_call_iarg_regs[] = { 107 TCG_REG_A0, 108 TCG_REG_A1, 109 TCG_REG_A2, 110 TCG_REG_A3, 111 TCG_REG_A4, 112 TCG_REG_A5, 113 TCG_REG_A6, 114 TCG_REG_A7, 115}; 116 117static const int tcg_target_call_oarg_regs[] = { 118 TCG_REG_A0, 119 TCG_REG_A1, 120}; 121 122#ifndef CONFIG_SOFTMMU 123#define USE_GUEST_BASE (guest_base != 0) 124#define TCG_GUEST_BASE_REG TCG_REG_S1 125#endif 126 127#define TCG_CT_CONST_ZERO 0x100 128#define TCG_CT_CONST_S12 0x200 129#define TCG_CT_CONST_N12 0x400 130#define TCG_CT_CONST_U12 0x800 131#define TCG_CT_CONST_C12 0x1000 132#define TCG_CT_CONST_WSZ 0x2000 133 134#define ALL_GENERAL_REGS MAKE_64BIT_MASK(0, 32) 135/* 136 * For softmmu, we need to avoid conflicts with the first 5 137 * argument registers to call the helper. Some of these are 138 * also used for the tlb lookup. 139 */ 140#ifdef CONFIG_SOFTMMU 141#define SOFTMMU_RESERVE_REGS MAKE_64BIT_MASK(TCG_REG_A0, 5) 142#else 143#define SOFTMMU_RESERVE_REGS 0 144#endif 145 146 147static inline tcg_target_long sextreg(tcg_target_long val, int pos, int len) 148{ 149 return sextract64(val, pos, len); 150} 151 152/* test if a constant matches the constraint */ 153static bool tcg_target_const_match(int64_t val, TCGType type, int ct) 154{ 155 if (ct & TCG_CT_CONST) { 156 return true; 157 } 158 if ((ct & TCG_CT_CONST_ZERO) && val == 0) { 159 return true; 160 } 161 if ((ct & TCG_CT_CONST_S12) && val == sextreg(val, 0, 12)) { 162 return true; 163 } 164 if ((ct & TCG_CT_CONST_N12) && -val == sextreg(-val, 0, 12)) { 165 return true; 166 } 167 if ((ct & TCG_CT_CONST_U12) && val >= 0 && val <= 0xfff) { 168 return true; 169 } 170 if ((ct & TCG_CT_CONST_C12) && ~val >= 0 && ~val <= 0xfff) { 171 return true; 172 } 173 if ((ct & TCG_CT_CONST_WSZ) && val == (type == TCG_TYPE_I32 ? 32 : 64)) { 174 return true; 175 } 176 return false; 177} 178 179/* 180 * Relocations 181 */ 182 183/* 184 * Relocation records defined in LoongArch ELF psABI v1.00 is way too 185 * complicated; a whopping stack machine is needed to stuff the fields, at 186 * the very least one SOP_PUSH and one SOP_POP (of the correct format) are 187 * needed. 188 * 189 * Hence, define our own simpler relocation types. Numbers are chosen as to 190 * not collide with potential future additions to the true ELF relocation 191 * type enum. 192 */ 193 194/* Field Sk16, shifted right by 2; suitable for conditional jumps */ 195#define R_LOONGARCH_BR_SK16 256 196/* Field Sd10k16, shifted right by 2; suitable for B and BL */ 197#define R_LOONGARCH_BR_SD10K16 257 198 199static bool reloc_br_sk16(tcg_insn_unit *src_rw, const tcg_insn_unit *target) 200{ 201 const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw); 202 intptr_t offset = (intptr_t)target - (intptr_t)src_rx; 203 204 tcg_debug_assert((offset & 3) == 0); 205 offset >>= 2; 206 if (offset == sextreg(offset, 0, 16)) { 207 *src_rw = deposit64(*src_rw, 10, 16, offset); 208 return true; 209 } 210 211 return false; 212} 213 214static bool reloc_br_sd10k16(tcg_insn_unit *src_rw, 215 const tcg_insn_unit *target) 216{ 217 const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw); 218 intptr_t offset = (intptr_t)target - (intptr_t)src_rx; 219 220 tcg_debug_assert((offset & 3) == 0); 221 offset >>= 2; 222 if (offset == sextreg(offset, 0, 26)) { 223 *src_rw = deposit64(*src_rw, 0, 10, offset >> 16); /* slot d10 */ 224 *src_rw = deposit64(*src_rw, 10, 16, offset); /* slot k16 */ 225 return true; 226 } 227 228 return false; 229} 230 231static bool patch_reloc(tcg_insn_unit *code_ptr, int type, 232 intptr_t value, intptr_t addend) 233{ 234 tcg_debug_assert(addend == 0); 235 switch (type) { 236 case R_LOONGARCH_BR_SK16: 237 return reloc_br_sk16(code_ptr, (tcg_insn_unit *)value); 238 case R_LOONGARCH_BR_SD10K16: 239 return reloc_br_sd10k16(code_ptr, (tcg_insn_unit *)value); 240 default: 241 g_assert_not_reached(); 242 } 243} 244 245#include "tcg-insn-defs.c.inc" 246 247/* 248 * TCG intrinsics 249 */ 250 251static void tcg_out_mb(TCGContext *s, TCGArg a0) 252{ 253 /* Baseline LoongArch only has the full barrier, unfortunately. */ 254 tcg_out_opc_dbar(s, 0); 255} 256 257static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg) 258{ 259 if (ret == arg) { 260 return true; 261 } 262 switch (type) { 263 case TCG_TYPE_I32: 264 case TCG_TYPE_I64: 265 /* 266 * Conventional register-register move used in LoongArch is 267 * `or dst, src, zero`. 268 */ 269 tcg_out_opc_or(s, ret, arg, TCG_REG_ZERO); 270 break; 271 default: 272 g_assert_not_reached(); 273 } 274 return true; 275} 276 277static bool imm_part_needs_loading(bool high_bits_are_ones, 278 tcg_target_long part) 279{ 280 if (high_bits_are_ones) { 281 return part != -1; 282 } else { 283 return part != 0; 284 } 285} 286 287/* Loads a 32-bit immediate into rd, sign-extended. */ 288static void tcg_out_movi_i32(TCGContext *s, TCGReg rd, int32_t val) 289{ 290 tcg_target_long lo = sextreg(val, 0, 12); 291 tcg_target_long hi12 = sextreg(val, 12, 20); 292 293 /* Single-instruction cases. */ 294 if (lo == val) { 295 /* val fits in simm12: addi.w rd, zero, val */ 296 tcg_out_opc_addi_w(s, rd, TCG_REG_ZERO, val); 297 return; 298 } 299 if (0x800 <= val && val <= 0xfff) { 300 /* val fits in uimm12: ori rd, zero, val */ 301 tcg_out_opc_ori(s, rd, TCG_REG_ZERO, val); 302 return; 303 } 304 305 /* High bits must be set; load with lu12i.w + optional ori. */ 306 tcg_out_opc_lu12i_w(s, rd, hi12); 307 if (lo != 0) { 308 tcg_out_opc_ori(s, rd, rd, lo & 0xfff); 309 } 310} 311 312static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg rd, 313 tcg_target_long val) 314{ 315 /* 316 * LoongArch conventionally loads 64-bit immediates in at most 4 steps, 317 * with dedicated instructions for filling the respective bitfields 318 * below: 319 * 320 * 6 5 4 3 321 * 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 322 * +-----------------------+---------------------------------------+... 323 * | hi52 | hi32 | 324 * +-----------------------+---------------------------------------+... 325 * 3 2 1 326 * 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 327 * ...+-------------------------------------+-------------------------+ 328 * | hi12 | lo | 329 * ...+-------------------------------------+-------------------------+ 330 * 331 * Check if val belong to one of the several fast cases, before falling 332 * back to the slow path. 333 */ 334 335 intptr_t pc_offset; 336 tcg_target_long val_lo, val_hi, pc_hi, offset_hi; 337 tcg_target_long hi32, hi52; 338 bool rd_high_bits_are_ones; 339 340 /* Value fits in signed i32. */ 341 if (type == TCG_TYPE_I32 || val == (int32_t)val) { 342 tcg_out_movi_i32(s, rd, val); 343 return; 344 } 345 346 /* PC-relative cases. */ 347 pc_offset = tcg_pcrel_diff(s, (void *)val); 348 if (pc_offset == sextreg(pc_offset, 0, 22) && (pc_offset & 3) == 0) { 349 /* Single pcaddu2i. */ 350 tcg_out_opc_pcaddu2i(s, rd, pc_offset >> 2); 351 return; 352 } 353 354 if (pc_offset == (int32_t)pc_offset) { 355 /* Offset within 32 bits; load with pcalau12i + ori. */ 356 val_lo = sextreg(val, 0, 12); 357 val_hi = val >> 12; 358 pc_hi = (val - pc_offset) >> 12; 359 offset_hi = val_hi - pc_hi; 360 361 tcg_debug_assert(offset_hi == sextreg(offset_hi, 0, 20)); 362 tcg_out_opc_pcalau12i(s, rd, offset_hi); 363 if (val_lo != 0) { 364 tcg_out_opc_ori(s, rd, rd, val_lo & 0xfff); 365 } 366 return; 367 } 368 369 hi32 = sextreg(val, 32, 20); 370 hi52 = sextreg(val, 52, 12); 371 372 /* Single cu52i.d case. */ 373 if (ctz64(val) >= 52) { 374 tcg_out_opc_cu52i_d(s, rd, TCG_REG_ZERO, hi52); 375 return; 376 } 377 378 /* Slow path. Initialize the low 32 bits, then concat high bits. */ 379 tcg_out_movi_i32(s, rd, val); 380 rd_high_bits_are_ones = (int32_t)val < 0; 381 382 if (imm_part_needs_loading(rd_high_bits_are_ones, hi32)) { 383 tcg_out_opc_cu32i_d(s, rd, hi32); 384 rd_high_bits_are_ones = hi32 < 0; 385 } 386 387 if (imm_part_needs_loading(rd_high_bits_are_ones, hi52)) { 388 tcg_out_opc_cu52i_d(s, rd, rd, hi52); 389 } 390} 391 392static void tcg_out_ext8u(TCGContext *s, TCGReg ret, TCGReg arg) 393{ 394 tcg_out_opc_andi(s, ret, arg, 0xff); 395} 396 397static void tcg_out_ext16u(TCGContext *s, TCGReg ret, TCGReg arg) 398{ 399 tcg_out_opc_bstrpick_w(s, ret, arg, 0, 15); 400} 401 402static void tcg_out_ext32u(TCGContext *s, TCGReg ret, TCGReg arg) 403{ 404 tcg_out_opc_bstrpick_d(s, ret, arg, 0, 31); 405} 406 407static void tcg_out_ext8s(TCGContext *s, TCGReg ret, TCGReg arg) 408{ 409 tcg_out_opc_sext_b(s, ret, arg); 410} 411 412static void tcg_out_ext16s(TCGContext *s, TCGReg ret, TCGReg arg) 413{ 414 tcg_out_opc_sext_h(s, ret, arg); 415} 416 417static void tcg_out_ext32s(TCGContext *s, TCGReg ret, TCGReg arg) 418{ 419 tcg_out_opc_addi_w(s, ret, arg, 0); 420} 421 422static void tcg_out_clzctz(TCGContext *s, LoongArchInsn opc, 423 TCGReg a0, TCGReg a1, TCGReg a2, 424 bool c2, bool is_32bit) 425{ 426 if (c2) { 427 /* 428 * Fast path: semantics already satisfied due to constraint and 429 * insn behavior, single instruction is enough. 430 */ 431 tcg_debug_assert(a2 == (is_32bit ? 32 : 64)); 432 /* all clz/ctz insns belong to DJ-format */ 433 tcg_out32(s, encode_dj_insn(opc, a0, a1)); 434 return; 435 } 436 437 tcg_out32(s, encode_dj_insn(opc, TCG_REG_TMP0, a1)); 438 /* a0 = a1 ? REG_TMP0 : a2 */ 439 tcg_out_opc_maskeqz(s, TCG_REG_TMP0, TCG_REG_TMP0, a1); 440 tcg_out_opc_masknez(s, a0, a2, a1); 441 tcg_out_opc_or(s, a0, TCG_REG_TMP0, a0); 442} 443 444static void tcg_out_setcond(TCGContext *s, TCGCond cond, TCGReg ret, 445 TCGReg arg1, TCGReg arg2, bool c2) 446{ 447 TCGReg tmp; 448 449 if (c2) { 450 tcg_debug_assert(arg2 == 0); 451 } 452 453 switch (cond) { 454 case TCG_COND_EQ: 455 if (c2) { 456 tmp = arg1; 457 } else { 458 tcg_out_opc_sub_d(s, ret, arg1, arg2); 459 tmp = ret; 460 } 461 tcg_out_opc_sltui(s, ret, tmp, 1); 462 break; 463 case TCG_COND_NE: 464 if (c2) { 465 tmp = arg1; 466 } else { 467 tcg_out_opc_sub_d(s, ret, arg1, arg2); 468 tmp = ret; 469 } 470 tcg_out_opc_sltu(s, ret, TCG_REG_ZERO, tmp); 471 break; 472 case TCG_COND_LT: 473 tcg_out_opc_slt(s, ret, arg1, arg2); 474 break; 475 case TCG_COND_GE: 476 tcg_out_opc_slt(s, ret, arg1, arg2); 477 tcg_out_opc_xori(s, ret, ret, 1); 478 break; 479 case TCG_COND_LE: 480 tcg_out_setcond(s, TCG_COND_GE, ret, arg2, arg1, false); 481 break; 482 case TCG_COND_GT: 483 tcg_out_setcond(s, TCG_COND_LT, ret, arg2, arg1, false); 484 break; 485 case TCG_COND_LTU: 486 tcg_out_opc_sltu(s, ret, arg1, arg2); 487 break; 488 case TCG_COND_GEU: 489 tcg_out_opc_sltu(s, ret, arg1, arg2); 490 tcg_out_opc_xori(s, ret, ret, 1); 491 break; 492 case TCG_COND_LEU: 493 tcg_out_setcond(s, TCG_COND_GEU, ret, arg2, arg1, false); 494 break; 495 case TCG_COND_GTU: 496 tcg_out_setcond(s, TCG_COND_LTU, ret, arg2, arg1, false); 497 break; 498 default: 499 g_assert_not_reached(); 500 break; 501 } 502} 503 504/* 505 * Branch helpers 506 */ 507 508static const struct { 509 LoongArchInsn op; 510 bool swap; 511} tcg_brcond_to_loongarch[] = { 512 [TCG_COND_EQ] = { OPC_BEQ, false }, 513 [TCG_COND_NE] = { OPC_BNE, false }, 514 [TCG_COND_LT] = { OPC_BGT, true }, 515 [TCG_COND_GE] = { OPC_BLE, true }, 516 [TCG_COND_LE] = { OPC_BLE, false }, 517 [TCG_COND_GT] = { OPC_BGT, false }, 518 [TCG_COND_LTU] = { OPC_BGTU, true }, 519 [TCG_COND_GEU] = { OPC_BLEU, true }, 520 [TCG_COND_LEU] = { OPC_BLEU, false }, 521 [TCG_COND_GTU] = { OPC_BGTU, false } 522}; 523 524static void tcg_out_brcond(TCGContext *s, TCGCond cond, TCGReg arg1, 525 TCGReg arg2, TCGLabel *l) 526{ 527 LoongArchInsn op = tcg_brcond_to_loongarch[cond].op; 528 529 tcg_debug_assert(op != 0); 530 531 if (tcg_brcond_to_loongarch[cond].swap) { 532 TCGReg t = arg1; 533 arg1 = arg2; 534 arg2 = t; 535 } 536 537 /* all conditional branch insns belong to DJSk16-format */ 538 tcg_out_reloc(s, s->code_ptr, R_LOONGARCH_BR_SK16, l, 0); 539 tcg_out32(s, encode_djsk16_insn(op, arg1, arg2, 0)); 540} 541 542static void tcg_out_call_int(TCGContext *s, const tcg_insn_unit *arg, bool tail) 543{ 544 TCGReg link = tail ? TCG_REG_ZERO : TCG_REG_RA; 545 ptrdiff_t offset = tcg_pcrel_diff(s, arg); 546 547 tcg_debug_assert((offset & 3) == 0); 548 if (offset == sextreg(offset, 0, 28)) { 549 /* short jump: +/- 256MiB */ 550 if (tail) { 551 tcg_out_opc_b(s, offset >> 2); 552 } else { 553 tcg_out_opc_bl(s, offset >> 2); 554 } 555 } else if (offset == sextreg(offset, 0, 38)) { 556 /* long jump: +/- 256GiB */ 557 tcg_target_long lo = sextreg(offset, 0, 18); 558 tcg_target_long hi = offset - lo; 559 tcg_out_opc_pcaddu18i(s, TCG_REG_TMP0, hi >> 18); 560 tcg_out_opc_jirl(s, link, TCG_REG_TMP0, lo >> 2); 561 } else { 562 /* far jump: 64-bit */ 563 tcg_target_long lo = sextreg((tcg_target_long)arg, 0, 18); 564 tcg_target_long hi = (tcg_target_long)arg - lo; 565 tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP0, hi); 566 tcg_out_opc_jirl(s, link, TCG_REG_TMP0, lo >> 2); 567 } 568} 569 570static void tcg_out_call(TCGContext *s, const tcg_insn_unit *arg) 571{ 572 tcg_out_call_int(s, arg, false); 573} 574 575/* 576 * Load/store helpers 577 */ 578 579static void tcg_out_ldst(TCGContext *s, LoongArchInsn opc, TCGReg data, 580 TCGReg addr, intptr_t offset) 581{ 582 intptr_t imm12 = sextreg(offset, 0, 12); 583 584 if (offset != imm12) { 585 intptr_t diff = offset - (uintptr_t)s->code_ptr; 586 587 if (addr == TCG_REG_ZERO && diff == (int32_t)diff) { 588 imm12 = sextreg(diff, 0, 12); 589 tcg_out_opc_pcaddu12i(s, TCG_REG_TMP2, (diff - imm12) >> 12); 590 } else { 591 tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP2, offset - imm12); 592 if (addr != TCG_REG_ZERO) { 593 tcg_out_opc_add_d(s, TCG_REG_TMP2, TCG_REG_TMP2, addr); 594 } 595 } 596 addr = TCG_REG_TMP2; 597 } 598 599 switch (opc) { 600 case OPC_LD_B: 601 case OPC_LD_BU: 602 case OPC_LD_H: 603 case OPC_LD_HU: 604 case OPC_LD_W: 605 case OPC_LD_WU: 606 case OPC_LD_D: 607 case OPC_ST_B: 608 case OPC_ST_H: 609 case OPC_ST_W: 610 case OPC_ST_D: 611 tcg_out32(s, encode_djsk12_insn(opc, data, addr, imm12)); 612 break; 613 default: 614 g_assert_not_reached(); 615 } 616} 617 618static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg arg, 619 TCGReg arg1, intptr_t arg2) 620{ 621 bool is_32bit = type == TCG_TYPE_I32; 622 tcg_out_ldst(s, is_32bit ? OPC_LD_W : OPC_LD_D, arg, arg1, arg2); 623} 624 625static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, 626 TCGReg arg1, intptr_t arg2) 627{ 628 bool is_32bit = type == TCG_TYPE_I32; 629 tcg_out_ldst(s, is_32bit ? OPC_ST_W : OPC_ST_D, arg, arg1, arg2); 630} 631 632static bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val, 633 TCGReg base, intptr_t ofs) 634{ 635 if (val == 0) { 636 tcg_out_st(s, type, TCG_REG_ZERO, base, ofs); 637 return true; 638 } 639 return false; 640} 641 642/* 643 * Load/store helpers for SoftMMU, and qemu_ld/st implementations 644 */ 645 646#if defined(CONFIG_SOFTMMU) 647/* 648 * helper signature: helper_ret_ld_mmu(CPUState *env, target_ulong addr, 649 * MemOpIdx oi, uintptr_t ra) 650 */ 651static void * const qemu_ld_helpers[4] = { 652 [MO_8] = helper_ret_ldub_mmu, 653 [MO_16] = helper_le_lduw_mmu, 654 [MO_32] = helper_le_ldul_mmu, 655 [MO_64] = helper_le_ldq_mmu, 656}; 657 658/* 659 * helper signature: helper_ret_st_mmu(CPUState *env, target_ulong addr, 660 * uintxx_t val, MemOpIdx oi, 661 * uintptr_t ra) 662 */ 663static void * const qemu_st_helpers[4] = { 664 [MO_8] = helper_ret_stb_mmu, 665 [MO_16] = helper_le_stw_mmu, 666 [MO_32] = helper_le_stl_mmu, 667 [MO_64] = helper_le_stq_mmu, 668}; 669 670/* We expect to use a 12-bit negative offset from ENV. */ 671QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) > 0); 672QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) < -(1 << 11)); 673 674static bool tcg_out_goto(TCGContext *s, const tcg_insn_unit *target) 675{ 676 tcg_out_opc_b(s, 0); 677 return reloc_br_sd10k16(s->code_ptr - 1, target); 678} 679 680/* 681 * Emits common code for TLB addend lookup, that eventually loads the 682 * addend in TCG_REG_TMP2. 683 */ 684static void tcg_out_tlb_load(TCGContext *s, TCGReg addrl, MemOpIdx oi, 685 tcg_insn_unit **label_ptr, bool is_load) 686{ 687 MemOp opc = get_memop(oi); 688 unsigned s_bits = opc & MO_SIZE; 689 unsigned a_bits = get_alignment_bits(opc); 690 tcg_target_long compare_mask; 691 int mem_index = get_mmuidx(oi); 692 int fast_ofs = TLB_MASK_TABLE_OFS(mem_index); 693 int mask_ofs = fast_ofs + offsetof(CPUTLBDescFast, mask); 694 int table_ofs = fast_ofs + offsetof(CPUTLBDescFast, table); 695 696 tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP0, TCG_AREG0, mask_ofs); 697 tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP1, TCG_AREG0, table_ofs); 698 699 tcg_out_opc_srli_d(s, TCG_REG_TMP2, addrl, 700 TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS); 701 tcg_out_opc_and(s, TCG_REG_TMP2, TCG_REG_TMP2, TCG_REG_TMP0); 702 tcg_out_opc_add_d(s, TCG_REG_TMP2, TCG_REG_TMP2, TCG_REG_TMP1); 703 704 /* Load the tlb comparator and the addend. */ 705 tcg_out_ld(s, TCG_TYPE_TL, TCG_REG_TMP0, TCG_REG_TMP2, 706 is_load ? offsetof(CPUTLBEntry, addr_read) 707 : offsetof(CPUTLBEntry, addr_write)); 708 tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP2, TCG_REG_TMP2, 709 offsetof(CPUTLBEntry, addend)); 710 711 /* We don't support unaligned accesses. */ 712 if (a_bits < s_bits) { 713 a_bits = s_bits; 714 } 715 /* Clear the non-page, non-alignment bits from the address. */ 716 compare_mask = (tcg_target_long)TARGET_PAGE_MASK | ((1 << a_bits) - 1); 717 tcg_out_movi(s, TCG_TYPE_TL, TCG_REG_TMP1, compare_mask); 718 tcg_out_opc_and(s, TCG_REG_TMP1, TCG_REG_TMP1, addrl); 719 720 /* Compare masked address with the TLB entry. */ 721 label_ptr[0] = s->code_ptr; 722 tcg_out_opc_bne(s, TCG_REG_TMP0, TCG_REG_TMP1, 0); 723 724 /* TLB Hit - addend in TCG_REG_TMP2, ready for use. */ 725} 726 727static void add_qemu_ldst_label(TCGContext *s, int is_ld, MemOpIdx oi, 728 TCGType type, 729 TCGReg datalo, TCGReg addrlo, 730 void *raddr, tcg_insn_unit **label_ptr) 731{ 732 TCGLabelQemuLdst *label = new_ldst_label(s); 733 734 label->is_ld = is_ld; 735 label->oi = oi; 736 label->type = type; 737 label->datalo_reg = datalo; 738 label->datahi_reg = 0; /* unused */ 739 label->addrlo_reg = addrlo; 740 label->addrhi_reg = 0; /* unused */ 741 label->raddr = tcg_splitwx_to_rx(raddr); 742 label->label_ptr[0] = label_ptr[0]; 743} 744 745static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l) 746{ 747 MemOpIdx oi = l->oi; 748 MemOp opc = get_memop(oi); 749 MemOp size = opc & MO_SIZE; 750 TCGType type = l->type; 751 752 /* resolve label address */ 753 if (!reloc_br_sk16(l->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) { 754 return false; 755 } 756 757 /* call load helper */ 758 tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_A0, TCG_AREG0); 759 tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_A1, l->addrlo_reg); 760 tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_A2, oi); 761 tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_A3, (tcg_target_long)l->raddr); 762 763 tcg_out_call(s, qemu_ld_helpers[size]); 764 765 switch (opc & MO_SSIZE) { 766 case MO_SB: 767 tcg_out_ext8s(s, l->datalo_reg, TCG_REG_A0); 768 break; 769 case MO_SW: 770 tcg_out_ext16s(s, l->datalo_reg, TCG_REG_A0); 771 break; 772 case MO_SL: 773 tcg_out_ext32s(s, l->datalo_reg, TCG_REG_A0); 774 break; 775 case MO_UL: 776 if (type == TCG_TYPE_I32) { 777 /* MO_UL loads of i32 should be sign-extended too */ 778 tcg_out_ext32s(s, l->datalo_reg, TCG_REG_A0); 779 break; 780 } 781 /* fallthrough */ 782 default: 783 tcg_out_mov(s, type, l->datalo_reg, TCG_REG_A0); 784 break; 785 } 786 787 return tcg_out_goto(s, l->raddr); 788} 789 790static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l) 791{ 792 MemOpIdx oi = l->oi; 793 MemOp opc = get_memop(oi); 794 MemOp size = opc & MO_SIZE; 795 796 /* resolve label address */ 797 if (!reloc_br_sk16(l->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) { 798 return false; 799 } 800 801 /* call store helper */ 802 tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_A0, TCG_AREG0); 803 tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_A1, l->addrlo_reg); 804 switch (size) { 805 case MO_8: 806 tcg_out_ext8u(s, TCG_REG_A2, l->datalo_reg); 807 break; 808 case MO_16: 809 tcg_out_ext16u(s, TCG_REG_A2, l->datalo_reg); 810 break; 811 case MO_32: 812 tcg_out_ext32u(s, TCG_REG_A2, l->datalo_reg); 813 break; 814 case MO_64: 815 tcg_out_mov(s, TCG_TYPE_I64, TCG_REG_A2, l->datalo_reg); 816 break; 817 default: 818 g_assert_not_reached(); 819 break; 820 } 821 tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_A3, oi); 822 tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_A4, (tcg_target_long)l->raddr); 823 824 tcg_out_call(s, qemu_st_helpers[size]); 825 826 return tcg_out_goto(s, l->raddr); 827} 828#else 829 830/* 831 * Alignment helpers for user-mode emulation 832 */ 833 834static void tcg_out_test_alignment(TCGContext *s, bool is_ld, TCGReg addr_reg, 835 unsigned a_bits) 836{ 837 TCGLabelQemuLdst *l = new_ldst_label(s); 838 839 l->is_ld = is_ld; 840 l->addrlo_reg = addr_reg; 841 842 /* 843 * Without micro-architecture details, we don't know which of bstrpick or 844 * andi is faster, so use bstrpick as it's not constrained by imm field 845 * width. (Not to say alignments >= 2^12 are going to happen any time 846 * soon, though) 847 */ 848 tcg_out_opc_bstrpick_d(s, TCG_REG_TMP1, addr_reg, 0, a_bits - 1); 849 850 l->label_ptr[0] = s->code_ptr; 851 tcg_out_opc_bne(s, TCG_REG_TMP1, TCG_REG_ZERO, 0); 852 853 l->raddr = tcg_splitwx_to_rx(s->code_ptr); 854} 855 856static bool tcg_out_fail_alignment(TCGContext *s, TCGLabelQemuLdst *l) 857{ 858 /* resolve label address */ 859 if (!reloc_br_sk16(l->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) { 860 return false; 861 } 862 863 tcg_out_mov(s, TCG_TYPE_TL, TCG_REG_A1, l->addrlo_reg); 864 tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_A0, TCG_AREG0); 865 866 /* tail call, with the return address back inline. */ 867 tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_RA, (uintptr_t)l->raddr); 868 tcg_out_call_int(s, (const void *)(l->is_ld ? helper_unaligned_ld 869 : helper_unaligned_st), true); 870 return true; 871} 872 873static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l) 874{ 875 return tcg_out_fail_alignment(s, l); 876} 877 878static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l) 879{ 880 return tcg_out_fail_alignment(s, l); 881} 882 883#endif /* CONFIG_SOFTMMU */ 884 885/* 886 * `ext32u` the address register into the temp register given, 887 * if target is 32-bit, no-op otherwise. 888 * 889 * Returns the address register ready for use with TLB addend. 890 */ 891static TCGReg tcg_out_zext_addr_if_32_bit(TCGContext *s, 892 TCGReg addr, TCGReg tmp) 893{ 894 if (TARGET_LONG_BITS == 32) { 895 tcg_out_ext32u(s, tmp, addr); 896 return tmp; 897 } 898 return addr; 899} 900 901static void tcg_out_qemu_ld_indexed(TCGContext *s, TCGReg rd, TCGReg rj, 902 TCGReg rk, MemOp opc, TCGType type) 903{ 904 /* Byte swapping is left to middle-end expansion. */ 905 tcg_debug_assert((opc & MO_BSWAP) == 0); 906 907 switch (opc & MO_SSIZE) { 908 case MO_UB: 909 tcg_out_opc_ldx_bu(s, rd, rj, rk); 910 break; 911 case MO_SB: 912 tcg_out_opc_ldx_b(s, rd, rj, rk); 913 break; 914 case MO_UW: 915 tcg_out_opc_ldx_hu(s, rd, rj, rk); 916 break; 917 case MO_SW: 918 tcg_out_opc_ldx_h(s, rd, rj, rk); 919 break; 920 case MO_UL: 921 if (type == TCG_TYPE_I64) { 922 tcg_out_opc_ldx_wu(s, rd, rj, rk); 923 break; 924 } 925 /* fallthrough */ 926 case MO_SL: 927 tcg_out_opc_ldx_w(s, rd, rj, rk); 928 break; 929 case MO_UQ: 930 tcg_out_opc_ldx_d(s, rd, rj, rk); 931 break; 932 default: 933 g_assert_not_reached(); 934 } 935} 936 937static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, TCGType type) 938{ 939 TCGReg addr_regl; 940 TCGReg data_regl; 941 MemOpIdx oi; 942 MemOp opc; 943#if defined(CONFIG_SOFTMMU) 944 tcg_insn_unit *label_ptr[1]; 945#else 946 unsigned a_bits; 947#endif 948 TCGReg base; 949 950 data_regl = *args++; 951 addr_regl = *args++; 952 oi = *args++; 953 opc = get_memop(oi); 954 955#if defined(CONFIG_SOFTMMU) 956 tcg_out_tlb_load(s, addr_regl, oi, label_ptr, 1); 957 base = tcg_out_zext_addr_if_32_bit(s, addr_regl, TCG_REG_TMP0); 958 tcg_out_qemu_ld_indexed(s, data_regl, base, TCG_REG_TMP2, opc, type); 959 add_qemu_ldst_label(s, 1, oi, type, 960 data_regl, addr_regl, 961 s->code_ptr, label_ptr); 962#else 963 a_bits = get_alignment_bits(opc); 964 if (a_bits) { 965 tcg_out_test_alignment(s, true, addr_regl, a_bits); 966 } 967 base = tcg_out_zext_addr_if_32_bit(s, addr_regl, TCG_REG_TMP0); 968 TCGReg guest_base_reg = USE_GUEST_BASE ? TCG_GUEST_BASE_REG : TCG_REG_ZERO; 969 tcg_out_qemu_ld_indexed(s, data_regl, base, guest_base_reg, opc, type); 970#endif 971} 972 973static void tcg_out_qemu_st_indexed(TCGContext *s, TCGReg data, 974 TCGReg rj, TCGReg rk, MemOp opc) 975{ 976 /* Byte swapping is left to middle-end expansion. */ 977 tcg_debug_assert((opc & MO_BSWAP) == 0); 978 979 switch (opc & MO_SIZE) { 980 case MO_8: 981 tcg_out_opc_stx_b(s, data, rj, rk); 982 break; 983 case MO_16: 984 tcg_out_opc_stx_h(s, data, rj, rk); 985 break; 986 case MO_32: 987 tcg_out_opc_stx_w(s, data, rj, rk); 988 break; 989 case MO_64: 990 tcg_out_opc_stx_d(s, data, rj, rk); 991 break; 992 default: 993 g_assert_not_reached(); 994 } 995} 996 997static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args) 998{ 999 TCGReg addr_regl; 1000 TCGReg data_regl; 1001 MemOpIdx oi; 1002 MemOp opc; 1003#if defined(CONFIG_SOFTMMU) 1004 tcg_insn_unit *label_ptr[1]; 1005#else 1006 unsigned a_bits; 1007#endif 1008 TCGReg base; 1009 1010 data_regl = *args++; 1011 addr_regl = *args++; 1012 oi = *args++; 1013 opc = get_memop(oi); 1014 1015#if defined(CONFIG_SOFTMMU) 1016 tcg_out_tlb_load(s, addr_regl, oi, label_ptr, 0); 1017 base = tcg_out_zext_addr_if_32_bit(s, addr_regl, TCG_REG_TMP0); 1018 tcg_out_qemu_st_indexed(s, data_regl, base, TCG_REG_TMP2, opc); 1019 add_qemu_ldst_label(s, 0, oi, 1020 0, /* type param is unused for stores */ 1021 data_regl, addr_regl, 1022 s->code_ptr, label_ptr); 1023#else 1024 a_bits = get_alignment_bits(opc); 1025 if (a_bits) { 1026 tcg_out_test_alignment(s, false, addr_regl, a_bits); 1027 } 1028 base = tcg_out_zext_addr_if_32_bit(s, addr_regl, TCG_REG_TMP0); 1029 TCGReg guest_base_reg = USE_GUEST_BASE ? TCG_GUEST_BASE_REG : TCG_REG_ZERO; 1030 tcg_out_qemu_st_indexed(s, data_regl, base, guest_base_reg, opc); 1031#endif 1032} 1033 1034/* 1035 * Entry-points 1036 */ 1037 1038static const tcg_insn_unit *tb_ret_addr; 1039 1040static void tcg_out_op(TCGContext *s, TCGOpcode opc, 1041 const TCGArg args[TCG_MAX_OP_ARGS], 1042 const int const_args[TCG_MAX_OP_ARGS]) 1043{ 1044 TCGArg a0 = args[0]; 1045 TCGArg a1 = args[1]; 1046 TCGArg a2 = args[2]; 1047 int c2 = const_args[2]; 1048 1049 switch (opc) { 1050 case INDEX_op_exit_tb: 1051 /* Reuse the zeroing that exists for goto_ptr. */ 1052 if (a0 == 0) { 1053 tcg_out_call_int(s, tcg_code_gen_epilogue, true); 1054 } else { 1055 tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_A0, a0); 1056 tcg_out_call_int(s, tb_ret_addr, true); 1057 } 1058 break; 1059 1060 case INDEX_op_goto_tb: 1061 assert(s->tb_jmp_insn_offset == 0); 1062 /* indirect jump method */ 1063 tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP0, TCG_REG_ZERO, 1064 (uintptr_t)(s->tb_jmp_target_addr + a0)); 1065 tcg_out_opc_jirl(s, TCG_REG_ZERO, TCG_REG_TMP0, 0); 1066 set_jmp_reset_offset(s, a0); 1067 break; 1068 1069 case INDEX_op_mb: 1070 tcg_out_mb(s, a0); 1071 break; 1072 1073 case INDEX_op_goto_ptr: 1074 tcg_out_opc_jirl(s, TCG_REG_ZERO, a0, 0); 1075 break; 1076 1077 case INDEX_op_br: 1078 tcg_out_reloc(s, s->code_ptr, R_LOONGARCH_BR_SD10K16, arg_label(a0), 1079 0); 1080 tcg_out_opc_b(s, 0); 1081 break; 1082 1083 case INDEX_op_brcond_i32: 1084 case INDEX_op_brcond_i64: 1085 tcg_out_brcond(s, a2, a0, a1, arg_label(args[3])); 1086 break; 1087 1088 case INDEX_op_ext8s_i32: 1089 case INDEX_op_ext8s_i64: 1090 tcg_out_ext8s(s, a0, a1); 1091 break; 1092 1093 case INDEX_op_ext8u_i32: 1094 case INDEX_op_ext8u_i64: 1095 tcg_out_ext8u(s, a0, a1); 1096 break; 1097 1098 case INDEX_op_ext16s_i32: 1099 case INDEX_op_ext16s_i64: 1100 tcg_out_ext16s(s, a0, a1); 1101 break; 1102 1103 case INDEX_op_ext16u_i32: 1104 case INDEX_op_ext16u_i64: 1105 tcg_out_ext16u(s, a0, a1); 1106 break; 1107 1108 case INDEX_op_ext32u_i64: 1109 case INDEX_op_extu_i32_i64: 1110 tcg_out_ext32u(s, a0, a1); 1111 break; 1112 1113 case INDEX_op_ext32s_i64: 1114 case INDEX_op_extrl_i64_i32: 1115 case INDEX_op_ext_i32_i64: 1116 tcg_out_ext32s(s, a0, a1); 1117 break; 1118 1119 case INDEX_op_extrh_i64_i32: 1120 tcg_out_opc_srai_d(s, a0, a1, 32); 1121 break; 1122 1123 case INDEX_op_not_i32: 1124 case INDEX_op_not_i64: 1125 tcg_out_opc_nor(s, a0, a1, TCG_REG_ZERO); 1126 break; 1127 1128 case INDEX_op_nor_i32: 1129 case INDEX_op_nor_i64: 1130 if (c2) { 1131 tcg_out_opc_ori(s, a0, a1, a2); 1132 tcg_out_opc_nor(s, a0, a0, TCG_REG_ZERO); 1133 } else { 1134 tcg_out_opc_nor(s, a0, a1, a2); 1135 } 1136 break; 1137 1138 case INDEX_op_andc_i32: 1139 case INDEX_op_andc_i64: 1140 if (c2) { 1141 /* guaranteed to fit due to constraint */ 1142 tcg_out_opc_andi(s, a0, a1, ~a2); 1143 } else { 1144 tcg_out_opc_andn(s, a0, a1, a2); 1145 } 1146 break; 1147 1148 case INDEX_op_orc_i32: 1149 case INDEX_op_orc_i64: 1150 if (c2) { 1151 /* guaranteed to fit due to constraint */ 1152 tcg_out_opc_ori(s, a0, a1, ~a2); 1153 } else { 1154 tcg_out_opc_orn(s, a0, a1, a2); 1155 } 1156 break; 1157 1158 case INDEX_op_and_i32: 1159 case INDEX_op_and_i64: 1160 if (c2) { 1161 tcg_out_opc_andi(s, a0, a1, a2); 1162 } else { 1163 tcg_out_opc_and(s, a0, a1, a2); 1164 } 1165 break; 1166 1167 case INDEX_op_or_i32: 1168 case INDEX_op_or_i64: 1169 if (c2) { 1170 tcg_out_opc_ori(s, a0, a1, a2); 1171 } else { 1172 tcg_out_opc_or(s, a0, a1, a2); 1173 } 1174 break; 1175 1176 case INDEX_op_xor_i32: 1177 case INDEX_op_xor_i64: 1178 if (c2) { 1179 tcg_out_opc_xori(s, a0, a1, a2); 1180 } else { 1181 tcg_out_opc_xor(s, a0, a1, a2); 1182 } 1183 break; 1184 1185 case INDEX_op_extract_i32: 1186 tcg_out_opc_bstrpick_w(s, a0, a1, a2, a2 + args[3] - 1); 1187 break; 1188 case INDEX_op_extract_i64: 1189 tcg_out_opc_bstrpick_d(s, a0, a1, a2, a2 + args[3] - 1); 1190 break; 1191 1192 case INDEX_op_deposit_i32: 1193 tcg_out_opc_bstrins_w(s, a0, a2, args[3], args[3] + args[4] - 1); 1194 break; 1195 case INDEX_op_deposit_i64: 1196 tcg_out_opc_bstrins_d(s, a0, a2, args[3], args[3] + args[4] - 1); 1197 break; 1198 1199 case INDEX_op_bswap16_i32: 1200 case INDEX_op_bswap16_i64: 1201 tcg_out_opc_revb_2h(s, a0, a1); 1202 if (a2 & TCG_BSWAP_OS) { 1203 tcg_out_ext16s(s, a0, a0); 1204 } else if ((a2 & (TCG_BSWAP_IZ | TCG_BSWAP_OZ)) == TCG_BSWAP_OZ) { 1205 tcg_out_ext16u(s, a0, a0); 1206 } 1207 break; 1208 1209 case INDEX_op_bswap32_i32: 1210 /* All 32-bit values are computed sign-extended in the register. */ 1211 a2 = TCG_BSWAP_OS; 1212 /* fallthrough */ 1213 case INDEX_op_bswap32_i64: 1214 tcg_out_opc_revb_2w(s, a0, a1); 1215 if (a2 & TCG_BSWAP_OS) { 1216 tcg_out_ext32s(s, a0, a0); 1217 } else if ((a2 & (TCG_BSWAP_IZ | TCG_BSWAP_OZ)) == TCG_BSWAP_OZ) { 1218 tcg_out_ext32u(s, a0, a0); 1219 } 1220 break; 1221 1222 case INDEX_op_bswap64_i64: 1223 tcg_out_opc_revb_d(s, a0, a1); 1224 break; 1225 1226 case INDEX_op_clz_i32: 1227 tcg_out_clzctz(s, OPC_CLZ_W, a0, a1, a2, c2, true); 1228 break; 1229 case INDEX_op_clz_i64: 1230 tcg_out_clzctz(s, OPC_CLZ_D, a0, a1, a2, c2, false); 1231 break; 1232 1233 case INDEX_op_ctz_i32: 1234 tcg_out_clzctz(s, OPC_CTZ_W, a0, a1, a2, c2, true); 1235 break; 1236 case INDEX_op_ctz_i64: 1237 tcg_out_clzctz(s, OPC_CTZ_D, a0, a1, a2, c2, false); 1238 break; 1239 1240 case INDEX_op_shl_i32: 1241 if (c2) { 1242 tcg_out_opc_slli_w(s, a0, a1, a2 & 0x1f); 1243 } else { 1244 tcg_out_opc_sll_w(s, a0, a1, a2); 1245 } 1246 break; 1247 case INDEX_op_shl_i64: 1248 if (c2) { 1249 tcg_out_opc_slli_d(s, a0, a1, a2 & 0x3f); 1250 } else { 1251 tcg_out_opc_sll_d(s, a0, a1, a2); 1252 } 1253 break; 1254 1255 case INDEX_op_shr_i32: 1256 if (c2) { 1257 tcg_out_opc_srli_w(s, a0, a1, a2 & 0x1f); 1258 } else { 1259 tcg_out_opc_srl_w(s, a0, a1, a2); 1260 } 1261 break; 1262 case INDEX_op_shr_i64: 1263 if (c2) { 1264 tcg_out_opc_srli_d(s, a0, a1, a2 & 0x3f); 1265 } else { 1266 tcg_out_opc_srl_d(s, a0, a1, a2); 1267 } 1268 break; 1269 1270 case INDEX_op_sar_i32: 1271 if (c2) { 1272 tcg_out_opc_srai_w(s, a0, a1, a2 & 0x1f); 1273 } else { 1274 tcg_out_opc_sra_w(s, a0, a1, a2); 1275 } 1276 break; 1277 case INDEX_op_sar_i64: 1278 if (c2) { 1279 tcg_out_opc_srai_d(s, a0, a1, a2 & 0x3f); 1280 } else { 1281 tcg_out_opc_sra_d(s, a0, a1, a2); 1282 } 1283 break; 1284 1285 case INDEX_op_rotl_i32: 1286 /* transform into equivalent rotr/rotri */ 1287 if (c2) { 1288 tcg_out_opc_rotri_w(s, a0, a1, (32 - a2) & 0x1f); 1289 } else { 1290 tcg_out_opc_sub_w(s, TCG_REG_TMP0, TCG_REG_ZERO, a2); 1291 tcg_out_opc_rotr_w(s, a0, a1, TCG_REG_TMP0); 1292 } 1293 break; 1294 case INDEX_op_rotl_i64: 1295 /* transform into equivalent rotr/rotri */ 1296 if (c2) { 1297 tcg_out_opc_rotri_d(s, a0, a1, (64 - a2) & 0x3f); 1298 } else { 1299 tcg_out_opc_sub_w(s, TCG_REG_TMP0, TCG_REG_ZERO, a2); 1300 tcg_out_opc_rotr_d(s, a0, a1, TCG_REG_TMP0); 1301 } 1302 break; 1303 1304 case INDEX_op_rotr_i32: 1305 if (c2) { 1306 tcg_out_opc_rotri_w(s, a0, a1, a2 & 0x1f); 1307 } else { 1308 tcg_out_opc_rotr_w(s, a0, a1, a2); 1309 } 1310 break; 1311 case INDEX_op_rotr_i64: 1312 if (c2) { 1313 tcg_out_opc_rotri_d(s, a0, a1, a2 & 0x3f); 1314 } else { 1315 tcg_out_opc_rotr_d(s, a0, a1, a2); 1316 } 1317 break; 1318 1319 case INDEX_op_add_i32: 1320 if (c2) { 1321 tcg_out_opc_addi_w(s, a0, a1, a2); 1322 } else { 1323 tcg_out_opc_add_w(s, a0, a1, a2); 1324 } 1325 break; 1326 case INDEX_op_add_i64: 1327 if (c2) { 1328 tcg_out_opc_addi_d(s, a0, a1, a2); 1329 } else { 1330 tcg_out_opc_add_d(s, a0, a1, a2); 1331 } 1332 break; 1333 1334 case INDEX_op_sub_i32: 1335 if (c2) { 1336 tcg_out_opc_addi_w(s, a0, a1, -a2); 1337 } else { 1338 tcg_out_opc_sub_w(s, a0, a1, a2); 1339 } 1340 break; 1341 case INDEX_op_sub_i64: 1342 if (c2) { 1343 tcg_out_opc_addi_d(s, a0, a1, -a2); 1344 } else { 1345 tcg_out_opc_sub_d(s, a0, a1, a2); 1346 } 1347 break; 1348 1349 case INDEX_op_mul_i32: 1350 tcg_out_opc_mul_w(s, a0, a1, a2); 1351 break; 1352 case INDEX_op_mul_i64: 1353 tcg_out_opc_mul_d(s, a0, a1, a2); 1354 break; 1355 1356 case INDEX_op_mulsh_i32: 1357 tcg_out_opc_mulh_w(s, a0, a1, a2); 1358 break; 1359 case INDEX_op_mulsh_i64: 1360 tcg_out_opc_mulh_d(s, a0, a1, a2); 1361 break; 1362 1363 case INDEX_op_muluh_i32: 1364 tcg_out_opc_mulh_wu(s, a0, a1, a2); 1365 break; 1366 case INDEX_op_muluh_i64: 1367 tcg_out_opc_mulh_du(s, a0, a1, a2); 1368 break; 1369 1370 case INDEX_op_div_i32: 1371 tcg_out_opc_div_w(s, a0, a1, a2); 1372 break; 1373 case INDEX_op_div_i64: 1374 tcg_out_opc_div_d(s, a0, a1, a2); 1375 break; 1376 1377 case INDEX_op_divu_i32: 1378 tcg_out_opc_div_wu(s, a0, a1, a2); 1379 break; 1380 case INDEX_op_divu_i64: 1381 tcg_out_opc_div_du(s, a0, a1, a2); 1382 break; 1383 1384 case INDEX_op_rem_i32: 1385 tcg_out_opc_mod_w(s, a0, a1, a2); 1386 break; 1387 case INDEX_op_rem_i64: 1388 tcg_out_opc_mod_d(s, a0, a1, a2); 1389 break; 1390 1391 case INDEX_op_remu_i32: 1392 tcg_out_opc_mod_wu(s, a0, a1, a2); 1393 break; 1394 case INDEX_op_remu_i64: 1395 tcg_out_opc_mod_du(s, a0, a1, a2); 1396 break; 1397 1398 case INDEX_op_setcond_i32: 1399 case INDEX_op_setcond_i64: 1400 tcg_out_setcond(s, args[3], a0, a1, a2, c2); 1401 break; 1402 1403 case INDEX_op_ld8s_i32: 1404 case INDEX_op_ld8s_i64: 1405 tcg_out_ldst(s, OPC_LD_B, a0, a1, a2); 1406 break; 1407 case INDEX_op_ld8u_i32: 1408 case INDEX_op_ld8u_i64: 1409 tcg_out_ldst(s, OPC_LD_BU, a0, a1, a2); 1410 break; 1411 case INDEX_op_ld16s_i32: 1412 case INDEX_op_ld16s_i64: 1413 tcg_out_ldst(s, OPC_LD_H, a0, a1, a2); 1414 break; 1415 case INDEX_op_ld16u_i32: 1416 case INDEX_op_ld16u_i64: 1417 tcg_out_ldst(s, OPC_LD_HU, a0, a1, a2); 1418 break; 1419 case INDEX_op_ld_i32: 1420 case INDEX_op_ld32s_i64: 1421 tcg_out_ldst(s, OPC_LD_W, a0, a1, a2); 1422 break; 1423 case INDEX_op_ld32u_i64: 1424 tcg_out_ldst(s, OPC_LD_WU, a0, a1, a2); 1425 break; 1426 case INDEX_op_ld_i64: 1427 tcg_out_ldst(s, OPC_LD_D, a0, a1, a2); 1428 break; 1429 1430 case INDEX_op_st8_i32: 1431 case INDEX_op_st8_i64: 1432 tcg_out_ldst(s, OPC_ST_B, a0, a1, a2); 1433 break; 1434 case INDEX_op_st16_i32: 1435 case INDEX_op_st16_i64: 1436 tcg_out_ldst(s, OPC_ST_H, a0, a1, a2); 1437 break; 1438 case INDEX_op_st_i32: 1439 case INDEX_op_st32_i64: 1440 tcg_out_ldst(s, OPC_ST_W, a0, a1, a2); 1441 break; 1442 case INDEX_op_st_i64: 1443 tcg_out_ldst(s, OPC_ST_D, a0, a1, a2); 1444 break; 1445 1446 case INDEX_op_qemu_ld_i32: 1447 tcg_out_qemu_ld(s, args, TCG_TYPE_I32); 1448 break; 1449 case INDEX_op_qemu_ld_i64: 1450 tcg_out_qemu_ld(s, args, TCG_TYPE_I64); 1451 break; 1452 case INDEX_op_qemu_st_i32: 1453 tcg_out_qemu_st(s, args); 1454 break; 1455 case INDEX_op_qemu_st_i64: 1456 tcg_out_qemu_st(s, args); 1457 break; 1458 1459 case INDEX_op_mov_i32: /* Always emitted via tcg_out_mov. */ 1460 case INDEX_op_mov_i64: 1461 case INDEX_op_call: /* Always emitted via tcg_out_call. */ 1462 default: 1463 g_assert_not_reached(); 1464 } 1465} 1466 1467static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op) 1468{ 1469 switch (op) { 1470 case INDEX_op_goto_ptr: 1471 return C_O0_I1(r); 1472 1473 case INDEX_op_st8_i32: 1474 case INDEX_op_st8_i64: 1475 case INDEX_op_st16_i32: 1476 case INDEX_op_st16_i64: 1477 case INDEX_op_st32_i64: 1478 case INDEX_op_st_i32: 1479 case INDEX_op_st_i64: 1480 return C_O0_I2(rZ, r); 1481 1482 case INDEX_op_brcond_i32: 1483 case INDEX_op_brcond_i64: 1484 return C_O0_I2(rZ, rZ); 1485 1486 case INDEX_op_qemu_st_i32: 1487 case INDEX_op_qemu_st_i64: 1488 return C_O0_I2(LZ, L); 1489 1490 case INDEX_op_ext8s_i32: 1491 case INDEX_op_ext8s_i64: 1492 case INDEX_op_ext8u_i32: 1493 case INDEX_op_ext8u_i64: 1494 case INDEX_op_ext16s_i32: 1495 case INDEX_op_ext16s_i64: 1496 case INDEX_op_ext16u_i32: 1497 case INDEX_op_ext16u_i64: 1498 case INDEX_op_ext32s_i64: 1499 case INDEX_op_ext32u_i64: 1500 case INDEX_op_extu_i32_i64: 1501 case INDEX_op_extrl_i64_i32: 1502 case INDEX_op_extrh_i64_i32: 1503 case INDEX_op_ext_i32_i64: 1504 case INDEX_op_not_i32: 1505 case INDEX_op_not_i64: 1506 case INDEX_op_extract_i32: 1507 case INDEX_op_extract_i64: 1508 case INDEX_op_bswap16_i32: 1509 case INDEX_op_bswap16_i64: 1510 case INDEX_op_bswap32_i32: 1511 case INDEX_op_bswap32_i64: 1512 case INDEX_op_bswap64_i64: 1513 case INDEX_op_ld8s_i32: 1514 case INDEX_op_ld8s_i64: 1515 case INDEX_op_ld8u_i32: 1516 case INDEX_op_ld8u_i64: 1517 case INDEX_op_ld16s_i32: 1518 case INDEX_op_ld16s_i64: 1519 case INDEX_op_ld16u_i32: 1520 case INDEX_op_ld16u_i64: 1521 case INDEX_op_ld32s_i64: 1522 case INDEX_op_ld32u_i64: 1523 case INDEX_op_ld_i32: 1524 case INDEX_op_ld_i64: 1525 return C_O1_I1(r, r); 1526 1527 case INDEX_op_qemu_ld_i32: 1528 case INDEX_op_qemu_ld_i64: 1529 return C_O1_I1(r, L); 1530 1531 case INDEX_op_andc_i32: 1532 case INDEX_op_andc_i64: 1533 case INDEX_op_orc_i32: 1534 case INDEX_op_orc_i64: 1535 /* 1536 * LoongArch insns for these ops don't have reg-imm forms, but we 1537 * can express using andi/ori if ~constant satisfies 1538 * TCG_CT_CONST_U12. 1539 */ 1540 return C_O1_I2(r, r, rC); 1541 1542 case INDEX_op_shl_i32: 1543 case INDEX_op_shl_i64: 1544 case INDEX_op_shr_i32: 1545 case INDEX_op_shr_i64: 1546 case INDEX_op_sar_i32: 1547 case INDEX_op_sar_i64: 1548 case INDEX_op_rotl_i32: 1549 case INDEX_op_rotl_i64: 1550 case INDEX_op_rotr_i32: 1551 case INDEX_op_rotr_i64: 1552 return C_O1_I2(r, r, ri); 1553 1554 case INDEX_op_add_i32: 1555 case INDEX_op_add_i64: 1556 return C_O1_I2(r, r, rI); 1557 1558 case INDEX_op_and_i32: 1559 case INDEX_op_and_i64: 1560 case INDEX_op_nor_i32: 1561 case INDEX_op_nor_i64: 1562 case INDEX_op_or_i32: 1563 case INDEX_op_or_i64: 1564 case INDEX_op_xor_i32: 1565 case INDEX_op_xor_i64: 1566 /* LoongArch reg-imm bitops have their imms ZERO-extended */ 1567 return C_O1_I2(r, r, rU); 1568 1569 case INDEX_op_clz_i32: 1570 case INDEX_op_clz_i64: 1571 case INDEX_op_ctz_i32: 1572 case INDEX_op_ctz_i64: 1573 return C_O1_I2(r, r, rW); 1574 1575 case INDEX_op_setcond_i32: 1576 case INDEX_op_setcond_i64: 1577 return C_O1_I2(r, r, rZ); 1578 1579 case INDEX_op_deposit_i32: 1580 case INDEX_op_deposit_i64: 1581 /* Must deposit into the same register as input */ 1582 return C_O1_I2(r, 0, rZ); 1583 1584 case INDEX_op_sub_i32: 1585 case INDEX_op_sub_i64: 1586 return C_O1_I2(r, rZ, rN); 1587 1588 case INDEX_op_mul_i32: 1589 case INDEX_op_mul_i64: 1590 case INDEX_op_mulsh_i32: 1591 case INDEX_op_mulsh_i64: 1592 case INDEX_op_muluh_i32: 1593 case INDEX_op_muluh_i64: 1594 case INDEX_op_div_i32: 1595 case INDEX_op_div_i64: 1596 case INDEX_op_divu_i32: 1597 case INDEX_op_divu_i64: 1598 case INDEX_op_rem_i32: 1599 case INDEX_op_rem_i64: 1600 case INDEX_op_remu_i32: 1601 case INDEX_op_remu_i64: 1602 return C_O1_I2(r, rZ, rZ); 1603 1604 default: 1605 g_assert_not_reached(); 1606 } 1607} 1608 1609static const int tcg_target_callee_save_regs[] = { 1610 TCG_REG_S0, /* used for the global env (TCG_AREG0) */ 1611 TCG_REG_S1, 1612 TCG_REG_S2, 1613 TCG_REG_S3, 1614 TCG_REG_S4, 1615 TCG_REG_S5, 1616 TCG_REG_S6, 1617 TCG_REG_S7, 1618 TCG_REG_S8, 1619 TCG_REG_S9, 1620 TCG_REG_RA, /* should be last for ABI compliance */ 1621}; 1622 1623/* Stack frame parameters. */ 1624#define REG_SIZE (TCG_TARGET_REG_BITS / 8) 1625#define SAVE_SIZE ((int)ARRAY_SIZE(tcg_target_callee_save_regs) * REG_SIZE) 1626#define TEMP_SIZE (CPU_TEMP_BUF_NLONGS * (int)sizeof(long)) 1627#define FRAME_SIZE ((TCG_STATIC_CALL_ARGS_SIZE + TEMP_SIZE + SAVE_SIZE \ 1628 + TCG_TARGET_STACK_ALIGN - 1) \ 1629 & -TCG_TARGET_STACK_ALIGN) 1630#define SAVE_OFS (TCG_STATIC_CALL_ARGS_SIZE + TEMP_SIZE) 1631 1632/* We're expecting to be able to use an immediate for frame allocation. */ 1633QEMU_BUILD_BUG_ON(FRAME_SIZE > 0x7ff); 1634 1635/* Generate global QEMU prologue and epilogue code */ 1636static void tcg_target_qemu_prologue(TCGContext *s) 1637{ 1638 int i; 1639 1640 tcg_set_frame(s, TCG_REG_SP, TCG_STATIC_CALL_ARGS_SIZE, TEMP_SIZE); 1641 1642 /* TB prologue */ 1643 tcg_out_opc_addi_d(s, TCG_REG_SP, TCG_REG_SP, -FRAME_SIZE); 1644 for (i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); i++) { 1645 tcg_out_st(s, TCG_TYPE_REG, tcg_target_callee_save_regs[i], 1646 TCG_REG_SP, SAVE_OFS + i * REG_SIZE); 1647 } 1648 1649#if !defined(CONFIG_SOFTMMU) 1650 if (USE_GUEST_BASE) { 1651 tcg_out_movi(s, TCG_TYPE_PTR, TCG_GUEST_BASE_REG, guest_base); 1652 tcg_regset_set_reg(s->reserved_regs, TCG_GUEST_BASE_REG); 1653 } 1654#endif 1655 1656 /* Call generated code */ 1657 tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]); 1658 tcg_out_opc_jirl(s, TCG_REG_ZERO, tcg_target_call_iarg_regs[1], 0); 1659 1660 /* Return path for goto_ptr. Set return value to 0 */ 1661 tcg_code_gen_epilogue = tcg_splitwx_to_rx(s->code_ptr); 1662 tcg_out_mov(s, TCG_TYPE_REG, TCG_REG_A0, TCG_REG_ZERO); 1663 1664 /* TB epilogue */ 1665 tb_ret_addr = tcg_splitwx_to_rx(s->code_ptr); 1666 for (i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); i++) { 1667 tcg_out_ld(s, TCG_TYPE_REG, tcg_target_callee_save_regs[i], 1668 TCG_REG_SP, SAVE_OFS + i * REG_SIZE); 1669 } 1670 1671 tcg_out_opc_addi_d(s, TCG_REG_SP, TCG_REG_SP, FRAME_SIZE); 1672 tcg_out_opc_jirl(s, TCG_REG_ZERO, TCG_REG_RA, 0); 1673} 1674 1675static void tcg_target_init(TCGContext *s) 1676{ 1677 tcg_target_available_regs[TCG_TYPE_I32] = ALL_GENERAL_REGS; 1678 tcg_target_available_regs[TCG_TYPE_I64] = ALL_GENERAL_REGS; 1679 1680 tcg_target_call_clobber_regs = ALL_GENERAL_REGS; 1681 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_S0); 1682 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_S1); 1683 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_S2); 1684 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_S3); 1685 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_S4); 1686 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_S5); 1687 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_S6); 1688 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_S7); 1689 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_S8); 1690 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_S9); 1691 1692 s->reserved_regs = 0; 1693 tcg_regset_set_reg(s->reserved_regs, TCG_REG_ZERO); 1694 tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP0); 1695 tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP1); 1696 tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP2); 1697 tcg_regset_set_reg(s->reserved_regs, TCG_REG_SP); 1698 tcg_regset_set_reg(s->reserved_regs, TCG_REG_TP); 1699 tcg_regset_set_reg(s->reserved_regs, TCG_REG_RESERVED); 1700} 1701 1702typedef struct { 1703 DebugFrameHeader h; 1704 uint8_t fde_def_cfa[4]; 1705 uint8_t fde_reg_ofs[ARRAY_SIZE(tcg_target_callee_save_regs) * 2]; 1706} DebugFrame; 1707 1708#define ELF_HOST_MACHINE EM_LOONGARCH 1709 1710static const DebugFrame debug_frame = { 1711 .h.cie.len = sizeof(DebugFrameCIE) - 4, /* length after .len member */ 1712 .h.cie.id = -1, 1713 .h.cie.version = 1, 1714 .h.cie.code_align = 1, 1715 .h.cie.data_align = -(TCG_TARGET_REG_BITS / 8) & 0x7f, /* sleb128 */ 1716 .h.cie.return_column = TCG_REG_RA, 1717 1718 /* Total FDE size does not include the "len" member. */ 1719 .h.fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, h.fde.cie_offset), 1720 1721 .fde_def_cfa = { 1722 12, TCG_REG_SP, /* DW_CFA_def_cfa sp, ... */ 1723 (FRAME_SIZE & 0x7f) | 0x80, /* ... uleb128 FRAME_SIZE */ 1724 (FRAME_SIZE >> 7) 1725 }, 1726 .fde_reg_ofs = { 1727 0x80 + 23, 11, /* DW_CFA_offset, s0, -88 */ 1728 0x80 + 24, 10, /* DW_CFA_offset, s1, -80 */ 1729 0x80 + 25, 9, /* DW_CFA_offset, s2, -72 */ 1730 0x80 + 26, 8, /* DW_CFA_offset, s3, -64 */ 1731 0x80 + 27, 7, /* DW_CFA_offset, s4, -56 */ 1732 0x80 + 28, 6, /* DW_CFA_offset, s5, -48 */ 1733 0x80 + 29, 5, /* DW_CFA_offset, s6, -40 */ 1734 0x80 + 30, 4, /* DW_CFA_offset, s7, -32 */ 1735 0x80 + 31, 3, /* DW_CFA_offset, s8, -24 */ 1736 0x80 + 22, 2, /* DW_CFA_offset, s9, -16 */ 1737 0x80 + 1 , 1, /* DW_CFA_offset, ra, -8 */ 1738 } 1739}; 1740 1741void tcg_register_jit(const void *buf, size_t buf_size) 1742{ 1743 tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame)); 1744} 1745