1/* 2 * Tiny Code Generator for QEMU 3 * 4 * Copyright (c) 2021 WANG Xuerui <git@xen0n.name> 5 * 6 * Based on tcg/riscv/tcg-target.c.inc 7 * 8 * Copyright (c) 2018 SiFive, Inc 9 * Copyright (c) 2008-2009 Arnaud Patard <arnaud.patard@rtp-net.org> 10 * Copyright (c) 2009 Aurelien Jarno <aurelien@aurel32.net> 11 * Copyright (c) 2008 Fabrice Bellard 12 * 13 * Permission is hereby granted, free of charge, to any person obtaining a copy 14 * of this software and associated documentation files (the "Software"), to deal 15 * in the Software without restriction, including without limitation the rights 16 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 17 * copies of the Software, and to permit persons to whom the Software is 18 * furnished to do so, subject to the following conditions: 19 * 20 * The above copyright notice and this permission notice shall be included in 21 * all copies or substantial portions of the Software. 22 * 23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 24 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 25 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 26 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 27 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 28 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 29 * THE SOFTWARE. 30 */ 31 32#include "../tcg-ldst.c.inc" 33 34#ifdef CONFIG_DEBUG_TCG 35static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = { 36 "zero", 37 "ra", 38 "tp", 39 "sp", 40 "a0", 41 "a1", 42 "a2", 43 "a3", 44 "a4", 45 "a5", 46 "a6", 47 "a7", 48 "t0", 49 "t1", 50 "t2", 51 "t3", 52 "t4", 53 "t5", 54 "t6", 55 "t7", 56 "t8", 57 "r21", /* reserved in the LP64* ABI, hence no ABI name */ 58 "s9", 59 "s0", 60 "s1", 61 "s2", 62 "s3", 63 "s4", 64 "s5", 65 "s6", 66 "s7", 67 "s8" 68}; 69#endif 70 71static const int tcg_target_reg_alloc_order[] = { 72 /* Registers preserved across calls */ 73 /* TCG_REG_S0 reserved for TCG_AREG0 */ 74 TCG_REG_S1, 75 TCG_REG_S2, 76 TCG_REG_S3, 77 TCG_REG_S4, 78 TCG_REG_S5, 79 TCG_REG_S6, 80 TCG_REG_S7, 81 TCG_REG_S8, 82 TCG_REG_S9, 83 84 /* Registers (potentially) clobbered across calls */ 85 TCG_REG_T0, 86 TCG_REG_T1, 87 TCG_REG_T2, 88 TCG_REG_T3, 89 TCG_REG_T4, 90 TCG_REG_T5, 91 TCG_REG_T6, 92 TCG_REG_T7, 93 TCG_REG_T8, 94 95 /* Argument registers, opposite order of allocation. */ 96 TCG_REG_A7, 97 TCG_REG_A6, 98 TCG_REG_A5, 99 TCG_REG_A4, 100 TCG_REG_A3, 101 TCG_REG_A2, 102 TCG_REG_A1, 103 TCG_REG_A0, 104}; 105 106static const int tcg_target_call_iarg_regs[] = { 107 TCG_REG_A0, 108 TCG_REG_A1, 109 TCG_REG_A2, 110 TCG_REG_A3, 111 TCG_REG_A4, 112 TCG_REG_A5, 113 TCG_REG_A6, 114 TCG_REG_A7, 115}; 116 117static const int tcg_target_call_oarg_regs[] = { 118 TCG_REG_A0, 119 TCG_REG_A1, 120}; 121 122#ifndef CONFIG_SOFTMMU 123#define USE_GUEST_BASE (guest_base != 0) 124#define TCG_GUEST_BASE_REG TCG_REG_S1 125#endif 126 127#define TCG_CT_CONST_ZERO 0x100 128#define TCG_CT_CONST_S12 0x200 129#define TCG_CT_CONST_N12 0x400 130#define TCG_CT_CONST_U12 0x800 131#define TCG_CT_CONST_C12 0x1000 132#define TCG_CT_CONST_WSZ 0x2000 133 134#define ALL_GENERAL_REGS MAKE_64BIT_MASK(0, 32) 135/* 136 * For softmmu, we need to avoid conflicts with the first 5 137 * argument registers to call the helper. Some of these are 138 * also used for the tlb lookup. 139 */ 140#ifdef CONFIG_SOFTMMU 141#define SOFTMMU_RESERVE_REGS MAKE_64BIT_MASK(TCG_REG_A0, 5) 142#else 143#define SOFTMMU_RESERVE_REGS 0 144#endif 145 146 147static inline tcg_target_long sextreg(tcg_target_long val, int pos, int len) 148{ 149 return sextract64(val, pos, len); 150} 151 152/* test if a constant matches the constraint */ 153static bool tcg_target_const_match(int64_t val, TCGType type, int ct) 154{ 155 if (ct & TCG_CT_CONST) { 156 return true; 157 } 158 if ((ct & TCG_CT_CONST_ZERO) && val == 0) { 159 return true; 160 } 161 if ((ct & TCG_CT_CONST_S12) && val == sextreg(val, 0, 12)) { 162 return true; 163 } 164 if ((ct & TCG_CT_CONST_N12) && -val == sextreg(-val, 0, 12)) { 165 return true; 166 } 167 if ((ct & TCG_CT_CONST_U12) && val >= 0 && val <= 0xfff) { 168 return true; 169 } 170 if ((ct & TCG_CT_CONST_C12) && ~val >= 0 && ~val <= 0xfff) { 171 return true; 172 } 173 if ((ct & TCG_CT_CONST_WSZ) && val == (type == TCG_TYPE_I32 ? 32 : 64)) { 174 return true; 175 } 176 return false; 177} 178 179/* 180 * Relocations 181 */ 182 183/* 184 * Relocation records defined in LoongArch ELF psABI v1.00 is way too 185 * complicated; a whopping stack machine is needed to stuff the fields, at 186 * the very least one SOP_PUSH and one SOP_POP (of the correct format) are 187 * needed. 188 * 189 * Hence, define our own simpler relocation types. Numbers are chosen as to 190 * not collide with potential future additions to the true ELF relocation 191 * type enum. 192 */ 193 194/* Field Sk16, shifted right by 2; suitable for conditional jumps */ 195#define R_LOONGARCH_BR_SK16 256 196/* Field Sd10k16, shifted right by 2; suitable for B and BL */ 197#define R_LOONGARCH_BR_SD10K16 257 198 199static bool reloc_br_sk16(tcg_insn_unit *src_rw, const tcg_insn_unit *target) 200{ 201 const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw); 202 intptr_t offset = (intptr_t)target - (intptr_t)src_rx; 203 204 tcg_debug_assert((offset & 3) == 0); 205 offset >>= 2; 206 if (offset == sextreg(offset, 0, 16)) { 207 *src_rw = deposit64(*src_rw, 10, 16, offset); 208 return true; 209 } 210 211 return false; 212} 213 214static bool reloc_br_sd10k16(tcg_insn_unit *src_rw, 215 const tcg_insn_unit *target) 216{ 217 const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw); 218 intptr_t offset = (intptr_t)target - (intptr_t)src_rx; 219 220 tcg_debug_assert((offset & 3) == 0); 221 offset >>= 2; 222 if (offset == sextreg(offset, 0, 26)) { 223 *src_rw = deposit64(*src_rw, 0, 10, offset >> 16); /* slot d10 */ 224 *src_rw = deposit64(*src_rw, 10, 16, offset); /* slot k16 */ 225 return true; 226 } 227 228 return false; 229} 230 231static bool patch_reloc(tcg_insn_unit *code_ptr, int type, 232 intptr_t value, intptr_t addend) 233{ 234 tcg_debug_assert(addend == 0); 235 switch (type) { 236 case R_LOONGARCH_BR_SK16: 237 return reloc_br_sk16(code_ptr, (tcg_insn_unit *)value); 238 case R_LOONGARCH_BR_SD10K16: 239 return reloc_br_sd10k16(code_ptr, (tcg_insn_unit *)value); 240 default: 241 g_assert_not_reached(); 242 } 243} 244 245#include "tcg-insn-defs.c.inc" 246 247/* 248 * TCG intrinsics 249 */ 250 251static void tcg_out_mb(TCGContext *s, TCGArg a0) 252{ 253 /* Baseline LoongArch only has the full barrier, unfortunately. */ 254 tcg_out_opc_dbar(s, 0); 255} 256 257static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg) 258{ 259 if (ret == arg) { 260 return true; 261 } 262 switch (type) { 263 case TCG_TYPE_I32: 264 case TCG_TYPE_I64: 265 /* 266 * Conventional register-register move used in LoongArch is 267 * `or dst, src, zero`. 268 */ 269 tcg_out_opc_or(s, ret, arg, TCG_REG_ZERO); 270 break; 271 default: 272 g_assert_not_reached(); 273 } 274 return true; 275} 276 277static bool imm_part_needs_loading(bool high_bits_are_ones, 278 tcg_target_long part) 279{ 280 if (high_bits_are_ones) { 281 return part != -1; 282 } else { 283 return part != 0; 284 } 285} 286 287/* Loads a 32-bit immediate into rd, sign-extended. */ 288static void tcg_out_movi_i32(TCGContext *s, TCGReg rd, int32_t val) 289{ 290 tcg_target_long lo = sextreg(val, 0, 12); 291 tcg_target_long hi12 = sextreg(val, 12, 20); 292 293 /* Single-instruction cases. */ 294 if (lo == val) { 295 /* val fits in simm12: addi.w rd, zero, val */ 296 tcg_out_opc_addi_w(s, rd, TCG_REG_ZERO, val); 297 return; 298 } 299 if (0x800 <= val && val <= 0xfff) { 300 /* val fits in uimm12: ori rd, zero, val */ 301 tcg_out_opc_ori(s, rd, TCG_REG_ZERO, val); 302 return; 303 } 304 305 /* High bits must be set; load with lu12i.w + optional ori. */ 306 tcg_out_opc_lu12i_w(s, rd, hi12); 307 if (lo != 0) { 308 tcg_out_opc_ori(s, rd, rd, lo & 0xfff); 309 } 310} 311 312static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg rd, 313 tcg_target_long val) 314{ 315 /* 316 * LoongArch conventionally loads 64-bit immediates in at most 4 steps, 317 * with dedicated instructions for filling the respective bitfields 318 * below: 319 * 320 * 6 5 4 3 321 * 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 322 * +-----------------------+---------------------------------------+... 323 * | hi52 | hi32 | 324 * +-----------------------+---------------------------------------+... 325 * 3 2 1 326 * 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 327 * ...+-------------------------------------+-------------------------+ 328 * | hi12 | lo | 329 * ...+-------------------------------------+-------------------------+ 330 * 331 * Check if val belong to one of the several fast cases, before falling 332 * back to the slow path. 333 */ 334 335 intptr_t pc_offset; 336 tcg_target_long val_lo, val_hi, pc_hi, offset_hi; 337 tcg_target_long hi32, hi52; 338 bool rd_high_bits_are_ones; 339 340 /* Value fits in signed i32. */ 341 if (type == TCG_TYPE_I32 || val == (int32_t)val) { 342 tcg_out_movi_i32(s, rd, val); 343 return; 344 } 345 346 /* PC-relative cases. */ 347 pc_offset = tcg_pcrel_diff(s, (void *)val); 348 if (pc_offset == sextreg(pc_offset, 0, 22) && (pc_offset & 3) == 0) { 349 /* Single pcaddu2i. */ 350 tcg_out_opc_pcaddu2i(s, rd, pc_offset >> 2); 351 return; 352 } 353 354 if (pc_offset == (int32_t)pc_offset) { 355 /* Offset within 32 bits; load with pcalau12i + ori. */ 356 val_lo = sextreg(val, 0, 12); 357 val_hi = val >> 12; 358 pc_hi = (val - pc_offset) >> 12; 359 offset_hi = val_hi - pc_hi; 360 361 tcg_debug_assert(offset_hi == sextreg(offset_hi, 0, 20)); 362 tcg_out_opc_pcalau12i(s, rd, offset_hi); 363 if (val_lo != 0) { 364 tcg_out_opc_ori(s, rd, rd, val_lo & 0xfff); 365 } 366 return; 367 } 368 369 hi32 = sextreg(val, 32, 20); 370 hi52 = sextreg(val, 52, 12); 371 372 /* Single cu52i.d case. */ 373 if (ctz64(val) >= 52) { 374 tcg_out_opc_cu52i_d(s, rd, TCG_REG_ZERO, hi52); 375 return; 376 } 377 378 /* Slow path. Initialize the low 32 bits, then concat high bits. */ 379 tcg_out_movi_i32(s, rd, val); 380 rd_high_bits_are_ones = (int32_t)val < 0; 381 382 if (imm_part_needs_loading(rd_high_bits_are_ones, hi32)) { 383 tcg_out_opc_cu32i_d(s, rd, hi32); 384 rd_high_bits_are_ones = hi32 < 0; 385 } 386 387 if (imm_part_needs_loading(rd_high_bits_are_ones, hi52)) { 388 tcg_out_opc_cu52i_d(s, rd, rd, hi52); 389 } 390} 391 392static void tcg_out_ext8u(TCGContext *s, TCGReg ret, TCGReg arg) 393{ 394 tcg_out_opc_andi(s, ret, arg, 0xff); 395} 396 397static void tcg_out_ext16u(TCGContext *s, TCGReg ret, TCGReg arg) 398{ 399 tcg_out_opc_bstrpick_w(s, ret, arg, 0, 15); 400} 401 402static void tcg_out_ext32u(TCGContext *s, TCGReg ret, TCGReg arg) 403{ 404 tcg_out_opc_bstrpick_d(s, ret, arg, 0, 31); 405} 406 407static void tcg_out_ext8s(TCGContext *s, TCGReg ret, TCGReg arg) 408{ 409 tcg_out_opc_sext_b(s, ret, arg); 410} 411 412static void tcg_out_ext16s(TCGContext *s, TCGReg ret, TCGReg arg) 413{ 414 tcg_out_opc_sext_h(s, ret, arg); 415} 416 417static void tcg_out_ext32s(TCGContext *s, TCGReg ret, TCGReg arg) 418{ 419 tcg_out_opc_addi_w(s, ret, arg, 0); 420} 421 422static void tcg_out_clzctz(TCGContext *s, LoongArchInsn opc, 423 TCGReg a0, TCGReg a1, TCGReg a2, 424 bool c2, bool is_32bit) 425{ 426 if (c2) { 427 /* 428 * Fast path: semantics already satisfied due to constraint and 429 * insn behavior, single instruction is enough. 430 */ 431 tcg_debug_assert(a2 == (is_32bit ? 32 : 64)); 432 /* all clz/ctz insns belong to DJ-format */ 433 tcg_out32(s, encode_dj_insn(opc, a0, a1)); 434 return; 435 } 436 437 tcg_out32(s, encode_dj_insn(opc, TCG_REG_TMP0, a1)); 438 /* a0 = a1 ? REG_TMP0 : a2 */ 439 tcg_out_opc_maskeqz(s, TCG_REG_TMP0, TCG_REG_TMP0, a1); 440 tcg_out_opc_masknez(s, a0, a2, a1); 441 tcg_out_opc_or(s, a0, TCG_REG_TMP0, a0); 442} 443 444static void tcg_out_setcond(TCGContext *s, TCGCond cond, TCGReg ret, 445 TCGReg arg1, TCGReg arg2, bool c2) 446{ 447 TCGReg tmp; 448 449 if (c2) { 450 tcg_debug_assert(arg2 == 0); 451 } 452 453 switch (cond) { 454 case TCG_COND_EQ: 455 if (c2) { 456 tmp = arg1; 457 } else { 458 tcg_out_opc_sub_d(s, ret, arg1, arg2); 459 tmp = ret; 460 } 461 tcg_out_opc_sltui(s, ret, tmp, 1); 462 break; 463 case TCG_COND_NE: 464 if (c2) { 465 tmp = arg1; 466 } else { 467 tcg_out_opc_sub_d(s, ret, arg1, arg2); 468 tmp = ret; 469 } 470 tcg_out_opc_sltu(s, ret, TCG_REG_ZERO, tmp); 471 break; 472 case TCG_COND_LT: 473 tcg_out_opc_slt(s, ret, arg1, arg2); 474 break; 475 case TCG_COND_GE: 476 tcg_out_opc_slt(s, ret, arg1, arg2); 477 tcg_out_opc_xori(s, ret, ret, 1); 478 break; 479 case TCG_COND_LE: 480 tcg_out_setcond(s, TCG_COND_GE, ret, arg2, arg1, false); 481 break; 482 case TCG_COND_GT: 483 tcg_out_setcond(s, TCG_COND_LT, ret, arg2, arg1, false); 484 break; 485 case TCG_COND_LTU: 486 tcg_out_opc_sltu(s, ret, arg1, arg2); 487 break; 488 case TCG_COND_GEU: 489 tcg_out_opc_sltu(s, ret, arg1, arg2); 490 tcg_out_opc_xori(s, ret, ret, 1); 491 break; 492 case TCG_COND_LEU: 493 tcg_out_setcond(s, TCG_COND_GEU, ret, arg2, arg1, false); 494 break; 495 case TCG_COND_GTU: 496 tcg_out_setcond(s, TCG_COND_LTU, ret, arg2, arg1, false); 497 break; 498 default: 499 g_assert_not_reached(); 500 break; 501 } 502} 503 504/* 505 * Branch helpers 506 */ 507 508static const struct { 509 LoongArchInsn op; 510 bool swap; 511} tcg_brcond_to_loongarch[] = { 512 [TCG_COND_EQ] = { OPC_BEQ, false }, 513 [TCG_COND_NE] = { OPC_BNE, false }, 514 [TCG_COND_LT] = { OPC_BGT, true }, 515 [TCG_COND_GE] = { OPC_BLE, true }, 516 [TCG_COND_LE] = { OPC_BLE, false }, 517 [TCG_COND_GT] = { OPC_BGT, false }, 518 [TCG_COND_LTU] = { OPC_BGTU, true }, 519 [TCG_COND_GEU] = { OPC_BLEU, true }, 520 [TCG_COND_LEU] = { OPC_BLEU, false }, 521 [TCG_COND_GTU] = { OPC_BGTU, false } 522}; 523 524static void tcg_out_brcond(TCGContext *s, TCGCond cond, TCGReg arg1, 525 TCGReg arg2, TCGLabel *l) 526{ 527 LoongArchInsn op = tcg_brcond_to_loongarch[cond].op; 528 529 tcg_debug_assert(op != 0); 530 531 if (tcg_brcond_to_loongarch[cond].swap) { 532 TCGReg t = arg1; 533 arg1 = arg2; 534 arg2 = t; 535 } 536 537 /* all conditional branch insns belong to DJSk16-format */ 538 tcg_out_reloc(s, s->code_ptr, R_LOONGARCH_BR_SK16, l, 0); 539 tcg_out32(s, encode_djsk16_insn(op, arg1, arg2, 0)); 540} 541 542static void tcg_out_call_int(TCGContext *s, const tcg_insn_unit *arg, bool tail) 543{ 544 TCGReg link = tail ? TCG_REG_ZERO : TCG_REG_RA; 545 ptrdiff_t offset = tcg_pcrel_diff(s, arg); 546 547 tcg_debug_assert((offset & 3) == 0); 548 if (offset == sextreg(offset, 0, 28)) { 549 /* short jump: +/- 256MiB */ 550 if (tail) { 551 tcg_out_opc_b(s, offset >> 2); 552 } else { 553 tcg_out_opc_bl(s, offset >> 2); 554 } 555 } else if (offset == sextreg(offset, 0, 38)) { 556 /* long jump: +/- 256GiB */ 557 tcg_target_long lo = sextreg(offset, 0, 18); 558 tcg_target_long hi = offset - lo; 559 tcg_out_opc_pcaddu18i(s, TCG_REG_TMP0, hi >> 18); 560 tcg_out_opc_jirl(s, link, TCG_REG_TMP0, lo >> 2); 561 } else { 562 /* far jump: 64-bit */ 563 tcg_target_long lo = sextreg((tcg_target_long)arg, 0, 18); 564 tcg_target_long hi = (tcg_target_long)arg - lo; 565 tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP0, hi); 566 tcg_out_opc_jirl(s, link, TCG_REG_TMP0, lo >> 2); 567 } 568} 569 570static void tcg_out_call(TCGContext *s, const tcg_insn_unit *arg) 571{ 572 tcg_out_call_int(s, arg, false); 573} 574 575/* 576 * Load/store helpers 577 */ 578 579static void tcg_out_ldst(TCGContext *s, LoongArchInsn opc, TCGReg data, 580 TCGReg addr, intptr_t offset) 581{ 582 intptr_t imm12 = sextreg(offset, 0, 12); 583 584 if (offset != imm12) { 585 intptr_t diff = offset - (uintptr_t)s->code_ptr; 586 587 if (addr == TCG_REG_ZERO && diff == (int32_t)diff) { 588 imm12 = sextreg(diff, 0, 12); 589 tcg_out_opc_pcaddu12i(s, TCG_REG_TMP2, (diff - imm12) >> 12); 590 } else { 591 tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP2, offset - imm12); 592 if (addr != TCG_REG_ZERO) { 593 tcg_out_opc_add_d(s, TCG_REG_TMP2, TCG_REG_TMP2, addr); 594 } 595 } 596 addr = TCG_REG_TMP2; 597 } 598 599 switch (opc) { 600 case OPC_LD_B: 601 case OPC_LD_BU: 602 case OPC_LD_H: 603 case OPC_LD_HU: 604 case OPC_LD_W: 605 case OPC_LD_WU: 606 case OPC_LD_D: 607 case OPC_ST_B: 608 case OPC_ST_H: 609 case OPC_ST_W: 610 case OPC_ST_D: 611 tcg_out32(s, encode_djsk12_insn(opc, data, addr, imm12)); 612 break; 613 default: 614 g_assert_not_reached(); 615 } 616} 617 618static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg arg, 619 TCGReg arg1, intptr_t arg2) 620{ 621 bool is_32bit = type == TCG_TYPE_I32; 622 tcg_out_ldst(s, is_32bit ? OPC_LD_W : OPC_LD_D, arg, arg1, arg2); 623} 624 625static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, 626 TCGReg arg1, intptr_t arg2) 627{ 628 bool is_32bit = type == TCG_TYPE_I32; 629 tcg_out_ldst(s, is_32bit ? OPC_ST_W : OPC_ST_D, arg, arg1, arg2); 630} 631 632static bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val, 633 TCGReg base, intptr_t ofs) 634{ 635 if (val == 0) { 636 tcg_out_st(s, type, TCG_REG_ZERO, base, ofs); 637 return true; 638 } 639 return false; 640} 641 642/* 643 * Load/store helpers for SoftMMU, and qemu_ld/st implementations 644 */ 645 646#if defined(CONFIG_SOFTMMU) 647/* 648 * helper signature: helper_ret_ld_mmu(CPUState *env, target_ulong addr, 649 * MemOpIdx oi, uintptr_t ra) 650 */ 651static void * const qemu_ld_helpers[4] = { 652 [MO_8] = helper_ret_ldub_mmu, 653 [MO_16] = helper_le_lduw_mmu, 654 [MO_32] = helper_le_ldul_mmu, 655 [MO_64] = helper_le_ldq_mmu, 656}; 657 658/* 659 * helper signature: helper_ret_st_mmu(CPUState *env, target_ulong addr, 660 * uintxx_t val, MemOpIdx oi, 661 * uintptr_t ra) 662 */ 663static void * const qemu_st_helpers[4] = { 664 [MO_8] = helper_ret_stb_mmu, 665 [MO_16] = helper_le_stw_mmu, 666 [MO_32] = helper_le_stl_mmu, 667 [MO_64] = helper_le_stq_mmu, 668}; 669 670/* We expect to use a 12-bit negative offset from ENV. */ 671QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) > 0); 672QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) < -(1 << 11)); 673 674static bool tcg_out_goto(TCGContext *s, const tcg_insn_unit *target) 675{ 676 tcg_out_opc_b(s, 0); 677 return reloc_br_sd10k16(s->code_ptr - 1, target); 678} 679 680/* 681 * Emits common code for TLB addend lookup, that eventually loads the 682 * addend in TCG_REG_TMP2. 683 */ 684static void tcg_out_tlb_load(TCGContext *s, TCGReg addrl, MemOpIdx oi, 685 tcg_insn_unit **label_ptr, bool is_load) 686{ 687 MemOp opc = get_memop(oi); 688 unsigned s_bits = opc & MO_SIZE; 689 unsigned a_bits = get_alignment_bits(opc); 690 tcg_target_long compare_mask; 691 int mem_index = get_mmuidx(oi); 692 int fast_ofs = TLB_MASK_TABLE_OFS(mem_index); 693 int mask_ofs = fast_ofs + offsetof(CPUTLBDescFast, mask); 694 int table_ofs = fast_ofs + offsetof(CPUTLBDescFast, table); 695 696 tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP0, TCG_AREG0, mask_ofs); 697 tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP1, TCG_AREG0, table_ofs); 698 699 tcg_out_opc_srli_d(s, TCG_REG_TMP2, addrl, 700 TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS); 701 tcg_out_opc_and(s, TCG_REG_TMP2, TCG_REG_TMP2, TCG_REG_TMP0); 702 tcg_out_opc_add_d(s, TCG_REG_TMP2, TCG_REG_TMP2, TCG_REG_TMP1); 703 704 /* Load the tlb comparator and the addend. */ 705 tcg_out_ld(s, TCG_TYPE_TL, TCG_REG_TMP0, TCG_REG_TMP2, 706 is_load ? offsetof(CPUTLBEntry, addr_read) 707 : offsetof(CPUTLBEntry, addr_write)); 708 tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP2, TCG_REG_TMP2, 709 offsetof(CPUTLBEntry, addend)); 710 711 /* We don't support unaligned accesses. */ 712 if (a_bits < s_bits) { 713 a_bits = s_bits; 714 } 715 /* Clear the non-page, non-alignment bits from the address. */ 716 compare_mask = (tcg_target_long)TARGET_PAGE_MASK | ((1 << a_bits) - 1); 717 tcg_out_movi(s, TCG_TYPE_TL, TCG_REG_TMP1, compare_mask); 718 tcg_out_opc_and(s, TCG_REG_TMP1, TCG_REG_TMP1, addrl); 719 720 /* Compare masked address with the TLB entry. */ 721 label_ptr[0] = s->code_ptr; 722 tcg_out_opc_bne(s, TCG_REG_TMP0, TCG_REG_TMP1, 0); 723 724 /* TLB Hit - addend in TCG_REG_TMP2, ready for use. */ 725} 726 727static void add_qemu_ldst_label(TCGContext *s, int is_ld, MemOpIdx oi, 728 TCGType type, 729 TCGReg datalo, TCGReg addrlo, 730 void *raddr, tcg_insn_unit **label_ptr) 731{ 732 TCGLabelQemuLdst *label = new_ldst_label(s); 733 734 label->is_ld = is_ld; 735 label->oi = oi; 736 label->type = type; 737 label->datalo_reg = datalo; 738 label->datahi_reg = 0; /* unused */ 739 label->addrlo_reg = addrlo; 740 label->addrhi_reg = 0; /* unused */ 741 label->raddr = tcg_splitwx_to_rx(raddr); 742 label->label_ptr[0] = label_ptr[0]; 743} 744 745static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l) 746{ 747 MemOpIdx oi = l->oi; 748 MemOp opc = get_memop(oi); 749 MemOp size = opc & MO_SIZE; 750 TCGType type = l->type; 751 752 /* resolve label address */ 753 if (!reloc_br_sk16(l->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) { 754 return false; 755 } 756 757 /* call load helper */ 758 tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_A0, TCG_AREG0); 759 tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_A1, l->addrlo_reg); 760 tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_A2, oi); 761 tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_A3, (tcg_target_long)l->raddr); 762 763 tcg_out_call(s, qemu_ld_helpers[size]); 764 765 switch (opc & MO_SSIZE) { 766 case MO_SB: 767 tcg_out_ext8s(s, l->datalo_reg, TCG_REG_A0); 768 break; 769 case MO_SW: 770 tcg_out_ext16s(s, l->datalo_reg, TCG_REG_A0); 771 break; 772 case MO_SL: 773 tcg_out_ext32s(s, l->datalo_reg, TCG_REG_A0); 774 break; 775 case MO_UL: 776 if (type == TCG_TYPE_I32) { 777 /* MO_UL loads of i32 should be sign-extended too */ 778 tcg_out_ext32s(s, l->datalo_reg, TCG_REG_A0); 779 break; 780 } 781 /* fallthrough */ 782 default: 783 tcg_out_mov(s, type, l->datalo_reg, TCG_REG_A0); 784 break; 785 } 786 787 return tcg_out_goto(s, l->raddr); 788} 789 790static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l) 791{ 792 MemOpIdx oi = l->oi; 793 MemOp opc = get_memop(oi); 794 MemOp size = opc & MO_SIZE; 795 796 /* resolve label address */ 797 if (!reloc_br_sk16(l->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) { 798 return false; 799 } 800 801 /* call store helper */ 802 tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_A0, TCG_AREG0); 803 tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_A1, l->addrlo_reg); 804 switch (size) { 805 case MO_8: 806 tcg_out_ext8u(s, TCG_REG_A2, l->datalo_reg); 807 break; 808 case MO_16: 809 tcg_out_ext16u(s, TCG_REG_A2, l->datalo_reg); 810 break; 811 case MO_32: 812 tcg_out_ext32u(s, TCG_REG_A2, l->datalo_reg); 813 break; 814 case MO_64: 815 tcg_out_mov(s, TCG_TYPE_I64, TCG_REG_A2, l->datalo_reg); 816 break; 817 default: 818 g_assert_not_reached(); 819 break; 820 } 821 tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_A3, oi); 822 tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_A4, (tcg_target_long)l->raddr); 823 824 tcg_out_call(s, qemu_st_helpers[size]); 825 826 return tcg_out_goto(s, l->raddr); 827} 828#else 829 830/* 831 * Alignment helpers for user-mode emulation 832 */ 833 834static void tcg_out_test_alignment(TCGContext *s, bool is_ld, TCGReg addr_reg, 835 unsigned a_bits) 836{ 837 TCGLabelQemuLdst *l = new_ldst_label(s); 838 839 l->is_ld = is_ld; 840 l->addrlo_reg = addr_reg; 841 842 /* 843 * Without micro-architecture details, we don't know which of bstrpick or 844 * andi is faster, so use bstrpick as it's not constrained by imm field 845 * width. (Not to say alignments >= 2^12 are going to happen any time 846 * soon, though) 847 */ 848 tcg_out_opc_bstrpick_d(s, TCG_REG_TMP1, addr_reg, 0, a_bits - 1); 849 850 l->label_ptr[0] = s->code_ptr; 851 tcg_out_opc_bne(s, TCG_REG_TMP1, TCG_REG_ZERO, 0); 852 853 l->raddr = tcg_splitwx_to_rx(s->code_ptr); 854} 855 856static bool tcg_out_fail_alignment(TCGContext *s, TCGLabelQemuLdst *l) 857{ 858 /* resolve label address */ 859 if (!reloc_br_sk16(l->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) { 860 return false; 861 } 862 863 tcg_out_mov(s, TCG_TYPE_TL, TCG_REG_A1, l->addrlo_reg); 864 tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_A0, TCG_AREG0); 865 866 /* tail call, with the return address back inline. */ 867 tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_RA, (uintptr_t)l->raddr); 868 tcg_out_call_int(s, (const void *)(l->is_ld ? helper_unaligned_ld 869 : helper_unaligned_st), true); 870 return true; 871} 872 873static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l) 874{ 875 return tcg_out_fail_alignment(s, l); 876} 877 878static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l) 879{ 880 return tcg_out_fail_alignment(s, l); 881} 882 883#endif /* CONFIG_SOFTMMU */ 884 885/* 886 * `ext32u` the address register into the temp register given, 887 * if target is 32-bit, no-op otherwise. 888 * 889 * Returns the address register ready for use with TLB addend. 890 */ 891static TCGReg tcg_out_zext_addr_if_32_bit(TCGContext *s, 892 TCGReg addr, TCGReg tmp) 893{ 894 if (TARGET_LONG_BITS == 32) { 895 tcg_out_ext32u(s, tmp, addr); 896 return tmp; 897 } 898 return addr; 899} 900 901static void tcg_out_qemu_ld_indexed(TCGContext *s, TCGReg rd, TCGReg rj, 902 TCGReg rk, MemOp opc, TCGType type) 903{ 904 /* Byte swapping is left to middle-end expansion. */ 905 tcg_debug_assert((opc & MO_BSWAP) == 0); 906 907 switch (opc & MO_SSIZE) { 908 case MO_UB: 909 tcg_out_opc_ldx_bu(s, rd, rj, rk); 910 break; 911 case MO_SB: 912 tcg_out_opc_ldx_b(s, rd, rj, rk); 913 break; 914 case MO_UW: 915 tcg_out_opc_ldx_hu(s, rd, rj, rk); 916 break; 917 case MO_SW: 918 tcg_out_opc_ldx_h(s, rd, rj, rk); 919 break; 920 case MO_UL: 921 if (type == TCG_TYPE_I64) { 922 tcg_out_opc_ldx_wu(s, rd, rj, rk); 923 break; 924 } 925 /* fallthrough */ 926 case MO_SL: 927 tcg_out_opc_ldx_w(s, rd, rj, rk); 928 break; 929 case MO_UQ: 930 tcg_out_opc_ldx_d(s, rd, rj, rk); 931 break; 932 default: 933 g_assert_not_reached(); 934 } 935} 936 937static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, TCGType type) 938{ 939 TCGReg addr_regl; 940 TCGReg data_regl; 941 MemOpIdx oi; 942 MemOp opc; 943#if defined(CONFIG_SOFTMMU) 944 tcg_insn_unit *label_ptr[1]; 945#else 946 unsigned a_bits; 947#endif 948 TCGReg base; 949 950 data_regl = *args++; 951 addr_regl = *args++; 952 oi = *args++; 953 opc = get_memop(oi); 954 955#if defined(CONFIG_SOFTMMU) 956 tcg_out_tlb_load(s, addr_regl, oi, label_ptr, 1); 957 base = tcg_out_zext_addr_if_32_bit(s, addr_regl, TCG_REG_TMP0); 958 tcg_out_qemu_ld_indexed(s, data_regl, base, TCG_REG_TMP2, opc, type); 959 add_qemu_ldst_label(s, 1, oi, type, 960 data_regl, addr_regl, 961 s->code_ptr, label_ptr); 962#else 963 a_bits = get_alignment_bits(opc); 964 if (a_bits) { 965 tcg_out_test_alignment(s, true, addr_regl, a_bits); 966 } 967 base = tcg_out_zext_addr_if_32_bit(s, addr_regl, TCG_REG_TMP0); 968 TCGReg guest_base_reg = USE_GUEST_BASE ? TCG_GUEST_BASE_REG : TCG_REG_ZERO; 969 tcg_out_qemu_ld_indexed(s, data_regl, base, guest_base_reg, opc, type); 970#endif 971} 972 973static void tcg_out_qemu_st_indexed(TCGContext *s, TCGReg data, 974 TCGReg rj, TCGReg rk, MemOp opc) 975{ 976 /* Byte swapping is left to middle-end expansion. */ 977 tcg_debug_assert((opc & MO_BSWAP) == 0); 978 979 switch (opc & MO_SIZE) { 980 case MO_8: 981 tcg_out_opc_stx_b(s, data, rj, rk); 982 break; 983 case MO_16: 984 tcg_out_opc_stx_h(s, data, rj, rk); 985 break; 986 case MO_32: 987 tcg_out_opc_stx_w(s, data, rj, rk); 988 break; 989 case MO_64: 990 tcg_out_opc_stx_d(s, data, rj, rk); 991 break; 992 default: 993 g_assert_not_reached(); 994 } 995} 996 997static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args) 998{ 999 TCGReg addr_regl; 1000 TCGReg data_regl; 1001 MemOpIdx oi; 1002 MemOp opc; 1003#if defined(CONFIG_SOFTMMU) 1004 tcg_insn_unit *label_ptr[1]; 1005#else 1006 unsigned a_bits; 1007#endif 1008 TCGReg base; 1009 1010 data_regl = *args++; 1011 addr_regl = *args++; 1012 oi = *args++; 1013 opc = get_memop(oi); 1014 1015#if defined(CONFIG_SOFTMMU) 1016 tcg_out_tlb_load(s, addr_regl, oi, label_ptr, 0); 1017 base = tcg_out_zext_addr_if_32_bit(s, addr_regl, TCG_REG_TMP0); 1018 tcg_out_qemu_st_indexed(s, data_regl, base, TCG_REG_TMP2, opc); 1019 add_qemu_ldst_label(s, 0, oi, 1020 0, /* type param is unused for stores */ 1021 data_regl, addr_regl, 1022 s->code_ptr, label_ptr); 1023#else 1024 a_bits = get_alignment_bits(opc); 1025 if (a_bits) { 1026 tcg_out_test_alignment(s, false, addr_regl, a_bits); 1027 } 1028 base = tcg_out_zext_addr_if_32_bit(s, addr_regl, TCG_REG_TMP0); 1029 TCGReg guest_base_reg = USE_GUEST_BASE ? TCG_GUEST_BASE_REG : TCG_REG_ZERO; 1030 tcg_out_qemu_st_indexed(s, data_regl, base, guest_base_reg, opc); 1031#endif 1032} 1033 1034/* LoongArch uses `andi zero, zero, 0` as NOP. */ 1035#define NOP OPC_ANDI 1036static void tcg_out_nop(TCGContext *s) 1037{ 1038 tcg_out32(s, NOP); 1039} 1040 1041void tb_target_set_jmp_target(uintptr_t tc_ptr, uintptr_t jmp_rx, 1042 uintptr_t jmp_rw, uintptr_t addr) 1043{ 1044 tcg_insn_unit i1, i2; 1045 ptrdiff_t upper, lower; 1046 ptrdiff_t offset = (ptrdiff_t)(addr - jmp_rx) >> 2; 1047 1048 if (offset == sextreg(offset, 0, 26)) { 1049 i1 = encode_sd10k16_insn(OPC_B, offset); 1050 i2 = NOP; 1051 } else { 1052 tcg_debug_assert(offset == sextreg(offset, 0, 36)); 1053 lower = (int16_t)offset; 1054 upper = (offset - lower) >> 16; 1055 1056 i1 = encode_dsj20_insn(OPC_PCADDU18I, TCG_REG_TMP0, upper); 1057 i2 = encode_djsk16_insn(OPC_JIRL, TCG_REG_ZERO, TCG_REG_TMP0, lower); 1058 } 1059 uint64_t pair = ((uint64_t)i2 << 32) | i1; 1060 qatomic_set((uint64_t *)jmp_rw, pair); 1061 flush_idcache_range(jmp_rx, jmp_rw, 8); 1062} 1063 1064/* 1065 * Entry-points 1066 */ 1067 1068static const tcg_insn_unit *tb_ret_addr; 1069 1070static void tcg_out_op(TCGContext *s, TCGOpcode opc, 1071 const TCGArg args[TCG_MAX_OP_ARGS], 1072 const int const_args[TCG_MAX_OP_ARGS]) 1073{ 1074 TCGArg a0 = args[0]; 1075 TCGArg a1 = args[1]; 1076 TCGArg a2 = args[2]; 1077 int c2 = const_args[2]; 1078 1079 switch (opc) { 1080 case INDEX_op_exit_tb: 1081 /* Reuse the zeroing that exists for goto_ptr. */ 1082 if (a0 == 0) { 1083 tcg_out_call_int(s, tcg_code_gen_epilogue, true); 1084 } else { 1085 tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_A0, a0); 1086 tcg_out_call_int(s, tb_ret_addr, true); 1087 } 1088 break; 1089 1090 case INDEX_op_goto_tb: 1091 tcg_debug_assert(s->tb_jmp_insn_offset != NULL); 1092 /* 1093 * Ensure that patch area is 8-byte aligned so that an 1094 * atomic write can be used to patch the target address. 1095 */ 1096 if ((uintptr_t)s->code_ptr & 7) { 1097 tcg_out_nop(s); 1098 } 1099 s->tb_jmp_insn_offset[a0] = tcg_current_code_size(s); 1100 /* 1101 * actual branch destination will be patched by 1102 * tb_target_set_jmp_target later 1103 */ 1104 tcg_out_opc_pcaddu18i(s, TCG_REG_TMP0, 0); 1105 tcg_out_opc_jirl(s, TCG_REG_ZERO, TCG_REG_TMP0, 0); 1106 set_jmp_reset_offset(s, a0); 1107 break; 1108 1109 case INDEX_op_mb: 1110 tcg_out_mb(s, a0); 1111 break; 1112 1113 case INDEX_op_goto_ptr: 1114 tcg_out_opc_jirl(s, TCG_REG_ZERO, a0, 0); 1115 break; 1116 1117 case INDEX_op_br: 1118 tcg_out_reloc(s, s->code_ptr, R_LOONGARCH_BR_SD10K16, arg_label(a0), 1119 0); 1120 tcg_out_opc_b(s, 0); 1121 break; 1122 1123 case INDEX_op_brcond_i32: 1124 case INDEX_op_brcond_i64: 1125 tcg_out_brcond(s, a2, a0, a1, arg_label(args[3])); 1126 break; 1127 1128 case INDEX_op_ext8s_i32: 1129 case INDEX_op_ext8s_i64: 1130 tcg_out_ext8s(s, a0, a1); 1131 break; 1132 1133 case INDEX_op_ext8u_i32: 1134 case INDEX_op_ext8u_i64: 1135 tcg_out_ext8u(s, a0, a1); 1136 break; 1137 1138 case INDEX_op_ext16s_i32: 1139 case INDEX_op_ext16s_i64: 1140 tcg_out_ext16s(s, a0, a1); 1141 break; 1142 1143 case INDEX_op_ext16u_i32: 1144 case INDEX_op_ext16u_i64: 1145 tcg_out_ext16u(s, a0, a1); 1146 break; 1147 1148 case INDEX_op_ext32u_i64: 1149 case INDEX_op_extu_i32_i64: 1150 tcg_out_ext32u(s, a0, a1); 1151 break; 1152 1153 case INDEX_op_ext32s_i64: 1154 case INDEX_op_extrl_i64_i32: 1155 case INDEX_op_ext_i32_i64: 1156 tcg_out_ext32s(s, a0, a1); 1157 break; 1158 1159 case INDEX_op_extrh_i64_i32: 1160 tcg_out_opc_srai_d(s, a0, a1, 32); 1161 break; 1162 1163 case INDEX_op_not_i32: 1164 case INDEX_op_not_i64: 1165 tcg_out_opc_nor(s, a0, a1, TCG_REG_ZERO); 1166 break; 1167 1168 case INDEX_op_nor_i32: 1169 case INDEX_op_nor_i64: 1170 if (c2) { 1171 tcg_out_opc_ori(s, a0, a1, a2); 1172 tcg_out_opc_nor(s, a0, a0, TCG_REG_ZERO); 1173 } else { 1174 tcg_out_opc_nor(s, a0, a1, a2); 1175 } 1176 break; 1177 1178 case INDEX_op_andc_i32: 1179 case INDEX_op_andc_i64: 1180 if (c2) { 1181 /* guaranteed to fit due to constraint */ 1182 tcg_out_opc_andi(s, a0, a1, ~a2); 1183 } else { 1184 tcg_out_opc_andn(s, a0, a1, a2); 1185 } 1186 break; 1187 1188 case INDEX_op_orc_i32: 1189 case INDEX_op_orc_i64: 1190 if (c2) { 1191 /* guaranteed to fit due to constraint */ 1192 tcg_out_opc_ori(s, a0, a1, ~a2); 1193 } else { 1194 tcg_out_opc_orn(s, a0, a1, a2); 1195 } 1196 break; 1197 1198 case INDEX_op_and_i32: 1199 case INDEX_op_and_i64: 1200 if (c2) { 1201 tcg_out_opc_andi(s, a0, a1, a2); 1202 } else { 1203 tcg_out_opc_and(s, a0, a1, a2); 1204 } 1205 break; 1206 1207 case INDEX_op_or_i32: 1208 case INDEX_op_or_i64: 1209 if (c2) { 1210 tcg_out_opc_ori(s, a0, a1, a2); 1211 } else { 1212 tcg_out_opc_or(s, a0, a1, a2); 1213 } 1214 break; 1215 1216 case INDEX_op_xor_i32: 1217 case INDEX_op_xor_i64: 1218 if (c2) { 1219 tcg_out_opc_xori(s, a0, a1, a2); 1220 } else { 1221 tcg_out_opc_xor(s, a0, a1, a2); 1222 } 1223 break; 1224 1225 case INDEX_op_extract_i32: 1226 tcg_out_opc_bstrpick_w(s, a0, a1, a2, a2 + args[3] - 1); 1227 break; 1228 case INDEX_op_extract_i64: 1229 tcg_out_opc_bstrpick_d(s, a0, a1, a2, a2 + args[3] - 1); 1230 break; 1231 1232 case INDEX_op_deposit_i32: 1233 tcg_out_opc_bstrins_w(s, a0, a2, args[3], args[3] + args[4] - 1); 1234 break; 1235 case INDEX_op_deposit_i64: 1236 tcg_out_opc_bstrins_d(s, a0, a2, args[3], args[3] + args[4] - 1); 1237 break; 1238 1239 case INDEX_op_bswap16_i32: 1240 case INDEX_op_bswap16_i64: 1241 tcg_out_opc_revb_2h(s, a0, a1); 1242 if (a2 & TCG_BSWAP_OS) { 1243 tcg_out_ext16s(s, a0, a0); 1244 } else if ((a2 & (TCG_BSWAP_IZ | TCG_BSWAP_OZ)) == TCG_BSWAP_OZ) { 1245 tcg_out_ext16u(s, a0, a0); 1246 } 1247 break; 1248 1249 case INDEX_op_bswap32_i32: 1250 /* All 32-bit values are computed sign-extended in the register. */ 1251 a2 = TCG_BSWAP_OS; 1252 /* fallthrough */ 1253 case INDEX_op_bswap32_i64: 1254 tcg_out_opc_revb_2w(s, a0, a1); 1255 if (a2 & TCG_BSWAP_OS) { 1256 tcg_out_ext32s(s, a0, a0); 1257 } else if ((a2 & (TCG_BSWAP_IZ | TCG_BSWAP_OZ)) == TCG_BSWAP_OZ) { 1258 tcg_out_ext32u(s, a0, a0); 1259 } 1260 break; 1261 1262 case INDEX_op_bswap64_i64: 1263 tcg_out_opc_revb_d(s, a0, a1); 1264 break; 1265 1266 case INDEX_op_clz_i32: 1267 tcg_out_clzctz(s, OPC_CLZ_W, a0, a1, a2, c2, true); 1268 break; 1269 case INDEX_op_clz_i64: 1270 tcg_out_clzctz(s, OPC_CLZ_D, a0, a1, a2, c2, false); 1271 break; 1272 1273 case INDEX_op_ctz_i32: 1274 tcg_out_clzctz(s, OPC_CTZ_W, a0, a1, a2, c2, true); 1275 break; 1276 case INDEX_op_ctz_i64: 1277 tcg_out_clzctz(s, OPC_CTZ_D, a0, a1, a2, c2, false); 1278 break; 1279 1280 case INDEX_op_shl_i32: 1281 if (c2) { 1282 tcg_out_opc_slli_w(s, a0, a1, a2 & 0x1f); 1283 } else { 1284 tcg_out_opc_sll_w(s, a0, a1, a2); 1285 } 1286 break; 1287 case INDEX_op_shl_i64: 1288 if (c2) { 1289 tcg_out_opc_slli_d(s, a0, a1, a2 & 0x3f); 1290 } else { 1291 tcg_out_opc_sll_d(s, a0, a1, a2); 1292 } 1293 break; 1294 1295 case INDEX_op_shr_i32: 1296 if (c2) { 1297 tcg_out_opc_srli_w(s, a0, a1, a2 & 0x1f); 1298 } else { 1299 tcg_out_opc_srl_w(s, a0, a1, a2); 1300 } 1301 break; 1302 case INDEX_op_shr_i64: 1303 if (c2) { 1304 tcg_out_opc_srli_d(s, a0, a1, a2 & 0x3f); 1305 } else { 1306 tcg_out_opc_srl_d(s, a0, a1, a2); 1307 } 1308 break; 1309 1310 case INDEX_op_sar_i32: 1311 if (c2) { 1312 tcg_out_opc_srai_w(s, a0, a1, a2 & 0x1f); 1313 } else { 1314 tcg_out_opc_sra_w(s, a0, a1, a2); 1315 } 1316 break; 1317 case INDEX_op_sar_i64: 1318 if (c2) { 1319 tcg_out_opc_srai_d(s, a0, a1, a2 & 0x3f); 1320 } else { 1321 tcg_out_opc_sra_d(s, a0, a1, a2); 1322 } 1323 break; 1324 1325 case INDEX_op_rotl_i32: 1326 /* transform into equivalent rotr/rotri */ 1327 if (c2) { 1328 tcg_out_opc_rotri_w(s, a0, a1, (32 - a2) & 0x1f); 1329 } else { 1330 tcg_out_opc_sub_w(s, TCG_REG_TMP0, TCG_REG_ZERO, a2); 1331 tcg_out_opc_rotr_w(s, a0, a1, TCG_REG_TMP0); 1332 } 1333 break; 1334 case INDEX_op_rotl_i64: 1335 /* transform into equivalent rotr/rotri */ 1336 if (c2) { 1337 tcg_out_opc_rotri_d(s, a0, a1, (64 - a2) & 0x3f); 1338 } else { 1339 tcg_out_opc_sub_w(s, TCG_REG_TMP0, TCG_REG_ZERO, a2); 1340 tcg_out_opc_rotr_d(s, a0, a1, TCG_REG_TMP0); 1341 } 1342 break; 1343 1344 case INDEX_op_rotr_i32: 1345 if (c2) { 1346 tcg_out_opc_rotri_w(s, a0, a1, a2 & 0x1f); 1347 } else { 1348 tcg_out_opc_rotr_w(s, a0, a1, a2); 1349 } 1350 break; 1351 case INDEX_op_rotr_i64: 1352 if (c2) { 1353 tcg_out_opc_rotri_d(s, a0, a1, a2 & 0x3f); 1354 } else { 1355 tcg_out_opc_rotr_d(s, a0, a1, a2); 1356 } 1357 break; 1358 1359 case INDEX_op_add_i32: 1360 if (c2) { 1361 tcg_out_opc_addi_w(s, a0, a1, a2); 1362 } else { 1363 tcg_out_opc_add_w(s, a0, a1, a2); 1364 } 1365 break; 1366 case INDEX_op_add_i64: 1367 if (c2) { 1368 tcg_out_opc_addi_d(s, a0, a1, a2); 1369 } else { 1370 tcg_out_opc_add_d(s, a0, a1, a2); 1371 } 1372 break; 1373 1374 case INDEX_op_sub_i32: 1375 if (c2) { 1376 tcg_out_opc_addi_w(s, a0, a1, -a2); 1377 } else { 1378 tcg_out_opc_sub_w(s, a0, a1, a2); 1379 } 1380 break; 1381 case INDEX_op_sub_i64: 1382 if (c2) { 1383 tcg_out_opc_addi_d(s, a0, a1, -a2); 1384 } else { 1385 tcg_out_opc_sub_d(s, a0, a1, a2); 1386 } 1387 break; 1388 1389 case INDEX_op_mul_i32: 1390 tcg_out_opc_mul_w(s, a0, a1, a2); 1391 break; 1392 case INDEX_op_mul_i64: 1393 tcg_out_opc_mul_d(s, a0, a1, a2); 1394 break; 1395 1396 case INDEX_op_mulsh_i32: 1397 tcg_out_opc_mulh_w(s, a0, a1, a2); 1398 break; 1399 case INDEX_op_mulsh_i64: 1400 tcg_out_opc_mulh_d(s, a0, a1, a2); 1401 break; 1402 1403 case INDEX_op_muluh_i32: 1404 tcg_out_opc_mulh_wu(s, a0, a1, a2); 1405 break; 1406 case INDEX_op_muluh_i64: 1407 tcg_out_opc_mulh_du(s, a0, a1, a2); 1408 break; 1409 1410 case INDEX_op_div_i32: 1411 tcg_out_opc_div_w(s, a0, a1, a2); 1412 break; 1413 case INDEX_op_div_i64: 1414 tcg_out_opc_div_d(s, a0, a1, a2); 1415 break; 1416 1417 case INDEX_op_divu_i32: 1418 tcg_out_opc_div_wu(s, a0, a1, a2); 1419 break; 1420 case INDEX_op_divu_i64: 1421 tcg_out_opc_div_du(s, a0, a1, a2); 1422 break; 1423 1424 case INDEX_op_rem_i32: 1425 tcg_out_opc_mod_w(s, a0, a1, a2); 1426 break; 1427 case INDEX_op_rem_i64: 1428 tcg_out_opc_mod_d(s, a0, a1, a2); 1429 break; 1430 1431 case INDEX_op_remu_i32: 1432 tcg_out_opc_mod_wu(s, a0, a1, a2); 1433 break; 1434 case INDEX_op_remu_i64: 1435 tcg_out_opc_mod_du(s, a0, a1, a2); 1436 break; 1437 1438 case INDEX_op_setcond_i32: 1439 case INDEX_op_setcond_i64: 1440 tcg_out_setcond(s, args[3], a0, a1, a2, c2); 1441 break; 1442 1443 case INDEX_op_ld8s_i32: 1444 case INDEX_op_ld8s_i64: 1445 tcg_out_ldst(s, OPC_LD_B, a0, a1, a2); 1446 break; 1447 case INDEX_op_ld8u_i32: 1448 case INDEX_op_ld8u_i64: 1449 tcg_out_ldst(s, OPC_LD_BU, a0, a1, a2); 1450 break; 1451 case INDEX_op_ld16s_i32: 1452 case INDEX_op_ld16s_i64: 1453 tcg_out_ldst(s, OPC_LD_H, a0, a1, a2); 1454 break; 1455 case INDEX_op_ld16u_i32: 1456 case INDEX_op_ld16u_i64: 1457 tcg_out_ldst(s, OPC_LD_HU, a0, a1, a2); 1458 break; 1459 case INDEX_op_ld_i32: 1460 case INDEX_op_ld32s_i64: 1461 tcg_out_ldst(s, OPC_LD_W, a0, a1, a2); 1462 break; 1463 case INDEX_op_ld32u_i64: 1464 tcg_out_ldst(s, OPC_LD_WU, a0, a1, a2); 1465 break; 1466 case INDEX_op_ld_i64: 1467 tcg_out_ldst(s, OPC_LD_D, a0, a1, a2); 1468 break; 1469 1470 case INDEX_op_st8_i32: 1471 case INDEX_op_st8_i64: 1472 tcg_out_ldst(s, OPC_ST_B, a0, a1, a2); 1473 break; 1474 case INDEX_op_st16_i32: 1475 case INDEX_op_st16_i64: 1476 tcg_out_ldst(s, OPC_ST_H, a0, a1, a2); 1477 break; 1478 case INDEX_op_st_i32: 1479 case INDEX_op_st32_i64: 1480 tcg_out_ldst(s, OPC_ST_W, a0, a1, a2); 1481 break; 1482 case INDEX_op_st_i64: 1483 tcg_out_ldst(s, OPC_ST_D, a0, a1, a2); 1484 break; 1485 1486 case INDEX_op_qemu_ld_i32: 1487 tcg_out_qemu_ld(s, args, TCG_TYPE_I32); 1488 break; 1489 case INDEX_op_qemu_ld_i64: 1490 tcg_out_qemu_ld(s, args, TCG_TYPE_I64); 1491 break; 1492 case INDEX_op_qemu_st_i32: 1493 tcg_out_qemu_st(s, args); 1494 break; 1495 case INDEX_op_qemu_st_i64: 1496 tcg_out_qemu_st(s, args); 1497 break; 1498 1499 case INDEX_op_mov_i32: /* Always emitted via tcg_out_mov. */ 1500 case INDEX_op_mov_i64: 1501 case INDEX_op_call: /* Always emitted via tcg_out_call. */ 1502 default: 1503 g_assert_not_reached(); 1504 } 1505} 1506 1507static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op) 1508{ 1509 switch (op) { 1510 case INDEX_op_goto_ptr: 1511 return C_O0_I1(r); 1512 1513 case INDEX_op_st8_i32: 1514 case INDEX_op_st8_i64: 1515 case INDEX_op_st16_i32: 1516 case INDEX_op_st16_i64: 1517 case INDEX_op_st32_i64: 1518 case INDEX_op_st_i32: 1519 case INDEX_op_st_i64: 1520 return C_O0_I2(rZ, r); 1521 1522 case INDEX_op_brcond_i32: 1523 case INDEX_op_brcond_i64: 1524 return C_O0_I2(rZ, rZ); 1525 1526 case INDEX_op_qemu_st_i32: 1527 case INDEX_op_qemu_st_i64: 1528 return C_O0_I2(LZ, L); 1529 1530 case INDEX_op_ext8s_i32: 1531 case INDEX_op_ext8s_i64: 1532 case INDEX_op_ext8u_i32: 1533 case INDEX_op_ext8u_i64: 1534 case INDEX_op_ext16s_i32: 1535 case INDEX_op_ext16s_i64: 1536 case INDEX_op_ext16u_i32: 1537 case INDEX_op_ext16u_i64: 1538 case INDEX_op_ext32s_i64: 1539 case INDEX_op_ext32u_i64: 1540 case INDEX_op_extu_i32_i64: 1541 case INDEX_op_extrl_i64_i32: 1542 case INDEX_op_extrh_i64_i32: 1543 case INDEX_op_ext_i32_i64: 1544 case INDEX_op_not_i32: 1545 case INDEX_op_not_i64: 1546 case INDEX_op_extract_i32: 1547 case INDEX_op_extract_i64: 1548 case INDEX_op_bswap16_i32: 1549 case INDEX_op_bswap16_i64: 1550 case INDEX_op_bswap32_i32: 1551 case INDEX_op_bswap32_i64: 1552 case INDEX_op_bswap64_i64: 1553 case INDEX_op_ld8s_i32: 1554 case INDEX_op_ld8s_i64: 1555 case INDEX_op_ld8u_i32: 1556 case INDEX_op_ld8u_i64: 1557 case INDEX_op_ld16s_i32: 1558 case INDEX_op_ld16s_i64: 1559 case INDEX_op_ld16u_i32: 1560 case INDEX_op_ld16u_i64: 1561 case INDEX_op_ld32s_i64: 1562 case INDEX_op_ld32u_i64: 1563 case INDEX_op_ld_i32: 1564 case INDEX_op_ld_i64: 1565 return C_O1_I1(r, r); 1566 1567 case INDEX_op_qemu_ld_i32: 1568 case INDEX_op_qemu_ld_i64: 1569 return C_O1_I1(r, L); 1570 1571 case INDEX_op_andc_i32: 1572 case INDEX_op_andc_i64: 1573 case INDEX_op_orc_i32: 1574 case INDEX_op_orc_i64: 1575 /* 1576 * LoongArch insns for these ops don't have reg-imm forms, but we 1577 * can express using andi/ori if ~constant satisfies 1578 * TCG_CT_CONST_U12. 1579 */ 1580 return C_O1_I2(r, r, rC); 1581 1582 case INDEX_op_shl_i32: 1583 case INDEX_op_shl_i64: 1584 case INDEX_op_shr_i32: 1585 case INDEX_op_shr_i64: 1586 case INDEX_op_sar_i32: 1587 case INDEX_op_sar_i64: 1588 case INDEX_op_rotl_i32: 1589 case INDEX_op_rotl_i64: 1590 case INDEX_op_rotr_i32: 1591 case INDEX_op_rotr_i64: 1592 return C_O1_I2(r, r, ri); 1593 1594 case INDEX_op_add_i32: 1595 case INDEX_op_add_i64: 1596 return C_O1_I2(r, r, rI); 1597 1598 case INDEX_op_and_i32: 1599 case INDEX_op_and_i64: 1600 case INDEX_op_nor_i32: 1601 case INDEX_op_nor_i64: 1602 case INDEX_op_or_i32: 1603 case INDEX_op_or_i64: 1604 case INDEX_op_xor_i32: 1605 case INDEX_op_xor_i64: 1606 /* LoongArch reg-imm bitops have their imms ZERO-extended */ 1607 return C_O1_I2(r, r, rU); 1608 1609 case INDEX_op_clz_i32: 1610 case INDEX_op_clz_i64: 1611 case INDEX_op_ctz_i32: 1612 case INDEX_op_ctz_i64: 1613 return C_O1_I2(r, r, rW); 1614 1615 case INDEX_op_setcond_i32: 1616 case INDEX_op_setcond_i64: 1617 return C_O1_I2(r, r, rZ); 1618 1619 case INDEX_op_deposit_i32: 1620 case INDEX_op_deposit_i64: 1621 /* Must deposit into the same register as input */ 1622 return C_O1_I2(r, 0, rZ); 1623 1624 case INDEX_op_sub_i32: 1625 case INDEX_op_sub_i64: 1626 return C_O1_I2(r, rZ, rN); 1627 1628 case INDEX_op_mul_i32: 1629 case INDEX_op_mul_i64: 1630 case INDEX_op_mulsh_i32: 1631 case INDEX_op_mulsh_i64: 1632 case INDEX_op_muluh_i32: 1633 case INDEX_op_muluh_i64: 1634 case INDEX_op_div_i32: 1635 case INDEX_op_div_i64: 1636 case INDEX_op_divu_i32: 1637 case INDEX_op_divu_i64: 1638 case INDEX_op_rem_i32: 1639 case INDEX_op_rem_i64: 1640 case INDEX_op_remu_i32: 1641 case INDEX_op_remu_i64: 1642 return C_O1_I2(r, rZ, rZ); 1643 1644 default: 1645 g_assert_not_reached(); 1646 } 1647} 1648 1649static const int tcg_target_callee_save_regs[] = { 1650 TCG_REG_S0, /* used for the global env (TCG_AREG0) */ 1651 TCG_REG_S1, 1652 TCG_REG_S2, 1653 TCG_REG_S3, 1654 TCG_REG_S4, 1655 TCG_REG_S5, 1656 TCG_REG_S6, 1657 TCG_REG_S7, 1658 TCG_REG_S8, 1659 TCG_REG_S9, 1660 TCG_REG_RA, /* should be last for ABI compliance */ 1661}; 1662 1663/* Stack frame parameters. */ 1664#define REG_SIZE (TCG_TARGET_REG_BITS / 8) 1665#define SAVE_SIZE ((int)ARRAY_SIZE(tcg_target_callee_save_regs) * REG_SIZE) 1666#define TEMP_SIZE (CPU_TEMP_BUF_NLONGS * (int)sizeof(long)) 1667#define FRAME_SIZE ((TCG_STATIC_CALL_ARGS_SIZE + TEMP_SIZE + SAVE_SIZE \ 1668 + TCG_TARGET_STACK_ALIGN - 1) \ 1669 & -TCG_TARGET_STACK_ALIGN) 1670#define SAVE_OFS (TCG_STATIC_CALL_ARGS_SIZE + TEMP_SIZE) 1671 1672/* We're expecting to be able to use an immediate for frame allocation. */ 1673QEMU_BUILD_BUG_ON(FRAME_SIZE > 0x7ff); 1674 1675/* Generate global QEMU prologue and epilogue code */ 1676static void tcg_target_qemu_prologue(TCGContext *s) 1677{ 1678 int i; 1679 1680 tcg_set_frame(s, TCG_REG_SP, TCG_STATIC_CALL_ARGS_SIZE, TEMP_SIZE); 1681 1682 /* TB prologue */ 1683 tcg_out_opc_addi_d(s, TCG_REG_SP, TCG_REG_SP, -FRAME_SIZE); 1684 for (i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); i++) { 1685 tcg_out_st(s, TCG_TYPE_REG, tcg_target_callee_save_regs[i], 1686 TCG_REG_SP, SAVE_OFS + i * REG_SIZE); 1687 } 1688 1689#if !defined(CONFIG_SOFTMMU) 1690 if (USE_GUEST_BASE) { 1691 tcg_out_movi(s, TCG_TYPE_PTR, TCG_GUEST_BASE_REG, guest_base); 1692 tcg_regset_set_reg(s->reserved_regs, TCG_GUEST_BASE_REG); 1693 } 1694#endif 1695 1696 /* Call generated code */ 1697 tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]); 1698 tcg_out_opc_jirl(s, TCG_REG_ZERO, tcg_target_call_iarg_regs[1], 0); 1699 1700 /* Return path for goto_ptr. Set return value to 0 */ 1701 tcg_code_gen_epilogue = tcg_splitwx_to_rx(s->code_ptr); 1702 tcg_out_mov(s, TCG_TYPE_REG, TCG_REG_A0, TCG_REG_ZERO); 1703 1704 /* TB epilogue */ 1705 tb_ret_addr = tcg_splitwx_to_rx(s->code_ptr); 1706 for (i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); i++) { 1707 tcg_out_ld(s, TCG_TYPE_REG, tcg_target_callee_save_regs[i], 1708 TCG_REG_SP, SAVE_OFS + i * REG_SIZE); 1709 } 1710 1711 tcg_out_opc_addi_d(s, TCG_REG_SP, TCG_REG_SP, FRAME_SIZE); 1712 tcg_out_opc_jirl(s, TCG_REG_ZERO, TCG_REG_RA, 0); 1713} 1714 1715static void tcg_target_init(TCGContext *s) 1716{ 1717 tcg_target_available_regs[TCG_TYPE_I32] = ALL_GENERAL_REGS; 1718 tcg_target_available_regs[TCG_TYPE_I64] = ALL_GENERAL_REGS; 1719 1720 tcg_target_call_clobber_regs = ALL_GENERAL_REGS; 1721 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_S0); 1722 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_S1); 1723 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_S2); 1724 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_S3); 1725 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_S4); 1726 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_S5); 1727 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_S6); 1728 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_S7); 1729 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_S8); 1730 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_S9); 1731 1732 s->reserved_regs = 0; 1733 tcg_regset_set_reg(s->reserved_regs, TCG_REG_ZERO); 1734 tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP0); 1735 tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP1); 1736 tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP2); 1737 tcg_regset_set_reg(s->reserved_regs, TCG_REG_SP); 1738 tcg_regset_set_reg(s->reserved_regs, TCG_REG_TP); 1739 tcg_regset_set_reg(s->reserved_regs, TCG_REG_RESERVED); 1740} 1741 1742typedef struct { 1743 DebugFrameHeader h; 1744 uint8_t fde_def_cfa[4]; 1745 uint8_t fde_reg_ofs[ARRAY_SIZE(tcg_target_callee_save_regs) * 2]; 1746} DebugFrame; 1747 1748#define ELF_HOST_MACHINE EM_LOONGARCH 1749 1750static const DebugFrame debug_frame = { 1751 .h.cie.len = sizeof(DebugFrameCIE) - 4, /* length after .len member */ 1752 .h.cie.id = -1, 1753 .h.cie.version = 1, 1754 .h.cie.code_align = 1, 1755 .h.cie.data_align = -(TCG_TARGET_REG_BITS / 8) & 0x7f, /* sleb128 */ 1756 .h.cie.return_column = TCG_REG_RA, 1757 1758 /* Total FDE size does not include the "len" member. */ 1759 .h.fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, h.fde.cie_offset), 1760 1761 .fde_def_cfa = { 1762 12, TCG_REG_SP, /* DW_CFA_def_cfa sp, ... */ 1763 (FRAME_SIZE & 0x7f) | 0x80, /* ... uleb128 FRAME_SIZE */ 1764 (FRAME_SIZE >> 7) 1765 }, 1766 .fde_reg_ofs = { 1767 0x80 + 23, 11, /* DW_CFA_offset, s0, -88 */ 1768 0x80 + 24, 10, /* DW_CFA_offset, s1, -80 */ 1769 0x80 + 25, 9, /* DW_CFA_offset, s2, -72 */ 1770 0x80 + 26, 8, /* DW_CFA_offset, s3, -64 */ 1771 0x80 + 27, 7, /* DW_CFA_offset, s4, -56 */ 1772 0x80 + 28, 6, /* DW_CFA_offset, s5, -48 */ 1773 0x80 + 29, 5, /* DW_CFA_offset, s6, -40 */ 1774 0x80 + 30, 4, /* DW_CFA_offset, s7, -32 */ 1775 0x80 + 31, 3, /* DW_CFA_offset, s8, -24 */ 1776 0x80 + 22, 2, /* DW_CFA_offset, s9, -16 */ 1777 0x80 + 1 , 1, /* DW_CFA_offset, ra, -8 */ 1778 } 1779}; 1780 1781void tcg_register_jit(const void *buf, size_t buf_size) 1782{ 1783 tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame)); 1784} 1785