1/* 2 * Tiny Code Generator for QEMU 3 * 4 * Copyright (c) 2021 WANG Xuerui <git@xen0n.name> 5 * 6 * Based on tcg/riscv/tcg-target.c.inc 7 * 8 * Copyright (c) 2018 SiFive, Inc 9 * Copyright (c) 2008-2009 Arnaud Patard <arnaud.patard@rtp-net.org> 10 * Copyright (c) 2009 Aurelien Jarno <aurelien@aurel32.net> 11 * Copyright (c) 2008 Fabrice Bellard 12 * 13 * Permission is hereby granted, free of charge, to any person obtaining a copy 14 * of this software and associated documentation files (the "Software"), to deal 15 * in the Software without restriction, including without limitation the rights 16 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 17 * copies of the Software, and to permit persons to whom the Software is 18 * furnished to do so, subject to the following conditions: 19 * 20 * The above copyright notice and this permission notice shall be included in 21 * all copies or substantial portions of the Software. 22 * 23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 24 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 25 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 26 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 27 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 28 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 29 * THE SOFTWARE. 30 */ 31 32#include "../tcg-ldst.c.inc" 33#include <asm/hwcap.h> 34 35#ifdef CONFIG_DEBUG_TCG 36static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = { 37 "zero", 38 "ra", 39 "tp", 40 "sp", 41 "a0", 42 "a1", 43 "a2", 44 "a3", 45 "a4", 46 "a5", 47 "a6", 48 "a7", 49 "t0", 50 "t1", 51 "t2", 52 "t3", 53 "t4", 54 "t5", 55 "t6", 56 "t7", 57 "t8", 58 "r21", /* reserved in the LP64* ABI, hence no ABI name */ 59 "s9", 60 "s0", 61 "s1", 62 "s2", 63 "s3", 64 "s4", 65 "s5", 66 "s6", 67 "s7", 68 "s8" 69}; 70#endif 71 72static const int tcg_target_reg_alloc_order[] = { 73 /* Registers preserved across calls */ 74 /* TCG_REG_S0 reserved for TCG_AREG0 */ 75 TCG_REG_S1, 76 TCG_REG_S2, 77 TCG_REG_S3, 78 TCG_REG_S4, 79 TCG_REG_S5, 80 TCG_REG_S6, 81 TCG_REG_S7, 82 TCG_REG_S8, 83 TCG_REG_S9, 84 85 /* Registers (potentially) clobbered across calls */ 86 TCG_REG_T0, 87 TCG_REG_T1, 88 TCG_REG_T2, 89 TCG_REG_T3, 90 TCG_REG_T4, 91 TCG_REG_T5, 92 TCG_REG_T6, 93 TCG_REG_T7, 94 TCG_REG_T8, 95 96 /* Argument registers, opposite order of allocation. */ 97 TCG_REG_A7, 98 TCG_REG_A6, 99 TCG_REG_A5, 100 TCG_REG_A4, 101 TCG_REG_A3, 102 TCG_REG_A2, 103 TCG_REG_A1, 104 TCG_REG_A0, 105}; 106 107static const int tcg_target_call_iarg_regs[] = { 108 TCG_REG_A0, 109 TCG_REG_A1, 110 TCG_REG_A2, 111 TCG_REG_A3, 112 TCG_REG_A4, 113 TCG_REG_A5, 114 TCG_REG_A6, 115 TCG_REG_A7, 116}; 117 118static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot) 119{ 120 tcg_debug_assert(kind == TCG_CALL_RET_NORMAL); 121 tcg_debug_assert(slot >= 0 && slot <= 1); 122 return TCG_REG_A0 + slot; 123} 124 125#ifndef CONFIG_SOFTMMU 126#define USE_GUEST_BASE (guest_base != 0) 127#define TCG_GUEST_BASE_REG TCG_REG_S1 128#endif 129 130#define TCG_CT_CONST_ZERO 0x100 131#define TCG_CT_CONST_S12 0x200 132#define TCG_CT_CONST_S32 0x400 133#define TCG_CT_CONST_U12 0x800 134#define TCG_CT_CONST_C12 0x1000 135#define TCG_CT_CONST_WSZ 0x2000 136 137#define ALL_GENERAL_REGS MAKE_64BIT_MASK(0, 32) 138 139static inline tcg_target_long sextreg(tcg_target_long val, int pos, int len) 140{ 141 return sextract64(val, pos, len); 142} 143 144/* test if a constant matches the constraint */ 145static bool tcg_target_const_match(int64_t val, TCGType type, int ct) 146{ 147 if (ct & TCG_CT_CONST) { 148 return true; 149 } 150 if ((ct & TCG_CT_CONST_ZERO) && val == 0) { 151 return true; 152 } 153 if ((ct & TCG_CT_CONST_S12) && val == sextreg(val, 0, 12)) { 154 return true; 155 } 156 if ((ct & TCG_CT_CONST_S32) && val == (int32_t)val) { 157 return true; 158 } 159 if ((ct & TCG_CT_CONST_U12) && val >= 0 && val <= 0xfff) { 160 return true; 161 } 162 if ((ct & TCG_CT_CONST_C12) && ~val >= 0 && ~val <= 0xfff) { 163 return true; 164 } 165 if ((ct & TCG_CT_CONST_WSZ) && val == (type == TCG_TYPE_I32 ? 32 : 64)) { 166 return true; 167 } 168 return false; 169} 170 171/* 172 * Relocations 173 */ 174 175/* 176 * Relocation records defined in LoongArch ELF psABI v1.00 is way too 177 * complicated; a whopping stack machine is needed to stuff the fields, at 178 * the very least one SOP_PUSH and one SOP_POP (of the correct format) are 179 * needed. 180 * 181 * Hence, define our own simpler relocation types. Numbers are chosen as to 182 * not collide with potential future additions to the true ELF relocation 183 * type enum. 184 */ 185 186/* Field Sk16, shifted right by 2; suitable for conditional jumps */ 187#define R_LOONGARCH_BR_SK16 256 188/* Field Sd10k16, shifted right by 2; suitable for B and BL */ 189#define R_LOONGARCH_BR_SD10K16 257 190 191static bool reloc_br_sk16(tcg_insn_unit *src_rw, const tcg_insn_unit *target) 192{ 193 const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw); 194 intptr_t offset = (intptr_t)target - (intptr_t)src_rx; 195 196 tcg_debug_assert((offset & 3) == 0); 197 offset >>= 2; 198 if (offset == sextreg(offset, 0, 16)) { 199 *src_rw = deposit64(*src_rw, 10, 16, offset); 200 return true; 201 } 202 203 return false; 204} 205 206static bool reloc_br_sd10k16(tcg_insn_unit *src_rw, 207 const tcg_insn_unit *target) 208{ 209 const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw); 210 intptr_t offset = (intptr_t)target - (intptr_t)src_rx; 211 212 tcg_debug_assert((offset & 3) == 0); 213 offset >>= 2; 214 if (offset == sextreg(offset, 0, 26)) { 215 *src_rw = deposit64(*src_rw, 0, 10, offset >> 16); /* slot d10 */ 216 *src_rw = deposit64(*src_rw, 10, 16, offset); /* slot k16 */ 217 return true; 218 } 219 220 return false; 221} 222 223static bool patch_reloc(tcg_insn_unit *code_ptr, int type, 224 intptr_t value, intptr_t addend) 225{ 226 tcg_debug_assert(addend == 0); 227 switch (type) { 228 case R_LOONGARCH_BR_SK16: 229 return reloc_br_sk16(code_ptr, (tcg_insn_unit *)value); 230 case R_LOONGARCH_BR_SD10K16: 231 return reloc_br_sd10k16(code_ptr, (tcg_insn_unit *)value); 232 default: 233 g_assert_not_reached(); 234 } 235} 236 237#include "tcg-insn-defs.c.inc" 238 239/* 240 * TCG intrinsics 241 */ 242 243static void tcg_out_mb(TCGContext *s, TCGArg a0) 244{ 245 /* Baseline LoongArch only has the full barrier, unfortunately. */ 246 tcg_out_opc_dbar(s, 0); 247} 248 249static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg) 250{ 251 if (ret == arg) { 252 return true; 253 } 254 switch (type) { 255 case TCG_TYPE_I32: 256 case TCG_TYPE_I64: 257 /* 258 * Conventional register-register move used in LoongArch is 259 * `or dst, src, zero`. 260 */ 261 tcg_out_opc_or(s, ret, arg, TCG_REG_ZERO); 262 break; 263 default: 264 g_assert_not_reached(); 265 } 266 return true; 267} 268 269/* Loads a 32-bit immediate into rd, sign-extended. */ 270static void tcg_out_movi_i32(TCGContext *s, TCGReg rd, int32_t val) 271{ 272 tcg_target_long lo = sextreg(val, 0, 12); 273 tcg_target_long hi12 = sextreg(val, 12, 20); 274 275 /* Single-instruction cases. */ 276 if (hi12 == 0) { 277 /* val fits in uimm12: ori rd, zero, val */ 278 tcg_out_opc_ori(s, rd, TCG_REG_ZERO, val); 279 return; 280 } 281 if (hi12 == sextreg(lo, 12, 20)) { 282 /* val fits in simm12: addi.w rd, zero, val */ 283 tcg_out_opc_addi_w(s, rd, TCG_REG_ZERO, val); 284 return; 285 } 286 287 /* High bits must be set; load with lu12i.w + optional ori. */ 288 tcg_out_opc_lu12i_w(s, rd, hi12); 289 if (lo != 0) { 290 tcg_out_opc_ori(s, rd, rd, lo & 0xfff); 291 } 292} 293 294static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg rd, 295 tcg_target_long val) 296{ 297 /* 298 * LoongArch conventionally loads 64-bit immediates in at most 4 steps, 299 * with dedicated instructions for filling the respective bitfields 300 * below: 301 * 302 * 6 5 4 3 303 * 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 304 * +-----------------------+---------------------------------------+... 305 * | hi52 | hi32 | 306 * +-----------------------+---------------------------------------+... 307 * 3 2 1 308 * 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 309 * ...+-------------------------------------+-------------------------+ 310 * | hi12 | lo | 311 * ...+-------------------------------------+-------------------------+ 312 * 313 * Check if val belong to one of the several fast cases, before falling 314 * back to the slow path. 315 */ 316 317 intptr_t pc_offset; 318 tcg_target_long val_lo, val_hi, pc_hi, offset_hi; 319 tcg_target_long hi12, hi32, hi52; 320 321 /* Value fits in signed i32. */ 322 if (type == TCG_TYPE_I32 || val == (int32_t)val) { 323 tcg_out_movi_i32(s, rd, val); 324 return; 325 } 326 327 /* PC-relative cases. */ 328 pc_offset = tcg_pcrel_diff(s, (void *)val); 329 if (pc_offset == sextreg(pc_offset, 0, 22) && (pc_offset & 3) == 0) { 330 /* Single pcaddu2i. */ 331 tcg_out_opc_pcaddu2i(s, rd, pc_offset >> 2); 332 return; 333 } 334 335 if (pc_offset == (int32_t)pc_offset) { 336 /* Offset within 32 bits; load with pcalau12i + ori. */ 337 val_lo = sextreg(val, 0, 12); 338 val_hi = val >> 12; 339 pc_hi = (val - pc_offset) >> 12; 340 offset_hi = val_hi - pc_hi; 341 342 tcg_debug_assert(offset_hi == sextreg(offset_hi, 0, 20)); 343 tcg_out_opc_pcalau12i(s, rd, offset_hi); 344 if (val_lo != 0) { 345 tcg_out_opc_ori(s, rd, rd, val_lo & 0xfff); 346 } 347 return; 348 } 349 350 hi12 = sextreg(val, 12, 20); 351 hi32 = sextreg(val, 32, 20); 352 hi52 = sextreg(val, 52, 12); 353 354 /* Single cu52i.d case. */ 355 if ((hi52 != 0) && (ctz64(val) >= 52)) { 356 tcg_out_opc_cu52i_d(s, rd, TCG_REG_ZERO, hi52); 357 return; 358 } 359 360 /* Slow path. Initialize the low 32 bits, then concat high bits. */ 361 tcg_out_movi_i32(s, rd, val); 362 363 /* Load hi32 and hi52 explicitly when they are unexpected values. */ 364 if (hi32 != sextreg(hi12, 20, 20)) { 365 tcg_out_opc_cu32i_d(s, rd, hi32); 366 } 367 368 if (hi52 != sextreg(hi32, 20, 12)) { 369 tcg_out_opc_cu52i_d(s, rd, rd, hi52); 370 } 371} 372 373static void tcg_out_addi(TCGContext *s, TCGType type, TCGReg rd, 374 TCGReg rs, tcg_target_long imm) 375{ 376 tcg_target_long lo12 = sextreg(imm, 0, 12); 377 tcg_target_long hi16 = sextreg(imm - lo12, 16, 16); 378 379 /* 380 * Note that there's a hole in between hi16 and lo12: 381 * 382 * 3 2 1 0 383 * 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 384 * ...+-------------------------------+-------+-----------------------+ 385 * | hi16 | | lo12 | 386 * ...+-------------------------------+-------+-----------------------+ 387 * 388 * For bits within that hole, it's more efficient to use LU12I and ADD. 389 */ 390 if (imm == (hi16 << 16) + lo12) { 391 if (hi16) { 392 tcg_out_opc_addu16i_d(s, rd, rs, hi16); 393 rs = rd; 394 } 395 if (type == TCG_TYPE_I32) { 396 tcg_out_opc_addi_w(s, rd, rs, lo12); 397 } else if (lo12) { 398 tcg_out_opc_addi_d(s, rd, rs, lo12); 399 } else { 400 tcg_out_mov(s, type, rd, rs); 401 } 402 } else { 403 tcg_out_movi(s, type, TCG_REG_TMP0, imm); 404 if (type == TCG_TYPE_I32) { 405 tcg_out_opc_add_w(s, rd, rs, TCG_REG_TMP0); 406 } else { 407 tcg_out_opc_add_d(s, rd, rs, TCG_REG_TMP0); 408 } 409 } 410} 411 412static bool tcg_out_xchg(TCGContext *s, TCGType type, TCGReg r1, TCGReg r2) 413{ 414 return false; 415} 416 417static void tcg_out_addi_ptr(TCGContext *s, TCGReg rd, TCGReg rs, 418 tcg_target_long imm) 419{ 420 /* This function is only used for passing structs by reference. */ 421 g_assert_not_reached(); 422} 423 424static void tcg_out_ext8u(TCGContext *s, TCGReg ret, TCGReg arg) 425{ 426 tcg_out_opc_andi(s, ret, arg, 0xff); 427} 428 429static void tcg_out_ext16u(TCGContext *s, TCGReg ret, TCGReg arg) 430{ 431 tcg_out_opc_bstrpick_w(s, ret, arg, 0, 15); 432} 433 434static void tcg_out_ext32u(TCGContext *s, TCGReg ret, TCGReg arg) 435{ 436 tcg_out_opc_bstrpick_d(s, ret, arg, 0, 31); 437} 438 439static void tcg_out_ext8s(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg) 440{ 441 tcg_out_opc_sext_b(s, ret, arg); 442} 443 444static void tcg_out_ext16s(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg) 445{ 446 tcg_out_opc_sext_h(s, ret, arg); 447} 448 449static void tcg_out_ext32s(TCGContext *s, TCGReg ret, TCGReg arg) 450{ 451 tcg_out_opc_addi_w(s, ret, arg, 0); 452} 453 454static void tcg_out_exts_i32_i64(TCGContext *s, TCGReg ret, TCGReg arg) 455{ 456 if (ret != arg) { 457 tcg_out_ext32s(s, ret, arg); 458 } 459} 460 461static void tcg_out_extu_i32_i64(TCGContext *s, TCGReg ret, TCGReg arg) 462{ 463 tcg_out_ext32u(s, ret, arg); 464} 465 466static void tcg_out_extrl_i64_i32(TCGContext *s, TCGReg ret, TCGReg arg) 467{ 468 tcg_out_ext32s(s, ret, arg); 469} 470 471static void tcg_out_clzctz(TCGContext *s, LoongArchInsn opc, 472 TCGReg a0, TCGReg a1, TCGReg a2, 473 bool c2, bool is_32bit) 474{ 475 if (c2) { 476 /* 477 * Fast path: semantics already satisfied due to constraint and 478 * insn behavior, single instruction is enough. 479 */ 480 tcg_debug_assert(a2 == (is_32bit ? 32 : 64)); 481 /* all clz/ctz insns belong to DJ-format */ 482 tcg_out32(s, encode_dj_insn(opc, a0, a1)); 483 return; 484 } 485 486 tcg_out32(s, encode_dj_insn(opc, TCG_REG_TMP0, a1)); 487 /* a0 = a1 ? REG_TMP0 : a2 */ 488 tcg_out_opc_maskeqz(s, TCG_REG_TMP0, TCG_REG_TMP0, a1); 489 tcg_out_opc_masknez(s, a0, a2, a1); 490 tcg_out_opc_or(s, a0, TCG_REG_TMP0, a0); 491} 492 493#define SETCOND_INV TCG_TARGET_NB_REGS 494#define SETCOND_NEZ (SETCOND_INV << 1) 495#define SETCOND_FLAGS (SETCOND_INV | SETCOND_NEZ) 496 497static int tcg_out_setcond_int(TCGContext *s, TCGCond cond, TCGReg ret, 498 TCGReg arg1, tcg_target_long arg2, bool c2) 499{ 500 int flags = 0; 501 502 switch (cond) { 503 case TCG_COND_EQ: /* -> NE */ 504 case TCG_COND_GE: /* -> LT */ 505 case TCG_COND_GEU: /* -> LTU */ 506 case TCG_COND_GT: /* -> LE */ 507 case TCG_COND_GTU: /* -> LEU */ 508 cond = tcg_invert_cond(cond); 509 flags ^= SETCOND_INV; 510 break; 511 default: 512 break; 513 } 514 515 switch (cond) { 516 case TCG_COND_LE: 517 case TCG_COND_LEU: 518 /* 519 * If we have a constant input, the most efficient way to implement 520 * LE is by adding 1 and using LT. Watch out for wrap around for LEU. 521 * We don't need to care for this for LE because the constant input 522 * is still constrained to int32_t, and INT32_MAX+1 is representable 523 * in the 64-bit temporary register. 524 */ 525 if (c2) { 526 if (cond == TCG_COND_LEU) { 527 /* unsigned <= -1 is true */ 528 if (arg2 == -1) { 529 tcg_out_movi(s, TCG_TYPE_REG, ret, !(flags & SETCOND_INV)); 530 return ret; 531 } 532 cond = TCG_COND_LTU; 533 } else { 534 cond = TCG_COND_LT; 535 } 536 arg2 += 1; 537 } else { 538 TCGReg tmp = arg2; 539 arg2 = arg1; 540 arg1 = tmp; 541 cond = tcg_swap_cond(cond); /* LE -> GE */ 542 cond = tcg_invert_cond(cond); /* GE -> LT */ 543 flags ^= SETCOND_INV; 544 } 545 break; 546 default: 547 break; 548 } 549 550 switch (cond) { 551 case TCG_COND_NE: 552 flags |= SETCOND_NEZ; 553 if (!c2) { 554 tcg_out_opc_xor(s, ret, arg1, arg2); 555 } else if (arg2 == 0) { 556 ret = arg1; 557 } else if (arg2 >= 0 && arg2 <= 0xfff) { 558 tcg_out_opc_xori(s, ret, arg1, arg2); 559 } else { 560 tcg_out_addi(s, TCG_TYPE_REG, ret, arg1, -arg2); 561 } 562 break; 563 564 case TCG_COND_LT: 565 case TCG_COND_LTU: 566 if (c2) { 567 if (arg2 >= -0x800 && arg2 <= 0x7ff) { 568 if (cond == TCG_COND_LT) { 569 tcg_out_opc_slti(s, ret, arg1, arg2); 570 } else { 571 tcg_out_opc_sltui(s, ret, arg1, arg2); 572 } 573 break; 574 } 575 tcg_out_movi(s, TCG_TYPE_REG, TCG_REG_TMP0, arg2); 576 arg2 = TCG_REG_TMP0; 577 } 578 if (cond == TCG_COND_LT) { 579 tcg_out_opc_slt(s, ret, arg1, arg2); 580 } else { 581 tcg_out_opc_sltu(s, ret, arg1, arg2); 582 } 583 break; 584 585 default: 586 g_assert_not_reached(); 587 break; 588 } 589 590 return ret | flags; 591} 592 593static void tcg_out_setcond(TCGContext *s, TCGCond cond, TCGReg ret, 594 TCGReg arg1, tcg_target_long arg2, bool c2) 595{ 596 int tmpflags = tcg_out_setcond_int(s, cond, ret, arg1, arg2, c2); 597 598 if (tmpflags != ret) { 599 TCGReg tmp = tmpflags & ~SETCOND_FLAGS; 600 601 switch (tmpflags & SETCOND_FLAGS) { 602 case SETCOND_INV: 603 /* Intermediate result is boolean: simply invert. */ 604 tcg_out_opc_xori(s, ret, tmp, 1); 605 break; 606 case SETCOND_NEZ: 607 /* Intermediate result is zero/non-zero: test != 0. */ 608 tcg_out_opc_sltu(s, ret, TCG_REG_ZERO, tmp); 609 break; 610 case SETCOND_NEZ | SETCOND_INV: 611 /* Intermediate result is zero/non-zero: test == 0. */ 612 tcg_out_opc_sltui(s, ret, tmp, 1); 613 break; 614 default: 615 g_assert_not_reached(); 616 } 617 } 618} 619 620static void tcg_out_movcond(TCGContext *s, TCGCond cond, TCGReg ret, 621 TCGReg c1, tcg_target_long c2, bool const2, 622 TCGReg v1, TCGReg v2) 623{ 624 int tmpflags = tcg_out_setcond_int(s, cond, TCG_REG_TMP0, c1, c2, const2); 625 TCGReg t; 626 627 /* Standardize the test below to t != 0. */ 628 if (tmpflags & SETCOND_INV) { 629 t = v1, v1 = v2, v2 = t; 630 } 631 632 t = tmpflags & ~SETCOND_FLAGS; 633 if (v1 == TCG_REG_ZERO) { 634 tcg_out_opc_masknez(s, ret, v2, t); 635 } else if (v2 == TCG_REG_ZERO) { 636 tcg_out_opc_maskeqz(s, ret, v1, t); 637 } else { 638 tcg_out_opc_masknez(s, TCG_REG_TMP2, v2, t); /* t ? 0 : v2 */ 639 tcg_out_opc_maskeqz(s, TCG_REG_TMP1, v1, t); /* t ? v1 : 0 */ 640 tcg_out_opc_or(s, ret, TCG_REG_TMP1, TCG_REG_TMP2); 641 } 642} 643 644/* 645 * Branch helpers 646 */ 647 648static const struct { 649 LoongArchInsn op; 650 bool swap; 651} tcg_brcond_to_loongarch[] = { 652 [TCG_COND_EQ] = { OPC_BEQ, false }, 653 [TCG_COND_NE] = { OPC_BNE, false }, 654 [TCG_COND_LT] = { OPC_BGT, true }, 655 [TCG_COND_GE] = { OPC_BLE, true }, 656 [TCG_COND_LE] = { OPC_BLE, false }, 657 [TCG_COND_GT] = { OPC_BGT, false }, 658 [TCG_COND_LTU] = { OPC_BGTU, true }, 659 [TCG_COND_GEU] = { OPC_BLEU, true }, 660 [TCG_COND_LEU] = { OPC_BLEU, false }, 661 [TCG_COND_GTU] = { OPC_BGTU, false } 662}; 663 664static void tcg_out_brcond(TCGContext *s, TCGCond cond, TCGReg arg1, 665 TCGReg arg2, TCGLabel *l) 666{ 667 LoongArchInsn op = tcg_brcond_to_loongarch[cond].op; 668 669 tcg_debug_assert(op != 0); 670 671 if (tcg_brcond_to_loongarch[cond].swap) { 672 TCGReg t = arg1; 673 arg1 = arg2; 674 arg2 = t; 675 } 676 677 /* all conditional branch insns belong to DJSk16-format */ 678 tcg_out_reloc(s, s->code_ptr, R_LOONGARCH_BR_SK16, l, 0); 679 tcg_out32(s, encode_djsk16_insn(op, arg1, arg2, 0)); 680} 681 682static void tcg_out_call_int(TCGContext *s, const tcg_insn_unit *arg, bool tail) 683{ 684 TCGReg link = tail ? TCG_REG_ZERO : TCG_REG_RA; 685 ptrdiff_t offset = tcg_pcrel_diff(s, arg); 686 687 tcg_debug_assert((offset & 3) == 0); 688 if (offset == sextreg(offset, 0, 28)) { 689 /* short jump: +/- 256MiB */ 690 if (tail) { 691 tcg_out_opc_b(s, offset >> 2); 692 } else { 693 tcg_out_opc_bl(s, offset >> 2); 694 } 695 } else if (offset == sextreg(offset, 0, 38)) { 696 /* long jump: +/- 256GiB */ 697 tcg_target_long lo = sextreg(offset, 0, 18); 698 tcg_target_long hi = offset - lo; 699 tcg_out_opc_pcaddu18i(s, TCG_REG_TMP0, hi >> 18); 700 tcg_out_opc_jirl(s, link, TCG_REG_TMP0, lo >> 2); 701 } else { 702 /* far jump: 64-bit */ 703 tcg_target_long lo = sextreg((tcg_target_long)arg, 0, 18); 704 tcg_target_long hi = (tcg_target_long)arg - lo; 705 tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP0, hi); 706 tcg_out_opc_jirl(s, link, TCG_REG_TMP0, lo >> 2); 707 } 708} 709 710static void tcg_out_call(TCGContext *s, const tcg_insn_unit *arg, 711 const TCGHelperInfo *info) 712{ 713 tcg_out_call_int(s, arg, false); 714} 715 716/* 717 * Load/store helpers 718 */ 719 720static void tcg_out_ldst(TCGContext *s, LoongArchInsn opc, TCGReg data, 721 TCGReg addr, intptr_t offset) 722{ 723 intptr_t imm12 = sextreg(offset, 0, 12); 724 725 if (offset != imm12) { 726 intptr_t diff = tcg_pcrel_diff(s, (void *)offset); 727 728 if (addr == TCG_REG_ZERO && diff == (int32_t)diff) { 729 imm12 = sextreg(diff, 0, 12); 730 tcg_out_opc_pcaddu12i(s, TCG_REG_TMP2, (diff - imm12) >> 12); 731 } else { 732 tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP2, offset - imm12); 733 if (addr != TCG_REG_ZERO) { 734 tcg_out_opc_add_d(s, TCG_REG_TMP2, TCG_REG_TMP2, addr); 735 } 736 } 737 addr = TCG_REG_TMP2; 738 } 739 740 switch (opc) { 741 case OPC_LD_B: 742 case OPC_LD_BU: 743 case OPC_LD_H: 744 case OPC_LD_HU: 745 case OPC_LD_W: 746 case OPC_LD_WU: 747 case OPC_LD_D: 748 case OPC_ST_B: 749 case OPC_ST_H: 750 case OPC_ST_W: 751 case OPC_ST_D: 752 tcg_out32(s, encode_djsk12_insn(opc, data, addr, imm12)); 753 break; 754 default: 755 g_assert_not_reached(); 756 } 757} 758 759static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg arg, 760 TCGReg arg1, intptr_t arg2) 761{ 762 bool is_32bit = type == TCG_TYPE_I32; 763 tcg_out_ldst(s, is_32bit ? OPC_LD_W : OPC_LD_D, arg, arg1, arg2); 764} 765 766static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, 767 TCGReg arg1, intptr_t arg2) 768{ 769 bool is_32bit = type == TCG_TYPE_I32; 770 tcg_out_ldst(s, is_32bit ? OPC_ST_W : OPC_ST_D, arg, arg1, arg2); 771} 772 773static bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val, 774 TCGReg base, intptr_t ofs) 775{ 776 if (val == 0) { 777 tcg_out_st(s, type, TCG_REG_ZERO, base, ofs); 778 return true; 779 } 780 return false; 781} 782 783/* 784 * Load/store helpers for SoftMMU, and qemu_ld/st implementations 785 */ 786 787static bool tcg_out_goto(TCGContext *s, const tcg_insn_unit *target) 788{ 789 tcg_out_opc_b(s, 0); 790 return reloc_br_sd10k16(s->code_ptr - 1, target); 791} 792 793static const TCGLdstHelperParam ldst_helper_param = { 794 .ntmp = 1, .tmp = { TCG_REG_TMP0 } 795}; 796 797static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l) 798{ 799 MemOp opc = get_memop(l->oi); 800 801 /* resolve label address */ 802 if (!reloc_br_sk16(l->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) { 803 return false; 804 } 805 806 tcg_out_ld_helper_args(s, l, &ldst_helper_param); 807 tcg_out_call_int(s, qemu_ld_helpers[opc & MO_SIZE], false); 808 tcg_out_ld_helper_ret(s, l, false, &ldst_helper_param); 809 return tcg_out_goto(s, l->raddr); 810} 811 812static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l) 813{ 814 MemOp opc = get_memop(l->oi); 815 816 /* resolve label address */ 817 if (!reloc_br_sk16(l->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) { 818 return false; 819 } 820 821 tcg_out_st_helper_args(s, l, &ldst_helper_param); 822 tcg_out_call_int(s, qemu_st_helpers[opc & MO_SIZE], false); 823 return tcg_out_goto(s, l->raddr); 824} 825 826typedef struct { 827 TCGReg base; 828 TCGReg index; 829} HostAddress; 830 831/* 832 * For softmmu, perform the TLB load and compare. 833 * For useronly, perform any required alignment tests. 834 * In both cases, return a TCGLabelQemuLdst structure if the slow path 835 * is required and fill in @h with the host address for the fast path. 836 */ 837static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h, 838 TCGReg addr_reg, MemOpIdx oi, 839 bool is_ld) 840{ 841 TCGLabelQemuLdst *ldst = NULL; 842 MemOp opc = get_memop(oi); 843 unsigned a_bits = get_alignment_bits(opc); 844 845#ifdef CONFIG_SOFTMMU 846 unsigned s_bits = opc & MO_SIZE; 847 int mem_index = get_mmuidx(oi); 848 int fast_ofs = TLB_MASK_TABLE_OFS(mem_index); 849 int mask_ofs = fast_ofs + offsetof(CPUTLBDescFast, mask); 850 int table_ofs = fast_ofs + offsetof(CPUTLBDescFast, table); 851 852 ldst = new_ldst_label(s); 853 ldst->is_ld = is_ld; 854 ldst->oi = oi; 855 ldst->addrlo_reg = addr_reg; 856 857 QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) > 0); 858 QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) < -(1 << 11)); 859 tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP0, TCG_AREG0, mask_ofs); 860 tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP1, TCG_AREG0, table_ofs); 861 862 tcg_out_opc_srli_d(s, TCG_REG_TMP2, addr_reg, 863 TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS); 864 tcg_out_opc_and(s, TCG_REG_TMP2, TCG_REG_TMP2, TCG_REG_TMP0); 865 tcg_out_opc_add_d(s, TCG_REG_TMP2, TCG_REG_TMP2, TCG_REG_TMP1); 866 867 /* Load the tlb comparator and the addend. */ 868 tcg_out_ld(s, TCG_TYPE_TL, TCG_REG_TMP0, TCG_REG_TMP2, 869 is_ld ? offsetof(CPUTLBEntry, addr_read) 870 : offsetof(CPUTLBEntry, addr_write)); 871 tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP2, TCG_REG_TMP2, 872 offsetof(CPUTLBEntry, addend)); 873 874 /* 875 * For aligned accesses, we check the first byte and include the alignment 876 * bits within the address. For unaligned access, we check that we don't 877 * cross pages using the address of the last byte of the access. 878 */ 879 if (a_bits < s_bits) { 880 unsigned a_mask = (1u << a_bits) - 1; 881 unsigned s_mask = (1u << s_bits) - 1; 882 tcg_out_addi(s, TCG_TYPE_TL, TCG_REG_TMP1, addr_reg, s_mask - a_mask); 883 } else { 884 tcg_out_mov(s, TCG_TYPE_TL, TCG_REG_TMP1, addr_reg); 885 } 886 tcg_out_opc_bstrins_d(s, TCG_REG_TMP1, TCG_REG_ZERO, 887 a_bits, TARGET_PAGE_BITS - 1); 888 889 /* Compare masked address with the TLB entry. */ 890 ldst->label_ptr[0] = s->code_ptr; 891 tcg_out_opc_bne(s, TCG_REG_TMP0, TCG_REG_TMP1, 0); 892 893 h->index = TCG_REG_TMP2; 894#else 895 if (a_bits) { 896 ldst = new_ldst_label(s); 897 898 ldst->is_ld = is_ld; 899 ldst->oi = oi; 900 ldst->addrlo_reg = addr_reg; 901 902 /* 903 * Without micro-architecture details, we don't know which of 904 * bstrpick or andi is faster, so use bstrpick as it's not 905 * constrained by imm field width. Not to say alignments >= 2^12 906 * are going to happen any time soon. 907 */ 908 tcg_out_opc_bstrpick_d(s, TCG_REG_TMP1, addr_reg, 0, a_bits - 1); 909 910 ldst->label_ptr[0] = s->code_ptr; 911 tcg_out_opc_bne(s, TCG_REG_TMP1, TCG_REG_ZERO, 0); 912 } 913 914 h->index = USE_GUEST_BASE ? TCG_GUEST_BASE_REG : TCG_REG_ZERO; 915#endif 916 917 if (TARGET_LONG_BITS == 32) { 918 h->base = TCG_REG_TMP0; 919 tcg_out_ext32u(s, h->base, addr_reg); 920 } else { 921 h->base = addr_reg; 922 } 923 924 return ldst; 925} 926 927static void tcg_out_qemu_ld_indexed(TCGContext *s, MemOp opc, TCGType type, 928 TCGReg rd, HostAddress h) 929{ 930 /* Byte swapping is left to middle-end expansion. */ 931 tcg_debug_assert((opc & MO_BSWAP) == 0); 932 933 switch (opc & MO_SSIZE) { 934 case MO_UB: 935 tcg_out_opc_ldx_bu(s, rd, h.base, h.index); 936 break; 937 case MO_SB: 938 tcg_out_opc_ldx_b(s, rd, h.base, h.index); 939 break; 940 case MO_UW: 941 tcg_out_opc_ldx_hu(s, rd, h.base, h.index); 942 break; 943 case MO_SW: 944 tcg_out_opc_ldx_h(s, rd, h.base, h.index); 945 break; 946 case MO_UL: 947 if (type == TCG_TYPE_I64) { 948 tcg_out_opc_ldx_wu(s, rd, h.base, h.index); 949 break; 950 } 951 /* fallthrough */ 952 case MO_SL: 953 tcg_out_opc_ldx_w(s, rd, h.base, h.index); 954 break; 955 case MO_UQ: 956 tcg_out_opc_ldx_d(s, rd, h.base, h.index); 957 break; 958 default: 959 g_assert_not_reached(); 960 } 961} 962 963static void tcg_out_qemu_ld(TCGContext *s, TCGReg data_reg, TCGReg addr_reg, 964 MemOpIdx oi, TCGType data_type) 965{ 966 TCGLabelQemuLdst *ldst; 967 HostAddress h; 968 969 ldst = prepare_host_addr(s, &h, addr_reg, oi, true); 970 tcg_out_qemu_ld_indexed(s, get_memop(oi), data_type, data_reg, h); 971 972 if (ldst) { 973 ldst->type = data_type; 974 ldst->datalo_reg = data_reg; 975 ldst->raddr = tcg_splitwx_to_rx(s->code_ptr); 976 } 977} 978 979static void tcg_out_qemu_st_indexed(TCGContext *s, MemOp opc, 980 TCGReg rd, HostAddress h) 981{ 982 /* Byte swapping is left to middle-end expansion. */ 983 tcg_debug_assert((opc & MO_BSWAP) == 0); 984 985 switch (opc & MO_SIZE) { 986 case MO_8: 987 tcg_out_opc_stx_b(s, rd, h.base, h.index); 988 break; 989 case MO_16: 990 tcg_out_opc_stx_h(s, rd, h.base, h.index); 991 break; 992 case MO_32: 993 tcg_out_opc_stx_w(s, rd, h.base, h.index); 994 break; 995 case MO_64: 996 tcg_out_opc_stx_d(s, rd, h.base, h.index); 997 break; 998 default: 999 g_assert_not_reached(); 1000 } 1001} 1002 1003static void tcg_out_qemu_st(TCGContext *s, TCGReg data_reg, TCGReg addr_reg, 1004 MemOpIdx oi, TCGType data_type) 1005{ 1006 TCGLabelQemuLdst *ldst; 1007 HostAddress h; 1008 1009 ldst = prepare_host_addr(s, &h, addr_reg, oi, false); 1010 tcg_out_qemu_st_indexed(s, get_memop(oi), data_reg, h); 1011 1012 if (ldst) { 1013 ldst->type = data_type; 1014 ldst->datalo_reg = data_reg; 1015 ldst->raddr = tcg_splitwx_to_rx(s->code_ptr); 1016 } 1017} 1018 1019/* 1020 * Entry-points 1021 */ 1022 1023static const tcg_insn_unit *tb_ret_addr; 1024 1025static void tcg_out_exit_tb(TCGContext *s, uintptr_t a0) 1026{ 1027 /* Reuse the zeroing that exists for goto_ptr. */ 1028 if (a0 == 0) { 1029 tcg_out_call_int(s, tcg_code_gen_epilogue, true); 1030 } else { 1031 tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_A0, a0); 1032 tcg_out_call_int(s, tb_ret_addr, true); 1033 } 1034} 1035 1036static void tcg_out_goto_tb(TCGContext *s, int which) 1037{ 1038 /* 1039 * Direct branch, or load indirect address, to be patched 1040 * by tb_target_set_jmp_target. Check indirect load offset 1041 * in range early, regardless of direct branch distance, 1042 * via assert within tcg_out_opc_pcaddu2i. 1043 */ 1044 uintptr_t i_addr = get_jmp_target_addr(s, which); 1045 intptr_t i_disp = tcg_pcrel_diff(s, (void *)i_addr); 1046 1047 set_jmp_insn_offset(s, which); 1048 tcg_out_opc_pcaddu2i(s, TCG_REG_TMP0, i_disp >> 2); 1049 1050 /* Finish the load and indirect branch. */ 1051 tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP0, TCG_REG_TMP0, 0); 1052 tcg_out_opc_jirl(s, TCG_REG_ZERO, TCG_REG_TMP0, 0); 1053 set_jmp_reset_offset(s, which); 1054} 1055 1056void tb_target_set_jmp_target(const TranslationBlock *tb, int n, 1057 uintptr_t jmp_rx, uintptr_t jmp_rw) 1058{ 1059 uintptr_t d_addr = tb->jmp_target_addr[n]; 1060 ptrdiff_t d_disp = (ptrdiff_t)(d_addr - jmp_rx) >> 2; 1061 tcg_insn_unit insn; 1062 1063 /* Either directly branch, or load slot address for indirect branch. */ 1064 if (d_disp == sextreg(d_disp, 0, 26)) { 1065 insn = encode_sd10k16_insn(OPC_B, d_disp); 1066 } else { 1067 uintptr_t i_addr = (uintptr_t)&tb->jmp_target_addr[n]; 1068 intptr_t i_disp = i_addr - jmp_rx; 1069 insn = encode_dsj20_insn(OPC_PCADDU2I, TCG_REG_TMP0, i_disp >> 2); 1070 } 1071 1072 qatomic_set((tcg_insn_unit *)jmp_rw, insn); 1073 flush_idcache_range(jmp_rx, jmp_rw, 4); 1074} 1075 1076static void tcg_out_op(TCGContext *s, TCGOpcode opc, 1077 const TCGArg args[TCG_MAX_OP_ARGS], 1078 const int const_args[TCG_MAX_OP_ARGS]) 1079{ 1080 TCGArg a0 = args[0]; 1081 TCGArg a1 = args[1]; 1082 TCGArg a2 = args[2]; 1083 int c2 = const_args[2]; 1084 1085 switch (opc) { 1086 case INDEX_op_mb: 1087 tcg_out_mb(s, a0); 1088 break; 1089 1090 case INDEX_op_goto_ptr: 1091 tcg_out_opc_jirl(s, TCG_REG_ZERO, a0, 0); 1092 break; 1093 1094 case INDEX_op_br: 1095 tcg_out_reloc(s, s->code_ptr, R_LOONGARCH_BR_SD10K16, arg_label(a0), 1096 0); 1097 tcg_out_opc_b(s, 0); 1098 break; 1099 1100 case INDEX_op_brcond_i32: 1101 case INDEX_op_brcond_i64: 1102 tcg_out_brcond(s, a2, a0, a1, arg_label(args[3])); 1103 break; 1104 1105 case INDEX_op_extrh_i64_i32: 1106 tcg_out_opc_srai_d(s, a0, a1, 32); 1107 break; 1108 1109 case INDEX_op_not_i32: 1110 case INDEX_op_not_i64: 1111 tcg_out_opc_nor(s, a0, a1, TCG_REG_ZERO); 1112 break; 1113 1114 case INDEX_op_nor_i32: 1115 case INDEX_op_nor_i64: 1116 if (c2) { 1117 tcg_out_opc_ori(s, a0, a1, a2); 1118 tcg_out_opc_nor(s, a0, a0, TCG_REG_ZERO); 1119 } else { 1120 tcg_out_opc_nor(s, a0, a1, a2); 1121 } 1122 break; 1123 1124 case INDEX_op_andc_i32: 1125 case INDEX_op_andc_i64: 1126 if (c2) { 1127 /* guaranteed to fit due to constraint */ 1128 tcg_out_opc_andi(s, a0, a1, ~a2); 1129 } else { 1130 tcg_out_opc_andn(s, a0, a1, a2); 1131 } 1132 break; 1133 1134 case INDEX_op_orc_i32: 1135 case INDEX_op_orc_i64: 1136 if (c2) { 1137 /* guaranteed to fit due to constraint */ 1138 tcg_out_opc_ori(s, a0, a1, ~a2); 1139 } else { 1140 tcg_out_opc_orn(s, a0, a1, a2); 1141 } 1142 break; 1143 1144 case INDEX_op_and_i32: 1145 case INDEX_op_and_i64: 1146 if (c2) { 1147 tcg_out_opc_andi(s, a0, a1, a2); 1148 } else { 1149 tcg_out_opc_and(s, a0, a1, a2); 1150 } 1151 break; 1152 1153 case INDEX_op_or_i32: 1154 case INDEX_op_or_i64: 1155 if (c2) { 1156 tcg_out_opc_ori(s, a0, a1, a2); 1157 } else { 1158 tcg_out_opc_or(s, a0, a1, a2); 1159 } 1160 break; 1161 1162 case INDEX_op_xor_i32: 1163 case INDEX_op_xor_i64: 1164 if (c2) { 1165 tcg_out_opc_xori(s, a0, a1, a2); 1166 } else { 1167 tcg_out_opc_xor(s, a0, a1, a2); 1168 } 1169 break; 1170 1171 case INDEX_op_extract_i32: 1172 tcg_out_opc_bstrpick_w(s, a0, a1, a2, a2 + args[3] - 1); 1173 break; 1174 case INDEX_op_extract_i64: 1175 tcg_out_opc_bstrpick_d(s, a0, a1, a2, a2 + args[3] - 1); 1176 break; 1177 1178 case INDEX_op_deposit_i32: 1179 tcg_out_opc_bstrins_w(s, a0, a2, args[3], args[3] + args[4] - 1); 1180 break; 1181 case INDEX_op_deposit_i64: 1182 tcg_out_opc_bstrins_d(s, a0, a2, args[3], args[3] + args[4] - 1); 1183 break; 1184 1185 case INDEX_op_bswap16_i32: 1186 case INDEX_op_bswap16_i64: 1187 tcg_out_opc_revb_2h(s, a0, a1); 1188 if (a2 & TCG_BSWAP_OS) { 1189 tcg_out_ext16s(s, TCG_TYPE_REG, a0, a0); 1190 } else if ((a2 & (TCG_BSWAP_IZ | TCG_BSWAP_OZ)) == TCG_BSWAP_OZ) { 1191 tcg_out_ext16u(s, a0, a0); 1192 } 1193 break; 1194 1195 case INDEX_op_bswap32_i32: 1196 /* All 32-bit values are computed sign-extended in the register. */ 1197 a2 = TCG_BSWAP_OS; 1198 /* fallthrough */ 1199 case INDEX_op_bswap32_i64: 1200 tcg_out_opc_revb_2w(s, a0, a1); 1201 if (a2 & TCG_BSWAP_OS) { 1202 tcg_out_ext32s(s, a0, a0); 1203 } else if ((a2 & (TCG_BSWAP_IZ | TCG_BSWAP_OZ)) == TCG_BSWAP_OZ) { 1204 tcg_out_ext32u(s, a0, a0); 1205 } 1206 break; 1207 1208 case INDEX_op_bswap64_i64: 1209 tcg_out_opc_revb_d(s, a0, a1); 1210 break; 1211 1212 case INDEX_op_clz_i32: 1213 tcg_out_clzctz(s, OPC_CLZ_W, a0, a1, a2, c2, true); 1214 break; 1215 case INDEX_op_clz_i64: 1216 tcg_out_clzctz(s, OPC_CLZ_D, a0, a1, a2, c2, false); 1217 break; 1218 1219 case INDEX_op_ctz_i32: 1220 tcg_out_clzctz(s, OPC_CTZ_W, a0, a1, a2, c2, true); 1221 break; 1222 case INDEX_op_ctz_i64: 1223 tcg_out_clzctz(s, OPC_CTZ_D, a0, a1, a2, c2, false); 1224 break; 1225 1226 case INDEX_op_shl_i32: 1227 if (c2) { 1228 tcg_out_opc_slli_w(s, a0, a1, a2 & 0x1f); 1229 } else { 1230 tcg_out_opc_sll_w(s, a0, a1, a2); 1231 } 1232 break; 1233 case INDEX_op_shl_i64: 1234 if (c2) { 1235 tcg_out_opc_slli_d(s, a0, a1, a2 & 0x3f); 1236 } else { 1237 tcg_out_opc_sll_d(s, a0, a1, a2); 1238 } 1239 break; 1240 1241 case INDEX_op_shr_i32: 1242 if (c2) { 1243 tcg_out_opc_srli_w(s, a0, a1, a2 & 0x1f); 1244 } else { 1245 tcg_out_opc_srl_w(s, a0, a1, a2); 1246 } 1247 break; 1248 case INDEX_op_shr_i64: 1249 if (c2) { 1250 tcg_out_opc_srli_d(s, a0, a1, a2 & 0x3f); 1251 } else { 1252 tcg_out_opc_srl_d(s, a0, a1, a2); 1253 } 1254 break; 1255 1256 case INDEX_op_sar_i32: 1257 if (c2) { 1258 tcg_out_opc_srai_w(s, a0, a1, a2 & 0x1f); 1259 } else { 1260 tcg_out_opc_sra_w(s, a0, a1, a2); 1261 } 1262 break; 1263 case INDEX_op_sar_i64: 1264 if (c2) { 1265 tcg_out_opc_srai_d(s, a0, a1, a2 & 0x3f); 1266 } else { 1267 tcg_out_opc_sra_d(s, a0, a1, a2); 1268 } 1269 break; 1270 1271 case INDEX_op_rotl_i32: 1272 /* transform into equivalent rotr/rotri */ 1273 if (c2) { 1274 tcg_out_opc_rotri_w(s, a0, a1, (32 - a2) & 0x1f); 1275 } else { 1276 tcg_out_opc_sub_w(s, TCG_REG_TMP0, TCG_REG_ZERO, a2); 1277 tcg_out_opc_rotr_w(s, a0, a1, TCG_REG_TMP0); 1278 } 1279 break; 1280 case INDEX_op_rotl_i64: 1281 /* transform into equivalent rotr/rotri */ 1282 if (c2) { 1283 tcg_out_opc_rotri_d(s, a0, a1, (64 - a2) & 0x3f); 1284 } else { 1285 tcg_out_opc_sub_w(s, TCG_REG_TMP0, TCG_REG_ZERO, a2); 1286 tcg_out_opc_rotr_d(s, a0, a1, TCG_REG_TMP0); 1287 } 1288 break; 1289 1290 case INDEX_op_rotr_i32: 1291 if (c2) { 1292 tcg_out_opc_rotri_w(s, a0, a1, a2 & 0x1f); 1293 } else { 1294 tcg_out_opc_rotr_w(s, a0, a1, a2); 1295 } 1296 break; 1297 case INDEX_op_rotr_i64: 1298 if (c2) { 1299 tcg_out_opc_rotri_d(s, a0, a1, a2 & 0x3f); 1300 } else { 1301 tcg_out_opc_rotr_d(s, a0, a1, a2); 1302 } 1303 break; 1304 1305 case INDEX_op_add_i32: 1306 if (c2) { 1307 tcg_out_addi(s, TCG_TYPE_I32, a0, a1, a2); 1308 } else { 1309 tcg_out_opc_add_w(s, a0, a1, a2); 1310 } 1311 break; 1312 case INDEX_op_add_i64: 1313 if (c2) { 1314 tcg_out_addi(s, TCG_TYPE_I64, a0, a1, a2); 1315 } else { 1316 tcg_out_opc_add_d(s, a0, a1, a2); 1317 } 1318 break; 1319 1320 case INDEX_op_sub_i32: 1321 if (c2) { 1322 tcg_out_addi(s, TCG_TYPE_I32, a0, a1, -a2); 1323 } else { 1324 tcg_out_opc_sub_w(s, a0, a1, a2); 1325 } 1326 break; 1327 case INDEX_op_sub_i64: 1328 if (c2) { 1329 tcg_out_addi(s, TCG_TYPE_I64, a0, a1, -a2); 1330 } else { 1331 tcg_out_opc_sub_d(s, a0, a1, a2); 1332 } 1333 break; 1334 1335 case INDEX_op_mul_i32: 1336 tcg_out_opc_mul_w(s, a0, a1, a2); 1337 break; 1338 case INDEX_op_mul_i64: 1339 tcg_out_opc_mul_d(s, a0, a1, a2); 1340 break; 1341 1342 case INDEX_op_mulsh_i32: 1343 tcg_out_opc_mulh_w(s, a0, a1, a2); 1344 break; 1345 case INDEX_op_mulsh_i64: 1346 tcg_out_opc_mulh_d(s, a0, a1, a2); 1347 break; 1348 1349 case INDEX_op_muluh_i32: 1350 tcg_out_opc_mulh_wu(s, a0, a1, a2); 1351 break; 1352 case INDEX_op_muluh_i64: 1353 tcg_out_opc_mulh_du(s, a0, a1, a2); 1354 break; 1355 1356 case INDEX_op_div_i32: 1357 tcg_out_opc_div_w(s, a0, a1, a2); 1358 break; 1359 case INDEX_op_div_i64: 1360 tcg_out_opc_div_d(s, a0, a1, a2); 1361 break; 1362 1363 case INDEX_op_divu_i32: 1364 tcg_out_opc_div_wu(s, a0, a1, a2); 1365 break; 1366 case INDEX_op_divu_i64: 1367 tcg_out_opc_div_du(s, a0, a1, a2); 1368 break; 1369 1370 case INDEX_op_rem_i32: 1371 tcg_out_opc_mod_w(s, a0, a1, a2); 1372 break; 1373 case INDEX_op_rem_i64: 1374 tcg_out_opc_mod_d(s, a0, a1, a2); 1375 break; 1376 1377 case INDEX_op_remu_i32: 1378 tcg_out_opc_mod_wu(s, a0, a1, a2); 1379 break; 1380 case INDEX_op_remu_i64: 1381 tcg_out_opc_mod_du(s, a0, a1, a2); 1382 break; 1383 1384 case INDEX_op_setcond_i32: 1385 case INDEX_op_setcond_i64: 1386 tcg_out_setcond(s, args[3], a0, a1, a2, c2); 1387 break; 1388 1389 case INDEX_op_movcond_i32: 1390 case INDEX_op_movcond_i64: 1391 tcg_out_movcond(s, args[5], a0, a1, a2, c2, args[3], args[4]); 1392 break; 1393 1394 case INDEX_op_ld8s_i32: 1395 case INDEX_op_ld8s_i64: 1396 tcg_out_ldst(s, OPC_LD_B, a0, a1, a2); 1397 break; 1398 case INDEX_op_ld8u_i32: 1399 case INDEX_op_ld8u_i64: 1400 tcg_out_ldst(s, OPC_LD_BU, a0, a1, a2); 1401 break; 1402 case INDEX_op_ld16s_i32: 1403 case INDEX_op_ld16s_i64: 1404 tcg_out_ldst(s, OPC_LD_H, a0, a1, a2); 1405 break; 1406 case INDEX_op_ld16u_i32: 1407 case INDEX_op_ld16u_i64: 1408 tcg_out_ldst(s, OPC_LD_HU, a0, a1, a2); 1409 break; 1410 case INDEX_op_ld_i32: 1411 case INDEX_op_ld32s_i64: 1412 tcg_out_ldst(s, OPC_LD_W, a0, a1, a2); 1413 break; 1414 case INDEX_op_ld32u_i64: 1415 tcg_out_ldst(s, OPC_LD_WU, a0, a1, a2); 1416 break; 1417 case INDEX_op_ld_i64: 1418 tcg_out_ldst(s, OPC_LD_D, a0, a1, a2); 1419 break; 1420 1421 case INDEX_op_st8_i32: 1422 case INDEX_op_st8_i64: 1423 tcg_out_ldst(s, OPC_ST_B, a0, a1, a2); 1424 break; 1425 case INDEX_op_st16_i32: 1426 case INDEX_op_st16_i64: 1427 tcg_out_ldst(s, OPC_ST_H, a0, a1, a2); 1428 break; 1429 case INDEX_op_st_i32: 1430 case INDEX_op_st32_i64: 1431 tcg_out_ldst(s, OPC_ST_W, a0, a1, a2); 1432 break; 1433 case INDEX_op_st_i64: 1434 tcg_out_ldst(s, OPC_ST_D, a0, a1, a2); 1435 break; 1436 1437 case INDEX_op_qemu_ld_i32: 1438 tcg_out_qemu_ld(s, a0, a1, a2, TCG_TYPE_I32); 1439 break; 1440 case INDEX_op_qemu_ld_i64: 1441 tcg_out_qemu_ld(s, a0, a1, a2, TCG_TYPE_I64); 1442 break; 1443 case INDEX_op_qemu_st_i32: 1444 tcg_out_qemu_st(s, a0, a1, a2, TCG_TYPE_I32); 1445 break; 1446 case INDEX_op_qemu_st_i64: 1447 tcg_out_qemu_st(s, a0, a1, a2, TCG_TYPE_I64); 1448 break; 1449 1450 case INDEX_op_mov_i32: /* Always emitted via tcg_out_mov. */ 1451 case INDEX_op_mov_i64: 1452 case INDEX_op_call: /* Always emitted via tcg_out_call. */ 1453 case INDEX_op_exit_tb: /* Always emitted via tcg_out_exit_tb. */ 1454 case INDEX_op_goto_tb: /* Always emitted via tcg_out_goto_tb. */ 1455 case INDEX_op_ext8s_i32: /* Always emitted via tcg_reg_alloc_op. */ 1456 case INDEX_op_ext8s_i64: 1457 case INDEX_op_ext8u_i32: 1458 case INDEX_op_ext8u_i64: 1459 case INDEX_op_ext16s_i32: 1460 case INDEX_op_ext16s_i64: 1461 case INDEX_op_ext16u_i32: 1462 case INDEX_op_ext16u_i64: 1463 case INDEX_op_ext32s_i64: 1464 case INDEX_op_ext32u_i64: 1465 case INDEX_op_ext_i32_i64: 1466 case INDEX_op_extu_i32_i64: 1467 case INDEX_op_extrl_i64_i32: 1468 default: 1469 g_assert_not_reached(); 1470 } 1471} 1472 1473static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op) 1474{ 1475 switch (op) { 1476 case INDEX_op_goto_ptr: 1477 return C_O0_I1(r); 1478 1479 case INDEX_op_st8_i32: 1480 case INDEX_op_st8_i64: 1481 case INDEX_op_st16_i32: 1482 case INDEX_op_st16_i64: 1483 case INDEX_op_st32_i64: 1484 case INDEX_op_st_i32: 1485 case INDEX_op_st_i64: 1486 case INDEX_op_qemu_st_i32: 1487 case INDEX_op_qemu_st_i64: 1488 return C_O0_I2(rZ, r); 1489 1490 case INDEX_op_brcond_i32: 1491 case INDEX_op_brcond_i64: 1492 return C_O0_I2(rZ, rZ); 1493 1494 case INDEX_op_ext8s_i32: 1495 case INDEX_op_ext8s_i64: 1496 case INDEX_op_ext8u_i32: 1497 case INDEX_op_ext8u_i64: 1498 case INDEX_op_ext16s_i32: 1499 case INDEX_op_ext16s_i64: 1500 case INDEX_op_ext16u_i32: 1501 case INDEX_op_ext16u_i64: 1502 case INDEX_op_ext32s_i64: 1503 case INDEX_op_ext32u_i64: 1504 case INDEX_op_extu_i32_i64: 1505 case INDEX_op_extrl_i64_i32: 1506 case INDEX_op_extrh_i64_i32: 1507 case INDEX_op_ext_i32_i64: 1508 case INDEX_op_not_i32: 1509 case INDEX_op_not_i64: 1510 case INDEX_op_extract_i32: 1511 case INDEX_op_extract_i64: 1512 case INDEX_op_bswap16_i32: 1513 case INDEX_op_bswap16_i64: 1514 case INDEX_op_bswap32_i32: 1515 case INDEX_op_bswap32_i64: 1516 case INDEX_op_bswap64_i64: 1517 case INDEX_op_ld8s_i32: 1518 case INDEX_op_ld8s_i64: 1519 case INDEX_op_ld8u_i32: 1520 case INDEX_op_ld8u_i64: 1521 case INDEX_op_ld16s_i32: 1522 case INDEX_op_ld16s_i64: 1523 case INDEX_op_ld16u_i32: 1524 case INDEX_op_ld16u_i64: 1525 case INDEX_op_ld32s_i64: 1526 case INDEX_op_ld32u_i64: 1527 case INDEX_op_ld_i32: 1528 case INDEX_op_ld_i64: 1529 case INDEX_op_qemu_ld_i32: 1530 case INDEX_op_qemu_ld_i64: 1531 return C_O1_I1(r, r); 1532 1533 case INDEX_op_andc_i32: 1534 case INDEX_op_andc_i64: 1535 case INDEX_op_orc_i32: 1536 case INDEX_op_orc_i64: 1537 /* 1538 * LoongArch insns for these ops don't have reg-imm forms, but we 1539 * can express using andi/ori if ~constant satisfies 1540 * TCG_CT_CONST_U12. 1541 */ 1542 return C_O1_I2(r, r, rC); 1543 1544 case INDEX_op_shl_i32: 1545 case INDEX_op_shl_i64: 1546 case INDEX_op_shr_i32: 1547 case INDEX_op_shr_i64: 1548 case INDEX_op_sar_i32: 1549 case INDEX_op_sar_i64: 1550 case INDEX_op_rotl_i32: 1551 case INDEX_op_rotl_i64: 1552 case INDEX_op_rotr_i32: 1553 case INDEX_op_rotr_i64: 1554 return C_O1_I2(r, r, ri); 1555 1556 case INDEX_op_add_i32: 1557 return C_O1_I2(r, r, ri); 1558 case INDEX_op_add_i64: 1559 return C_O1_I2(r, r, rJ); 1560 1561 case INDEX_op_and_i32: 1562 case INDEX_op_and_i64: 1563 case INDEX_op_nor_i32: 1564 case INDEX_op_nor_i64: 1565 case INDEX_op_or_i32: 1566 case INDEX_op_or_i64: 1567 case INDEX_op_xor_i32: 1568 case INDEX_op_xor_i64: 1569 /* LoongArch reg-imm bitops have their imms ZERO-extended */ 1570 return C_O1_I2(r, r, rU); 1571 1572 case INDEX_op_clz_i32: 1573 case INDEX_op_clz_i64: 1574 case INDEX_op_ctz_i32: 1575 case INDEX_op_ctz_i64: 1576 return C_O1_I2(r, r, rW); 1577 1578 case INDEX_op_deposit_i32: 1579 case INDEX_op_deposit_i64: 1580 /* Must deposit into the same register as input */ 1581 return C_O1_I2(r, 0, rZ); 1582 1583 case INDEX_op_sub_i32: 1584 case INDEX_op_setcond_i32: 1585 return C_O1_I2(r, rZ, ri); 1586 case INDEX_op_sub_i64: 1587 case INDEX_op_setcond_i64: 1588 return C_O1_I2(r, rZ, rJ); 1589 1590 case INDEX_op_mul_i32: 1591 case INDEX_op_mul_i64: 1592 case INDEX_op_mulsh_i32: 1593 case INDEX_op_mulsh_i64: 1594 case INDEX_op_muluh_i32: 1595 case INDEX_op_muluh_i64: 1596 case INDEX_op_div_i32: 1597 case INDEX_op_div_i64: 1598 case INDEX_op_divu_i32: 1599 case INDEX_op_divu_i64: 1600 case INDEX_op_rem_i32: 1601 case INDEX_op_rem_i64: 1602 case INDEX_op_remu_i32: 1603 case INDEX_op_remu_i64: 1604 return C_O1_I2(r, rZ, rZ); 1605 1606 case INDEX_op_movcond_i32: 1607 case INDEX_op_movcond_i64: 1608 return C_O1_I4(r, rZ, rJ, rZ, rZ); 1609 1610 default: 1611 g_assert_not_reached(); 1612 } 1613} 1614 1615static const int tcg_target_callee_save_regs[] = { 1616 TCG_REG_S0, /* used for the global env (TCG_AREG0) */ 1617 TCG_REG_S1, 1618 TCG_REG_S2, 1619 TCG_REG_S3, 1620 TCG_REG_S4, 1621 TCG_REG_S5, 1622 TCG_REG_S6, 1623 TCG_REG_S7, 1624 TCG_REG_S8, 1625 TCG_REG_S9, 1626 TCG_REG_RA, /* should be last for ABI compliance */ 1627}; 1628 1629/* Stack frame parameters. */ 1630#define REG_SIZE (TCG_TARGET_REG_BITS / 8) 1631#define SAVE_SIZE ((int)ARRAY_SIZE(tcg_target_callee_save_regs) * REG_SIZE) 1632#define TEMP_SIZE (CPU_TEMP_BUF_NLONGS * (int)sizeof(long)) 1633#define FRAME_SIZE ((TCG_STATIC_CALL_ARGS_SIZE + TEMP_SIZE + SAVE_SIZE \ 1634 + TCG_TARGET_STACK_ALIGN - 1) \ 1635 & -TCG_TARGET_STACK_ALIGN) 1636#define SAVE_OFS (TCG_STATIC_CALL_ARGS_SIZE + TEMP_SIZE) 1637 1638/* We're expecting to be able to use an immediate for frame allocation. */ 1639QEMU_BUILD_BUG_ON(FRAME_SIZE > 0x7ff); 1640 1641/* Generate global QEMU prologue and epilogue code */ 1642static void tcg_target_qemu_prologue(TCGContext *s) 1643{ 1644 int i; 1645 1646 tcg_set_frame(s, TCG_REG_SP, TCG_STATIC_CALL_ARGS_SIZE, TEMP_SIZE); 1647 1648 /* TB prologue */ 1649 tcg_out_opc_addi_d(s, TCG_REG_SP, TCG_REG_SP, -FRAME_SIZE); 1650 for (i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); i++) { 1651 tcg_out_st(s, TCG_TYPE_REG, tcg_target_callee_save_regs[i], 1652 TCG_REG_SP, SAVE_OFS + i * REG_SIZE); 1653 } 1654 1655#if !defined(CONFIG_SOFTMMU) 1656 if (USE_GUEST_BASE) { 1657 tcg_out_movi(s, TCG_TYPE_PTR, TCG_GUEST_BASE_REG, guest_base); 1658 tcg_regset_set_reg(s->reserved_regs, TCG_GUEST_BASE_REG); 1659 } 1660#endif 1661 1662 /* Call generated code */ 1663 tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]); 1664 tcg_out_opc_jirl(s, TCG_REG_ZERO, tcg_target_call_iarg_regs[1], 0); 1665 1666 /* Return path for goto_ptr. Set return value to 0 */ 1667 tcg_code_gen_epilogue = tcg_splitwx_to_rx(s->code_ptr); 1668 tcg_out_mov(s, TCG_TYPE_REG, TCG_REG_A0, TCG_REG_ZERO); 1669 1670 /* TB epilogue */ 1671 tb_ret_addr = tcg_splitwx_to_rx(s->code_ptr); 1672 for (i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); i++) { 1673 tcg_out_ld(s, TCG_TYPE_REG, tcg_target_callee_save_regs[i], 1674 TCG_REG_SP, SAVE_OFS + i * REG_SIZE); 1675 } 1676 1677 tcg_out_opc_addi_d(s, TCG_REG_SP, TCG_REG_SP, FRAME_SIZE); 1678 tcg_out_opc_jirl(s, TCG_REG_ZERO, TCG_REG_RA, 0); 1679} 1680 1681static void tcg_target_init(TCGContext *s) 1682{ 1683 unsigned long hwcap = qemu_getauxval(AT_HWCAP); 1684 1685 /* Server and desktop class cpus have UAL; embedded cpus do not. */ 1686 if (!(hwcap & HWCAP_LOONGARCH_UAL)) { 1687 error_report("TCG: unaligned access support required; exiting"); 1688 exit(EXIT_FAILURE); 1689 } 1690 1691 tcg_target_available_regs[TCG_TYPE_I32] = ALL_GENERAL_REGS; 1692 tcg_target_available_regs[TCG_TYPE_I64] = ALL_GENERAL_REGS; 1693 1694 tcg_target_call_clobber_regs = ALL_GENERAL_REGS; 1695 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_S0); 1696 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_S1); 1697 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_S2); 1698 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_S3); 1699 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_S4); 1700 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_S5); 1701 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_S6); 1702 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_S7); 1703 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_S8); 1704 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_S9); 1705 1706 s->reserved_regs = 0; 1707 tcg_regset_set_reg(s->reserved_regs, TCG_REG_ZERO); 1708 tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP0); 1709 tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP1); 1710 tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP2); 1711 tcg_regset_set_reg(s->reserved_regs, TCG_REG_SP); 1712 tcg_regset_set_reg(s->reserved_regs, TCG_REG_TP); 1713 tcg_regset_set_reg(s->reserved_regs, TCG_REG_RESERVED); 1714} 1715 1716typedef struct { 1717 DebugFrameHeader h; 1718 uint8_t fde_def_cfa[4]; 1719 uint8_t fde_reg_ofs[ARRAY_SIZE(tcg_target_callee_save_regs) * 2]; 1720} DebugFrame; 1721 1722#define ELF_HOST_MACHINE EM_LOONGARCH 1723 1724static const DebugFrame debug_frame = { 1725 .h.cie.len = sizeof(DebugFrameCIE) - 4, /* length after .len member */ 1726 .h.cie.id = -1, 1727 .h.cie.version = 1, 1728 .h.cie.code_align = 1, 1729 .h.cie.data_align = -(TCG_TARGET_REG_BITS / 8) & 0x7f, /* sleb128 */ 1730 .h.cie.return_column = TCG_REG_RA, 1731 1732 /* Total FDE size does not include the "len" member. */ 1733 .h.fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, h.fde.cie_offset), 1734 1735 .fde_def_cfa = { 1736 12, TCG_REG_SP, /* DW_CFA_def_cfa sp, ... */ 1737 (FRAME_SIZE & 0x7f) | 0x80, /* ... uleb128 FRAME_SIZE */ 1738 (FRAME_SIZE >> 7) 1739 }, 1740 .fde_reg_ofs = { 1741 0x80 + 23, 11, /* DW_CFA_offset, s0, -88 */ 1742 0x80 + 24, 10, /* DW_CFA_offset, s1, -80 */ 1743 0x80 + 25, 9, /* DW_CFA_offset, s2, -72 */ 1744 0x80 + 26, 8, /* DW_CFA_offset, s3, -64 */ 1745 0x80 + 27, 7, /* DW_CFA_offset, s4, -56 */ 1746 0x80 + 28, 6, /* DW_CFA_offset, s5, -48 */ 1747 0x80 + 29, 5, /* DW_CFA_offset, s6, -40 */ 1748 0x80 + 30, 4, /* DW_CFA_offset, s7, -32 */ 1749 0x80 + 31, 3, /* DW_CFA_offset, s8, -24 */ 1750 0x80 + 22, 2, /* DW_CFA_offset, s9, -16 */ 1751 0x80 + 1 , 1, /* DW_CFA_offset, ra, -8 */ 1752 } 1753}; 1754 1755void tcg_register_jit(const void *buf, size_t buf_size) 1756{ 1757 tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame)); 1758} 1759