1/* 2 * Tiny Code Generator for QEMU 3 * 4 * Copyright (c) 2021 WANG Xuerui <git@xen0n.name> 5 * 6 * Based on tcg/riscv/tcg-target.c.inc 7 * 8 * Copyright (c) 2018 SiFive, Inc 9 * Copyright (c) 2008-2009 Arnaud Patard <arnaud.patard@rtp-net.org> 10 * Copyright (c) 2009 Aurelien Jarno <aurelien@aurel32.net> 11 * Copyright (c) 2008 Fabrice Bellard 12 * 13 * Permission is hereby granted, free of charge, to any person obtaining a copy 14 * of this software and associated documentation files (the "Software"), to deal 15 * in the Software without restriction, including without limitation the rights 16 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 17 * copies of the Software, and to permit persons to whom the Software is 18 * furnished to do so, subject to the following conditions: 19 * 20 * The above copyright notice and this permission notice shall be included in 21 * all copies or substantial portions of the Software. 22 * 23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 24 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 25 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 26 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 27 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 28 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 29 * THE SOFTWARE. 30 */ 31 32#include "../tcg-ldst.c.inc" 33#include <asm/hwcap.h> 34 35#ifdef CONFIG_DEBUG_TCG 36static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = { 37 "zero", 38 "ra", 39 "tp", 40 "sp", 41 "a0", 42 "a1", 43 "a2", 44 "a3", 45 "a4", 46 "a5", 47 "a6", 48 "a7", 49 "t0", 50 "t1", 51 "t2", 52 "t3", 53 "t4", 54 "t5", 55 "t6", 56 "t7", 57 "t8", 58 "r21", /* reserved in the LP64* ABI, hence no ABI name */ 59 "s9", 60 "s0", 61 "s1", 62 "s2", 63 "s3", 64 "s4", 65 "s5", 66 "s6", 67 "s7", 68 "s8" 69}; 70#endif 71 72static const int tcg_target_reg_alloc_order[] = { 73 /* Registers preserved across calls */ 74 /* TCG_REG_S0 reserved for TCG_AREG0 */ 75 TCG_REG_S1, 76 TCG_REG_S2, 77 TCG_REG_S3, 78 TCG_REG_S4, 79 TCG_REG_S5, 80 TCG_REG_S6, 81 TCG_REG_S7, 82 TCG_REG_S8, 83 TCG_REG_S9, 84 85 /* Registers (potentially) clobbered across calls */ 86 TCG_REG_T0, 87 TCG_REG_T1, 88 TCG_REG_T2, 89 TCG_REG_T3, 90 TCG_REG_T4, 91 TCG_REG_T5, 92 TCG_REG_T6, 93 TCG_REG_T7, 94 TCG_REG_T8, 95 96 /* Argument registers, opposite order of allocation. */ 97 TCG_REG_A7, 98 TCG_REG_A6, 99 TCG_REG_A5, 100 TCG_REG_A4, 101 TCG_REG_A3, 102 TCG_REG_A2, 103 TCG_REG_A1, 104 TCG_REG_A0, 105}; 106 107static const int tcg_target_call_iarg_regs[] = { 108 TCG_REG_A0, 109 TCG_REG_A1, 110 TCG_REG_A2, 111 TCG_REG_A3, 112 TCG_REG_A4, 113 TCG_REG_A5, 114 TCG_REG_A6, 115 TCG_REG_A7, 116}; 117 118static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot) 119{ 120 tcg_debug_assert(kind == TCG_CALL_RET_NORMAL); 121 tcg_debug_assert(slot >= 0 && slot <= 1); 122 return TCG_REG_A0 + slot; 123} 124 125#ifndef CONFIG_SOFTMMU 126#define USE_GUEST_BASE (guest_base != 0) 127#define TCG_GUEST_BASE_REG TCG_REG_S1 128#endif 129 130#define TCG_CT_CONST_ZERO 0x100 131#define TCG_CT_CONST_S12 0x200 132#define TCG_CT_CONST_S32 0x400 133#define TCG_CT_CONST_U12 0x800 134#define TCG_CT_CONST_C12 0x1000 135#define TCG_CT_CONST_WSZ 0x2000 136 137#define ALL_GENERAL_REGS MAKE_64BIT_MASK(0, 32) 138 139static inline tcg_target_long sextreg(tcg_target_long val, int pos, int len) 140{ 141 return sextract64(val, pos, len); 142} 143 144/* test if a constant matches the constraint */ 145static bool tcg_target_const_match(int64_t val, TCGType type, int ct) 146{ 147 if (ct & TCG_CT_CONST) { 148 return true; 149 } 150 if ((ct & TCG_CT_CONST_ZERO) && val == 0) { 151 return true; 152 } 153 if ((ct & TCG_CT_CONST_S12) && val == sextreg(val, 0, 12)) { 154 return true; 155 } 156 if ((ct & TCG_CT_CONST_S32) && val == (int32_t)val) { 157 return true; 158 } 159 if ((ct & TCG_CT_CONST_U12) && val >= 0 && val <= 0xfff) { 160 return true; 161 } 162 if ((ct & TCG_CT_CONST_C12) && ~val >= 0 && ~val <= 0xfff) { 163 return true; 164 } 165 if ((ct & TCG_CT_CONST_WSZ) && val == (type == TCG_TYPE_I32 ? 32 : 64)) { 166 return true; 167 } 168 return false; 169} 170 171/* 172 * Relocations 173 */ 174 175/* 176 * Relocation records defined in LoongArch ELF psABI v1.00 is way too 177 * complicated; a whopping stack machine is needed to stuff the fields, at 178 * the very least one SOP_PUSH and one SOP_POP (of the correct format) are 179 * needed. 180 * 181 * Hence, define our own simpler relocation types. Numbers are chosen as to 182 * not collide with potential future additions to the true ELF relocation 183 * type enum. 184 */ 185 186/* Field Sk16, shifted right by 2; suitable for conditional jumps */ 187#define R_LOONGARCH_BR_SK16 256 188/* Field Sd10k16, shifted right by 2; suitable for B and BL */ 189#define R_LOONGARCH_BR_SD10K16 257 190 191static bool reloc_br_sk16(tcg_insn_unit *src_rw, const tcg_insn_unit *target) 192{ 193 const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw); 194 intptr_t offset = (intptr_t)target - (intptr_t)src_rx; 195 196 tcg_debug_assert((offset & 3) == 0); 197 offset >>= 2; 198 if (offset == sextreg(offset, 0, 16)) { 199 *src_rw = deposit64(*src_rw, 10, 16, offset); 200 return true; 201 } 202 203 return false; 204} 205 206static bool reloc_br_sd10k16(tcg_insn_unit *src_rw, 207 const tcg_insn_unit *target) 208{ 209 const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw); 210 intptr_t offset = (intptr_t)target - (intptr_t)src_rx; 211 212 tcg_debug_assert((offset & 3) == 0); 213 offset >>= 2; 214 if (offset == sextreg(offset, 0, 26)) { 215 *src_rw = deposit64(*src_rw, 0, 10, offset >> 16); /* slot d10 */ 216 *src_rw = deposit64(*src_rw, 10, 16, offset); /* slot k16 */ 217 return true; 218 } 219 220 return false; 221} 222 223static bool patch_reloc(tcg_insn_unit *code_ptr, int type, 224 intptr_t value, intptr_t addend) 225{ 226 tcg_debug_assert(addend == 0); 227 switch (type) { 228 case R_LOONGARCH_BR_SK16: 229 return reloc_br_sk16(code_ptr, (tcg_insn_unit *)value); 230 case R_LOONGARCH_BR_SD10K16: 231 return reloc_br_sd10k16(code_ptr, (tcg_insn_unit *)value); 232 default: 233 g_assert_not_reached(); 234 } 235} 236 237#include "tcg-insn-defs.c.inc" 238 239/* 240 * TCG intrinsics 241 */ 242 243static void tcg_out_mb(TCGContext *s, TCGArg a0) 244{ 245 /* Baseline LoongArch only has the full barrier, unfortunately. */ 246 tcg_out_opc_dbar(s, 0); 247} 248 249static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg) 250{ 251 if (ret == arg) { 252 return true; 253 } 254 switch (type) { 255 case TCG_TYPE_I32: 256 case TCG_TYPE_I64: 257 /* 258 * Conventional register-register move used in LoongArch is 259 * `or dst, src, zero`. 260 */ 261 tcg_out_opc_or(s, ret, arg, TCG_REG_ZERO); 262 break; 263 default: 264 g_assert_not_reached(); 265 } 266 return true; 267} 268 269/* Loads a 32-bit immediate into rd, sign-extended. */ 270static void tcg_out_movi_i32(TCGContext *s, TCGReg rd, int32_t val) 271{ 272 tcg_target_long lo = sextreg(val, 0, 12); 273 tcg_target_long hi12 = sextreg(val, 12, 20); 274 275 /* Single-instruction cases. */ 276 if (hi12 == 0) { 277 /* val fits in uimm12: ori rd, zero, val */ 278 tcg_out_opc_ori(s, rd, TCG_REG_ZERO, val); 279 return; 280 } 281 if (hi12 == sextreg(lo, 12, 20)) { 282 /* val fits in simm12: addi.w rd, zero, val */ 283 tcg_out_opc_addi_w(s, rd, TCG_REG_ZERO, val); 284 return; 285 } 286 287 /* High bits must be set; load with lu12i.w + optional ori. */ 288 tcg_out_opc_lu12i_w(s, rd, hi12); 289 if (lo != 0) { 290 tcg_out_opc_ori(s, rd, rd, lo & 0xfff); 291 } 292} 293 294static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg rd, 295 tcg_target_long val) 296{ 297 /* 298 * LoongArch conventionally loads 64-bit immediates in at most 4 steps, 299 * with dedicated instructions for filling the respective bitfields 300 * below: 301 * 302 * 6 5 4 3 303 * 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 304 * +-----------------------+---------------------------------------+... 305 * | hi52 | hi32 | 306 * +-----------------------+---------------------------------------+... 307 * 3 2 1 308 * 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 309 * ...+-------------------------------------+-------------------------+ 310 * | hi12 | lo | 311 * ...+-------------------------------------+-------------------------+ 312 * 313 * Check if val belong to one of the several fast cases, before falling 314 * back to the slow path. 315 */ 316 317 intptr_t pc_offset; 318 tcg_target_long val_lo, val_hi, pc_hi, offset_hi; 319 tcg_target_long hi12, hi32, hi52; 320 321 /* Value fits in signed i32. */ 322 if (type == TCG_TYPE_I32 || val == (int32_t)val) { 323 tcg_out_movi_i32(s, rd, val); 324 return; 325 } 326 327 /* PC-relative cases. */ 328 pc_offset = tcg_pcrel_diff(s, (void *)val); 329 if (pc_offset == sextreg(pc_offset, 0, 22) && (pc_offset & 3) == 0) { 330 /* Single pcaddu2i. */ 331 tcg_out_opc_pcaddu2i(s, rd, pc_offset >> 2); 332 return; 333 } 334 335 if (pc_offset == (int32_t)pc_offset) { 336 /* Offset within 32 bits; load with pcalau12i + ori. */ 337 val_lo = sextreg(val, 0, 12); 338 val_hi = val >> 12; 339 pc_hi = (val - pc_offset) >> 12; 340 offset_hi = val_hi - pc_hi; 341 342 tcg_debug_assert(offset_hi == sextreg(offset_hi, 0, 20)); 343 tcg_out_opc_pcalau12i(s, rd, offset_hi); 344 if (val_lo != 0) { 345 tcg_out_opc_ori(s, rd, rd, val_lo & 0xfff); 346 } 347 return; 348 } 349 350 hi12 = sextreg(val, 12, 20); 351 hi32 = sextreg(val, 32, 20); 352 hi52 = sextreg(val, 52, 12); 353 354 /* Single cu52i.d case. */ 355 if ((hi52 != 0) && (ctz64(val) >= 52)) { 356 tcg_out_opc_cu52i_d(s, rd, TCG_REG_ZERO, hi52); 357 return; 358 } 359 360 /* Slow path. Initialize the low 32 bits, then concat high bits. */ 361 tcg_out_movi_i32(s, rd, val); 362 363 /* Load hi32 and hi52 explicitly when they are unexpected values. */ 364 if (hi32 != sextreg(hi12, 20, 20)) { 365 tcg_out_opc_cu32i_d(s, rd, hi32); 366 } 367 368 if (hi52 != sextreg(hi32, 20, 12)) { 369 tcg_out_opc_cu52i_d(s, rd, rd, hi52); 370 } 371} 372 373static void tcg_out_addi(TCGContext *s, TCGType type, TCGReg rd, 374 TCGReg rs, tcg_target_long imm) 375{ 376 tcg_target_long lo12 = sextreg(imm, 0, 12); 377 tcg_target_long hi16 = sextreg(imm - lo12, 16, 16); 378 379 /* 380 * Note that there's a hole in between hi16 and lo12: 381 * 382 * 3 2 1 0 383 * 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 384 * ...+-------------------------------+-------+-----------------------+ 385 * | hi16 | | lo12 | 386 * ...+-------------------------------+-------+-----------------------+ 387 * 388 * For bits within that hole, it's more efficient to use LU12I and ADD. 389 */ 390 if (imm == (hi16 << 16) + lo12) { 391 if (hi16) { 392 tcg_out_opc_addu16i_d(s, rd, rs, hi16); 393 rs = rd; 394 } 395 if (type == TCG_TYPE_I32) { 396 tcg_out_opc_addi_w(s, rd, rs, lo12); 397 } else if (lo12) { 398 tcg_out_opc_addi_d(s, rd, rs, lo12); 399 } else { 400 tcg_out_mov(s, type, rd, rs); 401 } 402 } else { 403 tcg_out_movi(s, type, TCG_REG_TMP0, imm); 404 if (type == TCG_TYPE_I32) { 405 tcg_out_opc_add_w(s, rd, rs, TCG_REG_TMP0); 406 } else { 407 tcg_out_opc_add_d(s, rd, rs, TCG_REG_TMP0); 408 } 409 } 410} 411 412static bool tcg_out_xchg(TCGContext *s, TCGType type, TCGReg r1, TCGReg r2) 413{ 414 return false; 415} 416 417static void tcg_out_addi_ptr(TCGContext *s, TCGReg rd, TCGReg rs, 418 tcg_target_long imm) 419{ 420 /* This function is only used for passing structs by reference. */ 421 g_assert_not_reached(); 422} 423 424static void tcg_out_ext8u(TCGContext *s, TCGReg ret, TCGReg arg) 425{ 426 tcg_out_opc_andi(s, ret, arg, 0xff); 427} 428 429static void tcg_out_ext16u(TCGContext *s, TCGReg ret, TCGReg arg) 430{ 431 tcg_out_opc_bstrpick_w(s, ret, arg, 0, 15); 432} 433 434static void tcg_out_ext32u(TCGContext *s, TCGReg ret, TCGReg arg) 435{ 436 tcg_out_opc_bstrpick_d(s, ret, arg, 0, 31); 437} 438 439static void tcg_out_ext8s(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg) 440{ 441 tcg_out_opc_sext_b(s, ret, arg); 442} 443 444static void tcg_out_ext16s(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg) 445{ 446 tcg_out_opc_sext_h(s, ret, arg); 447} 448 449static void tcg_out_ext32s(TCGContext *s, TCGReg ret, TCGReg arg) 450{ 451 tcg_out_opc_addi_w(s, ret, arg, 0); 452} 453 454static void tcg_out_exts_i32_i64(TCGContext *s, TCGReg ret, TCGReg arg) 455{ 456 if (ret != arg) { 457 tcg_out_ext32s(s, ret, arg); 458 } 459} 460 461static void tcg_out_extu_i32_i64(TCGContext *s, TCGReg ret, TCGReg arg) 462{ 463 tcg_out_ext32u(s, ret, arg); 464} 465 466static void tcg_out_extrl_i64_i32(TCGContext *s, TCGReg ret, TCGReg arg) 467{ 468 tcg_out_ext32s(s, ret, arg); 469} 470 471static void tcg_out_clzctz(TCGContext *s, LoongArchInsn opc, 472 TCGReg a0, TCGReg a1, TCGReg a2, 473 bool c2, bool is_32bit) 474{ 475 if (c2) { 476 /* 477 * Fast path: semantics already satisfied due to constraint and 478 * insn behavior, single instruction is enough. 479 */ 480 tcg_debug_assert(a2 == (is_32bit ? 32 : 64)); 481 /* all clz/ctz insns belong to DJ-format */ 482 tcg_out32(s, encode_dj_insn(opc, a0, a1)); 483 return; 484 } 485 486 tcg_out32(s, encode_dj_insn(opc, TCG_REG_TMP0, a1)); 487 /* a0 = a1 ? REG_TMP0 : a2 */ 488 tcg_out_opc_maskeqz(s, TCG_REG_TMP0, TCG_REG_TMP0, a1); 489 tcg_out_opc_masknez(s, a0, a2, a1); 490 tcg_out_opc_or(s, a0, TCG_REG_TMP0, a0); 491} 492 493#define SETCOND_INV TCG_TARGET_NB_REGS 494#define SETCOND_NEZ (SETCOND_INV << 1) 495#define SETCOND_FLAGS (SETCOND_INV | SETCOND_NEZ) 496 497static int tcg_out_setcond_int(TCGContext *s, TCGCond cond, TCGReg ret, 498 TCGReg arg1, tcg_target_long arg2, bool c2) 499{ 500 int flags = 0; 501 502 switch (cond) { 503 case TCG_COND_EQ: /* -> NE */ 504 case TCG_COND_GE: /* -> LT */ 505 case TCG_COND_GEU: /* -> LTU */ 506 case TCG_COND_GT: /* -> LE */ 507 case TCG_COND_GTU: /* -> LEU */ 508 cond = tcg_invert_cond(cond); 509 flags ^= SETCOND_INV; 510 break; 511 default: 512 break; 513 } 514 515 switch (cond) { 516 case TCG_COND_LE: 517 case TCG_COND_LEU: 518 /* 519 * If we have a constant input, the most efficient way to implement 520 * LE is by adding 1 and using LT. Watch out for wrap around for LEU. 521 * We don't need to care for this for LE because the constant input 522 * is still constrained to int32_t, and INT32_MAX+1 is representable 523 * in the 64-bit temporary register. 524 */ 525 if (c2) { 526 if (cond == TCG_COND_LEU) { 527 /* unsigned <= -1 is true */ 528 if (arg2 == -1) { 529 tcg_out_movi(s, TCG_TYPE_REG, ret, !(flags & SETCOND_INV)); 530 return ret; 531 } 532 cond = TCG_COND_LTU; 533 } else { 534 cond = TCG_COND_LT; 535 } 536 arg2 += 1; 537 } else { 538 TCGReg tmp = arg2; 539 arg2 = arg1; 540 arg1 = tmp; 541 cond = tcg_swap_cond(cond); /* LE -> GE */ 542 cond = tcg_invert_cond(cond); /* GE -> LT */ 543 flags ^= SETCOND_INV; 544 } 545 break; 546 default: 547 break; 548 } 549 550 switch (cond) { 551 case TCG_COND_NE: 552 flags |= SETCOND_NEZ; 553 if (!c2) { 554 tcg_out_opc_xor(s, ret, arg1, arg2); 555 } else if (arg2 == 0) { 556 ret = arg1; 557 } else if (arg2 >= 0 && arg2 <= 0xfff) { 558 tcg_out_opc_xori(s, ret, arg1, arg2); 559 } else { 560 tcg_out_addi(s, TCG_TYPE_REG, ret, arg1, -arg2); 561 } 562 break; 563 564 case TCG_COND_LT: 565 case TCG_COND_LTU: 566 if (c2) { 567 if (arg2 >= -0x800 && arg2 <= 0x7ff) { 568 if (cond == TCG_COND_LT) { 569 tcg_out_opc_slti(s, ret, arg1, arg2); 570 } else { 571 tcg_out_opc_sltui(s, ret, arg1, arg2); 572 } 573 break; 574 } 575 tcg_out_movi(s, TCG_TYPE_REG, TCG_REG_TMP0, arg2); 576 arg2 = TCG_REG_TMP0; 577 } 578 if (cond == TCG_COND_LT) { 579 tcg_out_opc_slt(s, ret, arg1, arg2); 580 } else { 581 tcg_out_opc_sltu(s, ret, arg1, arg2); 582 } 583 break; 584 585 default: 586 g_assert_not_reached(); 587 break; 588 } 589 590 return ret | flags; 591} 592 593static void tcg_out_setcond(TCGContext *s, TCGCond cond, TCGReg ret, 594 TCGReg arg1, tcg_target_long arg2, bool c2) 595{ 596 int tmpflags = tcg_out_setcond_int(s, cond, ret, arg1, arg2, c2); 597 598 if (tmpflags != ret) { 599 TCGReg tmp = tmpflags & ~SETCOND_FLAGS; 600 601 switch (tmpflags & SETCOND_FLAGS) { 602 case SETCOND_INV: 603 /* Intermediate result is boolean: simply invert. */ 604 tcg_out_opc_xori(s, ret, tmp, 1); 605 break; 606 case SETCOND_NEZ: 607 /* Intermediate result is zero/non-zero: test != 0. */ 608 tcg_out_opc_sltu(s, ret, TCG_REG_ZERO, tmp); 609 break; 610 case SETCOND_NEZ | SETCOND_INV: 611 /* Intermediate result is zero/non-zero: test == 0. */ 612 tcg_out_opc_sltui(s, ret, tmp, 1); 613 break; 614 default: 615 g_assert_not_reached(); 616 } 617 } 618} 619 620static void tcg_out_movcond(TCGContext *s, TCGCond cond, TCGReg ret, 621 TCGReg c1, tcg_target_long c2, bool const2, 622 TCGReg v1, TCGReg v2) 623{ 624 int tmpflags = tcg_out_setcond_int(s, cond, TCG_REG_TMP0, c1, c2, const2); 625 TCGReg t; 626 627 /* Standardize the test below to t != 0. */ 628 if (tmpflags & SETCOND_INV) { 629 t = v1, v1 = v2, v2 = t; 630 } 631 632 t = tmpflags & ~SETCOND_FLAGS; 633 if (v1 == TCG_REG_ZERO) { 634 tcg_out_opc_masknez(s, ret, v2, t); 635 } else if (v2 == TCG_REG_ZERO) { 636 tcg_out_opc_maskeqz(s, ret, v1, t); 637 } else { 638 tcg_out_opc_masknez(s, TCG_REG_TMP2, v2, t); /* t ? 0 : v2 */ 639 tcg_out_opc_maskeqz(s, TCG_REG_TMP1, v1, t); /* t ? v1 : 0 */ 640 tcg_out_opc_or(s, ret, TCG_REG_TMP1, TCG_REG_TMP2); 641 } 642} 643 644/* 645 * Branch helpers 646 */ 647 648static const struct { 649 LoongArchInsn op; 650 bool swap; 651} tcg_brcond_to_loongarch[] = { 652 [TCG_COND_EQ] = { OPC_BEQ, false }, 653 [TCG_COND_NE] = { OPC_BNE, false }, 654 [TCG_COND_LT] = { OPC_BGT, true }, 655 [TCG_COND_GE] = { OPC_BLE, true }, 656 [TCG_COND_LE] = { OPC_BLE, false }, 657 [TCG_COND_GT] = { OPC_BGT, false }, 658 [TCG_COND_LTU] = { OPC_BGTU, true }, 659 [TCG_COND_GEU] = { OPC_BLEU, true }, 660 [TCG_COND_LEU] = { OPC_BLEU, false }, 661 [TCG_COND_GTU] = { OPC_BGTU, false } 662}; 663 664static void tcg_out_brcond(TCGContext *s, TCGCond cond, TCGReg arg1, 665 TCGReg arg2, TCGLabel *l) 666{ 667 LoongArchInsn op = tcg_brcond_to_loongarch[cond].op; 668 669 tcg_debug_assert(op != 0); 670 671 if (tcg_brcond_to_loongarch[cond].swap) { 672 TCGReg t = arg1; 673 arg1 = arg2; 674 arg2 = t; 675 } 676 677 /* all conditional branch insns belong to DJSk16-format */ 678 tcg_out_reloc(s, s->code_ptr, R_LOONGARCH_BR_SK16, l, 0); 679 tcg_out32(s, encode_djsk16_insn(op, arg1, arg2, 0)); 680} 681 682static void tcg_out_call_int(TCGContext *s, const tcg_insn_unit *arg, bool tail) 683{ 684 TCGReg link = tail ? TCG_REG_ZERO : TCG_REG_RA; 685 ptrdiff_t offset = tcg_pcrel_diff(s, arg); 686 687 tcg_debug_assert((offset & 3) == 0); 688 if (offset == sextreg(offset, 0, 28)) { 689 /* short jump: +/- 256MiB */ 690 if (tail) { 691 tcg_out_opc_b(s, offset >> 2); 692 } else { 693 tcg_out_opc_bl(s, offset >> 2); 694 } 695 } else if (offset == sextreg(offset, 0, 38)) { 696 /* long jump: +/- 256GiB */ 697 tcg_target_long lo = sextreg(offset, 0, 18); 698 tcg_target_long hi = offset - lo; 699 tcg_out_opc_pcaddu18i(s, TCG_REG_TMP0, hi >> 18); 700 tcg_out_opc_jirl(s, link, TCG_REG_TMP0, lo >> 2); 701 } else { 702 /* far jump: 64-bit */ 703 tcg_target_long lo = sextreg((tcg_target_long)arg, 0, 18); 704 tcg_target_long hi = (tcg_target_long)arg - lo; 705 tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP0, hi); 706 tcg_out_opc_jirl(s, link, TCG_REG_TMP0, lo >> 2); 707 } 708} 709 710static void tcg_out_call(TCGContext *s, const tcg_insn_unit *arg, 711 const TCGHelperInfo *info) 712{ 713 tcg_out_call_int(s, arg, false); 714} 715 716/* 717 * Load/store helpers 718 */ 719 720static void tcg_out_ldst(TCGContext *s, LoongArchInsn opc, TCGReg data, 721 TCGReg addr, intptr_t offset) 722{ 723 intptr_t imm12 = sextreg(offset, 0, 12); 724 725 if (offset != imm12) { 726 intptr_t diff = tcg_pcrel_diff(s, (void *)offset); 727 728 if (addr == TCG_REG_ZERO && diff == (int32_t)diff) { 729 imm12 = sextreg(diff, 0, 12); 730 tcg_out_opc_pcaddu12i(s, TCG_REG_TMP2, (diff - imm12) >> 12); 731 } else { 732 tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP2, offset - imm12); 733 if (addr != TCG_REG_ZERO) { 734 tcg_out_opc_add_d(s, TCG_REG_TMP2, TCG_REG_TMP2, addr); 735 } 736 } 737 addr = TCG_REG_TMP2; 738 } 739 740 switch (opc) { 741 case OPC_LD_B: 742 case OPC_LD_BU: 743 case OPC_LD_H: 744 case OPC_LD_HU: 745 case OPC_LD_W: 746 case OPC_LD_WU: 747 case OPC_LD_D: 748 case OPC_ST_B: 749 case OPC_ST_H: 750 case OPC_ST_W: 751 case OPC_ST_D: 752 tcg_out32(s, encode_djsk12_insn(opc, data, addr, imm12)); 753 break; 754 default: 755 g_assert_not_reached(); 756 } 757} 758 759static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg arg, 760 TCGReg arg1, intptr_t arg2) 761{ 762 bool is_32bit = type == TCG_TYPE_I32; 763 tcg_out_ldst(s, is_32bit ? OPC_LD_W : OPC_LD_D, arg, arg1, arg2); 764} 765 766static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, 767 TCGReg arg1, intptr_t arg2) 768{ 769 bool is_32bit = type == TCG_TYPE_I32; 770 tcg_out_ldst(s, is_32bit ? OPC_ST_W : OPC_ST_D, arg, arg1, arg2); 771} 772 773static bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val, 774 TCGReg base, intptr_t ofs) 775{ 776 if (val == 0) { 777 tcg_out_st(s, type, TCG_REG_ZERO, base, ofs); 778 return true; 779 } 780 return false; 781} 782 783/* 784 * Load/store helpers for SoftMMU, and qemu_ld/st implementations 785 */ 786 787static bool tcg_out_goto(TCGContext *s, const tcg_insn_unit *target) 788{ 789 tcg_out_opc_b(s, 0); 790 return reloc_br_sd10k16(s->code_ptr - 1, target); 791} 792 793static const TCGLdstHelperParam ldst_helper_param = { 794 .ntmp = 1, .tmp = { TCG_REG_TMP0 } 795}; 796 797static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l) 798{ 799 MemOp opc = get_memop(l->oi); 800 801 /* resolve label address */ 802 if (!reloc_br_sk16(l->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) { 803 return false; 804 } 805 806 tcg_out_ld_helper_args(s, l, &ldst_helper_param); 807 tcg_out_call_int(s, qemu_ld_helpers[opc & MO_SIZE], false); 808 tcg_out_ld_helper_ret(s, l, false, &ldst_helper_param); 809 return tcg_out_goto(s, l->raddr); 810} 811 812static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l) 813{ 814 MemOp opc = get_memop(l->oi); 815 816 /* resolve label address */ 817 if (!reloc_br_sk16(l->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) { 818 return false; 819 } 820 821 tcg_out_st_helper_args(s, l, &ldst_helper_param); 822 tcg_out_call_int(s, qemu_st_helpers[opc & MO_SIZE], false); 823 return tcg_out_goto(s, l->raddr); 824} 825 826typedef struct { 827 TCGReg base; 828 TCGReg index; 829} HostAddress; 830 831bool tcg_target_has_memory_bswap(MemOp memop) 832{ 833 return false; 834} 835 836/* 837 * For softmmu, perform the TLB load and compare. 838 * For useronly, perform any required alignment tests. 839 * In both cases, return a TCGLabelQemuLdst structure if the slow path 840 * is required and fill in @h with the host address for the fast path. 841 */ 842static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h, 843 TCGReg addr_reg, MemOpIdx oi, 844 bool is_ld) 845{ 846 TCGLabelQemuLdst *ldst = NULL; 847 MemOp opc = get_memop(oi); 848 unsigned a_bits = get_alignment_bits(opc); 849 850#ifdef CONFIG_SOFTMMU 851 unsigned s_bits = opc & MO_SIZE; 852 int mem_index = get_mmuidx(oi); 853 int fast_ofs = TLB_MASK_TABLE_OFS(mem_index); 854 int mask_ofs = fast_ofs + offsetof(CPUTLBDescFast, mask); 855 int table_ofs = fast_ofs + offsetof(CPUTLBDescFast, table); 856 857 ldst = new_ldst_label(s); 858 ldst->is_ld = is_ld; 859 ldst->oi = oi; 860 ldst->addrlo_reg = addr_reg; 861 862 QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) > 0); 863 QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) < -(1 << 11)); 864 tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP0, TCG_AREG0, mask_ofs); 865 tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP1, TCG_AREG0, table_ofs); 866 867 tcg_out_opc_srli_d(s, TCG_REG_TMP2, addr_reg, 868 TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS); 869 tcg_out_opc_and(s, TCG_REG_TMP2, TCG_REG_TMP2, TCG_REG_TMP0); 870 tcg_out_opc_add_d(s, TCG_REG_TMP2, TCG_REG_TMP2, TCG_REG_TMP1); 871 872 /* Load the tlb comparator and the addend. */ 873 tcg_out_ld(s, TCG_TYPE_TL, TCG_REG_TMP0, TCG_REG_TMP2, 874 is_ld ? offsetof(CPUTLBEntry, addr_read) 875 : offsetof(CPUTLBEntry, addr_write)); 876 tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP2, TCG_REG_TMP2, 877 offsetof(CPUTLBEntry, addend)); 878 879 /* 880 * For aligned accesses, we check the first byte and include the alignment 881 * bits within the address. For unaligned access, we check that we don't 882 * cross pages using the address of the last byte of the access. 883 */ 884 if (a_bits < s_bits) { 885 unsigned a_mask = (1u << a_bits) - 1; 886 unsigned s_mask = (1u << s_bits) - 1; 887 tcg_out_addi(s, TCG_TYPE_TL, TCG_REG_TMP1, addr_reg, s_mask - a_mask); 888 } else { 889 tcg_out_mov(s, TCG_TYPE_TL, TCG_REG_TMP1, addr_reg); 890 } 891 tcg_out_opc_bstrins_d(s, TCG_REG_TMP1, TCG_REG_ZERO, 892 a_bits, TARGET_PAGE_BITS - 1); 893 894 /* Compare masked address with the TLB entry. */ 895 ldst->label_ptr[0] = s->code_ptr; 896 tcg_out_opc_bne(s, TCG_REG_TMP0, TCG_REG_TMP1, 0); 897 898 h->index = TCG_REG_TMP2; 899#else 900 if (a_bits) { 901 ldst = new_ldst_label(s); 902 903 ldst->is_ld = is_ld; 904 ldst->oi = oi; 905 ldst->addrlo_reg = addr_reg; 906 907 /* 908 * Without micro-architecture details, we don't know which of 909 * bstrpick or andi is faster, so use bstrpick as it's not 910 * constrained by imm field width. Not to say alignments >= 2^12 911 * are going to happen any time soon. 912 */ 913 tcg_out_opc_bstrpick_d(s, TCG_REG_TMP1, addr_reg, 0, a_bits - 1); 914 915 ldst->label_ptr[0] = s->code_ptr; 916 tcg_out_opc_bne(s, TCG_REG_TMP1, TCG_REG_ZERO, 0); 917 } 918 919 h->index = USE_GUEST_BASE ? TCG_GUEST_BASE_REG : TCG_REG_ZERO; 920#endif 921 922 if (TARGET_LONG_BITS == 32) { 923 h->base = TCG_REG_TMP0; 924 tcg_out_ext32u(s, h->base, addr_reg); 925 } else { 926 h->base = addr_reg; 927 } 928 929 return ldst; 930} 931 932static void tcg_out_qemu_ld_indexed(TCGContext *s, MemOp opc, TCGType type, 933 TCGReg rd, HostAddress h) 934{ 935 /* Byte swapping is left to middle-end expansion. */ 936 tcg_debug_assert((opc & MO_BSWAP) == 0); 937 938 switch (opc & MO_SSIZE) { 939 case MO_UB: 940 tcg_out_opc_ldx_bu(s, rd, h.base, h.index); 941 break; 942 case MO_SB: 943 tcg_out_opc_ldx_b(s, rd, h.base, h.index); 944 break; 945 case MO_UW: 946 tcg_out_opc_ldx_hu(s, rd, h.base, h.index); 947 break; 948 case MO_SW: 949 tcg_out_opc_ldx_h(s, rd, h.base, h.index); 950 break; 951 case MO_UL: 952 if (type == TCG_TYPE_I64) { 953 tcg_out_opc_ldx_wu(s, rd, h.base, h.index); 954 break; 955 } 956 /* fallthrough */ 957 case MO_SL: 958 tcg_out_opc_ldx_w(s, rd, h.base, h.index); 959 break; 960 case MO_UQ: 961 tcg_out_opc_ldx_d(s, rd, h.base, h.index); 962 break; 963 default: 964 g_assert_not_reached(); 965 } 966} 967 968static void tcg_out_qemu_ld(TCGContext *s, TCGReg data_reg, TCGReg addr_reg, 969 MemOpIdx oi, TCGType data_type) 970{ 971 TCGLabelQemuLdst *ldst; 972 HostAddress h; 973 974 ldst = prepare_host_addr(s, &h, addr_reg, oi, true); 975 tcg_out_qemu_ld_indexed(s, get_memop(oi), data_type, data_reg, h); 976 977 if (ldst) { 978 ldst->type = data_type; 979 ldst->datalo_reg = data_reg; 980 ldst->raddr = tcg_splitwx_to_rx(s->code_ptr); 981 } 982} 983 984static void tcg_out_qemu_st_indexed(TCGContext *s, MemOp opc, 985 TCGReg rd, HostAddress h) 986{ 987 /* Byte swapping is left to middle-end expansion. */ 988 tcg_debug_assert((opc & MO_BSWAP) == 0); 989 990 switch (opc & MO_SIZE) { 991 case MO_8: 992 tcg_out_opc_stx_b(s, rd, h.base, h.index); 993 break; 994 case MO_16: 995 tcg_out_opc_stx_h(s, rd, h.base, h.index); 996 break; 997 case MO_32: 998 tcg_out_opc_stx_w(s, rd, h.base, h.index); 999 break; 1000 case MO_64: 1001 tcg_out_opc_stx_d(s, rd, h.base, h.index); 1002 break; 1003 default: 1004 g_assert_not_reached(); 1005 } 1006} 1007 1008static void tcg_out_qemu_st(TCGContext *s, TCGReg data_reg, TCGReg addr_reg, 1009 MemOpIdx oi, TCGType data_type) 1010{ 1011 TCGLabelQemuLdst *ldst; 1012 HostAddress h; 1013 1014 ldst = prepare_host_addr(s, &h, addr_reg, oi, false); 1015 tcg_out_qemu_st_indexed(s, get_memop(oi), data_reg, h); 1016 1017 if (ldst) { 1018 ldst->type = data_type; 1019 ldst->datalo_reg = data_reg; 1020 ldst->raddr = tcg_splitwx_to_rx(s->code_ptr); 1021 } 1022} 1023 1024/* 1025 * Entry-points 1026 */ 1027 1028static const tcg_insn_unit *tb_ret_addr; 1029 1030static void tcg_out_exit_tb(TCGContext *s, uintptr_t a0) 1031{ 1032 /* Reuse the zeroing that exists for goto_ptr. */ 1033 if (a0 == 0) { 1034 tcg_out_call_int(s, tcg_code_gen_epilogue, true); 1035 } else { 1036 tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_A0, a0); 1037 tcg_out_call_int(s, tb_ret_addr, true); 1038 } 1039} 1040 1041static void tcg_out_goto_tb(TCGContext *s, int which) 1042{ 1043 /* 1044 * Direct branch, or load indirect address, to be patched 1045 * by tb_target_set_jmp_target. Check indirect load offset 1046 * in range early, regardless of direct branch distance, 1047 * via assert within tcg_out_opc_pcaddu2i. 1048 */ 1049 uintptr_t i_addr = get_jmp_target_addr(s, which); 1050 intptr_t i_disp = tcg_pcrel_diff(s, (void *)i_addr); 1051 1052 set_jmp_insn_offset(s, which); 1053 tcg_out_opc_pcaddu2i(s, TCG_REG_TMP0, i_disp >> 2); 1054 1055 /* Finish the load and indirect branch. */ 1056 tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP0, TCG_REG_TMP0, 0); 1057 tcg_out_opc_jirl(s, TCG_REG_ZERO, TCG_REG_TMP0, 0); 1058 set_jmp_reset_offset(s, which); 1059} 1060 1061void tb_target_set_jmp_target(const TranslationBlock *tb, int n, 1062 uintptr_t jmp_rx, uintptr_t jmp_rw) 1063{ 1064 uintptr_t d_addr = tb->jmp_target_addr[n]; 1065 ptrdiff_t d_disp = (ptrdiff_t)(d_addr - jmp_rx) >> 2; 1066 tcg_insn_unit insn; 1067 1068 /* Either directly branch, or load slot address for indirect branch. */ 1069 if (d_disp == sextreg(d_disp, 0, 26)) { 1070 insn = encode_sd10k16_insn(OPC_B, d_disp); 1071 } else { 1072 uintptr_t i_addr = (uintptr_t)&tb->jmp_target_addr[n]; 1073 intptr_t i_disp = i_addr - jmp_rx; 1074 insn = encode_dsj20_insn(OPC_PCADDU2I, TCG_REG_TMP0, i_disp >> 2); 1075 } 1076 1077 qatomic_set((tcg_insn_unit *)jmp_rw, insn); 1078 flush_idcache_range(jmp_rx, jmp_rw, 4); 1079} 1080 1081static void tcg_out_op(TCGContext *s, TCGOpcode opc, 1082 const TCGArg args[TCG_MAX_OP_ARGS], 1083 const int const_args[TCG_MAX_OP_ARGS]) 1084{ 1085 TCGArg a0 = args[0]; 1086 TCGArg a1 = args[1]; 1087 TCGArg a2 = args[2]; 1088 int c2 = const_args[2]; 1089 1090 switch (opc) { 1091 case INDEX_op_mb: 1092 tcg_out_mb(s, a0); 1093 break; 1094 1095 case INDEX_op_goto_ptr: 1096 tcg_out_opc_jirl(s, TCG_REG_ZERO, a0, 0); 1097 break; 1098 1099 case INDEX_op_br: 1100 tcg_out_reloc(s, s->code_ptr, R_LOONGARCH_BR_SD10K16, arg_label(a0), 1101 0); 1102 tcg_out_opc_b(s, 0); 1103 break; 1104 1105 case INDEX_op_brcond_i32: 1106 case INDEX_op_brcond_i64: 1107 tcg_out_brcond(s, a2, a0, a1, arg_label(args[3])); 1108 break; 1109 1110 case INDEX_op_extrh_i64_i32: 1111 tcg_out_opc_srai_d(s, a0, a1, 32); 1112 break; 1113 1114 case INDEX_op_not_i32: 1115 case INDEX_op_not_i64: 1116 tcg_out_opc_nor(s, a0, a1, TCG_REG_ZERO); 1117 break; 1118 1119 case INDEX_op_nor_i32: 1120 case INDEX_op_nor_i64: 1121 if (c2) { 1122 tcg_out_opc_ori(s, a0, a1, a2); 1123 tcg_out_opc_nor(s, a0, a0, TCG_REG_ZERO); 1124 } else { 1125 tcg_out_opc_nor(s, a0, a1, a2); 1126 } 1127 break; 1128 1129 case INDEX_op_andc_i32: 1130 case INDEX_op_andc_i64: 1131 if (c2) { 1132 /* guaranteed to fit due to constraint */ 1133 tcg_out_opc_andi(s, a0, a1, ~a2); 1134 } else { 1135 tcg_out_opc_andn(s, a0, a1, a2); 1136 } 1137 break; 1138 1139 case INDEX_op_orc_i32: 1140 case INDEX_op_orc_i64: 1141 if (c2) { 1142 /* guaranteed to fit due to constraint */ 1143 tcg_out_opc_ori(s, a0, a1, ~a2); 1144 } else { 1145 tcg_out_opc_orn(s, a0, a1, a2); 1146 } 1147 break; 1148 1149 case INDEX_op_and_i32: 1150 case INDEX_op_and_i64: 1151 if (c2) { 1152 tcg_out_opc_andi(s, a0, a1, a2); 1153 } else { 1154 tcg_out_opc_and(s, a0, a1, a2); 1155 } 1156 break; 1157 1158 case INDEX_op_or_i32: 1159 case INDEX_op_or_i64: 1160 if (c2) { 1161 tcg_out_opc_ori(s, a0, a1, a2); 1162 } else { 1163 tcg_out_opc_or(s, a0, a1, a2); 1164 } 1165 break; 1166 1167 case INDEX_op_xor_i32: 1168 case INDEX_op_xor_i64: 1169 if (c2) { 1170 tcg_out_opc_xori(s, a0, a1, a2); 1171 } else { 1172 tcg_out_opc_xor(s, a0, a1, a2); 1173 } 1174 break; 1175 1176 case INDEX_op_extract_i32: 1177 tcg_out_opc_bstrpick_w(s, a0, a1, a2, a2 + args[3] - 1); 1178 break; 1179 case INDEX_op_extract_i64: 1180 tcg_out_opc_bstrpick_d(s, a0, a1, a2, a2 + args[3] - 1); 1181 break; 1182 1183 case INDEX_op_deposit_i32: 1184 tcg_out_opc_bstrins_w(s, a0, a2, args[3], args[3] + args[4] - 1); 1185 break; 1186 case INDEX_op_deposit_i64: 1187 tcg_out_opc_bstrins_d(s, a0, a2, args[3], args[3] + args[4] - 1); 1188 break; 1189 1190 case INDEX_op_bswap16_i32: 1191 case INDEX_op_bswap16_i64: 1192 tcg_out_opc_revb_2h(s, a0, a1); 1193 if (a2 & TCG_BSWAP_OS) { 1194 tcg_out_ext16s(s, TCG_TYPE_REG, a0, a0); 1195 } else if ((a2 & (TCG_BSWAP_IZ | TCG_BSWAP_OZ)) == TCG_BSWAP_OZ) { 1196 tcg_out_ext16u(s, a0, a0); 1197 } 1198 break; 1199 1200 case INDEX_op_bswap32_i32: 1201 /* All 32-bit values are computed sign-extended in the register. */ 1202 a2 = TCG_BSWAP_OS; 1203 /* fallthrough */ 1204 case INDEX_op_bswap32_i64: 1205 tcg_out_opc_revb_2w(s, a0, a1); 1206 if (a2 & TCG_BSWAP_OS) { 1207 tcg_out_ext32s(s, a0, a0); 1208 } else if ((a2 & (TCG_BSWAP_IZ | TCG_BSWAP_OZ)) == TCG_BSWAP_OZ) { 1209 tcg_out_ext32u(s, a0, a0); 1210 } 1211 break; 1212 1213 case INDEX_op_bswap64_i64: 1214 tcg_out_opc_revb_d(s, a0, a1); 1215 break; 1216 1217 case INDEX_op_clz_i32: 1218 tcg_out_clzctz(s, OPC_CLZ_W, a0, a1, a2, c2, true); 1219 break; 1220 case INDEX_op_clz_i64: 1221 tcg_out_clzctz(s, OPC_CLZ_D, a0, a1, a2, c2, false); 1222 break; 1223 1224 case INDEX_op_ctz_i32: 1225 tcg_out_clzctz(s, OPC_CTZ_W, a0, a1, a2, c2, true); 1226 break; 1227 case INDEX_op_ctz_i64: 1228 tcg_out_clzctz(s, OPC_CTZ_D, a0, a1, a2, c2, false); 1229 break; 1230 1231 case INDEX_op_shl_i32: 1232 if (c2) { 1233 tcg_out_opc_slli_w(s, a0, a1, a2 & 0x1f); 1234 } else { 1235 tcg_out_opc_sll_w(s, a0, a1, a2); 1236 } 1237 break; 1238 case INDEX_op_shl_i64: 1239 if (c2) { 1240 tcg_out_opc_slli_d(s, a0, a1, a2 & 0x3f); 1241 } else { 1242 tcg_out_opc_sll_d(s, a0, a1, a2); 1243 } 1244 break; 1245 1246 case INDEX_op_shr_i32: 1247 if (c2) { 1248 tcg_out_opc_srli_w(s, a0, a1, a2 & 0x1f); 1249 } else { 1250 tcg_out_opc_srl_w(s, a0, a1, a2); 1251 } 1252 break; 1253 case INDEX_op_shr_i64: 1254 if (c2) { 1255 tcg_out_opc_srli_d(s, a0, a1, a2 & 0x3f); 1256 } else { 1257 tcg_out_opc_srl_d(s, a0, a1, a2); 1258 } 1259 break; 1260 1261 case INDEX_op_sar_i32: 1262 if (c2) { 1263 tcg_out_opc_srai_w(s, a0, a1, a2 & 0x1f); 1264 } else { 1265 tcg_out_opc_sra_w(s, a0, a1, a2); 1266 } 1267 break; 1268 case INDEX_op_sar_i64: 1269 if (c2) { 1270 tcg_out_opc_srai_d(s, a0, a1, a2 & 0x3f); 1271 } else { 1272 tcg_out_opc_sra_d(s, a0, a1, a2); 1273 } 1274 break; 1275 1276 case INDEX_op_rotl_i32: 1277 /* transform into equivalent rotr/rotri */ 1278 if (c2) { 1279 tcg_out_opc_rotri_w(s, a0, a1, (32 - a2) & 0x1f); 1280 } else { 1281 tcg_out_opc_sub_w(s, TCG_REG_TMP0, TCG_REG_ZERO, a2); 1282 tcg_out_opc_rotr_w(s, a0, a1, TCG_REG_TMP0); 1283 } 1284 break; 1285 case INDEX_op_rotl_i64: 1286 /* transform into equivalent rotr/rotri */ 1287 if (c2) { 1288 tcg_out_opc_rotri_d(s, a0, a1, (64 - a2) & 0x3f); 1289 } else { 1290 tcg_out_opc_sub_w(s, TCG_REG_TMP0, TCG_REG_ZERO, a2); 1291 tcg_out_opc_rotr_d(s, a0, a1, TCG_REG_TMP0); 1292 } 1293 break; 1294 1295 case INDEX_op_rotr_i32: 1296 if (c2) { 1297 tcg_out_opc_rotri_w(s, a0, a1, a2 & 0x1f); 1298 } else { 1299 tcg_out_opc_rotr_w(s, a0, a1, a2); 1300 } 1301 break; 1302 case INDEX_op_rotr_i64: 1303 if (c2) { 1304 tcg_out_opc_rotri_d(s, a0, a1, a2 & 0x3f); 1305 } else { 1306 tcg_out_opc_rotr_d(s, a0, a1, a2); 1307 } 1308 break; 1309 1310 case INDEX_op_add_i32: 1311 if (c2) { 1312 tcg_out_addi(s, TCG_TYPE_I32, a0, a1, a2); 1313 } else { 1314 tcg_out_opc_add_w(s, a0, a1, a2); 1315 } 1316 break; 1317 case INDEX_op_add_i64: 1318 if (c2) { 1319 tcg_out_addi(s, TCG_TYPE_I64, a0, a1, a2); 1320 } else { 1321 tcg_out_opc_add_d(s, a0, a1, a2); 1322 } 1323 break; 1324 1325 case INDEX_op_sub_i32: 1326 if (c2) { 1327 tcg_out_addi(s, TCG_TYPE_I32, a0, a1, -a2); 1328 } else { 1329 tcg_out_opc_sub_w(s, a0, a1, a2); 1330 } 1331 break; 1332 case INDEX_op_sub_i64: 1333 if (c2) { 1334 tcg_out_addi(s, TCG_TYPE_I64, a0, a1, -a2); 1335 } else { 1336 tcg_out_opc_sub_d(s, a0, a1, a2); 1337 } 1338 break; 1339 1340 case INDEX_op_mul_i32: 1341 tcg_out_opc_mul_w(s, a0, a1, a2); 1342 break; 1343 case INDEX_op_mul_i64: 1344 tcg_out_opc_mul_d(s, a0, a1, a2); 1345 break; 1346 1347 case INDEX_op_mulsh_i32: 1348 tcg_out_opc_mulh_w(s, a0, a1, a2); 1349 break; 1350 case INDEX_op_mulsh_i64: 1351 tcg_out_opc_mulh_d(s, a0, a1, a2); 1352 break; 1353 1354 case INDEX_op_muluh_i32: 1355 tcg_out_opc_mulh_wu(s, a0, a1, a2); 1356 break; 1357 case INDEX_op_muluh_i64: 1358 tcg_out_opc_mulh_du(s, a0, a1, a2); 1359 break; 1360 1361 case INDEX_op_div_i32: 1362 tcg_out_opc_div_w(s, a0, a1, a2); 1363 break; 1364 case INDEX_op_div_i64: 1365 tcg_out_opc_div_d(s, a0, a1, a2); 1366 break; 1367 1368 case INDEX_op_divu_i32: 1369 tcg_out_opc_div_wu(s, a0, a1, a2); 1370 break; 1371 case INDEX_op_divu_i64: 1372 tcg_out_opc_div_du(s, a0, a1, a2); 1373 break; 1374 1375 case INDEX_op_rem_i32: 1376 tcg_out_opc_mod_w(s, a0, a1, a2); 1377 break; 1378 case INDEX_op_rem_i64: 1379 tcg_out_opc_mod_d(s, a0, a1, a2); 1380 break; 1381 1382 case INDEX_op_remu_i32: 1383 tcg_out_opc_mod_wu(s, a0, a1, a2); 1384 break; 1385 case INDEX_op_remu_i64: 1386 tcg_out_opc_mod_du(s, a0, a1, a2); 1387 break; 1388 1389 case INDEX_op_setcond_i32: 1390 case INDEX_op_setcond_i64: 1391 tcg_out_setcond(s, args[3], a0, a1, a2, c2); 1392 break; 1393 1394 case INDEX_op_movcond_i32: 1395 case INDEX_op_movcond_i64: 1396 tcg_out_movcond(s, args[5], a0, a1, a2, c2, args[3], args[4]); 1397 break; 1398 1399 case INDEX_op_ld8s_i32: 1400 case INDEX_op_ld8s_i64: 1401 tcg_out_ldst(s, OPC_LD_B, a0, a1, a2); 1402 break; 1403 case INDEX_op_ld8u_i32: 1404 case INDEX_op_ld8u_i64: 1405 tcg_out_ldst(s, OPC_LD_BU, a0, a1, a2); 1406 break; 1407 case INDEX_op_ld16s_i32: 1408 case INDEX_op_ld16s_i64: 1409 tcg_out_ldst(s, OPC_LD_H, a0, a1, a2); 1410 break; 1411 case INDEX_op_ld16u_i32: 1412 case INDEX_op_ld16u_i64: 1413 tcg_out_ldst(s, OPC_LD_HU, a0, a1, a2); 1414 break; 1415 case INDEX_op_ld_i32: 1416 case INDEX_op_ld32s_i64: 1417 tcg_out_ldst(s, OPC_LD_W, a0, a1, a2); 1418 break; 1419 case INDEX_op_ld32u_i64: 1420 tcg_out_ldst(s, OPC_LD_WU, a0, a1, a2); 1421 break; 1422 case INDEX_op_ld_i64: 1423 tcg_out_ldst(s, OPC_LD_D, a0, a1, a2); 1424 break; 1425 1426 case INDEX_op_st8_i32: 1427 case INDEX_op_st8_i64: 1428 tcg_out_ldst(s, OPC_ST_B, a0, a1, a2); 1429 break; 1430 case INDEX_op_st16_i32: 1431 case INDEX_op_st16_i64: 1432 tcg_out_ldst(s, OPC_ST_H, a0, a1, a2); 1433 break; 1434 case INDEX_op_st_i32: 1435 case INDEX_op_st32_i64: 1436 tcg_out_ldst(s, OPC_ST_W, a0, a1, a2); 1437 break; 1438 case INDEX_op_st_i64: 1439 tcg_out_ldst(s, OPC_ST_D, a0, a1, a2); 1440 break; 1441 1442 case INDEX_op_qemu_ld_i32: 1443 tcg_out_qemu_ld(s, a0, a1, a2, TCG_TYPE_I32); 1444 break; 1445 case INDEX_op_qemu_ld_i64: 1446 tcg_out_qemu_ld(s, a0, a1, a2, TCG_TYPE_I64); 1447 break; 1448 case INDEX_op_qemu_st_i32: 1449 tcg_out_qemu_st(s, a0, a1, a2, TCG_TYPE_I32); 1450 break; 1451 case INDEX_op_qemu_st_i64: 1452 tcg_out_qemu_st(s, a0, a1, a2, TCG_TYPE_I64); 1453 break; 1454 1455 case INDEX_op_mov_i32: /* Always emitted via tcg_out_mov. */ 1456 case INDEX_op_mov_i64: 1457 case INDEX_op_call: /* Always emitted via tcg_out_call. */ 1458 case INDEX_op_exit_tb: /* Always emitted via tcg_out_exit_tb. */ 1459 case INDEX_op_goto_tb: /* Always emitted via tcg_out_goto_tb. */ 1460 case INDEX_op_ext8s_i32: /* Always emitted via tcg_reg_alloc_op. */ 1461 case INDEX_op_ext8s_i64: 1462 case INDEX_op_ext8u_i32: 1463 case INDEX_op_ext8u_i64: 1464 case INDEX_op_ext16s_i32: 1465 case INDEX_op_ext16s_i64: 1466 case INDEX_op_ext16u_i32: 1467 case INDEX_op_ext16u_i64: 1468 case INDEX_op_ext32s_i64: 1469 case INDEX_op_ext32u_i64: 1470 case INDEX_op_ext_i32_i64: 1471 case INDEX_op_extu_i32_i64: 1472 case INDEX_op_extrl_i64_i32: 1473 default: 1474 g_assert_not_reached(); 1475 } 1476} 1477 1478static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op) 1479{ 1480 switch (op) { 1481 case INDEX_op_goto_ptr: 1482 return C_O0_I1(r); 1483 1484 case INDEX_op_st8_i32: 1485 case INDEX_op_st8_i64: 1486 case INDEX_op_st16_i32: 1487 case INDEX_op_st16_i64: 1488 case INDEX_op_st32_i64: 1489 case INDEX_op_st_i32: 1490 case INDEX_op_st_i64: 1491 case INDEX_op_qemu_st_i32: 1492 case INDEX_op_qemu_st_i64: 1493 return C_O0_I2(rZ, r); 1494 1495 case INDEX_op_brcond_i32: 1496 case INDEX_op_brcond_i64: 1497 return C_O0_I2(rZ, rZ); 1498 1499 case INDEX_op_ext8s_i32: 1500 case INDEX_op_ext8s_i64: 1501 case INDEX_op_ext8u_i32: 1502 case INDEX_op_ext8u_i64: 1503 case INDEX_op_ext16s_i32: 1504 case INDEX_op_ext16s_i64: 1505 case INDEX_op_ext16u_i32: 1506 case INDEX_op_ext16u_i64: 1507 case INDEX_op_ext32s_i64: 1508 case INDEX_op_ext32u_i64: 1509 case INDEX_op_extu_i32_i64: 1510 case INDEX_op_extrl_i64_i32: 1511 case INDEX_op_extrh_i64_i32: 1512 case INDEX_op_ext_i32_i64: 1513 case INDEX_op_not_i32: 1514 case INDEX_op_not_i64: 1515 case INDEX_op_extract_i32: 1516 case INDEX_op_extract_i64: 1517 case INDEX_op_bswap16_i32: 1518 case INDEX_op_bswap16_i64: 1519 case INDEX_op_bswap32_i32: 1520 case INDEX_op_bswap32_i64: 1521 case INDEX_op_bswap64_i64: 1522 case INDEX_op_ld8s_i32: 1523 case INDEX_op_ld8s_i64: 1524 case INDEX_op_ld8u_i32: 1525 case INDEX_op_ld8u_i64: 1526 case INDEX_op_ld16s_i32: 1527 case INDEX_op_ld16s_i64: 1528 case INDEX_op_ld16u_i32: 1529 case INDEX_op_ld16u_i64: 1530 case INDEX_op_ld32s_i64: 1531 case INDEX_op_ld32u_i64: 1532 case INDEX_op_ld_i32: 1533 case INDEX_op_ld_i64: 1534 case INDEX_op_qemu_ld_i32: 1535 case INDEX_op_qemu_ld_i64: 1536 return C_O1_I1(r, r); 1537 1538 case INDEX_op_andc_i32: 1539 case INDEX_op_andc_i64: 1540 case INDEX_op_orc_i32: 1541 case INDEX_op_orc_i64: 1542 /* 1543 * LoongArch insns for these ops don't have reg-imm forms, but we 1544 * can express using andi/ori if ~constant satisfies 1545 * TCG_CT_CONST_U12. 1546 */ 1547 return C_O1_I2(r, r, rC); 1548 1549 case INDEX_op_shl_i32: 1550 case INDEX_op_shl_i64: 1551 case INDEX_op_shr_i32: 1552 case INDEX_op_shr_i64: 1553 case INDEX_op_sar_i32: 1554 case INDEX_op_sar_i64: 1555 case INDEX_op_rotl_i32: 1556 case INDEX_op_rotl_i64: 1557 case INDEX_op_rotr_i32: 1558 case INDEX_op_rotr_i64: 1559 return C_O1_I2(r, r, ri); 1560 1561 case INDEX_op_add_i32: 1562 return C_O1_I2(r, r, ri); 1563 case INDEX_op_add_i64: 1564 return C_O1_I2(r, r, rJ); 1565 1566 case INDEX_op_and_i32: 1567 case INDEX_op_and_i64: 1568 case INDEX_op_nor_i32: 1569 case INDEX_op_nor_i64: 1570 case INDEX_op_or_i32: 1571 case INDEX_op_or_i64: 1572 case INDEX_op_xor_i32: 1573 case INDEX_op_xor_i64: 1574 /* LoongArch reg-imm bitops have their imms ZERO-extended */ 1575 return C_O1_I2(r, r, rU); 1576 1577 case INDEX_op_clz_i32: 1578 case INDEX_op_clz_i64: 1579 case INDEX_op_ctz_i32: 1580 case INDEX_op_ctz_i64: 1581 return C_O1_I2(r, r, rW); 1582 1583 case INDEX_op_deposit_i32: 1584 case INDEX_op_deposit_i64: 1585 /* Must deposit into the same register as input */ 1586 return C_O1_I2(r, 0, rZ); 1587 1588 case INDEX_op_sub_i32: 1589 case INDEX_op_setcond_i32: 1590 return C_O1_I2(r, rZ, ri); 1591 case INDEX_op_sub_i64: 1592 case INDEX_op_setcond_i64: 1593 return C_O1_I2(r, rZ, rJ); 1594 1595 case INDEX_op_mul_i32: 1596 case INDEX_op_mul_i64: 1597 case INDEX_op_mulsh_i32: 1598 case INDEX_op_mulsh_i64: 1599 case INDEX_op_muluh_i32: 1600 case INDEX_op_muluh_i64: 1601 case INDEX_op_div_i32: 1602 case INDEX_op_div_i64: 1603 case INDEX_op_divu_i32: 1604 case INDEX_op_divu_i64: 1605 case INDEX_op_rem_i32: 1606 case INDEX_op_rem_i64: 1607 case INDEX_op_remu_i32: 1608 case INDEX_op_remu_i64: 1609 return C_O1_I2(r, rZ, rZ); 1610 1611 case INDEX_op_movcond_i32: 1612 case INDEX_op_movcond_i64: 1613 return C_O1_I4(r, rZ, rJ, rZ, rZ); 1614 1615 default: 1616 g_assert_not_reached(); 1617 } 1618} 1619 1620static const int tcg_target_callee_save_regs[] = { 1621 TCG_REG_S0, /* used for the global env (TCG_AREG0) */ 1622 TCG_REG_S1, 1623 TCG_REG_S2, 1624 TCG_REG_S3, 1625 TCG_REG_S4, 1626 TCG_REG_S5, 1627 TCG_REG_S6, 1628 TCG_REG_S7, 1629 TCG_REG_S8, 1630 TCG_REG_S9, 1631 TCG_REG_RA, /* should be last for ABI compliance */ 1632}; 1633 1634/* Stack frame parameters. */ 1635#define REG_SIZE (TCG_TARGET_REG_BITS / 8) 1636#define SAVE_SIZE ((int)ARRAY_SIZE(tcg_target_callee_save_regs) * REG_SIZE) 1637#define TEMP_SIZE (CPU_TEMP_BUF_NLONGS * (int)sizeof(long)) 1638#define FRAME_SIZE ((TCG_STATIC_CALL_ARGS_SIZE + TEMP_SIZE + SAVE_SIZE \ 1639 + TCG_TARGET_STACK_ALIGN - 1) \ 1640 & -TCG_TARGET_STACK_ALIGN) 1641#define SAVE_OFS (TCG_STATIC_CALL_ARGS_SIZE + TEMP_SIZE) 1642 1643/* We're expecting to be able to use an immediate for frame allocation. */ 1644QEMU_BUILD_BUG_ON(FRAME_SIZE > 0x7ff); 1645 1646/* Generate global QEMU prologue and epilogue code */ 1647static void tcg_target_qemu_prologue(TCGContext *s) 1648{ 1649 int i; 1650 1651 tcg_set_frame(s, TCG_REG_SP, TCG_STATIC_CALL_ARGS_SIZE, TEMP_SIZE); 1652 1653 /* TB prologue */ 1654 tcg_out_opc_addi_d(s, TCG_REG_SP, TCG_REG_SP, -FRAME_SIZE); 1655 for (i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); i++) { 1656 tcg_out_st(s, TCG_TYPE_REG, tcg_target_callee_save_regs[i], 1657 TCG_REG_SP, SAVE_OFS + i * REG_SIZE); 1658 } 1659 1660#if !defined(CONFIG_SOFTMMU) 1661 if (USE_GUEST_BASE) { 1662 tcg_out_movi(s, TCG_TYPE_PTR, TCG_GUEST_BASE_REG, guest_base); 1663 tcg_regset_set_reg(s->reserved_regs, TCG_GUEST_BASE_REG); 1664 } 1665#endif 1666 1667 /* Call generated code */ 1668 tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]); 1669 tcg_out_opc_jirl(s, TCG_REG_ZERO, tcg_target_call_iarg_regs[1], 0); 1670 1671 /* Return path for goto_ptr. Set return value to 0 */ 1672 tcg_code_gen_epilogue = tcg_splitwx_to_rx(s->code_ptr); 1673 tcg_out_mov(s, TCG_TYPE_REG, TCG_REG_A0, TCG_REG_ZERO); 1674 1675 /* TB epilogue */ 1676 tb_ret_addr = tcg_splitwx_to_rx(s->code_ptr); 1677 for (i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); i++) { 1678 tcg_out_ld(s, TCG_TYPE_REG, tcg_target_callee_save_regs[i], 1679 TCG_REG_SP, SAVE_OFS + i * REG_SIZE); 1680 } 1681 1682 tcg_out_opc_addi_d(s, TCG_REG_SP, TCG_REG_SP, FRAME_SIZE); 1683 tcg_out_opc_jirl(s, TCG_REG_ZERO, TCG_REG_RA, 0); 1684} 1685 1686static void tcg_target_init(TCGContext *s) 1687{ 1688 unsigned long hwcap = qemu_getauxval(AT_HWCAP); 1689 1690 /* Server and desktop class cpus have UAL; embedded cpus do not. */ 1691 if (!(hwcap & HWCAP_LOONGARCH_UAL)) { 1692 error_report("TCG: unaligned access support required; exiting"); 1693 exit(EXIT_FAILURE); 1694 } 1695 1696 tcg_target_available_regs[TCG_TYPE_I32] = ALL_GENERAL_REGS; 1697 tcg_target_available_regs[TCG_TYPE_I64] = ALL_GENERAL_REGS; 1698 1699 tcg_target_call_clobber_regs = ALL_GENERAL_REGS; 1700 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_S0); 1701 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_S1); 1702 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_S2); 1703 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_S3); 1704 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_S4); 1705 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_S5); 1706 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_S6); 1707 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_S7); 1708 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_S8); 1709 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_S9); 1710 1711 s->reserved_regs = 0; 1712 tcg_regset_set_reg(s->reserved_regs, TCG_REG_ZERO); 1713 tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP0); 1714 tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP1); 1715 tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP2); 1716 tcg_regset_set_reg(s->reserved_regs, TCG_REG_SP); 1717 tcg_regset_set_reg(s->reserved_regs, TCG_REG_TP); 1718 tcg_regset_set_reg(s->reserved_regs, TCG_REG_RESERVED); 1719} 1720 1721typedef struct { 1722 DebugFrameHeader h; 1723 uint8_t fde_def_cfa[4]; 1724 uint8_t fde_reg_ofs[ARRAY_SIZE(tcg_target_callee_save_regs) * 2]; 1725} DebugFrame; 1726 1727#define ELF_HOST_MACHINE EM_LOONGARCH 1728 1729static const DebugFrame debug_frame = { 1730 .h.cie.len = sizeof(DebugFrameCIE) - 4, /* length after .len member */ 1731 .h.cie.id = -1, 1732 .h.cie.version = 1, 1733 .h.cie.code_align = 1, 1734 .h.cie.data_align = -(TCG_TARGET_REG_BITS / 8) & 0x7f, /* sleb128 */ 1735 .h.cie.return_column = TCG_REG_RA, 1736 1737 /* Total FDE size does not include the "len" member. */ 1738 .h.fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, h.fde.cie_offset), 1739 1740 .fde_def_cfa = { 1741 12, TCG_REG_SP, /* DW_CFA_def_cfa sp, ... */ 1742 (FRAME_SIZE & 0x7f) | 0x80, /* ... uleb128 FRAME_SIZE */ 1743 (FRAME_SIZE >> 7) 1744 }, 1745 .fde_reg_ofs = { 1746 0x80 + 23, 11, /* DW_CFA_offset, s0, -88 */ 1747 0x80 + 24, 10, /* DW_CFA_offset, s1, -80 */ 1748 0x80 + 25, 9, /* DW_CFA_offset, s2, -72 */ 1749 0x80 + 26, 8, /* DW_CFA_offset, s3, -64 */ 1750 0x80 + 27, 7, /* DW_CFA_offset, s4, -56 */ 1751 0x80 + 28, 6, /* DW_CFA_offset, s5, -48 */ 1752 0x80 + 29, 5, /* DW_CFA_offset, s6, -40 */ 1753 0x80 + 30, 4, /* DW_CFA_offset, s7, -32 */ 1754 0x80 + 31, 3, /* DW_CFA_offset, s8, -24 */ 1755 0x80 + 22, 2, /* DW_CFA_offset, s9, -16 */ 1756 0x80 + 1 , 1, /* DW_CFA_offset, ra, -8 */ 1757 } 1758}; 1759 1760void tcg_register_jit(const void *buf, size_t buf_size) 1761{ 1762 tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame)); 1763} 1764