1 /* 2 * Tiny Code Generator for QEMU 3 * 4 * Copyright (c) 2008 Fabrice Bellard 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a copy 7 * of this software and associated documentation files (the "Software"), to deal 8 * in the Software without restriction, including without limitation the rights 9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 * copies of the Software, and to permit persons to whom the Software is 11 * furnished to do so, subject to the following conditions: 12 * 13 * The above copyright notice and this permission notice shall be included in 14 * all copies or substantial portions of the Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 22 * THE SOFTWARE. 23 */ 24 25 #include "qemu/osdep.h" 26 27 /* Define to jump the ELF file used to communicate with GDB. */ 28 #undef DEBUG_JIT 29 30 #include "qemu/error-report.h" 31 #include "qemu/cutils.h" 32 #include "qemu/host-utils.h" 33 #include "qemu/qemu-print.h" 34 #include "qemu/cacheflush.h" 35 #include "qemu/cacheinfo.h" 36 #include "qemu/timer.h" 37 #include "exec/translation-block.h" 38 #include "exec/tlb-common.h" 39 #include "tcg/startup.h" 40 #include "tcg/tcg-op-common.h" 41 42 #if UINTPTR_MAX == UINT32_MAX 43 # define ELF_CLASS ELFCLASS32 44 #else 45 # define ELF_CLASS ELFCLASS64 46 #endif 47 #if HOST_BIG_ENDIAN 48 # define ELF_DATA ELFDATA2MSB 49 #else 50 # define ELF_DATA ELFDATA2LSB 51 #endif 52 53 #include "elf.h" 54 #include "exec/log.h" 55 #include "tcg/tcg-ldst.h" 56 #include "tcg/tcg-temp-internal.h" 57 #include "tcg-internal.h" 58 #include "tcg/perf.h" 59 #include "tcg-has.h" 60 #ifdef CONFIG_USER_ONLY 61 #include "user/guest-base.h" 62 #endif 63 64 /* Forward declarations for functions declared in tcg-target.c.inc and 65 used here. */ 66 static void tcg_target_init(TCGContext *s); 67 static void tcg_target_qemu_prologue(TCGContext *s); 68 static bool patch_reloc(tcg_insn_unit *code_ptr, int type, 69 intptr_t value, intptr_t addend); 70 static void tcg_out_nop_fill(tcg_insn_unit *p, int count); 71 72 typedef struct TCGLabelQemuLdst TCGLabelQemuLdst; 73 static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l); 74 static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l); 75 76 /* The CIE and FDE header definitions will be common to all hosts. */ 77 typedef struct { 78 uint32_t len __attribute__((aligned((sizeof(void *))))); 79 uint32_t id; 80 uint8_t version; 81 char augmentation[1]; 82 uint8_t code_align; 83 uint8_t data_align; 84 uint8_t return_column; 85 } DebugFrameCIE; 86 87 typedef struct QEMU_PACKED { 88 uint32_t len __attribute__((aligned((sizeof(void *))))); 89 uint32_t cie_offset; 90 uintptr_t func_start; 91 uintptr_t func_len; 92 } DebugFrameFDEHeader; 93 94 typedef struct QEMU_PACKED { 95 DebugFrameCIE cie; 96 DebugFrameFDEHeader fde; 97 } DebugFrameHeader; 98 99 struct TCGLabelQemuLdst { 100 bool is_ld; /* qemu_ld: true, qemu_st: false */ 101 MemOpIdx oi; 102 TCGType type; /* result type of a load */ 103 TCGReg addr_reg; /* reg index for guest virtual addr */ 104 TCGReg datalo_reg; /* reg index for low word to be loaded or stored */ 105 TCGReg datahi_reg; /* reg index for high word to be loaded or stored */ 106 const tcg_insn_unit *raddr; /* addr of the next IR of qemu_ld/st IR */ 107 tcg_insn_unit *label_ptr[2]; /* label pointers to be updated */ 108 QSIMPLEQ_ENTRY(TCGLabelQemuLdst) next; 109 }; 110 111 static void tcg_register_jit_int(const void *buf, size_t size, 112 const void *debug_frame, 113 size_t debug_frame_size) 114 __attribute__((unused)); 115 116 /* Forward declarations for functions declared and used in tcg-target.c.inc. */ 117 static void tcg_out_tb_start(TCGContext *s); 118 static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg1, 119 intptr_t arg2); 120 static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg); 121 static void tcg_out_movi(TCGContext *s, TCGType type, 122 TCGReg ret, tcg_target_long arg); 123 static void tcg_out_ext8s(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg); 124 static void tcg_out_ext16s(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg); 125 static void tcg_out_ext8u(TCGContext *s, TCGReg ret, TCGReg arg); 126 static void tcg_out_ext16u(TCGContext *s, TCGReg ret, TCGReg arg); 127 static void tcg_out_ext32s(TCGContext *s, TCGReg ret, TCGReg arg); 128 static void tcg_out_ext32u(TCGContext *s, TCGReg ret, TCGReg arg); 129 static void tcg_out_exts_i32_i64(TCGContext *s, TCGReg ret, TCGReg arg); 130 static void tcg_out_extu_i32_i64(TCGContext *s, TCGReg ret, TCGReg arg); 131 static void tcg_out_extrl_i64_i32(TCGContext *s, TCGReg ret, TCGReg arg); 132 static void tcg_out_addi_ptr(TCGContext *s, TCGReg, TCGReg, tcg_target_long); 133 static bool tcg_out_xchg(TCGContext *s, TCGType type, TCGReg r1, TCGReg r2); 134 static void tcg_out_exit_tb(TCGContext *s, uintptr_t arg); 135 static void tcg_out_goto_tb(TCGContext *s, int which); 136 static void tcg_out_set_carry(TCGContext *s); 137 static void tcg_out_set_borrow(TCGContext *s); 138 static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, 139 const TCGArg args[TCG_MAX_OP_ARGS], 140 const int const_args[TCG_MAX_OP_ARGS]); 141 #if TCG_TARGET_MAYBE_vec 142 static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece, 143 TCGReg dst, TCGReg src); 144 static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece, 145 TCGReg dst, TCGReg base, intptr_t offset); 146 static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece, 147 TCGReg dst, int64_t arg); 148 static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, 149 unsigned vecl, unsigned vece, 150 const TCGArg args[TCG_MAX_OP_ARGS], 151 const int const_args[TCG_MAX_OP_ARGS]); 152 #else 153 static inline bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece, 154 TCGReg dst, TCGReg src) 155 { 156 g_assert_not_reached(); 157 } 158 static inline bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece, 159 TCGReg dst, TCGReg base, intptr_t offset) 160 { 161 g_assert_not_reached(); 162 } 163 static inline void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece, 164 TCGReg dst, int64_t arg) 165 { 166 g_assert_not_reached(); 167 } 168 static inline void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, 169 unsigned vecl, unsigned vece, 170 const TCGArg args[TCG_MAX_OP_ARGS], 171 const int const_args[TCG_MAX_OP_ARGS]) 172 { 173 g_assert_not_reached(); 174 } 175 int tcg_can_emit_vec_op(TCGOpcode o, TCGType t, unsigned ve) 176 { 177 return 0; 178 } 179 #endif 180 static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, TCGReg arg1, 181 intptr_t arg2); 182 static bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val, 183 TCGReg base, intptr_t ofs); 184 static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target, 185 const TCGHelperInfo *info); 186 static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot); 187 static bool tcg_target_const_match(int64_t val, int ct, 188 TCGType type, TCGCond cond, int vece); 189 190 #ifndef CONFIG_USER_ONLY 191 #define guest_base ({ qemu_build_not_reached(); (uintptr_t)0; }) 192 #endif 193 194 typedef struct TCGLdstHelperParam { 195 TCGReg (*ra_gen)(TCGContext *s, const TCGLabelQemuLdst *l, int arg_reg); 196 unsigned ntmp; 197 int tmp[3]; 198 } TCGLdstHelperParam; 199 200 static void tcg_out_ld_helper_args(TCGContext *s, const TCGLabelQemuLdst *l, 201 const TCGLdstHelperParam *p) 202 __attribute__((unused)); 203 static void tcg_out_ld_helper_ret(TCGContext *s, const TCGLabelQemuLdst *l, 204 bool load_sign, const TCGLdstHelperParam *p) 205 __attribute__((unused)); 206 static void tcg_out_st_helper_args(TCGContext *s, const TCGLabelQemuLdst *l, 207 const TCGLdstHelperParam *p) 208 __attribute__((unused)); 209 210 static void * const qemu_ld_helpers[MO_SSIZE + 1] __attribute__((unused)) = { 211 [MO_UB] = helper_ldub_mmu, 212 [MO_SB] = helper_ldsb_mmu, 213 [MO_UW] = helper_lduw_mmu, 214 [MO_SW] = helper_ldsw_mmu, 215 [MO_UL] = helper_ldul_mmu, 216 [MO_UQ] = helper_ldq_mmu, 217 #if TCG_TARGET_REG_BITS == 64 218 [MO_SL] = helper_ldsl_mmu, 219 [MO_128] = helper_ld16_mmu, 220 #endif 221 }; 222 223 static void * const qemu_st_helpers[MO_SIZE + 1] __attribute__((unused)) = { 224 [MO_8] = helper_stb_mmu, 225 [MO_16] = helper_stw_mmu, 226 [MO_32] = helper_stl_mmu, 227 [MO_64] = helper_stq_mmu, 228 #if TCG_TARGET_REG_BITS == 64 229 [MO_128] = helper_st16_mmu, 230 #endif 231 }; 232 233 typedef struct { 234 MemOp atom; /* lg2 bits of atomicity required */ 235 MemOp align; /* lg2 bits of alignment to use */ 236 } TCGAtomAlign; 237 238 static TCGAtomAlign atom_and_align_for_opc(TCGContext *s, MemOp opc, 239 MemOp host_atom, bool allow_two_ops) 240 __attribute__((unused)); 241 242 #ifdef CONFIG_USER_ONLY 243 bool tcg_use_softmmu; 244 #endif 245 246 TCGContext tcg_init_ctx; 247 __thread TCGContext *tcg_ctx; 248 249 TCGContext **tcg_ctxs; 250 unsigned int tcg_cur_ctxs; 251 unsigned int tcg_max_ctxs; 252 TCGv_env tcg_env; 253 const void *tcg_code_gen_epilogue; 254 uintptr_t tcg_splitwx_diff; 255 256 #ifndef CONFIG_TCG_INTERPRETER 257 tcg_prologue_fn *tcg_qemu_tb_exec; 258 #endif 259 260 static TCGRegSet tcg_target_available_regs[TCG_TYPE_COUNT]; 261 static TCGRegSet tcg_target_call_clobber_regs; 262 263 #if TCG_TARGET_INSN_UNIT_SIZE == 1 264 static __attribute__((unused)) inline void tcg_out8(TCGContext *s, uint8_t v) 265 { 266 *s->code_ptr++ = v; 267 } 268 269 static __attribute__((unused)) inline void tcg_patch8(tcg_insn_unit *p, 270 uint8_t v) 271 { 272 *p = v; 273 } 274 #endif 275 276 #if TCG_TARGET_INSN_UNIT_SIZE <= 2 277 static __attribute__((unused)) inline void tcg_out16(TCGContext *s, uint16_t v) 278 { 279 if (TCG_TARGET_INSN_UNIT_SIZE == 2) { 280 *s->code_ptr++ = v; 281 } else { 282 tcg_insn_unit *p = s->code_ptr; 283 memcpy(p, &v, sizeof(v)); 284 s->code_ptr = p + (2 / TCG_TARGET_INSN_UNIT_SIZE); 285 } 286 } 287 288 static __attribute__((unused)) inline void tcg_patch16(tcg_insn_unit *p, 289 uint16_t v) 290 { 291 if (TCG_TARGET_INSN_UNIT_SIZE == 2) { 292 *p = v; 293 } else { 294 memcpy(p, &v, sizeof(v)); 295 } 296 } 297 #endif 298 299 #if TCG_TARGET_INSN_UNIT_SIZE <= 4 300 static __attribute__((unused)) inline void tcg_out32(TCGContext *s, uint32_t v) 301 { 302 if (TCG_TARGET_INSN_UNIT_SIZE == 4) { 303 *s->code_ptr++ = v; 304 } else { 305 tcg_insn_unit *p = s->code_ptr; 306 memcpy(p, &v, sizeof(v)); 307 s->code_ptr = p + (4 / TCG_TARGET_INSN_UNIT_SIZE); 308 } 309 } 310 311 static __attribute__((unused)) inline void tcg_patch32(tcg_insn_unit *p, 312 uint32_t v) 313 { 314 if (TCG_TARGET_INSN_UNIT_SIZE == 4) { 315 *p = v; 316 } else { 317 memcpy(p, &v, sizeof(v)); 318 } 319 } 320 #endif 321 322 #if TCG_TARGET_INSN_UNIT_SIZE <= 8 323 static __attribute__((unused)) inline void tcg_out64(TCGContext *s, uint64_t v) 324 { 325 if (TCG_TARGET_INSN_UNIT_SIZE == 8) { 326 *s->code_ptr++ = v; 327 } else { 328 tcg_insn_unit *p = s->code_ptr; 329 memcpy(p, &v, sizeof(v)); 330 s->code_ptr = p + (8 / TCG_TARGET_INSN_UNIT_SIZE); 331 } 332 } 333 334 static __attribute__((unused)) inline void tcg_patch64(tcg_insn_unit *p, 335 uint64_t v) 336 { 337 if (TCG_TARGET_INSN_UNIT_SIZE == 8) { 338 *p = v; 339 } else { 340 memcpy(p, &v, sizeof(v)); 341 } 342 } 343 #endif 344 345 /* label relocation processing */ 346 347 static void tcg_out_reloc(TCGContext *s, tcg_insn_unit *code_ptr, int type, 348 TCGLabel *l, intptr_t addend) 349 { 350 TCGRelocation *r = tcg_malloc(sizeof(TCGRelocation)); 351 352 r->type = type; 353 r->ptr = code_ptr; 354 r->addend = addend; 355 QSIMPLEQ_INSERT_TAIL(&l->relocs, r, next); 356 } 357 358 static void tcg_out_label(TCGContext *s, TCGLabel *l) 359 { 360 tcg_debug_assert(!l->has_value); 361 l->has_value = 1; 362 l->u.value_ptr = tcg_splitwx_to_rx(s->code_ptr); 363 } 364 365 TCGLabel *gen_new_label(void) 366 { 367 TCGContext *s = tcg_ctx; 368 TCGLabel *l = tcg_malloc(sizeof(TCGLabel)); 369 370 memset(l, 0, sizeof(TCGLabel)); 371 l->id = s->nb_labels++; 372 QSIMPLEQ_INIT(&l->branches); 373 QSIMPLEQ_INIT(&l->relocs); 374 375 QSIMPLEQ_INSERT_TAIL(&s->labels, l, next); 376 377 return l; 378 } 379 380 static bool tcg_resolve_relocs(TCGContext *s) 381 { 382 TCGLabel *l; 383 384 QSIMPLEQ_FOREACH(l, &s->labels, next) { 385 TCGRelocation *r; 386 uintptr_t value = l->u.value; 387 388 QSIMPLEQ_FOREACH(r, &l->relocs, next) { 389 if (!patch_reloc(r->ptr, r->type, value, r->addend)) { 390 return false; 391 } 392 } 393 } 394 return true; 395 } 396 397 static void set_jmp_reset_offset(TCGContext *s, int which) 398 { 399 /* 400 * We will check for overflow at the end of the opcode loop in 401 * tcg_gen_code, where we bound tcg_current_code_size to UINT16_MAX. 402 */ 403 s->gen_tb->jmp_reset_offset[which] = tcg_current_code_size(s); 404 } 405 406 static void G_GNUC_UNUSED set_jmp_insn_offset(TCGContext *s, int which) 407 { 408 /* 409 * We will check for overflow at the end of the opcode loop in 410 * tcg_gen_code, where we bound tcg_current_code_size to UINT16_MAX. 411 */ 412 s->gen_tb->jmp_insn_offset[which] = tcg_current_code_size(s); 413 } 414 415 static uintptr_t G_GNUC_UNUSED get_jmp_target_addr(TCGContext *s, int which) 416 { 417 /* 418 * Return the read-execute version of the pointer, for the benefit 419 * of any pc-relative addressing mode. 420 */ 421 return (uintptr_t)tcg_splitwx_to_rx(&s->gen_tb->jmp_target_addr[which]); 422 } 423 424 static int __attribute__((unused)) 425 tlb_mask_table_ofs(TCGContext *s, int which) 426 { 427 return (offsetof(CPUNegativeOffsetState, tlb.f[which]) - 428 sizeof(CPUNegativeOffsetState)); 429 } 430 431 /* Signal overflow, starting over with fewer guest insns. */ 432 static G_NORETURN 433 void tcg_raise_tb_overflow(TCGContext *s) 434 { 435 siglongjmp(s->jmp_trans, -2); 436 } 437 438 /* 439 * Used by tcg_out_movext{1,2} to hold the arguments for tcg_out_movext. 440 * By the time we arrive at tcg_out_movext1, @dst is always a TCGReg. 441 * 442 * However, tcg_out_helper_load_slots reuses this field to hold an 443 * argument slot number (which may designate a argument register or an 444 * argument stack slot), converting to TCGReg once all arguments that 445 * are destined for the stack are processed. 446 */ 447 typedef struct TCGMovExtend { 448 unsigned dst; 449 TCGReg src; 450 TCGType dst_type; 451 TCGType src_type; 452 MemOp src_ext; 453 } TCGMovExtend; 454 455 /** 456 * tcg_out_movext -- move and extend 457 * @s: tcg context 458 * @dst_type: integral type for destination 459 * @dst: destination register 460 * @src_type: integral type for source 461 * @src_ext: extension to apply to source 462 * @src: source register 463 * 464 * Move or extend @src into @dst, depending on @src_ext and the types. 465 */ 466 static void tcg_out_movext(TCGContext *s, TCGType dst_type, TCGReg dst, 467 TCGType src_type, MemOp src_ext, TCGReg src) 468 { 469 switch (src_ext) { 470 case MO_UB: 471 tcg_out_ext8u(s, dst, src); 472 break; 473 case MO_SB: 474 tcg_out_ext8s(s, dst_type, dst, src); 475 break; 476 case MO_UW: 477 tcg_out_ext16u(s, dst, src); 478 break; 479 case MO_SW: 480 tcg_out_ext16s(s, dst_type, dst, src); 481 break; 482 case MO_UL: 483 case MO_SL: 484 if (dst_type == TCG_TYPE_I32) { 485 if (src_type == TCG_TYPE_I32) { 486 tcg_out_mov(s, TCG_TYPE_I32, dst, src); 487 } else { 488 tcg_out_extrl_i64_i32(s, dst, src); 489 } 490 } else if (src_type == TCG_TYPE_I32) { 491 if (src_ext & MO_SIGN) { 492 tcg_out_exts_i32_i64(s, dst, src); 493 } else { 494 tcg_out_extu_i32_i64(s, dst, src); 495 } 496 } else { 497 if (src_ext & MO_SIGN) { 498 tcg_out_ext32s(s, dst, src); 499 } else { 500 tcg_out_ext32u(s, dst, src); 501 } 502 } 503 break; 504 case MO_UQ: 505 tcg_debug_assert(TCG_TARGET_REG_BITS == 64); 506 if (dst_type == TCG_TYPE_I32) { 507 tcg_out_extrl_i64_i32(s, dst, src); 508 } else { 509 tcg_out_mov(s, TCG_TYPE_I64, dst, src); 510 } 511 break; 512 default: 513 g_assert_not_reached(); 514 } 515 } 516 517 /* Minor variations on a theme, using a structure. */ 518 static void tcg_out_movext1_new_src(TCGContext *s, const TCGMovExtend *i, 519 TCGReg src) 520 { 521 tcg_out_movext(s, i->dst_type, i->dst, i->src_type, i->src_ext, src); 522 } 523 524 static void tcg_out_movext1(TCGContext *s, const TCGMovExtend *i) 525 { 526 tcg_out_movext1_new_src(s, i, i->src); 527 } 528 529 /** 530 * tcg_out_movext2 -- move and extend two pair 531 * @s: tcg context 532 * @i1: first move description 533 * @i2: second move description 534 * @scratch: temporary register, or -1 for none 535 * 536 * As tcg_out_movext, for both @i1 and @i2, caring for overlap 537 * between the sources and destinations. 538 */ 539 540 static void tcg_out_movext2(TCGContext *s, const TCGMovExtend *i1, 541 const TCGMovExtend *i2, int scratch) 542 { 543 TCGReg src1 = i1->src; 544 TCGReg src2 = i2->src; 545 546 if (i1->dst != src2) { 547 tcg_out_movext1(s, i1); 548 tcg_out_movext1(s, i2); 549 return; 550 } 551 if (i2->dst == src1) { 552 TCGType src1_type = i1->src_type; 553 TCGType src2_type = i2->src_type; 554 555 if (tcg_out_xchg(s, MAX(src1_type, src2_type), src1, src2)) { 556 /* The data is now in the correct registers, now extend. */ 557 src1 = i2->src; 558 src2 = i1->src; 559 } else { 560 tcg_debug_assert(scratch >= 0); 561 tcg_out_mov(s, src1_type, scratch, src1); 562 src1 = scratch; 563 } 564 } 565 tcg_out_movext1_new_src(s, i2, src2); 566 tcg_out_movext1_new_src(s, i1, src1); 567 } 568 569 /** 570 * tcg_out_movext3 -- move and extend three pair 571 * @s: tcg context 572 * @i1: first move description 573 * @i2: second move description 574 * @i3: third move description 575 * @scratch: temporary register, or -1 for none 576 * 577 * As tcg_out_movext, for all of @i1, @i2 and @i3, caring for overlap 578 * between the sources and destinations. 579 */ 580 581 static void tcg_out_movext3(TCGContext *s, const TCGMovExtend *i1, 582 const TCGMovExtend *i2, const TCGMovExtend *i3, 583 int scratch) 584 { 585 TCGReg src1 = i1->src; 586 TCGReg src2 = i2->src; 587 TCGReg src3 = i3->src; 588 589 if (i1->dst != src2 && i1->dst != src3) { 590 tcg_out_movext1(s, i1); 591 tcg_out_movext2(s, i2, i3, scratch); 592 return; 593 } 594 if (i2->dst != src1 && i2->dst != src3) { 595 tcg_out_movext1(s, i2); 596 tcg_out_movext2(s, i1, i3, scratch); 597 return; 598 } 599 if (i3->dst != src1 && i3->dst != src2) { 600 tcg_out_movext1(s, i3); 601 tcg_out_movext2(s, i1, i2, scratch); 602 return; 603 } 604 605 /* 606 * There is a cycle. Since there are only 3 nodes, the cycle is 607 * either "clockwise" or "anti-clockwise", and can be solved with 608 * a single scratch or two xchg. 609 */ 610 if (i1->dst == src2 && i2->dst == src3 && i3->dst == src1) { 611 /* "Clockwise" */ 612 if (tcg_out_xchg(s, MAX(i1->src_type, i2->src_type), src1, src2)) { 613 tcg_out_xchg(s, MAX(i2->src_type, i3->src_type), src2, src3); 614 /* The data is now in the correct registers, now extend. */ 615 tcg_out_movext1_new_src(s, i1, i1->dst); 616 tcg_out_movext1_new_src(s, i2, i2->dst); 617 tcg_out_movext1_new_src(s, i3, i3->dst); 618 } else { 619 tcg_debug_assert(scratch >= 0); 620 tcg_out_mov(s, i1->src_type, scratch, src1); 621 tcg_out_movext1(s, i3); 622 tcg_out_movext1(s, i2); 623 tcg_out_movext1_new_src(s, i1, scratch); 624 } 625 } else if (i1->dst == src3 && i2->dst == src1 && i3->dst == src2) { 626 /* "Anti-clockwise" */ 627 if (tcg_out_xchg(s, MAX(i2->src_type, i3->src_type), src2, src3)) { 628 tcg_out_xchg(s, MAX(i1->src_type, i2->src_type), src1, src2); 629 /* The data is now in the correct registers, now extend. */ 630 tcg_out_movext1_new_src(s, i1, i1->dst); 631 tcg_out_movext1_new_src(s, i2, i2->dst); 632 tcg_out_movext1_new_src(s, i3, i3->dst); 633 } else { 634 tcg_debug_assert(scratch >= 0); 635 tcg_out_mov(s, i1->src_type, scratch, src1); 636 tcg_out_movext1(s, i2); 637 tcg_out_movext1(s, i3); 638 tcg_out_movext1_new_src(s, i1, scratch); 639 } 640 } else { 641 g_assert_not_reached(); 642 } 643 } 644 645 /* 646 * Allocate a new TCGLabelQemuLdst entry. 647 */ 648 649 __attribute__((unused)) 650 static TCGLabelQemuLdst *new_ldst_label(TCGContext *s) 651 { 652 TCGLabelQemuLdst *l = tcg_malloc(sizeof(*l)); 653 654 memset(l, 0, sizeof(*l)); 655 QSIMPLEQ_INSERT_TAIL(&s->ldst_labels, l, next); 656 657 return l; 658 } 659 660 /* 661 * Allocate new constant pool entries. 662 */ 663 664 typedef struct TCGLabelPoolData { 665 struct TCGLabelPoolData *next; 666 tcg_insn_unit *label; 667 intptr_t addend; 668 int rtype; 669 unsigned nlong; 670 tcg_target_ulong data[]; 671 } TCGLabelPoolData; 672 673 static TCGLabelPoolData *new_pool_alloc(TCGContext *s, int nlong, int rtype, 674 tcg_insn_unit *label, intptr_t addend) 675 { 676 TCGLabelPoolData *n = tcg_malloc(sizeof(TCGLabelPoolData) 677 + sizeof(tcg_target_ulong) * nlong); 678 679 n->label = label; 680 n->addend = addend; 681 n->rtype = rtype; 682 n->nlong = nlong; 683 return n; 684 } 685 686 static void new_pool_insert(TCGContext *s, TCGLabelPoolData *n) 687 { 688 TCGLabelPoolData *i, **pp; 689 int nlong = n->nlong; 690 691 /* Insertion sort on the pool. */ 692 for (pp = &s->pool_labels; (i = *pp) != NULL; pp = &i->next) { 693 if (nlong > i->nlong) { 694 break; 695 } 696 if (nlong < i->nlong) { 697 continue; 698 } 699 if (memcmp(n->data, i->data, sizeof(tcg_target_ulong) * nlong) >= 0) { 700 break; 701 } 702 } 703 n->next = *pp; 704 *pp = n; 705 } 706 707 /* The "usual" for generic integer code. */ 708 __attribute__((unused)) 709 static void new_pool_label(TCGContext *s, tcg_target_ulong d, int rtype, 710 tcg_insn_unit *label, intptr_t addend) 711 { 712 TCGLabelPoolData *n = new_pool_alloc(s, 1, rtype, label, addend); 713 n->data[0] = d; 714 new_pool_insert(s, n); 715 } 716 717 /* For v64 or v128, depending on the host. */ 718 __attribute__((unused)) 719 static void new_pool_l2(TCGContext *s, int rtype, tcg_insn_unit *label, 720 intptr_t addend, tcg_target_ulong d0, 721 tcg_target_ulong d1) 722 { 723 TCGLabelPoolData *n = new_pool_alloc(s, 2, rtype, label, addend); 724 n->data[0] = d0; 725 n->data[1] = d1; 726 new_pool_insert(s, n); 727 } 728 729 /* For v128 or v256, depending on the host. */ 730 __attribute__((unused)) 731 static void new_pool_l4(TCGContext *s, int rtype, tcg_insn_unit *label, 732 intptr_t addend, tcg_target_ulong d0, 733 tcg_target_ulong d1, tcg_target_ulong d2, 734 tcg_target_ulong d3) 735 { 736 TCGLabelPoolData *n = new_pool_alloc(s, 4, rtype, label, addend); 737 n->data[0] = d0; 738 n->data[1] = d1; 739 n->data[2] = d2; 740 n->data[3] = d3; 741 new_pool_insert(s, n); 742 } 743 744 /* For v256, for 32-bit host. */ 745 __attribute__((unused)) 746 static void new_pool_l8(TCGContext *s, int rtype, tcg_insn_unit *label, 747 intptr_t addend, tcg_target_ulong d0, 748 tcg_target_ulong d1, tcg_target_ulong d2, 749 tcg_target_ulong d3, tcg_target_ulong d4, 750 tcg_target_ulong d5, tcg_target_ulong d6, 751 tcg_target_ulong d7) 752 { 753 TCGLabelPoolData *n = new_pool_alloc(s, 8, rtype, label, addend); 754 n->data[0] = d0; 755 n->data[1] = d1; 756 n->data[2] = d2; 757 n->data[3] = d3; 758 n->data[4] = d4; 759 n->data[5] = d5; 760 n->data[6] = d6; 761 n->data[7] = d7; 762 new_pool_insert(s, n); 763 } 764 765 /* 766 * Generate TB finalization at the end of block 767 */ 768 769 static int tcg_out_ldst_finalize(TCGContext *s) 770 { 771 TCGLabelQemuLdst *lb; 772 773 /* qemu_ld/st slow paths */ 774 QSIMPLEQ_FOREACH(lb, &s->ldst_labels, next) { 775 if (lb->is_ld 776 ? !tcg_out_qemu_ld_slow_path(s, lb) 777 : !tcg_out_qemu_st_slow_path(s, lb)) { 778 return -2; 779 } 780 781 /* 782 * Test for (pending) buffer overflow. The assumption is that any 783 * one operation beginning below the high water mark cannot overrun 784 * the buffer completely. Thus we can test for overflow after 785 * generating code without having to check during generation. 786 */ 787 if (unlikely((void *)s->code_ptr > s->code_gen_highwater)) { 788 return -1; 789 } 790 } 791 return 0; 792 } 793 794 static int tcg_out_pool_finalize(TCGContext *s) 795 { 796 TCGLabelPoolData *p = s->pool_labels; 797 TCGLabelPoolData *l = NULL; 798 void *a; 799 800 if (p == NULL) { 801 return 0; 802 } 803 804 /* 805 * ??? Round up to qemu_icache_linesize, but then do not round 806 * again when allocating the next TranslationBlock structure. 807 */ 808 a = (void *)ROUND_UP((uintptr_t)s->code_ptr, 809 sizeof(tcg_target_ulong) * p->nlong); 810 tcg_out_nop_fill(s->code_ptr, (tcg_insn_unit *)a - s->code_ptr); 811 s->data_gen_ptr = a; 812 813 for (; p != NULL; p = p->next) { 814 size_t size = sizeof(tcg_target_ulong) * p->nlong; 815 uintptr_t value; 816 817 if (!l || l->nlong != p->nlong || memcmp(l->data, p->data, size)) { 818 if (unlikely(a > s->code_gen_highwater)) { 819 return -1; 820 } 821 memcpy(a, p->data, size); 822 a += size; 823 l = p; 824 } 825 826 value = (uintptr_t)tcg_splitwx_to_rx(a) - size; 827 if (!patch_reloc(p->label, p->rtype, value, p->addend)) { 828 return -2; 829 } 830 } 831 832 s->code_ptr = a; 833 return 0; 834 } 835 836 #define C_PFX1(P, A) P##A 837 #define C_PFX2(P, A, B) P##A##_##B 838 #define C_PFX3(P, A, B, C) P##A##_##B##_##C 839 #define C_PFX4(P, A, B, C, D) P##A##_##B##_##C##_##D 840 #define C_PFX5(P, A, B, C, D, E) P##A##_##B##_##C##_##D##_##E 841 #define C_PFX6(P, A, B, C, D, E, F) P##A##_##B##_##C##_##D##_##E##_##F 842 843 /* Define an enumeration for the various combinations. */ 844 845 #define C_O0_I1(I1) C_PFX1(c_o0_i1_, I1), 846 #define C_O0_I2(I1, I2) C_PFX2(c_o0_i2_, I1, I2), 847 #define C_O0_I3(I1, I2, I3) C_PFX3(c_o0_i3_, I1, I2, I3), 848 #define C_O0_I4(I1, I2, I3, I4) C_PFX4(c_o0_i4_, I1, I2, I3, I4), 849 850 #define C_O1_I1(O1, I1) C_PFX2(c_o1_i1_, O1, I1), 851 #define C_O1_I2(O1, I1, I2) C_PFX3(c_o1_i2_, O1, I1, I2), 852 #define C_O1_I3(O1, I1, I2, I3) C_PFX4(c_o1_i3_, O1, I1, I2, I3), 853 #define C_O1_I4(O1, I1, I2, I3, I4) C_PFX5(c_o1_i4_, O1, I1, I2, I3, I4), 854 855 #define C_N1_I2(O1, I1, I2) C_PFX3(c_n1_i2_, O1, I1, I2), 856 #define C_N1O1_I1(O1, O2, I1) C_PFX3(c_n1o1_i1_, O1, O2, I1), 857 #define C_N2_I1(O1, O2, I1) C_PFX3(c_n2_i1_, O1, O2, I1), 858 859 #define C_O2_I1(O1, O2, I1) C_PFX3(c_o2_i1_, O1, O2, I1), 860 #define C_O2_I2(O1, O2, I1, I2) C_PFX4(c_o2_i2_, O1, O2, I1, I2), 861 #define C_O2_I3(O1, O2, I1, I2, I3) C_PFX5(c_o2_i3_, O1, O2, I1, I2, I3), 862 #define C_O2_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_o2_i4_, O1, O2, I1, I2, I3, I4), 863 #define C_N1_O1_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_n1_o1_i4_, O1, O2, I1, I2, I3, I4), 864 865 typedef enum { 866 C_Dynamic = -2, 867 C_NotImplemented = -1, 868 #include "tcg-target-con-set.h" 869 } TCGConstraintSetIndex; 870 871 static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode, TCGType, unsigned); 872 873 #undef C_O0_I1 874 #undef C_O0_I2 875 #undef C_O0_I3 876 #undef C_O0_I4 877 #undef C_O1_I1 878 #undef C_O1_I2 879 #undef C_O1_I3 880 #undef C_O1_I4 881 #undef C_N1_I2 882 #undef C_N1O1_I1 883 #undef C_N2_I1 884 #undef C_O2_I1 885 #undef C_O2_I2 886 #undef C_O2_I3 887 #undef C_O2_I4 888 #undef C_N1_O1_I4 889 890 /* Put all of the constraint sets into an array, indexed by the enum. */ 891 892 typedef struct TCGConstraintSet { 893 uint8_t nb_oargs, nb_iargs; 894 const char *args_ct_str[TCG_MAX_OP_ARGS]; 895 } TCGConstraintSet; 896 897 #define C_O0_I1(I1) { 0, 1, { #I1 } }, 898 #define C_O0_I2(I1, I2) { 0, 2, { #I1, #I2 } }, 899 #define C_O0_I3(I1, I2, I3) { 0, 3, { #I1, #I2, #I3 } }, 900 #define C_O0_I4(I1, I2, I3, I4) { 0, 4, { #I1, #I2, #I3, #I4 } }, 901 902 #define C_O1_I1(O1, I1) { 1, 1, { #O1, #I1 } }, 903 #define C_O1_I2(O1, I1, I2) { 1, 2, { #O1, #I1, #I2 } }, 904 #define C_O1_I3(O1, I1, I2, I3) { 1, 3, { #O1, #I1, #I2, #I3 } }, 905 #define C_O1_I4(O1, I1, I2, I3, I4) { 1, 4, { #O1, #I1, #I2, #I3, #I4 } }, 906 907 #define C_N1_I2(O1, I1, I2) { 1, 2, { "&" #O1, #I1, #I2 } }, 908 #define C_N1O1_I1(O1, O2, I1) { 2, 1, { "&" #O1, #O2, #I1 } }, 909 #define C_N2_I1(O1, O2, I1) { 2, 1, { "&" #O1, "&" #O2, #I1 } }, 910 911 #define C_O2_I1(O1, O2, I1) { 2, 1, { #O1, #O2, #I1 } }, 912 #define C_O2_I2(O1, O2, I1, I2) { 2, 2, { #O1, #O2, #I1, #I2 } }, 913 #define C_O2_I3(O1, O2, I1, I2, I3) { 2, 3, { #O1, #O2, #I1, #I2, #I3 } }, 914 #define C_O2_I4(O1, O2, I1, I2, I3, I4) { 2, 4, { #O1, #O2, #I1, #I2, #I3, #I4 } }, 915 #define C_N1_O1_I4(O1, O2, I1, I2, I3, I4) { 2, 4, { "&" #O1, #O2, #I1, #I2, #I3, #I4 } }, 916 917 static const TCGConstraintSet constraint_sets[] = { 918 #include "tcg-target-con-set.h" 919 }; 920 921 #undef C_O0_I1 922 #undef C_O0_I2 923 #undef C_O0_I3 924 #undef C_O0_I4 925 #undef C_O1_I1 926 #undef C_O1_I2 927 #undef C_O1_I3 928 #undef C_O1_I4 929 #undef C_N1_I2 930 #undef C_N1O1_I1 931 #undef C_N2_I1 932 #undef C_O2_I1 933 #undef C_O2_I2 934 #undef C_O2_I3 935 #undef C_O2_I4 936 #undef C_N1_O1_I4 937 938 /* Expand the enumerator to be returned from tcg_target_op_def(). */ 939 940 #define C_O0_I1(I1) C_PFX1(c_o0_i1_, I1) 941 #define C_O0_I2(I1, I2) C_PFX2(c_o0_i2_, I1, I2) 942 #define C_O0_I3(I1, I2, I3) C_PFX3(c_o0_i3_, I1, I2, I3) 943 #define C_O0_I4(I1, I2, I3, I4) C_PFX4(c_o0_i4_, I1, I2, I3, I4) 944 945 #define C_O1_I1(O1, I1) C_PFX2(c_o1_i1_, O1, I1) 946 #define C_O1_I2(O1, I1, I2) C_PFX3(c_o1_i2_, O1, I1, I2) 947 #define C_O1_I3(O1, I1, I2, I3) C_PFX4(c_o1_i3_, O1, I1, I2, I3) 948 #define C_O1_I4(O1, I1, I2, I3, I4) C_PFX5(c_o1_i4_, O1, I1, I2, I3, I4) 949 950 #define C_N1_I2(O1, I1, I2) C_PFX3(c_n1_i2_, O1, I1, I2) 951 #define C_N1O1_I1(O1, O2, I1) C_PFX3(c_n1o1_i1_, O1, O2, I1) 952 #define C_N2_I1(O1, O2, I1) C_PFX3(c_n2_i1_, O1, O2, I1) 953 954 #define C_O2_I1(O1, O2, I1) C_PFX3(c_o2_i1_, O1, O2, I1) 955 #define C_O2_I2(O1, O2, I1, I2) C_PFX4(c_o2_i2_, O1, O2, I1, I2) 956 #define C_O2_I3(O1, O2, I1, I2, I3) C_PFX5(c_o2_i3_, O1, O2, I1, I2, I3) 957 #define C_O2_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_o2_i4_, O1, O2, I1, I2, I3, I4) 958 #define C_N1_O1_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_n1_o1_i4_, O1, O2, I1, I2, I3, I4) 959 960 /* 961 * TCGOutOp is the base class for a set of structures that describe how 962 * to generate code for a given TCGOpcode. 963 * 964 * @static_constraint: 965 * C_NotImplemented: The TCGOpcode is not supported by the backend. 966 * C_Dynamic: Use @dynamic_constraint to select a constraint set 967 * based on any of @type, @flags, or host isa. 968 * Otherwise: The register allocation constrains for the TCGOpcode. 969 * 970 * Subclasses of TCGOutOp will define a set of output routines that may 971 * be used. Such routines will often be selected by the set of registers 972 * and constants that come out of register allocation. The set of 973 * routines that are provided will guide the set of constraints that are 974 * legal. In particular, assume that tcg_optimize() has done its job in 975 * swapping commutative operands and folding operations for which all 976 * operands are constant. 977 */ 978 typedef struct TCGOutOp { 979 TCGConstraintSetIndex static_constraint; 980 TCGConstraintSetIndex (*dynamic_constraint)(TCGType type, unsigned flags); 981 } TCGOutOp; 982 983 typedef struct TCGOutOpAddSubCarry { 984 TCGOutOp base; 985 void (*out_rrr)(TCGContext *s, TCGType type, 986 TCGReg a0, TCGReg a1, TCGReg a2); 987 void (*out_rri)(TCGContext *s, TCGType type, 988 TCGReg a0, TCGReg a1, tcg_target_long a2); 989 void (*out_rir)(TCGContext *s, TCGType type, 990 TCGReg a0, tcg_target_long a1, TCGReg a2); 991 void (*out_rii)(TCGContext *s, TCGType type, 992 TCGReg a0, tcg_target_long a1, tcg_target_long a2); 993 } TCGOutOpAddSubCarry; 994 995 typedef struct TCGOutOpBinary { 996 TCGOutOp base; 997 void (*out_rrr)(TCGContext *s, TCGType type, 998 TCGReg a0, TCGReg a1, TCGReg a2); 999 void (*out_rri)(TCGContext *s, TCGType type, 1000 TCGReg a0, TCGReg a1, tcg_target_long a2); 1001 } TCGOutOpBinary; 1002 1003 typedef struct TCGOutOpBrcond { 1004 TCGOutOp base; 1005 void (*out_rr)(TCGContext *s, TCGType type, TCGCond cond, 1006 TCGReg a1, TCGReg a2, TCGLabel *label); 1007 void (*out_ri)(TCGContext *s, TCGType type, TCGCond cond, 1008 TCGReg a1, tcg_target_long a2, TCGLabel *label); 1009 } TCGOutOpBrcond; 1010 1011 typedef struct TCGOutOpBrcond2 { 1012 TCGOutOp base; 1013 void (*out)(TCGContext *s, TCGCond cond, TCGReg al, TCGReg ah, 1014 TCGArg bl, bool const_bl, 1015 TCGArg bh, bool const_bh, TCGLabel *l); 1016 } TCGOutOpBrcond2; 1017 1018 typedef struct TCGOutOpBswap { 1019 TCGOutOp base; 1020 void (*out_rr)(TCGContext *s, TCGType type, 1021 TCGReg a0, TCGReg a1, unsigned flags); 1022 } TCGOutOpBswap; 1023 1024 typedef struct TCGOutOpDeposit { 1025 TCGOutOp base; 1026 void (*out_rrr)(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, 1027 TCGReg a2, unsigned ofs, unsigned len); 1028 void (*out_rri)(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, 1029 tcg_target_long a2, unsigned ofs, unsigned len); 1030 void (*out_rzr)(TCGContext *s, TCGType type, TCGReg a0, 1031 TCGReg a2, unsigned ofs, unsigned len); 1032 } TCGOutOpDeposit; 1033 1034 typedef struct TCGOutOpDivRem { 1035 TCGOutOp base; 1036 void (*out_rr01r)(TCGContext *s, TCGType type, 1037 TCGReg a0, TCGReg a1, TCGReg a4); 1038 } TCGOutOpDivRem; 1039 1040 typedef struct TCGOutOpExtract { 1041 TCGOutOp base; 1042 void (*out_rr)(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, 1043 unsigned ofs, unsigned len); 1044 } TCGOutOpExtract; 1045 1046 typedef struct TCGOutOpExtract2 { 1047 TCGOutOp base; 1048 void (*out_rrr)(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, 1049 TCGReg a2, unsigned shr); 1050 } TCGOutOpExtract2; 1051 1052 typedef struct TCGOutOpMovcond { 1053 TCGOutOp base; 1054 void (*out)(TCGContext *s, TCGType type, TCGCond cond, 1055 TCGReg ret, TCGReg c1, TCGArg c2, bool const_c2, 1056 TCGArg vt, bool const_vt, TCGArg vf, bool consf_vf); 1057 } TCGOutOpMovcond; 1058 1059 typedef struct TCGOutOpMul2 { 1060 TCGOutOp base; 1061 void (*out_rrrr)(TCGContext *s, TCGType type, 1062 TCGReg a0, TCGReg a1, TCGReg a2, TCGReg a3); 1063 } TCGOutOpMul2; 1064 1065 typedef struct TCGOutOpUnary { 1066 TCGOutOp base; 1067 void (*out_rr)(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1); 1068 } TCGOutOpUnary; 1069 1070 typedef struct TCGOutOpSetcond { 1071 TCGOutOp base; 1072 void (*out_rrr)(TCGContext *s, TCGType type, TCGCond cond, 1073 TCGReg ret, TCGReg a1, TCGReg a2); 1074 void (*out_rri)(TCGContext *s, TCGType type, TCGCond cond, 1075 TCGReg ret, TCGReg a1, tcg_target_long a2); 1076 } TCGOutOpSetcond; 1077 1078 typedef struct TCGOutOpSetcond2 { 1079 TCGOutOp base; 1080 void (*out)(TCGContext *s, TCGCond cond, TCGReg ret, TCGReg al, TCGReg ah, 1081 TCGArg bl, bool const_bl, TCGArg bh, bool const_bh); 1082 } TCGOutOpSetcond2; 1083 1084 typedef struct TCGOutOpSubtract { 1085 TCGOutOp base; 1086 void (*out_rrr)(TCGContext *s, TCGType type, 1087 TCGReg a0, TCGReg a1, TCGReg a2); 1088 void (*out_rir)(TCGContext *s, TCGType type, 1089 TCGReg a0, tcg_target_long a1, TCGReg a2); 1090 } TCGOutOpSubtract; 1091 1092 #include "tcg-target.c.inc" 1093 1094 #ifndef CONFIG_TCG_INTERPRETER 1095 /* Validate CPUTLBDescFast placement. */ 1096 QEMU_BUILD_BUG_ON((int)(offsetof(CPUNegativeOffsetState, tlb.f[0]) - 1097 sizeof(CPUNegativeOffsetState)) 1098 < MIN_TLB_MASK_TABLE_OFS); 1099 #endif 1100 1101 #if TCG_TARGET_REG_BITS == 64 1102 /* 1103 * We require these functions for slow-path function calls. 1104 * Adapt them generically for opcode output. 1105 */ 1106 1107 static void tgen_exts_i32_i64(TCGContext *s, TCGType t, TCGReg a0, TCGReg a1) 1108 { 1109 tcg_out_exts_i32_i64(s, a0, a1); 1110 } 1111 1112 static const TCGOutOpUnary outop_exts_i32_i64 = { 1113 .base.static_constraint = C_O1_I1(r, r), 1114 .out_rr = tgen_exts_i32_i64, 1115 }; 1116 1117 static void tgen_extu_i32_i64(TCGContext *s, TCGType t, TCGReg a0, TCGReg a1) 1118 { 1119 tcg_out_extu_i32_i64(s, a0, a1); 1120 } 1121 1122 static const TCGOutOpUnary outop_extu_i32_i64 = { 1123 .base.static_constraint = C_O1_I1(r, r), 1124 .out_rr = tgen_extu_i32_i64, 1125 }; 1126 1127 static void tgen_extrl_i64_i32(TCGContext *s, TCGType t, TCGReg a0, TCGReg a1) 1128 { 1129 tcg_out_extrl_i64_i32(s, a0, a1); 1130 } 1131 1132 static const TCGOutOpUnary outop_extrl_i64_i32 = { 1133 .base.static_constraint = C_O1_I1(r, r), 1134 .out_rr = TCG_TARGET_HAS_extr_i64_i32 ? tgen_extrl_i64_i32 : NULL, 1135 }; 1136 #endif 1137 1138 /* 1139 * Register V as the TCGOutOp for O. 1140 * This verifies that V is of type T, otherwise give a nice compiler error. 1141 * This prevents trivial mistakes within each arch/tcg-target.c.inc. 1142 */ 1143 #define OUTOP(O, T, V) [O] = _Generic(V, T: &V.base) 1144 1145 /* Register allocation descriptions for every TCGOpcode. */ 1146 static const TCGOutOp * const all_outop[NB_OPS] = { 1147 OUTOP(INDEX_op_add, TCGOutOpBinary, outop_add), 1148 OUTOP(INDEX_op_addci, TCGOutOpAddSubCarry, outop_addci), 1149 OUTOP(INDEX_op_addcio, TCGOutOpBinary, outop_addcio), 1150 OUTOP(INDEX_op_addco, TCGOutOpBinary, outop_addco), 1151 /* addc1o is implemented with set_carry + addcio */ 1152 OUTOP(INDEX_op_addc1o, TCGOutOpBinary, outop_addcio), 1153 OUTOP(INDEX_op_and, TCGOutOpBinary, outop_and), 1154 OUTOP(INDEX_op_andc, TCGOutOpBinary, outop_andc), 1155 OUTOP(INDEX_op_brcond, TCGOutOpBrcond, outop_brcond), 1156 OUTOP(INDEX_op_bswap16, TCGOutOpBswap, outop_bswap16), 1157 OUTOP(INDEX_op_bswap32, TCGOutOpBswap, outop_bswap32), 1158 OUTOP(INDEX_op_clz, TCGOutOpBinary, outop_clz), 1159 OUTOP(INDEX_op_ctpop, TCGOutOpUnary, outop_ctpop), 1160 OUTOP(INDEX_op_ctz, TCGOutOpBinary, outop_ctz), 1161 OUTOP(INDEX_op_deposit, TCGOutOpDeposit, outop_deposit), 1162 OUTOP(INDEX_op_divs, TCGOutOpBinary, outop_divs), 1163 OUTOP(INDEX_op_divu, TCGOutOpBinary, outop_divu), 1164 OUTOP(INDEX_op_divs2, TCGOutOpDivRem, outop_divs2), 1165 OUTOP(INDEX_op_divu2, TCGOutOpDivRem, outop_divu2), 1166 OUTOP(INDEX_op_eqv, TCGOutOpBinary, outop_eqv), 1167 OUTOP(INDEX_op_extract, TCGOutOpExtract, outop_extract), 1168 OUTOP(INDEX_op_extract2, TCGOutOpExtract2, outop_extract2), 1169 OUTOP(INDEX_op_movcond, TCGOutOpMovcond, outop_movcond), 1170 OUTOP(INDEX_op_mul, TCGOutOpBinary, outop_mul), 1171 OUTOP(INDEX_op_muls2, TCGOutOpMul2, outop_muls2), 1172 OUTOP(INDEX_op_mulsh, TCGOutOpBinary, outop_mulsh), 1173 OUTOP(INDEX_op_mulu2, TCGOutOpMul2, outop_mulu2), 1174 OUTOP(INDEX_op_muluh, TCGOutOpBinary, outop_muluh), 1175 OUTOP(INDEX_op_nand, TCGOutOpBinary, outop_nand), 1176 OUTOP(INDEX_op_neg, TCGOutOpUnary, outop_neg), 1177 OUTOP(INDEX_op_negsetcond, TCGOutOpSetcond, outop_negsetcond), 1178 OUTOP(INDEX_op_nor, TCGOutOpBinary, outop_nor), 1179 OUTOP(INDEX_op_not, TCGOutOpUnary, outop_not), 1180 OUTOP(INDEX_op_or, TCGOutOpBinary, outop_or), 1181 OUTOP(INDEX_op_orc, TCGOutOpBinary, outop_orc), 1182 OUTOP(INDEX_op_rems, TCGOutOpBinary, outop_rems), 1183 OUTOP(INDEX_op_remu, TCGOutOpBinary, outop_remu), 1184 OUTOP(INDEX_op_rotl, TCGOutOpBinary, outop_rotl), 1185 OUTOP(INDEX_op_rotr, TCGOutOpBinary, outop_rotr), 1186 OUTOP(INDEX_op_sar, TCGOutOpBinary, outop_sar), 1187 OUTOP(INDEX_op_setcond, TCGOutOpSetcond, outop_setcond), 1188 OUTOP(INDEX_op_sextract, TCGOutOpExtract, outop_sextract), 1189 OUTOP(INDEX_op_shl, TCGOutOpBinary, outop_shl), 1190 OUTOP(INDEX_op_shr, TCGOutOpBinary, outop_shr), 1191 OUTOP(INDEX_op_sub, TCGOutOpSubtract, outop_sub), 1192 OUTOP(INDEX_op_subbi, TCGOutOpAddSubCarry, outop_subbi), 1193 OUTOP(INDEX_op_subbio, TCGOutOpAddSubCarry, outop_subbio), 1194 OUTOP(INDEX_op_subbo, TCGOutOpAddSubCarry, outop_subbo), 1195 /* subb1o is implemented with set_borrow + subbio */ 1196 OUTOP(INDEX_op_subb1o, TCGOutOpAddSubCarry, outop_subbio), 1197 OUTOP(INDEX_op_xor, TCGOutOpBinary, outop_xor), 1198 1199 #if TCG_TARGET_REG_BITS == 32 1200 OUTOP(INDEX_op_brcond2_i32, TCGOutOpBrcond2, outop_brcond2), 1201 OUTOP(INDEX_op_setcond2_i32, TCGOutOpSetcond2, outop_setcond2), 1202 #else 1203 OUTOP(INDEX_op_bswap64, TCGOutOpUnary, outop_bswap64), 1204 OUTOP(INDEX_op_ext_i32_i64, TCGOutOpUnary, outop_exts_i32_i64), 1205 OUTOP(INDEX_op_extu_i32_i64, TCGOutOpUnary, outop_extu_i32_i64), 1206 OUTOP(INDEX_op_extrl_i64_i32, TCGOutOpUnary, outop_extrl_i64_i32), 1207 OUTOP(INDEX_op_extrh_i64_i32, TCGOutOpUnary, outop_extrh_i64_i32), 1208 #endif 1209 }; 1210 1211 #undef OUTOP 1212 1213 /* 1214 * All TCG threads except the parent (i.e. the one that called tcg_context_init 1215 * and registered the target's TCG globals) must register with this function 1216 * before initiating translation. 1217 * 1218 * In user-mode we just point tcg_ctx to tcg_init_ctx. See the documentation 1219 * of tcg_region_init() for the reasoning behind this. 1220 * 1221 * In system-mode each caller registers its context in tcg_ctxs[]. Note that in 1222 * system-mode tcg_ctxs[] does not track tcg_ctx_init, since the initial context 1223 * is not used anymore for translation once this function is called. 1224 * 1225 * Not tracking tcg_init_ctx in tcg_ctxs[] in system-mode keeps code that 1226 * iterates over the array (e.g. tcg_code_size() the same for both system/user 1227 * modes. 1228 */ 1229 #ifdef CONFIG_USER_ONLY 1230 void tcg_register_thread(void) 1231 { 1232 tcg_ctx = &tcg_init_ctx; 1233 } 1234 #else 1235 void tcg_register_thread(void) 1236 { 1237 TCGContext *s = g_malloc(sizeof(*s)); 1238 unsigned int i, n; 1239 1240 *s = tcg_init_ctx; 1241 1242 /* Relink mem_base. */ 1243 for (i = 0, n = tcg_init_ctx.nb_globals; i < n; ++i) { 1244 if (tcg_init_ctx.temps[i].mem_base) { 1245 ptrdiff_t b = tcg_init_ctx.temps[i].mem_base - tcg_init_ctx.temps; 1246 tcg_debug_assert(b >= 0 && b < n); 1247 s->temps[i].mem_base = &s->temps[b]; 1248 } 1249 } 1250 1251 /* Claim an entry in tcg_ctxs */ 1252 n = qatomic_fetch_inc(&tcg_cur_ctxs); 1253 g_assert(n < tcg_max_ctxs); 1254 qatomic_set(&tcg_ctxs[n], s); 1255 1256 if (n > 0) { 1257 tcg_region_initial_alloc(s); 1258 } 1259 1260 tcg_ctx = s; 1261 } 1262 #endif /* !CONFIG_USER_ONLY */ 1263 1264 /* pool based memory allocation */ 1265 void *tcg_malloc_internal(TCGContext *s, int size) 1266 { 1267 TCGPool *p; 1268 int pool_size; 1269 1270 if (size > TCG_POOL_CHUNK_SIZE) { 1271 /* big malloc: insert a new pool (XXX: could optimize) */ 1272 p = g_malloc(sizeof(TCGPool) + size); 1273 p->size = size; 1274 p->next = s->pool_first_large; 1275 s->pool_first_large = p; 1276 return p->data; 1277 } else { 1278 p = s->pool_current; 1279 if (!p) { 1280 p = s->pool_first; 1281 if (!p) 1282 goto new_pool; 1283 } else { 1284 if (!p->next) { 1285 new_pool: 1286 pool_size = TCG_POOL_CHUNK_SIZE; 1287 p = g_malloc(sizeof(TCGPool) + pool_size); 1288 p->size = pool_size; 1289 p->next = NULL; 1290 if (s->pool_current) { 1291 s->pool_current->next = p; 1292 } else { 1293 s->pool_first = p; 1294 } 1295 } else { 1296 p = p->next; 1297 } 1298 } 1299 } 1300 s->pool_current = p; 1301 s->pool_cur = p->data + size; 1302 s->pool_end = p->data + p->size; 1303 return p->data; 1304 } 1305 1306 void tcg_pool_reset(TCGContext *s) 1307 { 1308 TCGPool *p, *t; 1309 for (p = s->pool_first_large; p; p = t) { 1310 t = p->next; 1311 g_free(p); 1312 } 1313 s->pool_first_large = NULL; 1314 s->pool_cur = s->pool_end = NULL; 1315 s->pool_current = NULL; 1316 } 1317 1318 /* 1319 * Create TCGHelperInfo structures for "tcg/tcg-ldst.h" functions, 1320 * akin to what "exec/helper-tcg.h" does with DEF_HELPER_FLAGS_N. 1321 * We only use these for layout in tcg_out_ld_helper_ret and 1322 * tcg_out_st_helper_args, and share them between several of 1323 * the helpers, with the end result that it's easier to build manually. 1324 */ 1325 1326 #if TCG_TARGET_REG_BITS == 32 1327 # define dh_typecode_ttl dh_typecode_i32 1328 #else 1329 # define dh_typecode_ttl dh_typecode_i64 1330 #endif 1331 1332 static TCGHelperInfo info_helper_ld32_mmu = { 1333 .flags = TCG_CALL_NO_WG, 1334 .typemask = dh_typemask(ttl, 0) /* return tcg_target_ulong */ 1335 | dh_typemask(env, 1) 1336 | dh_typemask(i64, 2) /* uint64_t addr */ 1337 | dh_typemask(i32, 3) /* unsigned oi */ 1338 | dh_typemask(ptr, 4) /* uintptr_t ra */ 1339 }; 1340 1341 static TCGHelperInfo info_helper_ld64_mmu = { 1342 .flags = TCG_CALL_NO_WG, 1343 .typemask = dh_typemask(i64, 0) /* return uint64_t */ 1344 | dh_typemask(env, 1) 1345 | dh_typemask(i64, 2) /* uint64_t addr */ 1346 | dh_typemask(i32, 3) /* unsigned oi */ 1347 | dh_typemask(ptr, 4) /* uintptr_t ra */ 1348 }; 1349 1350 static TCGHelperInfo info_helper_ld128_mmu = { 1351 .flags = TCG_CALL_NO_WG, 1352 .typemask = dh_typemask(i128, 0) /* return Int128 */ 1353 | dh_typemask(env, 1) 1354 | dh_typemask(i64, 2) /* uint64_t addr */ 1355 | dh_typemask(i32, 3) /* unsigned oi */ 1356 | dh_typemask(ptr, 4) /* uintptr_t ra */ 1357 }; 1358 1359 static TCGHelperInfo info_helper_st32_mmu = { 1360 .flags = TCG_CALL_NO_WG, 1361 .typemask = dh_typemask(void, 0) 1362 | dh_typemask(env, 1) 1363 | dh_typemask(i64, 2) /* uint64_t addr */ 1364 | dh_typemask(i32, 3) /* uint32_t data */ 1365 | dh_typemask(i32, 4) /* unsigned oi */ 1366 | dh_typemask(ptr, 5) /* uintptr_t ra */ 1367 }; 1368 1369 static TCGHelperInfo info_helper_st64_mmu = { 1370 .flags = TCG_CALL_NO_WG, 1371 .typemask = dh_typemask(void, 0) 1372 | dh_typemask(env, 1) 1373 | dh_typemask(i64, 2) /* uint64_t addr */ 1374 | dh_typemask(i64, 3) /* uint64_t data */ 1375 | dh_typemask(i32, 4) /* unsigned oi */ 1376 | dh_typemask(ptr, 5) /* uintptr_t ra */ 1377 }; 1378 1379 static TCGHelperInfo info_helper_st128_mmu = { 1380 .flags = TCG_CALL_NO_WG, 1381 .typemask = dh_typemask(void, 0) 1382 | dh_typemask(env, 1) 1383 | dh_typemask(i64, 2) /* uint64_t addr */ 1384 | dh_typemask(i128, 3) /* Int128 data */ 1385 | dh_typemask(i32, 4) /* unsigned oi */ 1386 | dh_typemask(ptr, 5) /* uintptr_t ra */ 1387 }; 1388 1389 #ifdef CONFIG_TCG_INTERPRETER 1390 static ffi_type *typecode_to_ffi(int argmask) 1391 { 1392 /* 1393 * libffi does not support __int128_t, so we have forced Int128 1394 * to use the structure definition instead of the builtin type. 1395 */ 1396 static ffi_type *ffi_type_i128_elements[3] = { 1397 &ffi_type_uint64, 1398 &ffi_type_uint64, 1399 NULL 1400 }; 1401 static ffi_type ffi_type_i128 = { 1402 .size = 16, 1403 .alignment = __alignof__(Int128), 1404 .type = FFI_TYPE_STRUCT, 1405 .elements = ffi_type_i128_elements, 1406 }; 1407 1408 switch (argmask) { 1409 case dh_typecode_void: 1410 return &ffi_type_void; 1411 case dh_typecode_i32: 1412 return &ffi_type_uint32; 1413 case dh_typecode_s32: 1414 return &ffi_type_sint32; 1415 case dh_typecode_i64: 1416 return &ffi_type_uint64; 1417 case dh_typecode_s64: 1418 return &ffi_type_sint64; 1419 case dh_typecode_ptr: 1420 return &ffi_type_pointer; 1421 case dh_typecode_i128: 1422 return &ffi_type_i128; 1423 } 1424 g_assert_not_reached(); 1425 } 1426 1427 static ffi_cif *init_ffi_layout(TCGHelperInfo *info) 1428 { 1429 unsigned typemask = info->typemask; 1430 struct { 1431 ffi_cif cif; 1432 ffi_type *args[]; 1433 } *ca; 1434 ffi_status status; 1435 int nargs; 1436 1437 /* Ignoring the return type, find the last non-zero field. */ 1438 nargs = 32 - clz32(typemask >> 3); 1439 nargs = DIV_ROUND_UP(nargs, 3); 1440 assert(nargs <= MAX_CALL_IARGS); 1441 1442 ca = g_malloc0(sizeof(*ca) + nargs * sizeof(ffi_type *)); 1443 ca->cif.rtype = typecode_to_ffi(typemask & 7); 1444 ca->cif.nargs = nargs; 1445 1446 if (nargs != 0) { 1447 ca->cif.arg_types = ca->args; 1448 for (int j = 0; j < nargs; ++j) { 1449 int typecode = extract32(typemask, (j + 1) * 3, 3); 1450 ca->args[j] = typecode_to_ffi(typecode); 1451 } 1452 } 1453 1454 status = ffi_prep_cif(&ca->cif, FFI_DEFAULT_ABI, nargs, 1455 ca->cif.rtype, ca->cif.arg_types); 1456 assert(status == FFI_OK); 1457 1458 return &ca->cif; 1459 } 1460 1461 #define HELPER_INFO_INIT(I) (&(I)->cif) 1462 #define HELPER_INFO_INIT_VAL(I) init_ffi_layout(I) 1463 #else 1464 #define HELPER_INFO_INIT(I) (&(I)->init) 1465 #define HELPER_INFO_INIT_VAL(I) 1 1466 #endif /* CONFIG_TCG_INTERPRETER */ 1467 1468 static inline bool arg_slot_reg_p(unsigned arg_slot) 1469 { 1470 /* 1471 * Split the sizeof away from the comparison to avoid Werror from 1472 * "unsigned < 0 is always false", when iarg_regs is empty. 1473 */ 1474 unsigned nreg = ARRAY_SIZE(tcg_target_call_iarg_regs); 1475 return arg_slot < nreg; 1476 } 1477 1478 static inline int arg_slot_stk_ofs(unsigned arg_slot) 1479 { 1480 unsigned max = TCG_STATIC_CALL_ARGS_SIZE / sizeof(tcg_target_long); 1481 unsigned stk_slot = arg_slot - ARRAY_SIZE(tcg_target_call_iarg_regs); 1482 1483 tcg_debug_assert(stk_slot < max); 1484 return TCG_TARGET_CALL_STACK_OFFSET + stk_slot * sizeof(tcg_target_long); 1485 } 1486 1487 typedef struct TCGCumulativeArgs { 1488 int arg_idx; /* tcg_gen_callN args[] */ 1489 int info_in_idx; /* TCGHelperInfo in[] */ 1490 int arg_slot; /* regs+stack slot */ 1491 int ref_slot; /* stack slots for references */ 1492 } TCGCumulativeArgs; 1493 1494 static void layout_arg_even(TCGCumulativeArgs *cum) 1495 { 1496 cum->arg_slot += cum->arg_slot & 1; 1497 } 1498 1499 static void layout_arg_1(TCGCumulativeArgs *cum, TCGHelperInfo *info, 1500 TCGCallArgumentKind kind) 1501 { 1502 TCGCallArgumentLoc *loc = &info->in[cum->info_in_idx]; 1503 1504 *loc = (TCGCallArgumentLoc){ 1505 .kind = kind, 1506 .arg_idx = cum->arg_idx, 1507 .arg_slot = cum->arg_slot, 1508 }; 1509 cum->info_in_idx++; 1510 cum->arg_slot++; 1511 } 1512 1513 static void layout_arg_normal_n(TCGCumulativeArgs *cum, 1514 TCGHelperInfo *info, int n) 1515 { 1516 TCGCallArgumentLoc *loc = &info->in[cum->info_in_idx]; 1517 1518 for (int i = 0; i < n; ++i) { 1519 /* Layout all using the same arg_idx, adjusting the subindex. */ 1520 loc[i] = (TCGCallArgumentLoc){ 1521 .kind = TCG_CALL_ARG_NORMAL, 1522 .arg_idx = cum->arg_idx, 1523 .tmp_subindex = i, 1524 .arg_slot = cum->arg_slot + i, 1525 }; 1526 } 1527 cum->info_in_idx += n; 1528 cum->arg_slot += n; 1529 } 1530 1531 static void layout_arg_by_ref(TCGCumulativeArgs *cum, TCGHelperInfo *info) 1532 { 1533 TCGCallArgumentLoc *loc = &info->in[cum->info_in_idx]; 1534 int n = 128 / TCG_TARGET_REG_BITS; 1535 1536 /* The first subindex carries the pointer. */ 1537 layout_arg_1(cum, info, TCG_CALL_ARG_BY_REF); 1538 1539 /* 1540 * The callee is allowed to clobber memory associated with 1541 * structure pass by-reference. Therefore we must make copies. 1542 * Allocate space from "ref_slot", which will be adjusted to 1543 * follow the parameters on the stack. 1544 */ 1545 loc[0].ref_slot = cum->ref_slot; 1546 1547 /* 1548 * Subsequent words also go into the reference slot, but 1549 * do not accumulate into the regular arguments. 1550 */ 1551 for (int i = 1; i < n; ++i) { 1552 loc[i] = (TCGCallArgumentLoc){ 1553 .kind = TCG_CALL_ARG_BY_REF_N, 1554 .arg_idx = cum->arg_idx, 1555 .tmp_subindex = i, 1556 .ref_slot = cum->ref_slot + i, 1557 }; 1558 } 1559 cum->info_in_idx += n - 1; /* i=0 accounted for in layout_arg_1 */ 1560 cum->ref_slot += n; 1561 } 1562 1563 static void init_call_layout(TCGHelperInfo *info) 1564 { 1565 int max_reg_slots = ARRAY_SIZE(tcg_target_call_iarg_regs); 1566 int max_stk_slots = TCG_STATIC_CALL_ARGS_SIZE / sizeof(tcg_target_long); 1567 unsigned typemask = info->typemask; 1568 unsigned typecode; 1569 TCGCumulativeArgs cum = { }; 1570 1571 /* 1572 * Parse and place any function return value. 1573 */ 1574 typecode = typemask & 7; 1575 switch (typecode) { 1576 case dh_typecode_void: 1577 info->nr_out = 0; 1578 break; 1579 case dh_typecode_i32: 1580 case dh_typecode_s32: 1581 case dh_typecode_ptr: 1582 info->nr_out = 1; 1583 info->out_kind = TCG_CALL_RET_NORMAL; 1584 break; 1585 case dh_typecode_i64: 1586 case dh_typecode_s64: 1587 info->nr_out = 64 / TCG_TARGET_REG_BITS; 1588 info->out_kind = TCG_CALL_RET_NORMAL; 1589 /* Query the last register now to trigger any assert early. */ 1590 tcg_target_call_oarg_reg(info->out_kind, info->nr_out - 1); 1591 break; 1592 case dh_typecode_i128: 1593 info->nr_out = 128 / TCG_TARGET_REG_BITS; 1594 info->out_kind = TCG_TARGET_CALL_RET_I128; 1595 switch (TCG_TARGET_CALL_RET_I128) { 1596 case TCG_CALL_RET_NORMAL: 1597 /* Query the last register now to trigger any assert early. */ 1598 tcg_target_call_oarg_reg(info->out_kind, info->nr_out - 1); 1599 break; 1600 case TCG_CALL_RET_BY_VEC: 1601 /* Query the single register now to trigger any assert early. */ 1602 tcg_target_call_oarg_reg(TCG_CALL_RET_BY_VEC, 0); 1603 break; 1604 case TCG_CALL_RET_BY_REF: 1605 /* 1606 * Allocate the first argument to the output. 1607 * We don't need to store this anywhere, just make it 1608 * unavailable for use in the input loop below. 1609 */ 1610 cum.arg_slot = 1; 1611 break; 1612 default: 1613 qemu_build_not_reached(); 1614 } 1615 break; 1616 default: 1617 g_assert_not_reached(); 1618 } 1619 1620 /* 1621 * Parse and place function arguments. 1622 */ 1623 for (typemask >>= 3; typemask; typemask >>= 3, cum.arg_idx++) { 1624 TCGCallArgumentKind kind; 1625 TCGType type; 1626 1627 typecode = typemask & 7; 1628 switch (typecode) { 1629 case dh_typecode_i32: 1630 case dh_typecode_s32: 1631 type = TCG_TYPE_I32; 1632 break; 1633 case dh_typecode_i64: 1634 case dh_typecode_s64: 1635 type = TCG_TYPE_I64; 1636 break; 1637 case dh_typecode_ptr: 1638 type = TCG_TYPE_PTR; 1639 break; 1640 case dh_typecode_i128: 1641 type = TCG_TYPE_I128; 1642 break; 1643 default: 1644 g_assert_not_reached(); 1645 } 1646 1647 switch (type) { 1648 case TCG_TYPE_I32: 1649 switch (TCG_TARGET_CALL_ARG_I32) { 1650 case TCG_CALL_ARG_EVEN: 1651 layout_arg_even(&cum); 1652 /* fall through */ 1653 case TCG_CALL_ARG_NORMAL: 1654 layout_arg_1(&cum, info, TCG_CALL_ARG_NORMAL); 1655 break; 1656 case TCG_CALL_ARG_EXTEND: 1657 kind = TCG_CALL_ARG_EXTEND_U + (typecode & 1); 1658 layout_arg_1(&cum, info, kind); 1659 break; 1660 default: 1661 qemu_build_not_reached(); 1662 } 1663 break; 1664 1665 case TCG_TYPE_I64: 1666 switch (TCG_TARGET_CALL_ARG_I64) { 1667 case TCG_CALL_ARG_EVEN: 1668 layout_arg_even(&cum); 1669 /* fall through */ 1670 case TCG_CALL_ARG_NORMAL: 1671 if (TCG_TARGET_REG_BITS == 32) { 1672 layout_arg_normal_n(&cum, info, 2); 1673 } else { 1674 layout_arg_1(&cum, info, TCG_CALL_ARG_NORMAL); 1675 } 1676 break; 1677 default: 1678 qemu_build_not_reached(); 1679 } 1680 break; 1681 1682 case TCG_TYPE_I128: 1683 switch (TCG_TARGET_CALL_ARG_I128) { 1684 case TCG_CALL_ARG_EVEN: 1685 layout_arg_even(&cum); 1686 /* fall through */ 1687 case TCG_CALL_ARG_NORMAL: 1688 layout_arg_normal_n(&cum, info, 128 / TCG_TARGET_REG_BITS); 1689 break; 1690 case TCG_CALL_ARG_BY_REF: 1691 layout_arg_by_ref(&cum, info); 1692 break; 1693 default: 1694 qemu_build_not_reached(); 1695 } 1696 break; 1697 1698 default: 1699 g_assert_not_reached(); 1700 } 1701 } 1702 info->nr_in = cum.info_in_idx; 1703 1704 /* Validate that we didn't overrun the input array. */ 1705 assert(cum.info_in_idx <= ARRAY_SIZE(info->in)); 1706 /* Validate the backend has enough argument space. */ 1707 assert(cum.arg_slot <= max_reg_slots + max_stk_slots); 1708 1709 /* 1710 * Relocate the "ref_slot" area to the end of the parameters. 1711 * Minimizing this stack offset helps code size for x86, 1712 * which has a signed 8-bit offset encoding. 1713 */ 1714 if (cum.ref_slot != 0) { 1715 int ref_base = 0; 1716 1717 if (cum.arg_slot > max_reg_slots) { 1718 int align = __alignof(Int128) / sizeof(tcg_target_long); 1719 1720 ref_base = cum.arg_slot - max_reg_slots; 1721 if (align > 1) { 1722 ref_base = ROUND_UP(ref_base, align); 1723 } 1724 } 1725 assert(ref_base + cum.ref_slot <= max_stk_slots); 1726 ref_base += max_reg_slots; 1727 1728 if (ref_base != 0) { 1729 for (int i = cum.info_in_idx - 1; i >= 0; --i) { 1730 TCGCallArgumentLoc *loc = &info->in[i]; 1731 switch (loc->kind) { 1732 case TCG_CALL_ARG_BY_REF: 1733 case TCG_CALL_ARG_BY_REF_N: 1734 loc->ref_slot += ref_base; 1735 break; 1736 default: 1737 break; 1738 } 1739 } 1740 } 1741 } 1742 } 1743 1744 static int indirect_reg_alloc_order[ARRAY_SIZE(tcg_target_reg_alloc_order)]; 1745 static void process_constraint_sets(void); 1746 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type, 1747 TCGReg reg, const char *name); 1748 1749 static void tcg_context_init(unsigned max_threads) 1750 { 1751 TCGContext *s = &tcg_init_ctx; 1752 int n, i; 1753 TCGTemp *ts; 1754 1755 memset(s, 0, sizeof(*s)); 1756 s->nb_globals = 0; 1757 1758 init_call_layout(&info_helper_ld32_mmu); 1759 init_call_layout(&info_helper_ld64_mmu); 1760 init_call_layout(&info_helper_ld128_mmu); 1761 init_call_layout(&info_helper_st32_mmu); 1762 init_call_layout(&info_helper_st64_mmu); 1763 init_call_layout(&info_helper_st128_mmu); 1764 1765 tcg_target_init(s); 1766 process_constraint_sets(); 1767 1768 /* Reverse the order of the saved registers, assuming they're all at 1769 the start of tcg_target_reg_alloc_order. */ 1770 for (n = 0; n < ARRAY_SIZE(tcg_target_reg_alloc_order); ++n) { 1771 int r = tcg_target_reg_alloc_order[n]; 1772 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, r)) { 1773 break; 1774 } 1775 } 1776 for (i = 0; i < n; ++i) { 1777 indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[n - 1 - i]; 1778 } 1779 for (; i < ARRAY_SIZE(tcg_target_reg_alloc_order); ++i) { 1780 indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[i]; 1781 } 1782 1783 tcg_ctx = s; 1784 /* 1785 * In user-mode we simply share the init context among threads, since we 1786 * use a single region. See the documentation tcg_region_init() for the 1787 * reasoning behind this. 1788 * In system-mode we will have at most max_threads TCG threads. 1789 */ 1790 #ifdef CONFIG_USER_ONLY 1791 tcg_ctxs = &tcg_ctx; 1792 tcg_cur_ctxs = 1; 1793 tcg_max_ctxs = 1; 1794 #else 1795 tcg_max_ctxs = max_threads; 1796 tcg_ctxs = g_new0(TCGContext *, max_threads); 1797 #endif 1798 1799 tcg_debug_assert(!tcg_regset_test_reg(s->reserved_regs, TCG_AREG0)); 1800 ts = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, TCG_AREG0, "env"); 1801 tcg_env = temp_tcgv_ptr(ts); 1802 } 1803 1804 void tcg_init(size_t tb_size, int splitwx, unsigned max_threads) 1805 { 1806 tcg_context_init(max_threads); 1807 tcg_region_init(tb_size, splitwx, max_threads); 1808 } 1809 1810 /* 1811 * Allocate TBs right before their corresponding translated code, making 1812 * sure that TBs and code are on different cache lines. 1813 */ 1814 TranslationBlock *tcg_tb_alloc(TCGContext *s) 1815 { 1816 uintptr_t align = qemu_icache_linesize; 1817 TranslationBlock *tb; 1818 void *next; 1819 1820 retry: 1821 tb = (void *)ROUND_UP((uintptr_t)s->code_gen_ptr, align); 1822 next = (void *)ROUND_UP((uintptr_t)(tb + 1), align); 1823 1824 if (unlikely(next > s->code_gen_highwater)) { 1825 if (tcg_region_alloc(s)) { 1826 return NULL; 1827 } 1828 goto retry; 1829 } 1830 qatomic_set(&s->code_gen_ptr, next); 1831 return tb; 1832 } 1833 1834 void tcg_prologue_init(void) 1835 { 1836 TCGContext *s = tcg_ctx; 1837 size_t prologue_size; 1838 1839 s->code_ptr = s->code_gen_ptr; 1840 s->code_buf = s->code_gen_ptr; 1841 s->data_gen_ptr = NULL; 1842 1843 #ifndef CONFIG_TCG_INTERPRETER 1844 tcg_qemu_tb_exec = (tcg_prologue_fn *)tcg_splitwx_to_rx(s->code_ptr); 1845 #endif 1846 1847 s->pool_labels = NULL; 1848 1849 qemu_thread_jit_write(); 1850 /* Generate the prologue. */ 1851 tcg_target_qemu_prologue(s); 1852 1853 /* Allow the prologue to put e.g. guest_base into a pool entry. */ 1854 { 1855 int result = tcg_out_pool_finalize(s); 1856 tcg_debug_assert(result == 0); 1857 } 1858 1859 prologue_size = tcg_current_code_size(s); 1860 perf_report_prologue(s->code_gen_ptr, prologue_size); 1861 1862 #ifndef CONFIG_TCG_INTERPRETER 1863 flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(s->code_buf), 1864 (uintptr_t)s->code_buf, prologue_size); 1865 #endif 1866 1867 if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM)) { 1868 FILE *logfile = qemu_log_trylock(); 1869 if (logfile) { 1870 fprintf(logfile, "PROLOGUE: [size=%zu]\n", prologue_size); 1871 if (s->data_gen_ptr) { 1872 size_t code_size = s->data_gen_ptr - s->code_gen_ptr; 1873 size_t data_size = prologue_size - code_size; 1874 size_t i; 1875 1876 disas(logfile, s->code_gen_ptr, code_size); 1877 1878 for (i = 0; i < data_size; i += sizeof(tcg_target_ulong)) { 1879 if (sizeof(tcg_target_ulong) == 8) { 1880 fprintf(logfile, 1881 "0x%08" PRIxPTR ": .quad 0x%016" PRIx64 "\n", 1882 (uintptr_t)s->data_gen_ptr + i, 1883 *(uint64_t *)(s->data_gen_ptr + i)); 1884 } else { 1885 fprintf(logfile, 1886 "0x%08" PRIxPTR ": .long 0x%08x\n", 1887 (uintptr_t)s->data_gen_ptr + i, 1888 *(uint32_t *)(s->data_gen_ptr + i)); 1889 } 1890 } 1891 } else { 1892 disas(logfile, s->code_gen_ptr, prologue_size); 1893 } 1894 fprintf(logfile, "\n"); 1895 qemu_log_unlock(logfile); 1896 } 1897 } 1898 1899 #ifndef CONFIG_TCG_INTERPRETER 1900 /* 1901 * Assert that goto_ptr is implemented completely, setting an epilogue. 1902 * For tci, we use NULL as the signal to return from the interpreter, 1903 * so skip this check. 1904 */ 1905 tcg_debug_assert(tcg_code_gen_epilogue != NULL); 1906 #endif 1907 1908 tcg_region_prologue_set(s); 1909 } 1910 1911 void tcg_func_start(TCGContext *s) 1912 { 1913 tcg_pool_reset(s); 1914 s->nb_temps = s->nb_globals; 1915 1916 /* No temps have been previously allocated for size or locality. */ 1917 tcg_temp_ebb_reset_freed(s); 1918 1919 /* No constant temps have been previously allocated. */ 1920 for (int i = 0; i < TCG_TYPE_COUNT; ++i) { 1921 if (s->const_table[i]) { 1922 g_hash_table_remove_all(s->const_table[i]); 1923 } 1924 } 1925 1926 s->nb_ops = 0; 1927 s->nb_labels = 0; 1928 s->current_frame_offset = s->frame_start; 1929 1930 #ifdef CONFIG_DEBUG_TCG 1931 s->goto_tb_issue_mask = 0; 1932 #endif 1933 1934 QTAILQ_INIT(&s->ops); 1935 QTAILQ_INIT(&s->free_ops); 1936 s->emit_before_op = NULL; 1937 QSIMPLEQ_INIT(&s->labels); 1938 1939 tcg_debug_assert(s->addr_type <= TCG_TYPE_REG); 1940 tcg_debug_assert(s->insn_start_words > 0); 1941 } 1942 1943 static TCGTemp *tcg_temp_alloc(TCGContext *s) 1944 { 1945 int n = s->nb_temps++; 1946 1947 if (n >= TCG_MAX_TEMPS) { 1948 tcg_raise_tb_overflow(s); 1949 } 1950 return memset(&s->temps[n], 0, sizeof(TCGTemp)); 1951 } 1952 1953 static TCGTemp *tcg_global_alloc(TCGContext *s) 1954 { 1955 TCGTemp *ts; 1956 1957 tcg_debug_assert(s->nb_globals == s->nb_temps); 1958 tcg_debug_assert(s->nb_globals < TCG_MAX_TEMPS); 1959 s->nb_globals++; 1960 ts = tcg_temp_alloc(s); 1961 ts->kind = TEMP_GLOBAL; 1962 1963 return ts; 1964 } 1965 1966 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type, 1967 TCGReg reg, const char *name) 1968 { 1969 TCGTemp *ts; 1970 1971 tcg_debug_assert(TCG_TARGET_REG_BITS == 64 || type == TCG_TYPE_I32); 1972 1973 ts = tcg_global_alloc(s); 1974 ts->base_type = type; 1975 ts->type = type; 1976 ts->kind = TEMP_FIXED; 1977 ts->reg = reg; 1978 ts->name = name; 1979 tcg_regset_set_reg(s->reserved_regs, reg); 1980 1981 return ts; 1982 } 1983 1984 void tcg_set_frame(TCGContext *s, TCGReg reg, intptr_t start, intptr_t size) 1985 { 1986 s->frame_start = start; 1987 s->frame_end = start + size; 1988 s->frame_temp 1989 = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, reg, "_frame"); 1990 } 1991 1992 static TCGTemp *tcg_global_mem_new_internal(TCGv_ptr base, intptr_t offset, 1993 const char *name, TCGType type) 1994 { 1995 TCGContext *s = tcg_ctx; 1996 TCGTemp *base_ts = tcgv_ptr_temp(base); 1997 TCGTemp *ts = tcg_global_alloc(s); 1998 int indirect_reg = 0; 1999 2000 switch (base_ts->kind) { 2001 case TEMP_FIXED: 2002 break; 2003 case TEMP_GLOBAL: 2004 /* We do not support double-indirect registers. */ 2005 tcg_debug_assert(!base_ts->indirect_reg); 2006 base_ts->indirect_base = 1; 2007 s->nb_indirects += (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64 2008 ? 2 : 1); 2009 indirect_reg = 1; 2010 break; 2011 default: 2012 g_assert_not_reached(); 2013 } 2014 2015 if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) { 2016 TCGTemp *ts2 = tcg_global_alloc(s); 2017 char buf[64]; 2018 2019 ts->base_type = TCG_TYPE_I64; 2020 ts->type = TCG_TYPE_I32; 2021 ts->indirect_reg = indirect_reg; 2022 ts->mem_allocated = 1; 2023 ts->mem_base = base_ts; 2024 ts->mem_offset = offset; 2025 pstrcpy(buf, sizeof(buf), name); 2026 pstrcat(buf, sizeof(buf), "_0"); 2027 ts->name = strdup(buf); 2028 2029 tcg_debug_assert(ts2 == ts + 1); 2030 ts2->base_type = TCG_TYPE_I64; 2031 ts2->type = TCG_TYPE_I32; 2032 ts2->indirect_reg = indirect_reg; 2033 ts2->mem_allocated = 1; 2034 ts2->mem_base = base_ts; 2035 ts2->mem_offset = offset + 4; 2036 ts2->temp_subindex = 1; 2037 pstrcpy(buf, sizeof(buf), name); 2038 pstrcat(buf, sizeof(buf), "_1"); 2039 ts2->name = strdup(buf); 2040 } else { 2041 ts->base_type = type; 2042 ts->type = type; 2043 ts->indirect_reg = indirect_reg; 2044 ts->mem_allocated = 1; 2045 ts->mem_base = base_ts; 2046 ts->mem_offset = offset; 2047 ts->name = name; 2048 } 2049 return ts; 2050 } 2051 2052 TCGv_i32 tcg_global_mem_new_i32(TCGv_ptr reg, intptr_t off, const char *name) 2053 { 2054 TCGTemp *ts = tcg_global_mem_new_internal(reg, off, name, TCG_TYPE_I32); 2055 return temp_tcgv_i32(ts); 2056 } 2057 2058 TCGv_i64 tcg_global_mem_new_i64(TCGv_ptr reg, intptr_t off, const char *name) 2059 { 2060 TCGTemp *ts = tcg_global_mem_new_internal(reg, off, name, TCG_TYPE_I64); 2061 return temp_tcgv_i64(ts); 2062 } 2063 2064 TCGv_ptr tcg_global_mem_new_ptr(TCGv_ptr reg, intptr_t off, const char *name) 2065 { 2066 TCGTemp *ts = tcg_global_mem_new_internal(reg, off, name, TCG_TYPE_PTR); 2067 return temp_tcgv_ptr(ts); 2068 } 2069 2070 TCGTemp *tcg_temp_new_internal(TCGType type, TCGTempKind kind) 2071 { 2072 TCGContext *s = tcg_ctx; 2073 TCGTemp *ts; 2074 int n; 2075 2076 if (kind == TEMP_EBB) { 2077 int idx = find_first_bit(s->free_temps[type].l, TCG_MAX_TEMPS); 2078 2079 if (idx < TCG_MAX_TEMPS) { 2080 /* There is already an available temp with the right type. */ 2081 clear_bit(idx, s->free_temps[type].l); 2082 2083 ts = &s->temps[idx]; 2084 ts->temp_allocated = 1; 2085 tcg_debug_assert(ts->base_type == type); 2086 tcg_debug_assert(ts->kind == kind); 2087 return ts; 2088 } 2089 } else { 2090 tcg_debug_assert(kind == TEMP_TB); 2091 } 2092 2093 switch (type) { 2094 case TCG_TYPE_I32: 2095 case TCG_TYPE_V64: 2096 case TCG_TYPE_V128: 2097 case TCG_TYPE_V256: 2098 n = 1; 2099 break; 2100 case TCG_TYPE_I64: 2101 n = 64 / TCG_TARGET_REG_BITS; 2102 break; 2103 case TCG_TYPE_I128: 2104 n = 128 / TCG_TARGET_REG_BITS; 2105 break; 2106 default: 2107 g_assert_not_reached(); 2108 } 2109 2110 ts = tcg_temp_alloc(s); 2111 ts->base_type = type; 2112 ts->temp_allocated = 1; 2113 ts->kind = kind; 2114 2115 if (n == 1) { 2116 ts->type = type; 2117 } else { 2118 ts->type = TCG_TYPE_REG; 2119 2120 for (int i = 1; i < n; ++i) { 2121 TCGTemp *ts2 = tcg_temp_alloc(s); 2122 2123 tcg_debug_assert(ts2 == ts + i); 2124 ts2->base_type = type; 2125 ts2->type = TCG_TYPE_REG; 2126 ts2->temp_allocated = 1; 2127 ts2->temp_subindex = i; 2128 ts2->kind = kind; 2129 } 2130 } 2131 return ts; 2132 } 2133 2134 TCGv_i32 tcg_temp_new_i32(void) 2135 { 2136 return temp_tcgv_i32(tcg_temp_new_internal(TCG_TYPE_I32, TEMP_TB)); 2137 } 2138 2139 TCGv_i32 tcg_temp_ebb_new_i32(void) 2140 { 2141 return temp_tcgv_i32(tcg_temp_new_internal(TCG_TYPE_I32, TEMP_EBB)); 2142 } 2143 2144 TCGv_i64 tcg_temp_new_i64(void) 2145 { 2146 return temp_tcgv_i64(tcg_temp_new_internal(TCG_TYPE_I64, TEMP_TB)); 2147 } 2148 2149 TCGv_i64 tcg_temp_ebb_new_i64(void) 2150 { 2151 return temp_tcgv_i64(tcg_temp_new_internal(TCG_TYPE_I64, TEMP_EBB)); 2152 } 2153 2154 TCGv_ptr tcg_temp_new_ptr(void) 2155 { 2156 return temp_tcgv_ptr(tcg_temp_new_internal(TCG_TYPE_PTR, TEMP_TB)); 2157 } 2158 2159 TCGv_ptr tcg_temp_ebb_new_ptr(void) 2160 { 2161 return temp_tcgv_ptr(tcg_temp_new_internal(TCG_TYPE_PTR, TEMP_EBB)); 2162 } 2163 2164 TCGv_i128 tcg_temp_new_i128(void) 2165 { 2166 return temp_tcgv_i128(tcg_temp_new_internal(TCG_TYPE_I128, TEMP_TB)); 2167 } 2168 2169 TCGv_i128 tcg_temp_ebb_new_i128(void) 2170 { 2171 return temp_tcgv_i128(tcg_temp_new_internal(TCG_TYPE_I128, TEMP_EBB)); 2172 } 2173 2174 TCGv_vec tcg_temp_new_vec(TCGType type) 2175 { 2176 TCGTemp *t; 2177 2178 #ifdef CONFIG_DEBUG_TCG 2179 switch (type) { 2180 case TCG_TYPE_V64: 2181 assert(TCG_TARGET_HAS_v64); 2182 break; 2183 case TCG_TYPE_V128: 2184 assert(TCG_TARGET_HAS_v128); 2185 break; 2186 case TCG_TYPE_V256: 2187 assert(TCG_TARGET_HAS_v256); 2188 break; 2189 default: 2190 g_assert_not_reached(); 2191 } 2192 #endif 2193 2194 t = tcg_temp_new_internal(type, TEMP_EBB); 2195 return temp_tcgv_vec(t); 2196 } 2197 2198 /* Create a new temp of the same type as an existing temp. */ 2199 TCGv_vec tcg_temp_new_vec_matching(TCGv_vec match) 2200 { 2201 TCGTemp *t = tcgv_vec_temp(match); 2202 2203 tcg_debug_assert(t->temp_allocated != 0); 2204 2205 t = tcg_temp_new_internal(t->base_type, TEMP_EBB); 2206 return temp_tcgv_vec(t); 2207 } 2208 2209 void tcg_temp_free_internal(TCGTemp *ts) 2210 { 2211 TCGContext *s = tcg_ctx; 2212 2213 switch (ts->kind) { 2214 case TEMP_CONST: 2215 case TEMP_TB: 2216 /* Silently ignore free. */ 2217 break; 2218 case TEMP_EBB: 2219 tcg_debug_assert(ts->temp_allocated != 0); 2220 ts->temp_allocated = 0; 2221 set_bit(temp_idx(ts), s->free_temps[ts->base_type].l); 2222 break; 2223 default: 2224 /* It never made sense to free TEMP_FIXED or TEMP_GLOBAL. */ 2225 g_assert_not_reached(); 2226 } 2227 } 2228 2229 void tcg_temp_free_i32(TCGv_i32 arg) 2230 { 2231 tcg_temp_free_internal(tcgv_i32_temp(arg)); 2232 } 2233 2234 void tcg_temp_free_i64(TCGv_i64 arg) 2235 { 2236 tcg_temp_free_internal(tcgv_i64_temp(arg)); 2237 } 2238 2239 void tcg_temp_free_i128(TCGv_i128 arg) 2240 { 2241 tcg_temp_free_internal(tcgv_i128_temp(arg)); 2242 } 2243 2244 void tcg_temp_free_ptr(TCGv_ptr arg) 2245 { 2246 tcg_temp_free_internal(tcgv_ptr_temp(arg)); 2247 } 2248 2249 void tcg_temp_free_vec(TCGv_vec arg) 2250 { 2251 tcg_temp_free_internal(tcgv_vec_temp(arg)); 2252 } 2253 2254 TCGTemp *tcg_constant_internal(TCGType type, int64_t val) 2255 { 2256 TCGContext *s = tcg_ctx; 2257 GHashTable *h = s->const_table[type]; 2258 TCGTemp *ts; 2259 2260 if (h == NULL) { 2261 h = g_hash_table_new(g_int64_hash, g_int64_equal); 2262 s->const_table[type] = h; 2263 } 2264 2265 ts = g_hash_table_lookup(h, &val); 2266 if (ts == NULL) { 2267 int64_t *val_ptr; 2268 2269 ts = tcg_temp_alloc(s); 2270 2271 if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) { 2272 TCGTemp *ts2 = tcg_temp_alloc(s); 2273 2274 tcg_debug_assert(ts2 == ts + 1); 2275 2276 ts->base_type = TCG_TYPE_I64; 2277 ts->type = TCG_TYPE_I32; 2278 ts->kind = TEMP_CONST; 2279 ts->temp_allocated = 1; 2280 2281 ts2->base_type = TCG_TYPE_I64; 2282 ts2->type = TCG_TYPE_I32; 2283 ts2->kind = TEMP_CONST; 2284 ts2->temp_allocated = 1; 2285 ts2->temp_subindex = 1; 2286 2287 /* 2288 * Retain the full value of the 64-bit constant in the low 2289 * part, so that the hash table works. Actual uses will 2290 * truncate the value to the low part. 2291 */ 2292 ts[HOST_BIG_ENDIAN].val = val; 2293 ts[!HOST_BIG_ENDIAN].val = val >> 32; 2294 val_ptr = &ts[HOST_BIG_ENDIAN].val; 2295 } else { 2296 ts->base_type = type; 2297 ts->type = type; 2298 ts->kind = TEMP_CONST; 2299 ts->temp_allocated = 1; 2300 ts->val = val; 2301 val_ptr = &ts->val; 2302 } 2303 g_hash_table_insert(h, val_ptr, ts); 2304 } 2305 2306 return ts; 2307 } 2308 2309 TCGv_i32 tcg_constant_i32(int32_t val) 2310 { 2311 return temp_tcgv_i32(tcg_constant_internal(TCG_TYPE_I32, val)); 2312 } 2313 2314 TCGv_i64 tcg_constant_i64(int64_t val) 2315 { 2316 return temp_tcgv_i64(tcg_constant_internal(TCG_TYPE_I64, val)); 2317 } 2318 2319 TCGv_ptr tcg_constant_ptr_int(intptr_t val) 2320 { 2321 return temp_tcgv_ptr(tcg_constant_internal(TCG_TYPE_PTR, val)); 2322 } 2323 2324 TCGv_vec tcg_constant_vec(TCGType type, unsigned vece, int64_t val) 2325 { 2326 val = dup_const(vece, val); 2327 return temp_tcgv_vec(tcg_constant_internal(type, val)); 2328 } 2329 2330 TCGv_vec tcg_constant_vec_matching(TCGv_vec match, unsigned vece, int64_t val) 2331 { 2332 TCGTemp *t = tcgv_vec_temp(match); 2333 2334 tcg_debug_assert(t->temp_allocated != 0); 2335 return tcg_constant_vec(t->base_type, vece, val); 2336 } 2337 2338 #ifdef CONFIG_DEBUG_TCG 2339 size_t temp_idx(TCGTemp *ts) 2340 { 2341 ptrdiff_t n = ts - tcg_ctx->temps; 2342 assert(n >= 0 && n < tcg_ctx->nb_temps); 2343 return n; 2344 } 2345 2346 TCGTemp *tcgv_i32_temp(TCGv_i32 v) 2347 { 2348 uintptr_t o = (uintptr_t)v - offsetof(TCGContext, temps); 2349 2350 assert(o < sizeof(TCGTemp) * tcg_ctx->nb_temps); 2351 assert(o % sizeof(TCGTemp) == 0); 2352 2353 return (void *)tcg_ctx + (uintptr_t)v; 2354 } 2355 #endif /* CONFIG_DEBUG_TCG */ 2356 2357 /* 2358 * Return true if OP may appear in the opcode stream with TYPE. 2359 * Test the runtime variable that controls each opcode. 2360 */ 2361 bool tcg_op_supported(TCGOpcode op, TCGType type, unsigned flags) 2362 { 2363 bool has_type; 2364 2365 switch (type) { 2366 case TCG_TYPE_I32: 2367 has_type = true; 2368 break; 2369 case TCG_TYPE_I64: 2370 has_type = TCG_TARGET_REG_BITS == 64; 2371 break; 2372 case TCG_TYPE_V64: 2373 has_type = TCG_TARGET_HAS_v64; 2374 break; 2375 case TCG_TYPE_V128: 2376 has_type = TCG_TARGET_HAS_v128; 2377 break; 2378 case TCG_TYPE_V256: 2379 has_type = TCG_TARGET_HAS_v256; 2380 break; 2381 default: 2382 has_type = false; 2383 break; 2384 } 2385 2386 switch (op) { 2387 case INDEX_op_discard: 2388 case INDEX_op_set_label: 2389 case INDEX_op_call: 2390 case INDEX_op_br: 2391 case INDEX_op_mb: 2392 case INDEX_op_insn_start: 2393 case INDEX_op_exit_tb: 2394 case INDEX_op_goto_tb: 2395 case INDEX_op_goto_ptr: 2396 case INDEX_op_qemu_ld_i32: 2397 case INDEX_op_qemu_st_i32: 2398 case INDEX_op_qemu_ld_i64: 2399 case INDEX_op_qemu_st_i64: 2400 return true; 2401 2402 case INDEX_op_qemu_st8_i32: 2403 return TCG_TARGET_HAS_qemu_st8_i32; 2404 2405 case INDEX_op_qemu_ld_i128: 2406 case INDEX_op_qemu_st_i128: 2407 return TCG_TARGET_HAS_qemu_ldst_i128; 2408 2409 case INDEX_op_add: 2410 case INDEX_op_and: 2411 case INDEX_op_brcond: 2412 case INDEX_op_deposit: 2413 case INDEX_op_extract: 2414 case INDEX_op_mov: 2415 case INDEX_op_movcond: 2416 case INDEX_op_negsetcond: 2417 case INDEX_op_or: 2418 case INDEX_op_setcond: 2419 case INDEX_op_sextract: 2420 case INDEX_op_xor: 2421 return has_type; 2422 2423 case INDEX_op_ld8u_i32: 2424 case INDEX_op_ld8s_i32: 2425 case INDEX_op_ld16u_i32: 2426 case INDEX_op_ld16s_i32: 2427 case INDEX_op_ld_i32: 2428 case INDEX_op_st8_i32: 2429 case INDEX_op_st16_i32: 2430 case INDEX_op_st_i32: 2431 return true; 2432 2433 case INDEX_op_brcond2_i32: 2434 case INDEX_op_setcond2_i32: 2435 return TCG_TARGET_REG_BITS == 32; 2436 2437 case INDEX_op_ld8u_i64: 2438 case INDEX_op_ld8s_i64: 2439 case INDEX_op_ld16u_i64: 2440 case INDEX_op_ld16s_i64: 2441 case INDEX_op_ld32u_i64: 2442 case INDEX_op_ld32s_i64: 2443 case INDEX_op_ld_i64: 2444 case INDEX_op_st8_i64: 2445 case INDEX_op_st16_i64: 2446 case INDEX_op_st32_i64: 2447 case INDEX_op_st_i64: 2448 case INDEX_op_ext_i32_i64: 2449 case INDEX_op_extu_i32_i64: 2450 case INDEX_op_extrl_i64_i32: 2451 case INDEX_op_extrh_i64_i32: 2452 return TCG_TARGET_REG_BITS == 64; 2453 2454 case INDEX_op_mov_vec: 2455 case INDEX_op_dup_vec: 2456 case INDEX_op_dupm_vec: 2457 case INDEX_op_ld_vec: 2458 case INDEX_op_st_vec: 2459 case INDEX_op_add_vec: 2460 case INDEX_op_sub_vec: 2461 case INDEX_op_and_vec: 2462 case INDEX_op_or_vec: 2463 case INDEX_op_xor_vec: 2464 case INDEX_op_cmp_vec: 2465 return has_type; 2466 case INDEX_op_dup2_vec: 2467 return has_type && TCG_TARGET_REG_BITS == 32; 2468 case INDEX_op_not_vec: 2469 return has_type && TCG_TARGET_HAS_not_vec; 2470 case INDEX_op_neg_vec: 2471 return has_type && TCG_TARGET_HAS_neg_vec; 2472 case INDEX_op_abs_vec: 2473 return has_type && TCG_TARGET_HAS_abs_vec; 2474 case INDEX_op_andc_vec: 2475 return has_type && TCG_TARGET_HAS_andc_vec; 2476 case INDEX_op_orc_vec: 2477 return has_type && TCG_TARGET_HAS_orc_vec; 2478 case INDEX_op_nand_vec: 2479 return has_type && TCG_TARGET_HAS_nand_vec; 2480 case INDEX_op_nor_vec: 2481 return has_type && TCG_TARGET_HAS_nor_vec; 2482 case INDEX_op_eqv_vec: 2483 return has_type && TCG_TARGET_HAS_eqv_vec; 2484 case INDEX_op_mul_vec: 2485 return has_type && TCG_TARGET_HAS_mul_vec; 2486 case INDEX_op_shli_vec: 2487 case INDEX_op_shri_vec: 2488 case INDEX_op_sari_vec: 2489 return has_type && TCG_TARGET_HAS_shi_vec; 2490 case INDEX_op_shls_vec: 2491 case INDEX_op_shrs_vec: 2492 case INDEX_op_sars_vec: 2493 return has_type && TCG_TARGET_HAS_shs_vec; 2494 case INDEX_op_shlv_vec: 2495 case INDEX_op_shrv_vec: 2496 case INDEX_op_sarv_vec: 2497 return has_type && TCG_TARGET_HAS_shv_vec; 2498 case INDEX_op_rotli_vec: 2499 return has_type && TCG_TARGET_HAS_roti_vec; 2500 case INDEX_op_rotls_vec: 2501 return has_type && TCG_TARGET_HAS_rots_vec; 2502 case INDEX_op_rotlv_vec: 2503 case INDEX_op_rotrv_vec: 2504 return has_type && TCG_TARGET_HAS_rotv_vec; 2505 case INDEX_op_ssadd_vec: 2506 case INDEX_op_usadd_vec: 2507 case INDEX_op_sssub_vec: 2508 case INDEX_op_ussub_vec: 2509 return has_type && TCG_TARGET_HAS_sat_vec; 2510 case INDEX_op_smin_vec: 2511 case INDEX_op_umin_vec: 2512 case INDEX_op_smax_vec: 2513 case INDEX_op_umax_vec: 2514 return has_type && TCG_TARGET_HAS_minmax_vec; 2515 case INDEX_op_bitsel_vec: 2516 return has_type && TCG_TARGET_HAS_bitsel_vec; 2517 case INDEX_op_cmpsel_vec: 2518 return has_type && TCG_TARGET_HAS_cmpsel_vec; 2519 2520 default: 2521 if (op < INDEX_op_last_generic) { 2522 const TCGOutOp *outop; 2523 TCGConstraintSetIndex con_set; 2524 2525 if (!has_type) { 2526 return false; 2527 } 2528 2529 outop = all_outop[op]; 2530 tcg_debug_assert(outop != NULL); 2531 2532 con_set = outop->static_constraint; 2533 if (con_set == C_Dynamic) { 2534 con_set = outop->dynamic_constraint(type, flags); 2535 } 2536 if (con_set >= 0) { 2537 return true; 2538 } 2539 tcg_debug_assert(con_set == C_NotImplemented); 2540 return false; 2541 } 2542 tcg_debug_assert(op < NB_OPS); 2543 return true; 2544 2545 case INDEX_op_last_generic: 2546 g_assert_not_reached(); 2547 } 2548 } 2549 2550 bool tcg_op_deposit_valid(TCGType type, unsigned ofs, unsigned len) 2551 { 2552 unsigned width; 2553 2554 tcg_debug_assert(type == TCG_TYPE_I32 || type == TCG_TYPE_I64); 2555 width = (type == TCG_TYPE_I32 ? 32 : 64); 2556 2557 tcg_debug_assert(ofs < width); 2558 tcg_debug_assert(len > 0); 2559 tcg_debug_assert(len <= width - ofs); 2560 2561 return TCG_TARGET_deposit_valid(type, ofs, len); 2562 } 2563 2564 static TCGOp *tcg_op_alloc(TCGOpcode opc, unsigned nargs); 2565 2566 static void tcg_gen_callN(void *func, TCGHelperInfo *info, 2567 TCGTemp *ret, TCGTemp **args) 2568 { 2569 TCGv_i64 extend_free[MAX_CALL_IARGS]; 2570 int n_extend = 0; 2571 TCGOp *op; 2572 int i, n, pi = 0, total_args; 2573 2574 if (unlikely(g_once_init_enter(HELPER_INFO_INIT(info)))) { 2575 init_call_layout(info); 2576 g_once_init_leave(HELPER_INFO_INIT(info), HELPER_INFO_INIT_VAL(info)); 2577 } 2578 2579 total_args = info->nr_out + info->nr_in + 2; 2580 op = tcg_op_alloc(INDEX_op_call, total_args); 2581 2582 #ifdef CONFIG_PLUGIN 2583 /* Flag helpers that may affect guest state */ 2584 if (tcg_ctx->plugin_insn && !(info->flags & TCG_CALL_NO_SIDE_EFFECTS)) { 2585 tcg_ctx->plugin_insn->calls_helpers = true; 2586 } 2587 #endif 2588 2589 TCGOP_CALLO(op) = n = info->nr_out; 2590 switch (n) { 2591 case 0: 2592 tcg_debug_assert(ret == NULL); 2593 break; 2594 case 1: 2595 tcg_debug_assert(ret != NULL); 2596 op->args[pi++] = temp_arg(ret); 2597 break; 2598 case 2: 2599 case 4: 2600 tcg_debug_assert(ret != NULL); 2601 tcg_debug_assert(ret->base_type == ret->type + ctz32(n)); 2602 tcg_debug_assert(ret->temp_subindex == 0); 2603 for (i = 0; i < n; ++i) { 2604 op->args[pi++] = temp_arg(ret + i); 2605 } 2606 break; 2607 default: 2608 g_assert_not_reached(); 2609 } 2610 2611 TCGOP_CALLI(op) = n = info->nr_in; 2612 for (i = 0; i < n; i++) { 2613 const TCGCallArgumentLoc *loc = &info->in[i]; 2614 TCGTemp *ts = args[loc->arg_idx] + loc->tmp_subindex; 2615 2616 switch (loc->kind) { 2617 case TCG_CALL_ARG_NORMAL: 2618 case TCG_CALL_ARG_BY_REF: 2619 case TCG_CALL_ARG_BY_REF_N: 2620 op->args[pi++] = temp_arg(ts); 2621 break; 2622 2623 case TCG_CALL_ARG_EXTEND_U: 2624 case TCG_CALL_ARG_EXTEND_S: 2625 { 2626 TCGv_i64 temp = tcg_temp_ebb_new_i64(); 2627 TCGv_i32 orig = temp_tcgv_i32(ts); 2628 2629 if (loc->kind == TCG_CALL_ARG_EXTEND_S) { 2630 tcg_gen_ext_i32_i64(temp, orig); 2631 } else { 2632 tcg_gen_extu_i32_i64(temp, orig); 2633 } 2634 op->args[pi++] = tcgv_i64_arg(temp); 2635 extend_free[n_extend++] = temp; 2636 } 2637 break; 2638 2639 default: 2640 g_assert_not_reached(); 2641 } 2642 } 2643 op->args[pi++] = (uintptr_t)func; 2644 op->args[pi++] = (uintptr_t)info; 2645 tcg_debug_assert(pi == total_args); 2646 2647 if (tcg_ctx->emit_before_op) { 2648 QTAILQ_INSERT_BEFORE(tcg_ctx->emit_before_op, op, link); 2649 } else { 2650 QTAILQ_INSERT_TAIL(&tcg_ctx->ops, op, link); 2651 } 2652 2653 tcg_debug_assert(n_extend < ARRAY_SIZE(extend_free)); 2654 for (i = 0; i < n_extend; ++i) { 2655 tcg_temp_free_i64(extend_free[i]); 2656 } 2657 } 2658 2659 void tcg_gen_call0(void *func, TCGHelperInfo *info, TCGTemp *ret) 2660 { 2661 tcg_gen_callN(func, info, ret, NULL); 2662 } 2663 2664 void tcg_gen_call1(void *func, TCGHelperInfo *info, TCGTemp *ret, TCGTemp *t1) 2665 { 2666 tcg_gen_callN(func, info, ret, &t1); 2667 } 2668 2669 void tcg_gen_call2(void *func, TCGHelperInfo *info, TCGTemp *ret, 2670 TCGTemp *t1, TCGTemp *t2) 2671 { 2672 TCGTemp *args[2] = { t1, t2 }; 2673 tcg_gen_callN(func, info, ret, args); 2674 } 2675 2676 void tcg_gen_call3(void *func, TCGHelperInfo *info, TCGTemp *ret, 2677 TCGTemp *t1, TCGTemp *t2, TCGTemp *t3) 2678 { 2679 TCGTemp *args[3] = { t1, t2, t3 }; 2680 tcg_gen_callN(func, info, ret, args); 2681 } 2682 2683 void tcg_gen_call4(void *func, TCGHelperInfo *info, TCGTemp *ret, 2684 TCGTemp *t1, TCGTemp *t2, TCGTemp *t3, TCGTemp *t4) 2685 { 2686 TCGTemp *args[4] = { t1, t2, t3, t4 }; 2687 tcg_gen_callN(func, info, ret, args); 2688 } 2689 2690 void tcg_gen_call5(void *func, TCGHelperInfo *info, TCGTemp *ret, TCGTemp *t1, 2691 TCGTemp *t2, TCGTemp *t3, TCGTemp *t4, TCGTemp *t5) 2692 { 2693 TCGTemp *args[5] = { t1, t2, t3, t4, t5 }; 2694 tcg_gen_callN(func, info, ret, args); 2695 } 2696 2697 void tcg_gen_call6(void *func, TCGHelperInfo *info, TCGTemp *ret, 2698 TCGTemp *t1, TCGTemp *t2, TCGTemp *t3, 2699 TCGTemp *t4, TCGTemp *t5, TCGTemp *t6) 2700 { 2701 TCGTemp *args[6] = { t1, t2, t3, t4, t5, t6 }; 2702 tcg_gen_callN(func, info, ret, args); 2703 } 2704 2705 void tcg_gen_call7(void *func, TCGHelperInfo *info, TCGTemp *ret, TCGTemp *t1, 2706 TCGTemp *t2, TCGTemp *t3, TCGTemp *t4, 2707 TCGTemp *t5, TCGTemp *t6, TCGTemp *t7) 2708 { 2709 TCGTemp *args[7] = { t1, t2, t3, t4, t5, t6, t7 }; 2710 tcg_gen_callN(func, info, ret, args); 2711 } 2712 2713 static void tcg_reg_alloc_start(TCGContext *s) 2714 { 2715 int i, n; 2716 2717 for (i = 0, n = s->nb_temps; i < n; i++) { 2718 TCGTemp *ts = &s->temps[i]; 2719 TCGTempVal val = TEMP_VAL_MEM; 2720 2721 switch (ts->kind) { 2722 case TEMP_CONST: 2723 val = TEMP_VAL_CONST; 2724 break; 2725 case TEMP_FIXED: 2726 val = TEMP_VAL_REG; 2727 break; 2728 case TEMP_GLOBAL: 2729 break; 2730 case TEMP_EBB: 2731 val = TEMP_VAL_DEAD; 2732 /* fall through */ 2733 case TEMP_TB: 2734 ts->mem_allocated = 0; 2735 break; 2736 default: 2737 g_assert_not_reached(); 2738 } 2739 ts->val_type = val; 2740 } 2741 2742 memset(s->reg_to_temp, 0, sizeof(s->reg_to_temp)); 2743 } 2744 2745 static char *tcg_get_arg_str_ptr(TCGContext *s, char *buf, int buf_size, 2746 TCGTemp *ts) 2747 { 2748 int idx = temp_idx(ts); 2749 2750 switch (ts->kind) { 2751 case TEMP_FIXED: 2752 case TEMP_GLOBAL: 2753 pstrcpy(buf, buf_size, ts->name); 2754 break; 2755 case TEMP_TB: 2756 snprintf(buf, buf_size, "loc%d", idx - s->nb_globals); 2757 break; 2758 case TEMP_EBB: 2759 snprintf(buf, buf_size, "tmp%d", idx - s->nb_globals); 2760 break; 2761 case TEMP_CONST: 2762 switch (ts->type) { 2763 case TCG_TYPE_I32: 2764 snprintf(buf, buf_size, "$0x%x", (int32_t)ts->val); 2765 break; 2766 #if TCG_TARGET_REG_BITS > 32 2767 case TCG_TYPE_I64: 2768 snprintf(buf, buf_size, "$0x%" PRIx64, ts->val); 2769 break; 2770 #endif 2771 case TCG_TYPE_V64: 2772 case TCG_TYPE_V128: 2773 case TCG_TYPE_V256: 2774 snprintf(buf, buf_size, "v%d$0x%" PRIx64, 2775 64 << (ts->type - TCG_TYPE_V64), ts->val); 2776 break; 2777 default: 2778 g_assert_not_reached(); 2779 } 2780 break; 2781 } 2782 return buf; 2783 } 2784 2785 static char *tcg_get_arg_str(TCGContext *s, char *buf, 2786 int buf_size, TCGArg arg) 2787 { 2788 return tcg_get_arg_str_ptr(s, buf, buf_size, arg_temp(arg)); 2789 } 2790 2791 static const char * const cond_name[] = 2792 { 2793 [TCG_COND_NEVER] = "never", 2794 [TCG_COND_ALWAYS] = "always", 2795 [TCG_COND_EQ] = "eq", 2796 [TCG_COND_NE] = "ne", 2797 [TCG_COND_LT] = "lt", 2798 [TCG_COND_GE] = "ge", 2799 [TCG_COND_LE] = "le", 2800 [TCG_COND_GT] = "gt", 2801 [TCG_COND_LTU] = "ltu", 2802 [TCG_COND_GEU] = "geu", 2803 [TCG_COND_LEU] = "leu", 2804 [TCG_COND_GTU] = "gtu", 2805 [TCG_COND_TSTEQ] = "tsteq", 2806 [TCG_COND_TSTNE] = "tstne", 2807 }; 2808 2809 static const char * const ldst_name[(MO_BSWAP | MO_SSIZE) + 1] = 2810 { 2811 [MO_UB] = "ub", 2812 [MO_SB] = "sb", 2813 [MO_LEUW] = "leuw", 2814 [MO_LESW] = "lesw", 2815 [MO_LEUL] = "leul", 2816 [MO_LESL] = "lesl", 2817 [MO_LEUQ] = "leq", 2818 [MO_BEUW] = "beuw", 2819 [MO_BESW] = "besw", 2820 [MO_BEUL] = "beul", 2821 [MO_BESL] = "besl", 2822 [MO_BEUQ] = "beq", 2823 [MO_128 + MO_BE] = "beo", 2824 [MO_128 + MO_LE] = "leo", 2825 }; 2826 2827 static const char * const alignment_name[(MO_AMASK >> MO_ASHIFT) + 1] = { 2828 [MO_UNALN >> MO_ASHIFT] = "un+", 2829 [MO_ALIGN >> MO_ASHIFT] = "al+", 2830 [MO_ALIGN_2 >> MO_ASHIFT] = "al2+", 2831 [MO_ALIGN_4 >> MO_ASHIFT] = "al4+", 2832 [MO_ALIGN_8 >> MO_ASHIFT] = "al8+", 2833 [MO_ALIGN_16 >> MO_ASHIFT] = "al16+", 2834 [MO_ALIGN_32 >> MO_ASHIFT] = "al32+", 2835 [MO_ALIGN_64 >> MO_ASHIFT] = "al64+", 2836 }; 2837 2838 static const char * const atom_name[(MO_ATOM_MASK >> MO_ATOM_SHIFT) + 1] = { 2839 [MO_ATOM_IFALIGN >> MO_ATOM_SHIFT] = "", 2840 [MO_ATOM_IFALIGN_PAIR >> MO_ATOM_SHIFT] = "pair+", 2841 [MO_ATOM_WITHIN16 >> MO_ATOM_SHIFT] = "w16+", 2842 [MO_ATOM_WITHIN16_PAIR >> MO_ATOM_SHIFT] = "w16p+", 2843 [MO_ATOM_SUBALIGN >> MO_ATOM_SHIFT] = "sub+", 2844 [MO_ATOM_NONE >> MO_ATOM_SHIFT] = "noat+", 2845 }; 2846 2847 static const char bswap_flag_name[][6] = { 2848 [TCG_BSWAP_IZ] = "iz", 2849 [TCG_BSWAP_OZ] = "oz", 2850 [TCG_BSWAP_OS] = "os", 2851 [TCG_BSWAP_IZ | TCG_BSWAP_OZ] = "iz,oz", 2852 [TCG_BSWAP_IZ | TCG_BSWAP_OS] = "iz,os", 2853 }; 2854 2855 #ifdef CONFIG_PLUGIN 2856 static const char * const plugin_from_name[] = { 2857 "from-tb", 2858 "from-insn", 2859 "after-insn", 2860 "after-tb", 2861 }; 2862 #endif 2863 2864 static inline bool tcg_regset_single(TCGRegSet d) 2865 { 2866 return (d & (d - 1)) == 0; 2867 } 2868 2869 static inline TCGReg tcg_regset_first(TCGRegSet d) 2870 { 2871 if (TCG_TARGET_NB_REGS <= 32) { 2872 return ctz32(d); 2873 } else { 2874 return ctz64(d); 2875 } 2876 } 2877 2878 /* Return only the number of characters output -- no error return. */ 2879 #define ne_fprintf(...) \ 2880 ({ int ret_ = fprintf(__VA_ARGS__); ret_ >= 0 ? ret_ : 0; }) 2881 2882 void tcg_dump_ops(TCGContext *s, FILE *f, bool have_prefs) 2883 { 2884 char buf[128]; 2885 TCGOp *op; 2886 2887 QTAILQ_FOREACH(op, &s->ops, link) { 2888 int i, k, nb_oargs, nb_iargs, nb_cargs; 2889 const TCGOpDef *def; 2890 TCGOpcode c; 2891 int col = 0; 2892 2893 c = op->opc; 2894 def = &tcg_op_defs[c]; 2895 2896 if (c == INDEX_op_insn_start) { 2897 nb_oargs = 0; 2898 col += ne_fprintf(f, "\n ----"); 2899 2900 for (i = 0, k = s->insn_start_words; i < k; ++i) { 2901 col += ne_fprintf(f, " %016" PRIx64, 2902 tcg_get_insn_start_param(op, i)); 2903 } 2904 } else if (c == INDEX_op_call) { 2905 const TCGHelperInfo *info = tcg_call_info(op); 2906 void *func = tcg_call_func(op); 2907 2908 /* variable number of arguments */ 2909 nb_oargs = TCGOP_CALLO(op); 2910 nb_iargs = TCGOP_CALLI(op); 2911 nb_cargs = def->nb_cargs; 2912 2913 col += ne_fprintf(f, " %s ", def->name); 2914 2915 /* 2916 * Print the function name from TCGHelperInfo, if available. 2917 * Note that plugins have a template function for the info, 2918 * but the actual function pointer comes from the plugin. 2919 */ 2920 if (func == info->func) { 2921 col += ne_fprintf(f, "%s", info->name); 2922 } else { 2923 col += ne_fprintf(f, "plugin(%p)", func); 2924 } 2925 2926 col += ne_fprintf(f, ",$0x%x,$%d", info->flags, nb_oargs); 2927 for (i = 0; i < nb_oargs; i++) { 2928 col += ne_fprintf(f, ",%s", tcg_get_arg_str(s, buf, sizeof(buf), 2929 op->args[i])); 2930 } 2931 for (i = 0; i < nb_iargs; i++) { 2932 TCGArg arg = op->args[nb_oargs + i]; 2933 const char *t = tcg_get_arg_str(s, buf, sizeof(buf), arg); 2934 col += ne_fprintf(f, ",%s", t); 2935 } 2936 } else { 2937 if (def->flags & TCG_OPF_INT) { 2938 col += ne_fprintf(f, " %s_i%d ", 2939 def->name, 2940 8 * tcg_type_size(TCGOP_TYPE(op))); 2941 } else if (def->flags & TCG_OPF_VECTOR) { 2942 col += ne_fprintf(f, "%s v%d,e%d,", 2943 def->name, 2944 8 * tcg_type_size(TCGOP_TYPE(op)), 2945 8 << TCGOP_VECE(op)); 2946 } else { 2947 col += ne_fprintf(f, " %s ", def->name); 2948 } 2949 2950 nb_oargs = def->nb_oargs; 2951 nb_iargs = def->nb_iargs; 2952 nb_cargs = def->nb_cargs; 2953 2954 k = 0; 2955 for (i = 0; i < nb_oargs; i++) { 2956 const char *sep = k ? "," : ""; 2957 col += ne_fprintf(f, "%s%s", sep, 2958 tcg_get_arg_str(s, buf, sizeof(buf), 2959 op->args[k++])); 2960 } 2961 for (i = 0; i < nb_iargs; i++) { 2962 const char *sep = k ? "," : ""; 2963 col += ne_fprintf(f, "%s%s", sep, 2964 tcg_get_arg_str(s, buf, sizeof(buf), 2965 op->args[k++])); 2966 } 2967 switch (c) { 2968 case INDEX_op_brcond: 2969 case INDEX_op_setcond: 2970 case INDEX_op_negsetcond: 2971 case INDEX_op_movcond: 2972 case INDEX_op_brcond2_i32: 2973 case INDEX_op_setcond2_i32: 2974 case INDEX_op_cmp_vec: 2975 case INDEX_op_cmpsel_vec: 2976 if (op->args[k] < ARRAY_SIZE(cond_name) 2977 && cond_name[op->args[k]]) { 2978 col += ne_fprintf(f, ",%s", cond_name[op->args[k++]]); 2979 } else { 2980 col += ne_fprintf(f, ",$0x%" TCG_PRIlx, op->args[k++]); 2981 } 2982 i = 1; 2983 break; 2984 case INDEX_op_qemu_ld_i32: 2985 case INDEX_op_qemu_st_i32: 2986 case INDEX_op_qemu_st8_i32: 2987 case INDEX_op_qemu_ld_i64: 2988 case INDEX_op_qemu_st_i64: 2989 case INDEX_op_qemu_ld_i128: 2990 case INDEX_op_qemu_st_i128: 2991 { 2992 const char *s_al, *s_op, *s_at; 2993 MemOpIdx oi = op->args[k++]; 2994 MemOp mop = get_memop(oi); 2995 unsigned ix = get_mmuidx(oi); 2996 2997 s_al = alignment_name[(mop & MO_AMASK) >> MO_ASHIFT]; 2998 s_op = ldst_name[mop & (MO_BSWAP | MO_SSIZE)]; 2999 s_at = atom_name[(mop & MO_ATOM_MASK) >> MO_ATOM_SHIFT]; 3000 mop &= ~(MO_AMASK | MO_BSWAP | MO_SSIZE | MO_ATOM_MASK); 3001 3002 /* If all fields are accounted for, print symbolically. */ 3003 if (!mop && s_al && s_op && s_at) { 3004 col += ne_fprintf(f, ",%s%s%s,%u", 3005 s_at, s_al, s_op, ix); 3006 } else { 3007 mop = get_memop(oi); 3008 col += ne_fprintf(f, ",$0x%x,%u", mop, ix); 3009 } 3010 i = 1; 3011 } 3012 break; 3013 case INDEX_op_bswap16: 3014 case INDEX_op_bswap32: 3015 case INDEX_op_bswap64: 3016 { 3017 TCGArg flags = op->args[k]; 3018 const char *name = NULL; 3019 3020 if (flags < ARRAY_SIZE(bswap_flag_name)) { 3021 name = bswap_flag_name[flags]; 3022 } 3023 if (name) { 3024 col += ne_fprintf(f, ",%s", name); 3025 } else { 3026 col += ne_fprintf(f, ",$0x%" TCG_PRIlx, flags); 3027 } 3028 i = k = 1; 3029 } 3030 break; 3031 #ifdef CONFIG_PLUGIN 3032 case INDEX_op_plugin_cb: 3033 { 3034 TCGArg from = op->args[k++]; 3035 const char *name = NULL; 3036 3037 if (from < ARRAY_SIZE(plugin_from_name)) { 3038 name = plugin_from_name[from]; 3039 } 3040 if (name) { 3041 col += ne_fprintf(f, "%s", name); 3042 } else { 3043 col += ne_fprintf(f, "$0x%" TCG_PRIlx, from); 3044 } 3045 i = 1; 3046 } 3047 break; 3048 #endif 3049 default: 3050 i = 0; 3051 break; 3052 } 3053 switch (c) { 3054 case INDEX_op_set_label: 3055 case INDEX_op_br: 3056 case INDEX_op_brcond: 3057 case INDEX_op_brcond2_i32: 3058 col += ne_fprintf(f, "%s$L%d", k ? "," : "", 3059 arg_label(op->args[k])->id); 3060 i++, k++; 3061 break; 3062 case INDEX_op_mb: 3063 { 3064 TCGBar membar = op->args[k]; 3065 const char *b_op, *m_op; 3066 3067 switch (membar & TCG_BAR_SC) { 3068 case 0: 3069 b_op = "none"; 3070 break; 3071 case TCG_BAR_LDAQ: 3072 b_op = "acq"; 3073 break; 3074 case TCG_BAR_STRL: 3075 b_op = "rel"; 3076 break; 3077 case TCG_BAR_SC: 3078 b_op = "seq"; 3079 break; 3080 default: 3081 g_assert_not_reached(); 3082 } 3083 3084 switch (membar & TCG_MO_ALL) { 3085 case 0: 3086 m_op = "none"; 3087 break; 3088 case TCG_MO_LD_LD: 3089 m_op = "rr"; 3090 break; 3091 case TCG_MO_LD_ST: 3092 m_op = "rw"; 3093 break; 3094 case TCG_MO_ST_LD: 3095 m_op = "wr"; 3096 break; 3097 case TCG_MO_ST_ST: 3098 m_op = "ww"; 3099 break; 3100 case TCG_MO_LD_LD | TCG_MO_LD_ST: 3101 m_op = "rr+rw"; 3102 break; 3103 case TCG_MO_LD_LD | TCG_MO_ST_LD: 3104 m_op = "rr+wr"; 3105 break; 3106 case TCG_MO_LD_LD | TCG_MO_ST_ST: 3107 m_op = "rr+ww"; 3108 break; 3109 case TCG_MO_LD_ST | TCG_MO_ST_LD: 3110 m_op = "rw+wr"; 3111 break; 3112 case TCG_MO_LD_ST | TCG_MO_ST_ST: 3113 m_op = "rw+ww"; 3114 break; 3115 case TCG_MO_ST_LD | TCG_MO_ST_ST: 3116 m_op = "wr+ww"; 3117 break; 3118 case TCG_MO_LD_LD | TCG_MO_LD_ST | TCG_MO_ST_LD: 3119 m_op = "rr+rw+wr"; 3120 break; 3121 case TCG_MO_LD_LD | TCG_MO_LD_ST | TCG_MO_ST_ST: 3122 m_op = "rr+rw+ww"; 3123 break; 3124 case TCG_MO_LD_LD | TCG_MO_ST_LD | TCG_MO_ST_ST: 3125 m_op = "rr+wr+ww"; 3126 break; 3127 case TCG_MO_LD_ST | TCG_MO_ST_LD | TCG_MO_ST_ST: 3128 m_op = "rw+wr+ww"; 3129 break; 3130 case TCG_MO_ALL: 3131 m_op = "all"; 3132 break; 3133 default: 3134 g_assert_not_reached(); 3135 } 3136 3137 col += ne_fprintf(f, "%s%s:%s", (k ? "," : ""), b_op, m_op); 3138 i++, k++; 3139 } 3140 break; 3141 default: 3142 break; 3143 } 3144 for (; i < nb_cargs; i++, k++) { 3145 col += ne_fprintf(f, "%s$0x%" TCG_PRIlx, k ? "," : "", 3146 op->args[k]); 3147 } 3148 } 3149 3150 if (have_prefs || op->life) { 3151 for (; col < 40; ++col) { 3152 putc(' ', f); 3153 } 3154 } 3155 3156 if (op->life) { 3157 unsigned life = op->life; 3158 3159 if (life & (SYNC_ARG * 3)) { 3160 ne_fprintf(f, " sync:"); 3161 for (i = 0; i < 2; ++i) { 3162 if (life & (SYNC_ARG << i)) { 3163 ne_fprintf(f, " %d", i); 3164 } 3165 } 3166 } 3167 life /= DEAD_ARG; 3168 if (life) { 3169 ne_fprintf(f, " dead:"); 3170 for (i = 0; life; ++i, life >>= 1) { 3171 if (life & 1) { 3172 ne_fprintf(f, " %d", i); 3173 } 3174 } 3175 } 3176 } 3177 3178 if (have_prefs) { 3179 for (i = 0; i < nb_oargs; ++i) { 3180 TCGRegSet set = output_pref(op, i); 3181 3182 if (i == 0) { 3183 ne_fprintf(f, " pref="); 3184 } else { 3185 ne_fprintf(f, ","); 3186 } 3187 if (set == 0) { 3188 ne_fprintf(f, "none"); 3189 } else if (set == MAKE_64BIT_MASK(0, TCG_TARGET_NB_REGS)) { 3190 ne_fprintf(f, "all"); 3191 #ifdef CONFIG_DEBUG_TCG 3192 } else if (tcg_regset_single(set)) { 3193 TCGReg reg = tcg_regset_first(set); 3194 ne_fprintf(f, "%s", tcg_target_reg_names[reg]); 3195 #endif 3196 } else if (TCG_TARGET_NB_REGS <= 32) { 3197 ne_fprintf(f, "0x%x", (uint32_t)set); 3198 } else { 3199 ne_fprintf(f, "0x%" PRIx64, (uint64_t)set); 3200 } 3201 } 3202 } 3203 3204 putc('\n', f); 3205 } 3206 } 3207 3208 /* we give more priority to constraints with less registers */ 3209 static int get_constraint_priority(const TCGArgConstraint *arg_ct, int k) 3210 { 3211 int n; 3212 3213 arg_ct += k; 3214 n = ctpop64(arg_ct->regs); 3215 3216 /* 3217 * Sort constraints of a single register first, which includes output 3218 * aliases (which must exactly match the input already allocated). 3219 */ 3220 if (n == 1 || arg_ct->oalias) { 3221 return INT_MAX; 3222 } 3223 3224 /* 3225 * Sort register pairs next, first then second immediately after. 3226 * Arbitrarily sort multiple pairs by the index of the first reg; 3227 * there shouldn't be many pairs. 3228 */ 3229 switch (arg_ct->pair) { 3230 case 1: 3231 case 3: 3232 return (k + 1) * 2; 3233 case 2: 3234 return (arg_ct->pair_index + 1) * 2 - 1; 3235 } 3236 3237 /* Finally, sort by decreasing register count. */ 3238 assert(n > 1); 3239 return -n; 3240 } 3241 3242 /* sort from highest priority to lowest */ 3243 static void sort_constraints(TCGArgConstraint *a, int start, int n) 3244 { 3245 int i, j; 3246 3247 for (i = 0; i < n; i++) { 3248 a[start + i].sort_index = start + i; 3249 } 3250 if (n <= 1) { 3251 return; 3252 } 3253 for (i = 0; i < n - 1; i++) { 3254 for (j = i + 1; j < n; j++) { 3255 int p1 = get_constraint_priority(a, a[start + i].sort_index); 3256 int p2 = get_constraint_priority(a, a[start + j].sort_index); 3257 if (p1 < p2) { 3258 int tmp = a[start + i].sort_index; 3259 a[start + i].sort_index = a[start + j].sort_index; 3260 a[start + j].sort_index = tmp; 3261 } 3262 } 3263 } 3264 } 3265 3266 static const TCGArgConstraint empty_cts[TCG_MAX_OP_ARGS]; 3267 static TCGArgConstraint all_cts[ARRAY_SIZE(constraint_sets)][TCG_MAX_OP_ARGS]; 3268 3269 static void process_constraint_sets(void) 3270 { 3271 for (size_t c = 0; c < ARRAY_SIZE(constraint_sets); ++c) { 3272 const TCGConstraintSet *tdefs = &constraint_sets[c]; 3273 TCGArgConstraint *args_ct = all_cts[c]; 3274 int nb_oargs = tdefs->nb_oargs; 3275 int nb_iargs = tdefs->nb_iargs; 3276 int nb_args = nb_oargs + nb_iargs; 3277 bool saw_alias_pair = false; 3278 3279 for (int i = 0; i < nb_args; i++) { 3280 const char *ct_str = tdefs->args_ct_str[i]; 3281 bool input_p = i >= nb_oargs; 3282 int o; 3283 3284 switch (*ct_str) { 3285 case '0' ... '9': 3286 o = *ct_str - '0'; 3287 tcg_debug_assert(input_p); 3288 tcg_debug_assert(o < nb_oargs); 3289 tcg_debug_assert(args_ct[o].regs != 0); 3290 tcg_debug_assert(!args_ct[o].oalias); 3291 args_ct[i] = args_ct[o]; 3292 /* The output sets oalias. */ 3293 args_ct[o].oalias = 1; 3294 args_ct[o].alias_index = i; 3295 /* The input sets ialias. */ 3296 args_ct[i].ialias = 1; 3297 args_ct[i].alias_index = o; 3298 if (args_ct[i].pair) { 3299 saw_alias_pair = true; 3300 } 3301 tcg_debug_assert(ct_str[1] == '\0'); 3302 continue; 3303 3304 case '&': 3305 tcg_debug_assert(!input_p); 3306 args_ct[i].newreg = true; 3307 ct_str++; 3308 break; 3309 3310 case 'p': /* plus */ 3311 /* Allocate to the register after the previous. */ 3312 tcg_debug_assert(i > (input_p ? nb_oargs : 0)); 3313 o = i - 1; 3314 tcg_debug_assert(!args_ct[o].pair); 3315 tcg_debug_assert(!args_ct[o].ct); 3316 args_ct[i] = (TCGArgConstraint){ 3317 .pair = 2, 3318 .pair_index = o, 3319 .regs = args_ct[o].regs << 1, 3320 .newreg = args_ct[o].newreg, 3321 }; 3322 args_ct[o].pair = 1; 3323 args_ct[o].pair_index = i; 3324 tcg_debug_assert(ct_str[1] == '\0'); 3325 continue; 3326 3327 case 'm': /* minus */ 3328 /* Allocate to the register before the previous. */ 3329 tcg_debug_assert(i > (input_p ? nb_oargs : 0)); 3330 o = i - 1; 3331 tcg_debug_assert(!args_ct[o].pair); 3332 tcg_debug_assert(!args_ct[o].ct); 3333 args_ct[i] = (TCGArgConstraint){ 3334 .pair = 1, 3335 .pair_index = o, 3336 .regs = args_ct[o].regs >> 1, 3337 .newreg = args_ct[o].newreg, 3338 }; 3339 args_ct[o].pair = 2; 3340 args_ct[o].pair_index = i; 3341 tcg_debug_assert(ct_str[1] == '\0'); 3342 continue; 3343 } 3344 3345 do { 3346 switch (*ct_str) { 3347 case 'i': 3348 args_ct[i].ct |= TCG_CT_CONST; 3349 break; 3350 #ifdef TCG_REG_ZERO 3351 case 'z': 3352 args_ct[i].ct |= TCG_CT_REG_ZERO; 3353 break; 3354 #endif 3355 3356 /* Include all of the target-specific constraints. */ 3357 3358 #undef CONST 3359 #define CONST(CASE, MASK) \ 3360 case CASE: args_ct[i].ct |= MASK; break; 3361 #define REGS(CASE, MASK) \ 3362 case CASE: args_ct[i].regs |= MASK; break; 3363 3364 #include "tcg-target-con-str.h" 3365 3366 #undef REGS 3367 #undef CONST 3368 default: 3369 case '0' ... '9': 3370 case '&': 3371 case 'p': 3372 case 'm': 3373 /* Typo in TCGConstraintSet constraint. */ 3374 g_assert_not_reached(); 3375 } 3376 } while (*++ct_str != '\0'); 3377 } 3378 3379 /* 3380 * Fix up output pairs that are aliased with inputs. 3381 * When we created the alias, we copied pair from the output. 3382 * There are three cases: 3383 * (1a) Pairs of inputs alias pairs of outputs. 3384 * (1b) One input aliases the first of a pair of outputs. 3385 * (2) One input aliases the second of a pair of outputs. 3386 * 3387 * Case 1a is handled by making sure that the pair_index'es are 3388 * properly updated so that they appear the same as a pair of inputs. 3389 * 3390 * Case 1b is handled by setting the pair_index of the input to 3391 * itself, simply so it doesn't point to an unrelated argument. 3392 * Since we don't encounter the "second" during the input allocation 3393 * phase, nothing happens with the second half of the input pair. 3394 * 3395 * Case 2 is handled by setting the second input to pair=3, the 3396 * first output to pair=3, and the pair_index'es to match. 3397 */ 3398 if (saw_alias_pair) { 3399 for (int i = nb_oargs; i < nb_args; i++) { 3400 int o, o2, i2; 3401 3402 /* 3403 * Since [0-9pm] must be alone in the constraint string, 3404 * the only way they can both be set is if the pair comes 3405 * from the output alias. 3406 */ 3407 if (!args_ct[i].ialias) { 3408 continue; 3409 } 3410 switch (args_ct[i].pair) { 3411 case 0: 3412 break; 3413 case 1: 3414 o = args_ct[i].alias_index; 3415 o2 = args_ct[o].pair_index; 3416 tcg_debug_assert(args_ct[o].pair == 1); 3417 tcg_debug_assert(args_ct[o2].pair == 2); 3418 if (args_ct[o2].oalias) { 3419 /* Case 1a */ 3420 i2 = args_ct[o2].alias_index; 3421 tcg_debug_assert(args_ct[i2].pair == 2); 3422 args_ct[i2].pair_index = i; 3423 args_ct[i].pair_index = i2; 3424 } else { 3425 /* Case 1b */ 3426 args_ct[i].pair_index = i; 3427 } 3428 break; 3429 case 2: 3430 o = args_ct[i].alias_index; 3431 o2 = args_ct[o].pair_index; 3432 tcg_debug_assert(args_ct[o].pair == 2); 3433 tcg_debug_assert(args_ct[o2].pair == 1); 3434 if (args_ct[o2].oalias) { 3435 /* Case 1a */ 3436 i2 = args_ct[o2].alias_index; 3437 tcg_debug_assert(args_ct[i2].pair == 1); 3438 args_ct[i2].pair_index = i; 3439 args_ct[i].pair_index = i2; 3440 } else { 3441 /* Case 2 */ 3442 args_ct[i].pair = 3; 3443 args_ct[o2].pair = 3; 3444 args_ct[i].pair_index = o2; 3445 args_ct[o2].pair_index = i; 3446 } 3447 break; 3448 default: 3449 g_assert_not_reached(); 3450 } 3451 } 3452 } 3453 3454 /* sort the constraints (XXX: this is just an heuristic) */ 3455 sort_constraints(args_ct, 0, nb_oargs); 3456 sort_constraints(args_ct, nb_oargs, nb_iargs); 3457 } 3458 } 3459 3460 static const TCGArgConstraint *opcode_args_ct(const TCGOp *op) 3461 { 3462 TCGOpcode opc = op->opc; 3463 TCGType type = TCGOP_TYPE(op); 3464 unsigned flags = TCGOP_FLAGS(op); 3465 const TCGOpDef *def = &tcg_op_defs[opc]; 3466 const TCGOutOp *outop = all_outop[opc]; 3467 TCGConstraintSetIndex con_set; 3468 3469 if (def->flags & TCG_OPF_NOT_PRESENT) { 3470 return empty_cts; 3471 } 3472 3473 if (outop) { 3474 con_set = outop->static_constraint; 3475 if (con_set == C_Dynamic) { 3476 con_set = outop->dynamic_constraint(type, flags); 3477 } 3478 } else { 3479 con_set = tcg_target_op_def(opc, type, flags); 3480 } 3481 tcg_debug_assert(con_set >= 0); 3482 tcg_debug_assert(con_set < ARRAY_SIZE(constraint_sets)); 3483 3484 /* The constraint arguments must match TCGOpcode arguments. */ 3485 tcg_debug_assert(constraint_sets[con_set].nb_oargs == def->nb_oargs); 3486 tcg_debug_assert(constraint_sets[con_set].nb_iargs == def->nb_iargs); 3487 3488 return all_cts[con_set]; 3489 } 3490 3491 static void remove_label_use(TCGOp *op, int idx) 3492 { 3493 TCGLabel *label = arg_label(op->args[idx]); 3494 TCGLabelUse *use; 3495 3496 QSIMPLEQ_FOREACH(use, &label->branches, next) { 3497 if (use->op == op) { 3498 QSIMPLEQ_REMOVE(&label->branches, use, TCGLabelUse, next); 3499 return; 3500 } 3501 } 3502 g_assert_not_reached(); 3503 } 3504 3505 void tcg_op_remove(TCGContext *s, TCGOp *op) 3506 { 3507 switch (op->opc) { 3508 case INDEX_op_br: 3509 remove_label_use(op, 0); 3510 break; 3511 case INDEX_op_brcond: 3512 remove_label_use(op, 3); 3513 break; 3514 case INDEX_op_brcond2_i32: 3515 remove_label_use(op, 5); 3516 break; 3517 default: 3518 break; 3519 } 3520 3521 QTAILQ_REMOVE(&s->ops, op, link); 3522 QTAILQ_INSERT_TAIL(&s->free_ops, op, link); 3523 s->nb_ops--; 3524 } 3525 3526 void tcg_remove_ops_after(TCGOp *op) 3527 { 3528 TCGContext *s = tcg_ctx; 3529 3530 while (true) { 3531 TCGOp *last = tcg_last_op(); 3532 if (last == op) { 3533 return; 3534 } 3535 tcg_op_remove(s, last); 3536 } 3537 } 3538 3539 static TCGOp *tcg_op_alloc(TCGOpcode opc, unsigned nargs) 3540 { 3541 TCGContext *s = tcg_ctx; 3542 TCGOp *op = NULL; 3543 3544 if (unlikely(!QTAILQ_EMPTY(&s->free_ops))) { 3545 QTAILQ_FOREACH(op, &s->free_ops, link) { 3546 if (nargs <= op->nargs) { 3547 QTAILQ_REMOVE(&s->free_ops, op, link); 3548 nargs = op->nargs; 3549 goto found; 3550 } 3551 } 3552 } 3553 3554 /* Most opcodes have 3 or 4 operands: reduce fragmentation. */ 3555 nargs = MAX(4, nargs); 3556 op = tcg_malloc(sizeof(TCGOp) + sizeof(TCGArg) * nargs); 3557 3558 found: 3559 memset(op, 0, offsetof(TCGOp, link)); 3560 op->opc = opc; 3561 op->nargs = nargs; 3562 3563 /* Check for bitfield overflow. */ 3564 tcg_debug_assert(op->nargs == nargs); 3565 3566 s->nb_ops++; 3567 return op; 3568 } 3569 3570 TCGOp *tcg_emit_op(TCGOpcode opc, unsigned nargs) 3571 { 3572 TCGOp *op = tcg_op_alloc(opc, nargs); 3573 3574 if (tcg_ctx->emit_before_op) { 3575 QTAILQ_INSERT_BEFORE(tcg_ctx->emit_before_op, op, link); 3576 } else { 3577 QTAILQ_INSERT_TAIL(&tcg_ctx->ops, op, link); 3578 } 3579 return op; 3580 } 3581 3582 TCGOp *tcg_op_insert_before(TCGContext *s, TCGOp *old_op, 3583 TCGOpcode opc, TCGType type, unsigned nargs) 3584 { 3585 TCGOp *new_op = tcg_op_alloc(opc, nargs); 3586 3587 TCGOP_TYPE(new_op) = type; 3588 QTAILQ_INSERT_BEFORE(old_op, new_op, link); 3589 return new_op; 3590 } 3591 3592 TCGOp *tcg_op_insert_after(TCGContext *s, TCGOp *old_op, 3593 TCGOpcode opc, TCGType type, unsigned nargs) 3594 { 3595 TCGOp *new_op = tcg_op_alloc(opc, nargs); 3596 3597 TCGOP_TYPE(new_op) = type; 3598 QTAILQ_INSERT_AFTER(&s->ops, old_op, new_op, link); 3599 return new_op; 3600 } 3601 3602 static void move_label_uses(TCGLabel *to, TCGLabel *from) 3603 { 3604 TCGLabelUse *u; 3605 3606 QSIMPLEQ_FOREACH(u, &from->branches, next) { 3607 TCGOp *op = u->op; 3608 switch (op->opc) { 3609 case INDEX_op_br: 3610 op->args[0] = label_arg(to); 3611 break; 3612 case INDEX_op_brcond: 3613 op->args[3] = label_arg(to); 3614 break; 3615 case INDEX_op_brcond2_i32: 3616 op->args[5] = label_arg(to); 3617 break; 3618 default: 3619 g_assert_not_reached(); 3620 } 3621 } 3622 3623 QSIMPLEQ_CONCAT(&to->branches, &from->branches); 3624 } 3625 3626 /* Reachable analysis : remove unreachable code. */ 3627 static void __attribute__((noinline)) 3628 reachable_code_pass(TCGContext *s) 3629 { 3630 TCGOp *op, *op_next, *op_prev; 3631 bool dead = false; 3632 3633 QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) { 3634 bool remove = dead; 3635 TCGLabel *label; 3636 3637 switch (op->opc) { 3638 case INDEX_op_set_label: 3639 label = arg_label(op->args[0]); 3640 3641 /* 3642 * Note that the first op in the TB is always a load, 3643 * so there is always something before a label. 3644 */ 3645 op_prev = QTAILQ_PREV(op, link); 3646 3647 /* 3648 * If we find two sequential labels, move all branches to 3649 * reference the second label and remove the first label. 3650 * Do this before branch to next optimization, so that the 3651 * middle label is out of the way. 3652 */ 3653 if (op_prev->opc == INDEX_op_set_label) { 3654 move_label_uses(label, arg_label(op_prev->args[0])); 3655 tcg_op_remove(s, op_prev); 3656 op_prev = QTAILQ_PREV(op, link); 3657 } 3658 3659 /* 3660 * Optimization can fold conditional branches to unconditional. 3661 * If we find a label which is preceded by an unconditional 3662 * branch to next, remove the branch. We couldn't do this when 3663 * processing the branch because any dead code between the branch 3664 * and label had not yet been removed. 3665 */ 3666 if (op_prev->opc == INDEX_op_br && 3667 label == arg_label(op_prev->args[0])) { 3668 tcg_op_remove(s, op_prev); 3669 /* Fall through means insns become live again. */ 3670 dead = false; 3671 } 3672 3673 if (QSIMPLEQ_EMPTY(&label->branches)) { 3674 /* 3675 * While there is an occasional backward branch, virtually 3676 * all branches generated by the translators are forward. 3677 * Which means that generally we will have already removed 3678 * all references to the label that will be, and there is 3679 * little to be gained by iterating. 3680 */ 3681 remove = true; 3682 } else { 3683 /* Once we see a label, insns become live again. */ 3684 dead = false; 3685 remove = false; 3686 } 3687 break; 3688 3689 case INDEX_op_br: 3690 case INDEX_op_exit_tb: 3691 case INDEX_op_goto_ptr: 3692 /* Unconditional branches; everything following is dead. */ 3693 dead = true; 3694 break; 3695 3696 case INDEX_op_call: 3697 /* Notice noreturn helper calls, raising exceptions. */ 3698 if (tcg_call_flags(op) & TCG_CALL_NO_RETURN) { 3699 dead = true; 3700 } 3701 break; 3702 3703 case INDEX_op_insn_start: 3704 /* Never remove -- we need to keep these for unwind. */ 3705 remove = false; 3706 break; 3707 3708 default: 3709 break; 3710 } 3711 3712 if (remove) { 3713 tcg_op_remove(s, op); 3714 } 3715 } 3716 } 3717 3718 #define TS_DEAD 1 3719 #define TS_MEM 2 3720 3721 #define IS_DEAD_ARG(n) (arg_life & (DEAD_ARG << (n))) 3722 #define NEED_SYNC_ARG(n) (arg_life & (SYNC_ARG << (n))) 3723 3724 /* For liveness_pass_1, the register preferences for a given temp. */ 3725 static inline TCGRegSet *la_temp_pref(TCGTemp *ts) 3726 { 3727 return ts->state_ptr; 3728 } 3729 3730 /* For liveness_pass_1, reset the preferences for a given temp to the 3731 * maximal regset for its type. 3732 */ 3733 static inline void la_reset_pref(TCGTemp *ts) 3734 { 3735 *la_temp_pref(ts) 3736 = (ts->state == TS_DEAD ? 0 : tcg_target_available_regs[ts->type]); 3737 } 3738 3739 /* liveness analysis: end of function: all temps are dead, and globals 3740 should be in memory. */ 3741 static void la_func_end(TCGContext *s, int ng, int nt) 3742 { 3743 int i; 3744 3745 for (i = 0; i < ng; ++i) { 3746 s->temps[i].state = TS_DEAD | TS_MEM; 3747 la_reset_pref(&s->temps[i]); 3748 } 3749 for (i = ng; i < nt; ++i) { 3750 s->temps[i].state = TS_DEAD; 3751 la_reset_pref(&s->temps[i]); 3752 } 3753 } 3754 3755 /* liveness analysis: end of basic block: all temps are dead, globals 3756 and local temps should be in memory. */ 3757 static void la_bb_end(TCGContext *s, int ng, int nt) 3758 { 3759 int i; 3760 3761 for (i = 0; i < nt; ++i) { 3762 TCGTemp *ts = &s->temps[i]; 3763 int state; 3764 3765 switch (ts->kind) { 3766 case TEMP_FIXED: 3767 case TEMP_GLOBAL: 3768 case TEMP_TB: 3769 state = TS_DEAD | TS_MEM; 3770 break; 3771 case TEMP_EBB: 3772 case TEMP_CONST: 3773 state = TS_DEAD; 3774 break; 3775 default: 3776 g_assert_not_reached(); 3777 } 3778 ts->state = state; 3779 la_reset_pref(ts); 3780 } 3781 } 3782 3783 /* liveness analysis: sync globals back to memory. */ 3784 static void la_global_sync(TCGContext *s, int ng) 3785 { 3786 int i; 3787 3788 for (i = 0; i < ng; ++i) { 3789 int state = s->temps[i].state; 3790 s->temps[i].state = state | TS_MEM; 3791 if (state == TS_DEAD) { 3792 /* If the global was previously dead, reset prefs. */ 3793 la_reset_pref(&s->temps[i]); 3794 } 3795 } 3796 } 3797 3798 /* 3799 * liveness analysis: conditional branch: all temps are dead unless 3800 * explicitly live-across-conditional-branch, globals and local temps 3801 * should be synced. 3802 */ 3803 static void la_bb_sync(TCGContext *s, int ng, int nt) 3804 { 3805 la_global_sync(s, ng); 3806 3807 for (int i = ng; i < nt; ++i) { 3808 TCGTemp *ts = &s->temps[i]; 3809 int state; 3810 3811 switch (ts->kind) { 3812 case TEMP_TB: 3813 state = ts->state; 3814 ts->state = state | TS_MEM; 3815 if (state != TS_DEAD) { 3816 continue; 3817 } 3818 break; 3819 case TEMP_EBB: 3820 case TEMP_CONST: 3821 continue; 3822 default: 3823 g_assert_not_reached(); 3824 } 3825 la_reset_pref(&s->temps[i]); 3826 } 3827 } 3828 3829 /* liveness analysis: sync globals back to memory and kill. */ 3830 static void la_global_kill(TCGContext *s, int ng) 3831 { 3832 int i; 3833 3834 for (i = 0; i < ng; i++) { 3835 s->temps[i].state = TS_DEAD | TS_MEM; 3836 la_reset_pref(&s->temps[i]); 3837 } 3838 } 3839 3840 /* liveness analysis: note live globals crossing calls. */ 3841 static void la_cross_call(TCGContext *s, int nt) 3842 { 3843 TCGRegSet mask = ~tcg_target_call_clobber_regs; 3844 int i; 3845 3846 for (i = 0; i < nt; i++) { 3847 TCGTemp *ts = &s->temps[i]; 3848 if (!(ts->state & TS_DEAD)) { 3849 TCGRegSet *pset = la_temp_pref(ts); 3850 TCGRegSet set = *pset; 3851 3852 set &= mask; 3853 /* If the combination is not possible, restart. */ 3854 if (set == 0) { 3855 set = tcg_target_available_regs[ts->type] & mask; 3856 } 3857 *pset = set; 3858 } 3859 } 3860 } 3861 3862 /* 3863 * Liveness analysis: Verify the lifetime of TEMP_TB, and reduce 3864 * to TEMP_EBB, if possible. 3865 */ 3866 static void __attribute__((noinline)) 3867 liveness_pass_0(TCGContext *s) 3868 { 3869 void * const multiple_ebb = (void *)(uintptr_t)-1; 3870 int nb_temps = s->nb_temps; 3871 TCGOp *op, *ebb; 3872 3873 for (int i = s->nb_globals; i < nb_temps; ++i) { 3874 s->temps[i].state_ptr = NULL; 3875 } 3876 3877 /* 3878 * Represent each EBB by the op at which it begins. In the case of 3879 * the first EBB, this is the first op, otherwise it is a label. 3880 * Collect the uses of each TEMP_TB: NULL for unused, EBB for use 3881 * within a single EBB, else MULTIPLE_EBB. 3882 */ 3883 ebb = QTAILQ_FIRST(&s->ops); 3884 QTAILQ_FOREACH(op, &s->ops, link) { 3885 const TCGOpDef *def; 3886 int nb_oargs, nb_iargs; 3887 3888 switch (op->opc) { 3889 case INDEX_op_set_label: 3890 ebb = op; 3891 continue; 3892 case INDEX_op_discard: 3893 continue; 3894 case INDEX_op_call: 3895 nb_oargs = TCGOP_CALLO(op); 3896 nb_iargs = TCGOP_CALLI(op); 3897 break; 3898 default: 3899 def = &tcg_op_defs[op->opc]; 3900 nb_oargs = def->nb_oargs; 3901 nb_iargs = def->nb_iargs; 3902 break; 3903 } 3904 3905 for (int i = 0; i < nb_oargs + nb_iargs; ++i) { 3906 TCGTemp *ts = arg_temp(op->args[i]); 3907 3908 if (ts->kind != TEMP_TB) { 3909 continue; 3910 } 3911 if (ts->state_ptr == NULL) { 3912 ts->state_ptr = ebb; 3913 } else if (ts->state_ptr != ebb) { 3914 ts->state_ptr = multiple_ebb; 3915 } 3916 } 3917 } 3918 3919 /* 3920 * For TEMP_TB that turned out not to be used beyond one EBB, 3921 * reduce the liveness to TEMP_EBB. 3922 */ 3923 for (int i = s->nb_globals; i < nb_temps; ++i) { 3924 TCGTemp *ts = &s->temps[i]; 3925 if (ts->kind == TEMP_TB && ts->state_ptr != multiple_ebb) { 3926 ts->kind = TEMP_EBB; 3927 } 3928 } 3929 } 3930 3931 static void assert_carry_dead(TCGContext *s) 3932 { 3933 /* 3934 * Carry operations can be separated by a few insns like mov, 3935 * load or store, but they should always be "close", and 3936 * carry-out operations should always be paired with carry-in. 3937 * At various boundaries, carry must have been consumed. 3938 */ 3939 tcg_debug_assert(!s->carry_live); 3940 } 3941 3942 /* Liveness analysis : update the opc_arg_life array to tell if a 3943 given input arguments is dead. Instructions updating dead 3944 temporaries are removed. */ 3945 static void __attribute__((noinline)) 3946 liveness_pass_1(TCGContext *s) 3947 { 3948 int nb_globals = s->nb_globals; 3949 int nb_temps = s->nb_temps; 3950 TCGOp *op, *op_prev; 3951 TCGRegSet *prefs; 3952 3953 prefs = tcg_malloc(sizeof(TCGRegSet) * nb_temps); 3954 for (int i = 0; i < nb_temps; ++i) { 3955 s->temps[i].state_ptr = prefs + i; 3956 } 3957 3958 /* ??? Should be redundant with the exit_tb that ends the TB. */ 3959 la_func_end(s, nb_globals, nb_temps); 3960 3961 s->carry_live = false; 3962 QTAILQ_FOREACH_REVERSE_SAFE(op, &s->ops, link, op_prev) { 3963 int nb_iargs, nb_oargs; 3964 TCGOpcode opc_new, opc_new2; 3965 TCGLifeData arg_life = 0; 3966 TCGTemp *ts; 3967 TCGOpcode opc = op->opc; 3968 const TCGOpDef *def; 3969 const TCGArgConstraint *args_ct; 3970 3971 switch (opc) { 3972 case INDEX_op_call: 3973 assert_carry_dead(s); 3974 { 3975 const TCGHelperInfo *info = tcg_call_info(op); 3976 int call_flags = tcg_call_flags(op); 3977 3978 nb_oargs = TCGOP_CALLO(op); 3979 nb_iargs = TCGOP_CALLI(op); 3980 3981 /* pure functions can be removed if their result is unused */ 3982 if (call_flags & TCG_CALL_NO_SIDE_EFFECTS) { 3983 for (int i = 0; i < nb_oargs; i++) { 3984 ts = arg_temp(op->args[i]); 3985 if (ts->state != TS_DEAD) { 3986 goto do_not_remove_call; 3987 } 3988 } 3989 goto do_remove; 3990 } 3991 do_not_remove_call: 3992 3993 /* Output args are dead. */ 3994 for (int i = 0; i < nb_oargs; i++) { 3995 ts = arg_temp(op->args[i]); 3996 if (ts->state & TS_DEAD) { 3997 arg_life |= DEAD_ARG << i; 3998 } 3999 if (ts->state & TS_MEM) { 4000 arg_life |= SYNC_ARG << i; 4001 } 4002 ts->state = TS_DEAD; 4003 la_reset_pref(ts); 4004 } 4005 4006 /* Not used -- it will be tcg_target_call_oarg_reg(). */ 4007 memset(op->output_pref, 0, sizeof(op->output_pref)); 4008 4009 if (!(call_flags & (TCG_CALL_NO_WRITE_GLOBALS | 4010 TCG_CALL_NO_READ_GLOBALS))) { 4011 la_global_kill(s, nb_globals); 4012 } else if (!(call_flags & TCG_CALL_NO_READ_GLOBALS)) { 4013 la_global_sync(s, nb_globals); 4014 } 4015 4016 /* Record arguments that die in this helper. */ 4017 for (int i = nb_oargs; i < nb_iargs + nb_oargs; i++) { 4018 ts = arg_temp(op->args[i]); 4019 if (ts->state & TS_DEAD) { 4020 arg_life |= DEAD_ARG << i; 4021 } 4022 } 4023 4024 /* For all live registers, remove call-clobbered prefs. */ 4025 la_cross_call(s, nb_temps); 4026 4027 /* 4028 * Input arguments are live for preceding opcodes. 4029 * 4030 * For those arguments that die, and will be allocated in 4031 * registers, clear the register set for that arg, to be 4032 * filled in below. For args that will be on the stack, 4033 * reset to any available reg. Process arguments in reverse 4034 * order so that if a temp is used more than once, the stack 4035 * reset to max happens before the register reset to 0. 4036 */ 4037 for (int i = nb_iargs - 1; i >= 0; i--) { 4038 const TCGCallArgumentLoc *loc = &info->in[i]; 4039 ts = arg_temp(op->args[nb_oargs + i]); 4040 4041 if (ts->state & TS_DEAD) { 4042 switch (loc->kind) { 4043 case TCG_CALL_ARG_NORMAL: 4044 case TCG_CALL_ARG_EXTEND_U: 4045 case TCG_CALL_ARG_EXTEND_S: 4046 if (arg_slot_reg_p(loc->arg_slot)) { 4047 *la_temp_pref(ts) = 0; 4048 break; 4049 } 4050 /* fall through */ 4051 default: 4052 *la_temp_pref(ts) = 4053 tcg_target_available_regs[ts->type]; 4054 break; 4055 } 4056 ts->state &= ~TS_DEAD; 4057 } 4058 } 4059 4060 /* 4061 * For each input argument, add its input register to prefs. 4062 * If a temp is used once, this produces a single set bit; 4063 * if a temp is used multiple times, this produces a set. 4064 */ 4065 for (int i = 0; i < nb_iargs; i++) { 4066 const TCGCallArgumentLoc *loc = &info->in[i]; 4067 ts = arg_temp(op->args[nb_oargs + i]); 4068 4069 switch (loc->kind) { 4070 case TCG_CALL_ARG_NORMAL: 4071 case TCG_CALL_ARG_EXTEND_U: 4072 case TCG_CALL_ARG_EXTEND_S: 4073 if (arg_slot_reg_p(loc->arg_slot)) { 4074 tcg_regset_set_reg(*la_temp_pref(ts), 4075 tcg_target_call_iarg_regs[loc->arg_slot]); 4076 } 4077 break; 4078 default: 4079 break; 4080 } 4081 } 4082 } 4083 break; 4084 case INDEX_op_insn_start: 4085 assert_carry_dead(s); 4086 break; 4087 case INDEX_op_discard: 4088 /* mark the temporary as dead */ 4089 ts = arg_temp(op->args[0]); 4090 ts->state = TS_DEAD; 4091 la_reset_pref(ts); 4092 break; 4093 4094 case INDEX_op_muls2: 4095 opc_new = INDEX_op_mul; 4096 opc_new2 = INDEX_op_mulsh; 4097 goto do_mul2; 4098 case INDEX_op_mulu2: 4099 opc_new = INDEX_op_mul; 4100 opc_new2 = INDEX_op_muluh; 4101 do_mul2: 4102 assert_carry_dead(s); 4103 if (arg_temp(op->args[1])->state == TS_DEAD) { 4104 if (arg_temp(op->args[0])->state == TS_DEAD) { 4105 /* Both parts of the operation are dead. */ 4106 goto do_remove; 4107 } 4108 /* The high part of the operation is dead; generate the low. */ 4109 op->opc = opc = opc_new; 4110 op->args[1] = op->args[2]; 4111 op->args[2] = op->args[3]; 4112 } else if (arg_temp(op->args[0])->state == TS_DEAD && 4113 tcg_op_supported(opc_new2, TCGOP_TYPE(op), 0)) { 4114 /* The low part of the operation is dead; generate the high. */ 4115 op->opc = opc = opc_new2; 4116 op->args[0] = op->args[1]; 4117 op->args[1] = op->args[2]; 4118 op->args[2] = op->args[3]; 4119 } else { 4120 goto do_not_remove; 4121 } 4122 /* Mark the single-word operation live. */ 4123 goto do_not_remove; 4124 4125 case INDEX_op_addco: 4126 if (s->carry_live) { 4127 goto do_not_remove; 4128 } 4129 op->opc = opc = INDEX_op_add; 4130 goto do_default; 4131 4132 case INDEX_op_addcio: 4133 if (s->carry_live) { 4134 goto do_not_remove; 4135 } 4136 op->opc = opc = INDEX_op_addci; 4137 goto do_default; 4138 4139 case INDEX_op_subbo: 4140 if (s->carry_live) { 4141 goto do_not_remove; 4142 } 4143 /* Lower to sub, but this may also require canonicalization. */ 4144 op->opc = opc = INDEX_op_sub; 4145 ts = arg_temp(op->args[2]); 4146 if (ts->kind == TEMP_CONST) { 4147 ts = tcg_constant_internal(ts->type, -ts->val); 4148 if (ts->state_ptr == NULL) { 4149 tcg_debug_assert(temp_idx(ts) == nb_temps); 4150 nb_temps++; 4151 ts->state_ptr = tcg_malloc(sizeof(TCGRegSet)); 4152 ts->state = TS_DEAD; 4153 la_reset_pref(ts); 4154 } 4155 op->args[2] = temp_arg(ts); 4156 op->opc = opc = INDEX_op_add; 4157 } 4158 goto do_default; 4159 4160 case INDEX_op_subbio: 4161 if (s->carry_live) { 4162 goto do_not_remove; 4163 } 4164 op->opc = opc = INDEX_op_subbi; 4165 goto do_default; 4166 4167 case INDEX_op_addc1o: 4168 if (s->carry_live) { 4169 goto do_not_remove; 4170 } 4171 /* Lower to add, add +1. */ 4172 op_prev = tcg_op_insert_before(s, op, INDEX_op_add, 4173 TCGOP_TYPE(op), 3); 4174 op_prev->args[0] = op->args[0]; 4175 op_prev->args[1] = op->args[1]; 4176 op_prev->args[2] = op->args[2]; 4177 op->opc = opc = INDEX_op_add; 4178 op->args[1] = op->args[0]; 4179 ts = arg_temp(op->args[0]); 4180 ts = tcg_constant_internal(ts->type, 1); 4181 op->args[2] = temp_arg(ts); 4182 goto do_default; 4183 4184 case INDEX_op_subb1o: 4185 if (s->carry_live) { 4186 goto do_not_remove; 4187 } 4188 /* Lower to sub, add -1. */ 4189 op_prev = tcg_op_insert_before(s, op, INDEX_op_sub, 4190 TCGOP_TYPE(op), 3); 4191 op_prev->args[0] = op->args[0]; 4192 op_prev->args[1] = op->args[1]; 4193 op_prev->args[2] = op->args[2]; 4194 op->opc = opc = INDEX_op_add; 4195 op->args[1] = op->args[0]; 4196 ts = arg_temp(op->args[0]); 4197 ts = tcg_constant_internal(ts->type, -1); 4198 op->args[2] = temp_arg(ts); 4199 goto do_default; 4200 4201 default: 4202 do_default: 4203 /* 4204 * Test if the operation can be removed because all 4205 * its outputs are dead. We assume that nb_oargs == 0 4206 * implies side effects. 4207 */ 4208 def = &tcg_op_defs[opc]; 4209 if (!(def->flags & TCG_OPF_SIDE_EFFECTS) && def->nb_oargs != 0) { 4210 for (int i = def->nb_oargs - 1; i >= 0; i--) { 4211 if (arg_temp(op->args[i])->state != TS_DEAD) { 4212 goto do_not_remove; 4213 } 4214 } 4215 goto do_remove; 4216 } 4217 goto do_not_remove; 4218 4219 do_remove: 4220 tcg_op_remove(s, op); 4221 break; 4222 4223 do_not_remove: 4224 def = &tcg_op_defs[opc]; 4225 nb_iargs = def->nb_iargs; 4226 nb_oargs = def->nb_oargs; 4227 4228 for (int i = 0; i < nb_oargs; i++) { 4229 ts = arg_temp(op->args[i]); 4230 4231 /* Remember the preference of the uses that followed. */ 4232 if (i < ARRAY_SIZE(op->output_pref)) { 4233 op->output_pref[i] = *la_temp_pref(ts); 4234 } 4235 4236 /* Output args are dead. */ 4237 if (ts->state & TS_DEAD) { 4238 arg_life |= DEAD_ARG << i; 4239 } 4240 if (ts->state & TS_MEM) { 4241 arg_life |= SYNC_ARG << i; 4242 } 4243 ts->state = TS_DEAD; 4244 la_reset_pref(ts); 4245 } 4246 4247 /* If end of basic block, update. */ 4248 if (def->flags & TCG_OPF_BB_EXIT) { 4249 assert_carry_dead(s); 4250 la_func_end(s, nb_globals, nb_temps); 4251 } else if (def->flags & TCG_OPF_COND_BRANCH) { 4252 assert_carry_dead(s); 4253 la_bb_sync(s, nb_globals, nb_temps); 4254 } else if (def->flags & TCG_OPF_BB_END) { 4255 assert_carry_dead(s); 4256 la_bb_end(s, nb_globals, nb_temps); 4257 } else if (def->flags & TCG_OPF_SIDE_EFFECTS) { 4258 assert_carry_dead(s); 4259 la_global_sync(s, nb_globals); 4260 if (def->flags & TCG_OPF_CALL_CLOBBER) { 4261 la_cross_call(s, nb_temps); 4262 } 4263 } 4264 4265 /* Record arguments that die in this opcode. */ 4266 for (int i = nb_oargs; i < nb_oargs + nb_iargs; i++) { 4267 ts = arg_temp(op->args[i]); 4268 if (ts->state & TS_DEAD) { 4269 arg_life |= DEAD_ARG << i; 4270 } 4271 } 4272 if (def->flags & TCG_OPF_CARRY_OUT) { 4273 s->carry_live = false; 4274 } 4275 4276 /* Input arguments are live for preceding opcodes. */ 4277 for (int i = nb_oargs; i < nb_oargs + nb_iargs; i++) { 4278 ts = arg_temp(op->args[i]); 4279 if (ts->state & TS_DEAD) { 4280 /* For operands that were dead, initially allow 4281 all regs for the type. */ 4282 *la_temp_pref(ts) = tcg_target_available_regs[ts->type]; 4283 ts->state &= ~TS_DEAD; 4284 } 4285 } 4286 if (def->flags & TCG_OPF_CARRY_IN) { 4287 s->carry_live = true; 4288 } 4289 4290 /* Incorporate constraints for this operand. */ 4291 switch (opc) { 4292 case INDEX_op_mov: 4293 /* Note that these are TCG_OPF_NOT_PRESENT and do not 4294 have proper constraints. That said, special case 4295 moves to propagate preferences backward. */ 4296 if (IS_DEAD_ARG(1)) { 4297 *la_temp_pref(arg_temp(op->args[0])) 4298 = *la_temp_pref(arg_temp(op->args[1])); 4299 } 4300 break; 4301 4302 default: 4303 args_ct = opcode_args_ct(op); 4304 for (int i = nb_oargs; i < nb_oargs + nb_iargs; i++) { 4305 const TCGArgConstraint *ct = &args_ct[i]; 4306 TCGRegSet set, *pset; 4307 4308 ts = arg_temp(op->args[i]); 4309 pset = la_temp_pref(ts); 4310 set = *pset; 4311 4312 set &= ct->regs; 4313 if (ct->ialias) { 4314 set &= output_pref(op, ct->alias_index); 4315 } 4316 /* If the combination is not possible, restart. */ 4317 if (set == 0) { 4318 set = ct->regs; 4319 } 4320 *pset = set; 4321 } 4322 break; 4323 } 4324 break; 4325 } 4326 op->life = arg_life; 4327 } 4328 assert_carry_dead(s); 4329 } 4330 4331 /* Liveness analysis: Convert indirect regs to direct temporaries. */ 4332 static bool __attribute__((noinline)) 4333 liveness_pass_2(TCGContext *s) 4334 { 4335 int nb_globals = s->nb_globals; 4336 int nb_temps, i; 4337 bool changes = false; 4338 TCGOp *op, *op_next; 4339 4340 /* Create a temporary for each indirect global. */ 4341 for (i = 0; i < nb_globals; ++i) { 4342 TCGTemp *its = &s->temps[i]; 4343 if (its->indirect_reg) { 4344 TCGTemp *dts = tcg_temp_alloc(s); 4345 dts->type = its->type; 4346 dts->base_type = its->base_type; 4347 dts->temp_subindex = its->temp_subindex; 4348 dts->kind = TEMP_EBB; 4349 its->state_ptr = dts; 4350 } else { 4351 its->state_ptr = NULL; 4352 } 4353 /* All globals begin dead. */ 4354 its->state = TS_DEAD; 4355 } 4356 for (nb_temps = s->nb_temps; i < nb_temps; ++i) { 4357 TCGTemp *its = &s->temps[i]; 4358 its->state_ptr = NULL; 4359 its->state = TS_DEAD; 4360 } 4361 4362 QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) { 4363 TCGOpcode opc = op->opc; 4364 const TCGOpDef *def = &tcg_op_defs[opc]; 4365 TCGLifeData arg_life = op->life; 4366 int nb_iargs, nb_oargs, call_flags; 4367 TCGTemp *arg_ts, *dir_ts; 4368 4369 if (opc == INDEX_op_call) { 4370 nb_oargs = TCGOP_CALLO(op); 4371 nb_iargs = TCGOP_CALLI(op); 4372 call_flags = tcg_call_flags(op); 4373 } else { 4374 nb_iargs = def->nb_iargs; 4375 nb_oargs = def->nb_oargs; 4376 4377 /* Set flags similar to how calls require. */ 4378 if (def->flags & TCG_OPF_COND_BRANCH) { 4379 /* Like reading globals: sync_globals */ 4380 call_flags = TCG_CALL_NO_WRITE_GLOBALS; 4381 } else if (def->flags & TCG_OPF_BB_END) { 4382 /* Like writing globals: save_globals */ 4383 call_flags = 0; 4384 } else if (def->flags & TCG_OPF_SIDE_EFFECTS) { 4385 /* Like reading globals: sync_globals */ 4386 call_flags = TCG_CALL_NO_WRITE_GLOBALS; 4387 } else { 4388 /* No effect on globals. */ 4389 call_flags = (TCG_CALL_NO_READ_GLOBALS | 4390 TCG_CALL_NO_WRITE_GLOBALS); 4391 } 4392 } 4393 4394 /* Make sure that input arguments are available. */ 4395 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) { 4396 arg_ts = arg_temp(op->args[i]); 4397 dir_ts = arg_ts->state_ptr; 4398 if (dir_ts && arg_ts->state == TS_DEAD) { 4399 TCGOpcode lopc = (arg_ts->type == TCG_TYPE_I32 4400 ? INDEX_op_ld_i32 4401 : INDEX_op_ld_i64); 4402 TCGOp *lop = tcg_op_insert_before(s, op, lopc, 4403 arg_ts->type, 3); 4404 4405 lop->args[0] = temp_arg(dir_ts); 4406 lop->args[1] = temp_arg(arg_ts->mem_base); 4407 lop->args[2] = arg_ts->mem_offset; 4408 4409 /* Loaded, but synced with memory. */ 4410 arg_ts->state = TS_MEM; 4411 } 4412 } 4413 4414 /* Perform input replacement, and mark inputs that became dead. 4415 No action is required except keeping temp_state up to date 4416 so that we reload when needed. */ 4417 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) { 4418 arg_ts = arg_temp(op->args[i]); 4419 dir_ts = arg_ts->state_ptr; 4420 if (dir_ts) { 4421 op->args[i] = temp_arg(dir_ts); 4422 changes = true; 4423 if (IS_DEAD_ARG(i)) { 4424 arg_ts->state = TS_DEAD; 4425 } 4426 } 4427 } 4428 4429 /* Liveness analysis should ensure that the following are 4430 all correct, for call sites and basic block end points. */ 4431 if (call_flags & TCG_CALL_NO_READ_GLOBALS) { 4432 /* Nothing to do */ 4433 } else if (call_flags & TCG_CALL_NO_WRITE_GLOBALS) { 4434 for (i = 0; i < nb_globals; ++i) { 4435 /* Liveness should see that globals are synced back, 4436 that is, either TS_DEAD or TS_MEM. */ 4437 arg_ts = &s->temps[i]; 4438 tcg_debug_assert(arg_ts->state_ptr == 0 4439 || arg_ts->state != 0); 4440 } 4441 } else { 4442 for (i = 0; i < nb_globals; ++i) { 4443 /* Liveness should see that globals are saved back, 4444 that is, TS_DEAD, waiting to be reloaded. */ 4445 arg_ts = &s->temps[i]; 4446 tcg_debug_assert(arg_ts->state_ptr == 0 4447 || arg_ts->state == TS_DEAD); 4448 } 4449 } 4450 4451 /* Outputs become available. */ 4452 if (opc == INDEX_op_mov) { 4453 arg_ts = arg_temp(op->args[0]); 4454 dir_ts = arg_ts->state_ptr; 4455 if (dir_ts) { 4456 op->args[0] = temp_arg(dir_ts); 4457 changes = true; 4458 4459 /* The output is now live and modified. */ 4460 arg_ts->state = 0; 4461 4462 if (NEED_SYNC_ARG(0)) { 4463 TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32 4464 ? INDEX_op_st_i32 4465 : INDEX_op_st_i64); 4466 TCGOp *sop = tcg_op_insert_after(s, op, sopc, 4467 arg_ts->type, 3); 4468 TCGTemp *out_ts = dir_ts; 4469 4470 if (IS_DEAD_ARG(0)) { 4471 out_ts = arg_temp(op->args[1]); 4472 arg_ts->state = TS_DEAD; 4473 tcg_op_remove(s, op); 4474 } else { 4475 arg_ts->state = TS_MEM; 4476 } 4477 4478 sop->args[0] = temp_arg(out_ts); 4479 sop->args[1] = temp_arg(arg_ts->mem_base); 4480 sop->args[2] = arg_ts->mem_offset; 4481 } else { 4482 tcg_debug_assert(!IS_DEAD_ARG(0)); 4483 } 4484 } 4485 } else { 4486 for (i = 0; i < nb_oargs; i++) { 4487 arg_ts = arg_temp(op->args[i]); 4488 dir_ts = arg_ts->state_ptr; 4489 if (!dir_ts) { 4490 continue; 4491 } 4492 op->args[i] = temp_arg(dir_ts); 4493 changes = true; 4494 4495 /* The output is now live and modified. */ 4496 arg_ts->state = 0; 4497 4498 /* Sync outputs upon their last write. */ 4499 if (NEED_SYNC_ARG(i)) { 4500 TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32 4501 ? INDEX_op_st_i32 4502 : INDEX_op_st_i64); 4503 TCGOp *sop = tcg_op_insert_after(s, op, sopc, 4504 arg_ts->type, 3); 4505 4506 sop->args[0] = temp_arg(dir_ts); 4507 sop->args[1] = temp_arg(arg_ts->mem_base); 4508 sop->args[2] = arg_ts->mem_offset; 4509 4510 arg_ts->state = TS_MEM; 4511 } 4512 /* Drop outputs that are dead. */ 4513 if (IS_DEAD_ARG(i)) { 4514 arg_ts->state = TS_DEAD; 4515 } 4516 } 4517 } 4518 } 4519 4520 return changes; 4521 } 4522 4523 static void temp_allocate_frame(TCGContext *s, TCGTemp *ts) 4524 { 4525 intptr_t off; 4526 int size, align; 4527 4528 /* When allocating an object, look at the full type. */ 4529 size = tcg_type_size(ts->base_type); 4530 switch (ts->base_type) { 4531 case TCG_TYPE_I32: 4532 align = 4; 4533 break; 4534 case TCG_TYPE_I64: 4535 case TCG_TYPE_V64: 4536 align = 8; 4537 break; 4538 case TCG_TYPE_I128: 4539 case TCG_TYPE_V128: 4540 case TCG_TYPE_V256: 4541 /* 4542 * Note that we do not require aligned storage for V256, 4543 * and that we provide alignment for I128 to match V128, 4544 * even if that's above what the host ABI requires. 4545 */ 4546 align = 16; 4547 break; 4548 default: 4549 g_assert_not_reached(); 4550 } 4551 4552 /* 4553 * Assume the stack is sufficiently aligned. 4554 * This affects e.g. ARM NEON, where we have 8 byte stack alignment 4555 * and do not require 16 byte vector alignment. This seems slightly 4556 * easier than fully parameterizing the above switch statement. 4557 */ 4558 align = MIN(TCG_TARGET_STACK_ALIGN, align); 4559 off = ROUND_UP(s->current_frame_offset, align); 4560 4561 /* If we've exhausted the stack frame, restart with a smaller TB. */ 4562 if (off + size > s->frame_end) { 4563 tcg_raise_tb_overflow(s); 4564 } 4565 s->current_frame_offset = off + size; 4566 #if defined(__sparc__) 4567 off += TCG_TARGET_STACK_BIAS; 4568 #endif 4569 4570 /* If the object was subdivided, assign memory to all the parts. */ 4571 if (ts->base_type != ts->type) { 4572 int part_size = tcg_type_size(ts->type); 4573 int part_count = size / part_size; 4574 4575 /* 4576 * Each part is allocated sequentially in tcg_temp_new_internal. 4577 * Jump back to the first part by subtracting the current index. 4578 */ 4579 ts -= ts->temp_subindex; 4580 for (int i = 0; i < part_count; ++i) { 4581 ts[i].mem_offset = off + i * part_size; 4582 ts[i].mem_base = s->frame_temp; 4583 ts[i].mem_allocated = 1; 4584 } 4585 } else { 4586 ts->mem_offset = off; 4587 ts->mem_base = s->frame_temp; 4588 ts->mem_allocated = 1; 4589 } 4590 } 4591 4592 /* Assign @reg to @ts, and update reg_to_temp[]. */ 4593 static void set_temp_val_reg(TCGContext *s, TCGTemp *ts, TCGReg reg) 4594 { 4595 if (ts->val_type == TEMP_VAL_REG) { 4596 TCGReg old = ts->reg; 4597 tcg_debug_assert(s->reg_to_temp[old] == ts); 4598 if (old == reg) { 4599 return; 4600 } 4601 s->reg_to_temp[old] = NULL; 4602 } 4603 tcg_debug_assert(s->reg_to_temp[reg] == NULL); 4604 s->reg_to_temp[reg] = ts; 4605 ts->val_type = TEMP_VAL_REG; 4606 ts->reg = reg; 4607 } 4608 4609 /* Assign a non-register value type to @ts, and update reg_to_temp[]. */ 4610 static void set_temp_val_nonreg(TCGContext *s, TCGTemp *ts, TCGTempVal type) 4611 { 4612 tcg_debug_assert(type != TEMP_VAL_REG); 4613 if (ts->val_type == TEMP_VAL_REG) { 4614 TCGReg reg = ts->reg; 4615 tcg_debug_assert(s->reg_to_temp[reg] == ts); 4616 s->reg_to_temp[reg] = NULL; 4617 } 4618 ts->val_type = type; 4619 } 4620 4621 static void temp_load(TCGContext *, TCGTemp *, TCGRegSet, TCGRegSet, TCGRegSet); 4622 4623 /* Mark a temporary as free or dead. If 'free_or_dead' is negative, 4624 mark it free; otherwise mark it dead. */ 4625 static void temp_free_or_dead(TCGContext *s, TCGTemp *ts, int free_or_dead) 4626 { 4627 TCGTempVal new_type; 4628 4629 switch (ts->kind) { 4630 case TEMP_FIXED: 4631 return; 4632 case TEMP_GLOBAL: 4633 case TEMP_TB: 4634 new_type = TEMP_VAL_MEM; 4635 break; 4636 case TEMP_EBB: 4637 new_type = free_or_dead < 0 ? TEMP_VAL_MEM : TEMP_VAL_DEAD; 4638 break; 4639 case TEMP_CONST: 4640 new_type = TEMP_VAL_CONST; 4641 break; 4642 default: 4643 g_assert_not_reached(); 4644 } 4645 set_temp_val_nonreg(s, ts, new_type); 4646 } 4647 4648 /* Mark a temporary as dead. */ 4649 static inline void temp_dead(TCGContext *s, TCGTemp *ts) 4650 { 4651 temp_free_or_dead(s, ts, 1); 4652 } 4653 4654 /* Sync a temporary to memory. 'allocated_regs' is used in case a temporary 4655 registers needs to be allocated to store a constant. If 'free_or_dead' 4656 is non-zero, subsequently release the temporary; if it is positive, the 4657 temp is dead; if it is negative, the temp is free. */ 4658 static void temp_sync(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs, 4659 TCGRegSet preferred_regs, int free_or_dead) 4660 { 4661 if (!temp_readonly(ts) && !ts->mem_coherent) { 4662 if (!ts->mem_allocated) { 4663 temp_allocate_frame(s, ts); 4664 } 4665 switch (ts->val_type) { 4666 case TEMP_VAL_CONST: 4667 /* If we're going to free the temp immediately, then we won't 4668 require it later in a register, so attempt to store the 4669 constant to memory directly. */ 4670 if (free_or_dead 4671 && tcg_out_sti(s, ts->type, ts->val, 4672 ts->mem_base->reg, ts->mem_offset)) { 4673 break; 4674 } 4675 temp_load(s, ts, tcg_target_available_regs[ts->type], 4676 allocated_regs, preferred_regs); 4677 /* fallthrough */ 4678 4679 case TEMP_VAL_REG: 4680 tcg_out_st(s, ts->type, ts->reg, 4681 ts->mem_base->reg, ts->mem_offset); 4682 break; 4683 4684 case TEMP_VAL_MEM: 4685 break; 4686 4687 case TEMP_VAL_DEAD: 4688 default: 4689 g_assert_not_reached(); 4690 } 4691 ts->mem_coherent = 1; 4692 } 4693 if (free_or_dead) { 4694 temp_free_or_dead(s, ts, free_or_dead); 4695 } 4696 } 4697 4698 /* free register 'reg' by spilling the corresponding temporary if necessary */ 4699 static void tcg_reg_free(TCGContext *s, TCGReg reg, TCGRegSet allocated_regs) 4700 { 4701 TCGTemp *ts = s->reg_to_temp[reg]; 4702 if (ts != NULL) { 4703 temp_sync(s, ts, allocated_regs, 0, -1); 4704 } 4705 } 4706 4707 /** 4708 * tcg_reg_alloc: 4709 * @required_regs: Set of registers in which we must allocate. 4710 * @allocated_regs: Set of registers which must be avoided. 4711 * @preferred_regs: Set of registers we should prefer. 4712 * @rev: True if we search the registers in "indirect" order. 4713 * 4714 * The allocated register must be in @required_regs & ~@allocated_regs, 4715 * but if we can put it in @preferred_regs we may save a move later. 4716 */ 4717 static TCGReg tcg_reg_alloc(TCGContext *s, TCGRegSet required_regs, 4718 TCGRegSet allocated_regs, 4719 TCGRegSet preferred_regs, bool rev) 4720 { 4721 int i, j, f, n = ARRAY_SIZE(tcg_target_reg_alloc_order); 4722 TCGRegSet reg_ct[2]; 4723 const int *order; 4724 4725 reg_ct[1] = required_regs & ~allocated_regs; 4726 tcg_debug_assert(reg_ct[1] != 0); 4727 reg_ct[0] = reg_ct[1] & preferred_regs; 4728 4729 /* Skip the preferred_regs option if it cannot be satisfied, 4730 or if the preference made no difference. */ 4731 f = reg_ct[0] == 0 || reg_ct[0] == reg_ct[1]; 4732 4733 order = rev ? indirect_reg_alloc_order : tcg_target_reg_alloc_order; 4734 4735 /* Try free registers, preferences first. */ 4736 for (j = f; j < 2; j++) { 4737 TCGRegSet set = reg_ct[j]; 4738 4739 if (tcg_regset_single(set)) { 4740 /* One register in the set. */ 4741 TCGReg reg = tcg_regset_first(set); 4742 if (s->reg_to_temp[reg] == NULL) { 4743 return reg; 4744 } 4745 } else { 4746 for (i = 0; i < n; i++) { 4747 TCGReg reg = order[i]; 4748 if (s->reg_to_temp[reg] == NULL && 4749 tcg_regset_test_reg(set, reg)) { 4750 return reg; 4751 } 4752 } 4753 } 4754 } 4755 4756 /* We must spill something. */ 4757 for (j = f; j < 2; j++) { 4758 TCGRegSet set = reg_ct[j]; 4759 4760 if (tcg_regset_single(set)) { 4761 /* One register in the set. */ 4762 TCGReg reg = tcg_regset_first(set); 4763 tcg_reg_free(s, reg, allocated_regs); 4764 return reg; 4765 } else { 4766 for (i = 0; i < n; i++) { 4767 TCGReg reg = order[i]; 4768 if (tcg_regset_test_reg(set, reg)) { 4769 tcg_reg_free(s, reg, allocated_regs); 4770 return reg; 4771 } 4772 } 4773 } 4774 } 4775 4776 g_assert_not_reached(); 4777 } 4778 4779 static TCGReg tcg_reg_alloc_pair(TCGContext *s, TCGRegSet required_regs, 4780 TCGRegSet allocated_regs, 4781 TCGRegSet preferred_regs, bool rev) 4782 { 4783 int i, j, k, fmin, n = ARRAY_SIZE(tcg_target_reg_alloc_order); 4784 TCGRegSet reg_ct[2]; 4785 const int *order; 4786 4787 /* Ensure that if I is not in allocated_regs, I+1 is not either. */ 4788 reg_ct[1] = required_regs & ~(allocated_regs | (allocated_regs >> 1)); 4789 tcg_debug_assert(reg_ct[1] != 0); 4790 reg_ct[0] = reg_ct[1] & preferred_regs; 4791 4792 order = rev ? indirect_reg_alloc_order : tcg_target_reg_alloc_order; 4793 4794 /* 4795 * Skip the preferred_regs option if it cannot be satisfied, 4796 * or if the preference made no difference. 4797 */ 4798 k = reg_ct[0] == 0 || reg_ct[0] == reg_ct[1]; 4799 4800 /* 4801 * Minimize the number of flushes by looking for 2 free registers first, 4802 * then a single flush, then two flushes. 4803 */ 4804 for (fmin = 2; fmin >= 0; fmin--) { 4805 for (j = k; j < 2; j++) { 4806 TCGRegSet set = reg_ct[j]; 4807 4808 for (i = 0; i < n; i++) { 4809 TCGReg reg = order[i]; 4810 4811 if (tcg_regset_test_reg(set, reg)) { 4812 int f = !s->reg_to_temp[reg] + !s->reg_to_temp[reg + 1]; 4813 if (f >= fmin) { 4814 tcg_reg_free(s, reg, allocated_regs); 4815 tcg_reg_free(s, reg + 1, allocated_regs); 4816 return reg; 4817 } 4818 } 4819 } 4820 } 4821 } 4822 g_assert_not_reached(); 4823 } 4824 4825 /* Make sure the temporary is in a register. If needed, allocate the register 4826 from DESIRED while avoiding ALLOCATED. */ 4827 static void temp_load(TCGContext *s, TCGTemp *ts, TCGRegSet desired_regs, 4828 TCGRegSet allocated_regs, TCGRegSet preferred_regs) 4829 { 4830 TCGReg reg; 4831 4832 switch (ts->val_type) { 4833 case TEMP_VAL_REG: 4834 return; 4835 case TEMP_VAL_CONST: 4836 reg = tcg_reg_alloc(s, desired_regs, allocated_regs, 4837 preferred_regs, ts->indirect_base); 4838 if (ts->type <= TCG_TYPE_I64) { 4839 tcg_out_movi(s, ts->type, reg, ts->val); 4840 } else { 4841 uint64_t val = ts->val; 4842 MemOp vece = MO_64; 4843 4844 /* 4845 * Find the minimal vector element that matches the constant. 4846 * The targets will, in general, have to do this search anyway, 4847 * do this generically. 4848 */ 4849 if (val == dup_const(MO_8, val)) { 4850 vece = MO_8; 4851 } else if (val == dup_const(MO_16, val)) { 4852 vece = MO_16; 4853 } else if (val == dup_const(MO_32, val)) { 4854 vece = MO_32; 4855 } 4856 4857 tcg_out_dupi_vec(s, ts->type, vece, reg, ts->val); 4858 } 4859 ts->mem_coherent = 0; 4860 break; 4861 case TEMP_VAL_MEM: 4862 if (!ts->mem_allocated) { 4863 temp_allocate_frame(s, ts); 4864 } 4865 reg = tcg_reg_alloc(s, desired_regs, allocated_regs, 4866 preferred_regs, ts->indirect_base); 4867 tcg_out_ld(s, ts->type, reg, ts->mem_base->reg, ts->mem_offset); 4868 ts->mem_coherent = 1; 4869 break; 4870 case TEMP_VAL_DEAD: 4871 default: 4872 g_assert_not_reached(); 4873 } 4874 set_temp_val_reg(s, ts, reg); 4875 } 4876 4877 /* Save a temporary to memory. 'allocated_regs' is used in case a 4878 temporary registers needs to be allocated to store a constant. */ 4879 static void temp_save(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs) 4880 { 4881 /* The liveness analysis already ensures that globals are back 4882 in memory. Keep an tcg_debug_assert for safety. */ 4883 tcg_debug_assert(ts->val_type == TEMP_VAL_MEM || temp_readonly(ts)); 4884 } 4885 4886 /* save globals to their canonical location and assume they can be 4887 modified be the following code. 'allocated_regs' is used in case a 4888 temporary registers needs to be allocated to store a constant. */ 4889 static void save_globals(TCGContext *s, TCGRegSet allocated_regs) 4890 { 4891 int i, n; 4892 4893 for (i = 0, n = s->nb_globals; i < n; i++) { 4894 temp_save(s, &s->temps[i], allocated_regs); 4895 } 4896 } 4897 4898 /* sync globals to their canonical location and assume they can be 4899 read by the following code. 'allocated_regs' is used in case a 4900 temporary registers needs to be allocated to store a constant. */ 4901 static void sync_globals(TCGContext *s, TCGRegSet allocated_regs) 4902 { 4903 int i, n; 4904 4905 for (i = 0, n = s->nb_globals; i < n; i++) { 4906 TCGTemp *ts = &s->temps[i]; 4907 tcg_debug_assert(ts->val_type != TEMP_VAL_REG 4908 || ts->kind == TEMP_FIXED 4909 || ts->mem_coherent); 4910 } 4911 } 4912 4913 /* at the end of a basic block, we assume all temporaries are dead and 4914 all globals are stored at their canonical location. */ 4915 static void tcg_reg_alloc_bb_end(TCGContext *s, TCGRegSet allocated_regs) 4916 { 4917 assert_carry_dead(s); 4918 for (int i = s->nb_globals; i < s->nb_temps; i++) { 4919 TCGTemp *ts = &s->temps[i]; 4920 4921 switch (ts->kind) { 4922 case TEMP_TB: 4923 temp_save(s, ts, allocated_regs); 4924 break; 4925 case TEMP_EBB: 4926 /* The liveness analysis already ensures that temps are dead. 4927 Keep an tcg_debug_assert for safety. */ 4928 tcg_debug_assert(ts->val_type == TEMP_VAL_DEAD); 4929 break; 4930 case TEMP_CONST: 4931 /* Similarly, we should have freed any allocated register. */ 4932 tcg_debug_assert(ts->val_type == TEMP_VAL_CONST); 4933 break; 4934 default: 4935 g_assert_not_reached(); 4936 } 4937 } 4938 4939 save_globals(s, allocated_regs); 4940 } 4941 4942 /* 4943 * At a conditional branch, we assume all temporaries are dead unless 4944 * explicitly live-across-conditional-branch; all globals and local 4945 * temps are synced to their location. 4946 */ 4947 static void tcg_reg_alloc_cbranch(TCGContext *s, TCGRegSet allocated_regs) 4948 { 4949 assert_carry_dead(s); 4950 sync_globals(s, allocated_regs); 4951 4952 for (int i = s->nb_globals; i < s->nb_temps; i++) { 4953 TCGTemp *ts = &s->temps[i]; 4954 /* 4955 * The liveness analysis already ensures that temps are dead. 4956 * Keep tcg_debug_asserts for safety. 4957 */ 4958 switch (ts->kind) { 4959 case TEMP_TB: 4960 tcg_debug_assert(ts->val_type != TEMP_VAL_REG || ts->mem_coherent); 4961 break; 4962 case TEMP_EBB: 4963 case TEMP_CONST: 4964 break; 4965 default: 4966 g_assert_not_reached(); 4967 } 4968 } 4969 } 4970 4971 /* 4972 * Specialized code generation for INDEX_op_mov_* with a constant. 4973 */ 4974 static void tcg_reg_alloc_do_movi(TCGContext *s, TCGTemp *ots, 4975 tcg_target_ulong val, TCGLifeData arg_life, 4976 TCGRegSet preferred_regs) 4977 { 4978 /* ENV should not be modified. */ 4979 tcg_debug_assert(!temp_readonly(ots)); 4980 4981 /* The movi is not explicitly generated here. */ 4982 set_temp_val_nonreg(s, ots, TEMP_VAL_CONST); 4983 ots->val = val; 4984 ots->mem_coherent = 0; 4985 if (NEED_SYNC_ARG(0)) { 4986 temp_sync(s, ots, s->reserved_regs, preferred_regs, IS_DEAD_ARG(0)); 4987 } else if (IS_DEAD_ARG(0)) { 4988 temp_dead(s, ots); 4989 } 4990 } 4991 4992 /* 4993 * Specialized code generation for INDEX_op_mov_*. 4994 */ 4995 static void tcg_reg_alloc_mov(TCGContext *s, const TCGOp *op) 4996 { 4997 const TCGLifeData arg_life = op->life; 4998 TCGRegSet allocated_regs, preferred_regs; 4999 TCGTemp *ts, *ots; 5000 TCGType otype, itype; 5001 TCGReg oreg, ireg; 5002 5003 allocated_regs = s->reserved_regs; 5004 preferred_regs = output_pref(op, 0); 5005 ots = arg_temp(op->args[0]); 5006 ts = arg_temp(op->args[1]); 5007 5008 /* ENV should not be modified. */ 5009 tcg_debug_assert(!temp_readonly(ots)); 5010 5011 /* Note that otype != itype for no-op truncation. */ 5012 otype = ots->type; 5013 itype = ts->type; 5014 5015 if (ts->val_type == TEMP_VAL_CONST) { 5016 /* propagate constant or generate sti */ 5017 tcg_target_ulong val = ts->val; 5018 if (IS_DEAD_ARG(1)) { 5019 temp_dead(s, ts); 5020 } 5021 tcg_reg_alloc_do_movi(s, ots, val, arg_life, preferred_regs); 5022 return; 5023 } 5024 5025 /* If the source value is in memory we're going to be forced 5026 to have it in a register in order to perform the copy. Copy 5027 the SOURCE value into its own register first, that way we 5028 don't have to reload SOURCE the next time it is used. */ 5029 if (ts->val_type == TEMP_VAL_MEM) { 5030 temp_load(s, ts, tcg_target_available_regs[itype], 5031 allocated_regs, preferred_regs); 5032 } 5033 tcg_debug_assert(ts->val_type == TEMP_VAL_REG); 5034 ireg = ts->reg; 5035 5036 if (IS_DEAD_ARG(0)) { 5037 /* mov to a non-saved dead register makes no sense (even with 5038 liveness analysis disabled). */ 5039 tcg_debug_assert(NEED_SYNC_ARG(0)); 5040 if (!ots->mem_allocated) { 5041 temp_allocate_frame(s, ots); 5042 } 5043 tcg_out_st(s, otype, ireg, ots->mem_base->reg, ots->mem_offset); 5044 if (IS_DEAD_ARG(1)) { 5045 temp_dead(s, ts); 5046 } 5047 temp_dead(s, ots); 5048 return; 5049 } 5050 5051 if (IS_DEAD_ARG(1) && ts->kind != TEMP_FIXED) { 5052 /* 5053 * The mov can be suppressed. Kill input first, so that it 5054 * is unlinked from reg_to_temp, then set the output to the 5055 * reg that we saved from the input. 5056 */ 5057 temp_dead(s, ts); 5058 oreg = ireg; 5059 } else { 5060 if (ots->val_type == TEMP_VAL_REG) { 5061 oreg = ots->reg; 5062 } else { 5063 /* Make sure to not spill the input register during allocation. */ 5064 oreg = tcg_reg_alloc(s, tcg_target_available_regs[otype], 5065 allocated_regs | ((TCGRegSet)1 << ireg), 5066 preferred_regs, ots->indirect_base); 5067 } 5068 if (!tcg_out_mov(s, otype, oreg, ireg)) { 5069 /* 5070 * Cross register class move not supported. 5071 * Store the source register into the destination slot 5072 * and leave the destination temp as TEMP_VAL_MEM. 5073 */ 5074 assert(!temp_readonly(ots)); 5075 if (!ts->mem_allocated) { 5076 temp_allocate_frame(s, ots); 5077 } 5078 tcg_out_st(s, ts->type, ireg, ots->mem_base->reg, ots->mem_offset); 5079 set_temp_val_nonreg(s, ts, TEMP_VAL_MEM); 5080 ots->mem_coherent = 1; 5081 return; 5082 } 5083 } 5084 set_temp_val_reg(s, ots, oreg); 5085 ots->mem_coherent = 0; 5086 5087 if (NEED_SYNC_ARG(0)) { 5088 temp_sync(s, ots, allocated_regs, 0, 0); 5089 } 5090 } 5091 5092 /* 5093 * Specialized code generation for INDEX_op_dup_vec. 5094 */ 5095 static void tcg_reg_alloc_dup(TCGContext *s, const TCGOp *op) 5096 { 5097 const TCGLifeData arg_life = op->life; 5098 TCGRegSet dup_out_regs, dup_in_regs; 5099 const TCGArgConstraint *dup_args_ct; 5100 TCGTemp *its, *ots; 5101 TCGType itype, vtype; 5102 unsigned vece; 5103 int lowpart_ofs; 5104 bool ok; 5105 5106 ots = arg_temp(op->args[0]); 5107 its = arg_temp(op->args[1]); 5108 5109 /* ENV should not be modified. */ 5110 tcg_debug_assert(!temp_readonly(ots)); 5111 5112 itype = its->type; 5113 vece = TCGOP_VECE(op); 5114 vtype = TCGOP_TYPE(op); 5115 5116 if (its->val_type == TEMP_VAL_CONST) { 5117 /* Propagate constant via movi -> dupi. */ 5118 tcg_target_ulong val = its->val; 5119 if (IS_DEAD_ARG(1)) { 5120 temp_dead(s, its); 5121 } 5122 tcg_reg_alloc_do_movi(s, ots, val, arg_life, output_pref(op, 0)); 5123 return; 5124 } 5125 5126 dup_args_ct = opcode_args_ct(op); 5127 dup_out_regs = dup_args_ct[0].regs; 5128 dup_in_regs = dup_args_ct[1].regs; 5129 5130 /* Allocate the output register now. */ 5131 if (ots->val_type != TEMP_VAL_REG) { 5132 TCGRegSet allocated_regs = s->reserved_regs; 5133 TCGReg oreg; 5134 5135 if (!IS_DEAD_ARG(1) && its->val_type == TEMP_VAL_REG) { 5136 /* Make sure to not spill the input register. */ 5137 tcg_regset_set_reg(allocated_regs, its->reg); 5138 } 5139 oreg = tcg_reg_alloc(s, dup_out_regs, allocated_regs, 5140 output_pref(op, 0), ots->indirect_base); 5141 set_temp_val_reg(s, ots, oreg); 5142 } 5143 5144 switch (its->val_type) { 5145 case TEMP_VAL_REG: 5146 /* 5147 * The dup constriaints must be broad, covering all possible VECE. 5148 * However, tcg_op_dup_vec() gets to see the VECE and we allow it 5149 * to fail, indicating that extra moves are required for that case. 5150 */ 5151 if (tcg_regset_test_reg(dup_in_regs, its->reg)) { 5152 if (tcg_out_dup_vec(s, vtype, vece, ots->reg, its->reg)) { 5153 goto done; 5154 } 5155 /* Try again from memory or a vector input register. */ 5156 } 5157 if (!its->mem_coherent) { 5158 /* 5159 * The input register is not synced, and so an extra store 5160 * would be required to use memory. Attempt an integer-vector 5161 * register move first. We do not have a TCGRegSet for this. 5162 */ 5163 if (tcg_out_mov(s, itype, ots->reg, its->reg)) { 5164 break; 5165 } 5166 /* Sync the temp back to its slot and load from there. */ 5167 temp_sync(s, its, s->reserved_regs, 0, 0); 5168 } 5169 /* fall through */ 5170 5171 case TEMP_VAL_MEM: 5172 lowpart_ofs = 0; 5173 if (HOST_BIG_ENDIAN) { 5174 lowpart_ofs = tcg_type_size(itype) - (1 << vece); 5175 } 5176 if (tcg_out_dupm_vec(s, vtype, vece, ots->reg, its->mem_base->reg, 5177 its->mem_offset + lowpart_ofs)) { 5178 goto done; 5179 } 5180 /* Load the input into the destination vector register. */ 5181 tcg_out_ld(s, itype, ots->reg, its->mem_base->reg, its->mem_offset); 5182 break; 5183 5184 default: 5185 g_assert_not_reached(); 5186 } 5187 5188 /* We now have a vector input register, so dup must succeed. */ 5189 ok = tcg_out_dup_vec(s, vtype, vece, ots->reg, ots->reg); 5190 tcg_debug_assert(ok); 5191 5192 done: 5193 ots->mem_coherent = 0; 5194 if (IS_DEAD_ARG(1)) { 5195 temp_dead(s, its); 5196 } 5197 if (NEED_SYNC_ARG(0)) { 5198 temp_sync(s, ots, s->reserved_regs, 0, 0); 5199 } 5200 if (IS_DEAD_ARG(0)) { 5201 temp_dead(s, ots); 5202 } 5203 } 5204 5205 static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op) 5206 { 5207 const TCGLifeData arg_life = op->life; 5208 const TCGOpDef * const def = &tcg_op_defs[op->opc]; 5209 TCGRegSet i_allocated_regs; 5210 TCGRegSet o_allocated_regs; 5211 int i, k, nb_iargs, nb_oargs; 5212 TCGReg reg; 5213 TCGArg arg; 5214 const TCGArgConstraint *args_ct; 5215 const TCGArgConstraint *arg_ct; 5216 TCGTemp *ts; 5217 TCGArg new_args[TCG_MAX_OP_ARGS]; 5218 int const_args[TCG_MAX_OP_ARGS]; 5219 TCGCond op_cond; 5220 5221 if (def->flags & TCG_OPF_CARRY_IN) { 5222 tcg_debug_assert(s->carry_live); 5223 } 5224 5225 nb_oargs = def->nb_oargs; 5226 nb_iargs = def->nb_iargs; 5227 5228 /* copy constants */ 5229 memcpy(new_args + nb_oargs + nb_iargs, 5230 op->args + nb_oargs + nb_iargs, 5231 sizeof(TCGArg) * def->nb_cargs); 5232 5233 i_allocated_regs = s->reserved_regs; 5234 o_allocated_regs = s->reserved_regs; 5235 5236 switch (op->opc) { 5237 case INDEX_op_brcond: 5238 op_cond = op->args[2]; 5239 break; 5240 case INDEX_op_setcond: 5241 case INDEX_op_negsetcond: 5242 case INDEX_op_cmp_vec: 5243 op_cond = op->args[3]; 5244 break; 5245 case INDEX_op_brcond2_i32: 5246 op_cond = op->args[4]; 5247 break; 5248 case INDEX_op_movcond: 5249 case INDEX_op_setcond2_i32: 5250 case INDEX_op_cmpsel_vec: 5251 op_cond = op->args[5]; 5252 break; 5253 default: 5254 /* No condition within opcode. */ 5255 op_cond = TCG_COND_ALWAYS; 5256 break; 5257 } 5258 5259 args_ct = opcode_args_ct(op); 5260 5261 /* satisfy input constraints */ 5262 for (k = 0; k < nb_iargs; k++) { 5263 TCGRegSet i_preferred_regs, i_required_regs; 5264 bool allocate_new_reg, copyto_new_reg; 5265 TCGTemp *ts2; 5266 int i1, i2; 5267 5268 i = args_ct[nb_oargs + k].sort_index; 5269 arg = op->args[i]; 5270 arg_ct = &args_ct[i]; 5271 ts = arg_temp(arg); 5272 5273 if (ts->val_type == TEMP_VAL_CONST) { 5274 #ifdef TCG_REG_ZERO 5275 if (ts->val == 0 && (arg_ct->ct & TCG_CT_REG_ZERO)) { 5276 /* Hardware zero register: indicate register via non-const. */ 5277 const_args[i] = 0; 5278 new_args[i] = TCG_REG_ZERO; 5279 continue; 5280 } 5281 #endif 5282 5283 if (tcg_target_const_match(ts->val, arg_ct->ct, ts->type, 5284 op_cond, TCGOP_VECE(op))) { 5285 /* constant is OK for instruction */ 5286 const_args[i] = 1; 5287 new_args[i] = ts->val; 5288 continue; 5289 } 5290 } 5291 5292 reg = ts->reg; 5293 i_preferred_regs = 0; 5294 i_required_regs = arg_ct->regs; 5295 allocate_new_reg = false; 5296 copyto_new_reg = false; 5297 5298 switch (arg_ct->pair) { 5299 case 0: /* not paired */ 5300 if (arg_ct->ialias) { 5301 i_preferred_regs = output_pref(op, arg_ct->alias_index); 5302 5303 /* 5304 * If the input is readonly, then it cannot also be an 5305 * output and aliased to itself. If the input is not 5306 * dead after the instruction, we must allocate a new 5307 * register and move it. 5308 */ 5309 if (temp_readonly(ts) || !IS_DEAD_ARG(i) 5310 || args_ct[arg_ct->alias_index].newreg) { 5311 allocate_new_reg = true; 5312 } else if (ts->val_type == TEMP_VAL_REG) { 5313 /* 5314 * Check if the current register has already been 5315 * allocated for another input. 5316 */ 5317 allocate_new_reg = 5318 tcg_regset_test_reg(i_allocated_regs, reg); 5319 } 5320 } 5321 if (!allocate_new_reg) { 5322 temp_load(s, ts, i_required_regs, i_allocated_regs, 5323 i_preferred_regs); 5324 reg = ts->reg; 5325 allocate_new_reg = !tcg_regset_test_reg(i_required_regs, reg); 5326 } 5327 if (allocate_new_reg) { 5328 /* 5329 * Allocate a new register matching the constraint 5330 * and move the temporary register into it. 5331 */ 5332 temp_load(s, ts, tcg_target_available_regs[ts->type], 5333 i_allocated_regs, 0); 5334 reg = tcg_reg_alloc(s, i_required_regs, i_allocated_regs, 5335 i_preferred_regs, ts->indirect_base); 5336 copyto_new_reg = true; 5337 } 5338 break; 5339 5340 case 1: 5341 /* First of an input pair; if i1 == i2, the second is an output. */ 5342 i1 = i; 5343 i2 = arg_ct->pair_index; 5344 ts2 = i1 != i2 ? arg_temp(op->args[i2]) : NULL; 5345 5346 /* 5347 * It is easier to default to allocating a new pair 5348 * and to identify a few cases where it's not required. 5349 */ 5350 if (arg_ct->ialias) { 5351 i_preferred_regs = output_pref(op, arg_ct->alias_index); 5352 if (IS_DEAD_ARG(i1) && 5353 IS_DEAD_ARG(i2) && 5354 !temp_readonly(ts) && 5355 ts->val_type == TEMP_VAL_REG && 5356 ts->reg < TCG_TARGET_NB_REGS - 1 && 5357 tcg_regset_test_reg(i_required_regs, reg) && 5358 !tcg_regset_test_reg(i_allocated_regs, reg) && 5359 !tcg_regset_test_reg(i_allocated_regs, reg + 1) && 5360 (ts2 5361 ? ts2->val_type == TEMP_VAL_REG && 5362 ts2->reg == reg + 1 && 5363 !temp_readonly(ts2) 5364 : s->reg_to_temp[reg + 1] == NULL)) { 5365 break; 5366 } 5367 } else { 5368 /* Without aliasing, the pair must also be an input. */ 5369 tcg_debug_assert(ts2); 5370 if (ts->val_type == TEMP_VAL_REG && 5371 ts2->val_type == TEMP_VAL_REG && 5372 ts2->reg == reg + 1 && 5373 tcg_regset_test_reg(i_required_regs, reg)) { 5374 break; 5375 } 5376 } 5377 reg = tcg_reg_alloc_pair(s, i_required_regs, i_allocated_regs, 5378 0, ts->indirect_base); 5379 goto do_pair; 5380 5381 case 2: /* pair second */ 5382 reg = new_args[arg_ct->pair_index] + 1; 5383 goto do_pair; 5384 5385 case 3: /* ialias with second output, no first input */ 5386 tcg_debug_assert(arg_ct->ialias); 5387 i_preferred_regs = output_pref(op, arg_ct->alias_index); 5388 5389 if (IS_DEAD_ARG(i) && 5390 !temp_readonly(ts) && 5391 ts->val_type == TEMP_VAL_REG && 5392 reg > 0 && 5393 s->reg_to_temp[reg - 1] == NULL && 5394 tcg_regset_test_reg(i_required_regs, reg) && 5395 !tcg_regset_test_reg(i_allocated_regs, reg) && 5396 !tcg_regset_test_reg(i_allocated_regs, reg - 1)) { 5397 tcg_regset_set_reg(i_allocated_regs, reg - 1); 5398 break; 5399 } 5400 reg = tcg_reg_alloc_pair(s, i_required_regs >> 1, 5401 i_allocated_regs, 0, 5402 ts->indirect_base); 5403 tcg_regset_set_reg(i_allocated_regs, reg); 5404 reg += 1; 5405 goto do_pair; 5406 5407 do_pair: 5408 /* 5409 * If an aliased input is not dead after the instruction, 5410 * we must allocate a new register and move it. 5411 */ 5412 if (arg_ct->ialias && (!IS_DEAD_ARG(i) || temp_readonly(ts))) { 5413 TCGRegSet t_allocated_regs = i_allocated_regs; 5414 5415 /* 5416 * Because of the alias, and the continued life, make sure 5417 * that the temp is somewhere *other* than the reg pair, 5418 * and we get a copy in reg. 5419 */ 5420 tcg_regset_set_reg(t_allocated_regs, reg); 5421 tcg_regset_set_reg(t_allocated_regs, reg + 1); 5422 if (ts->val_type == TEMP_VAL_REG && ts->reg == reg) { 5423 /* If ts was already in reg, copy it somewhere else. */ 5424 TCGReg nr; 5425 bool ok; 5426 5427 tcg_debug_assert(ts->kind != TEMP_FIXED); 5428 nr = tcg_reg_alloc(s, tcg_target_available_regs[ts->type], 5429 t_allocated_regs, 0, ts->indirect_base); 5430 ok = tcg_out_mov(s, ts->type, nr, reg); 5431 tcg_debug_assert(ok); 5432 5433 set_temp_val_reg(s, ts, nr); 5434 } else { 5435 temp_load(s, ts, tcg_target_available_regs[ts->type], 5436 t_allocated_regs, 0); 5437 copyto_new_reg = true; 5438 } 5439 } else { 5440 /* Preferably allocate to reg, otherwise copy. */ 5441 i_required_regs = (TCGRegSet)1 << reg; 5442 temp_load(s, ts, i_required_regs, i_allocated_regs, 5443 i_preferred_regs); 5444 copyto_new_reg = ts->reg != reg; 5445 } 5446 break; 5447 5448 default: 5449 g_assert_not_reached(); 5450 } 5451 5452 if (copyto_new_reg) { 5453 if (!tcg_out_mov(s, ts->type, reg, ts->reg)) { 5454 /* 5455 * Cross register class move not supported. Sync the 5456 * temp back to its slot and load from there. 5457 */ 5458 temp_sync(s, ts, i_allocated_regs, 0, 0); 5459 tcg_out_ld(s, ts->type, reg, 5460 ts->mem_base->reg, ts->mem_offset); 5461 } 5462 } 5463 new_args[i] = reg; 5464 const_args[i] = 0; 5465 tcg_regset_set_reg(i_allocated_regs, reg); 5466 } 5467 5468 /* mark dead temporaries and free the associated registers */ 5469 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) { 5470 if (IS_DEAD_ARG(i)) { 5471 temp_dead(s, arg_temp(op->args[i])); 5472 } 5473 } 5474 5475 if (def->flags & TCG_OPF_COND_BRANCH) { 5476 tcg_reg_alloc_cbranch(s, i_allocated_regs); 5477 } else if (def->flags & TCG_OPF_BB_END) { 5478 tcg_reg_alloc_bb_end(s, i_allocated_regs); 5479 } else { 5480 if (def->flags & TCG_OPF_CALL_CLOBBER) { 5481 assert_carry_dead(s); 5482 /* XXX: permit generic clobber register list ? */ 5483 for (i = 0; i < TCG_TARGET_NB_REGS; i++) { 5484 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) { 5485 tcg_reg_free(s, i, i_allocated_regs); 5486 } 5487 } 5488 } 5489 if (def->flags & TCG_OPF_SIDE_EFFECTS) { 5490 /* sync globals if the op has side effects and might trigger 5491 an exception. */ 5492 sync_globals(s, i_allocated_regs); 5493 } 5494 5495 /* satisfy the output constraints */ 5496 for (k = 0; k < nb_oargs; k++) { 5497 i = args_ct[k].sort_index; 5498 arg = op->args[i]; 5499 arg_ct = &args_ct[i]; 5500 ts = arg_temp(arg); 5501 5502 /* ENV should not be modified. */ 5503 tcg_debug_assert(!temp_readonly(ts)); 5504 5505 switch (arg_ct->pair) { 5506 case 0: /* not paired */ 5507 if (arg_ct->oalias && !const_args[arg_ct->alias_index]) { 5508 reg = new_args[arg_ct->alias_index]; 5509 } else if (arg_ct->newreg) { 5510 reg = tcg_reg_alloc(s, arg_ct->regs, 5511 i_allocated_regs | o_allocated_regs, 5512 output_pref(op, k), ts->indirect_base); 5513 } else { 5514 reg = tcg_reg_alloc(s, arg_ct->regs, o_allocated_regs, 5515 output_pref(op, k), ts->indirect_base); 5516 } 5517 break; 5518 5519 case 1: /* first of pair */ 5520 if (arg_ct->oalias) { 5521 reg = new_args[arg_ct->alias_index]; 5522 } else if (arg_ct->newreg) { 5523 reg = tcg_reg_alloc_pair(s, arg_ct->regs, 5524 i_allocated_regs | o_allocated_regs, 5525 output_pref(op, k), 5526 ts->indirect_base); 5527 } else { 5528 reg = tcg_reg_alloc_pair(s, arg_ct->regs, o_allocated_regs, 5529 output_pref(op, k), 5530 ts->indirect_base); 5531 } 5532 break; 5533 5534 case 2: /* second of pair */ 5535 if (arg_ct->oalias) { 5536 reg = new_args[arg_ct->alias_index]; 5537 } else { 5538 reg = new_args[arg_ct->pair_index] + 1; 5539 } 5540 break; 5541 5542 case 3: /* first of pair, aliasing with a second input */ 5543 tcg_debug_assert(!arg_ct->newreg); 5544 reg = new_args[arg_ct->pair_index] - 1; 5545 break; 5546 5547 default: 5548 g_assert_not_reached(); 5549 } 5550 tcg_regset_set_reg(o_allocated_regs, reg); 5551 set_temp_val_reg(s, ts, reg); 5552 ts->mem_coherent = 0; 5553 new_args[i] = reg; 5554 } 5555 } 5556 5557 /* emit instruction */ 5558 TCGType type = TCGOP_TYPE(op); 5559 switch (op->opc) { 5560 case INDEX_op_addc1o: 5561 tcg_out_set_carry(s); 5562 /* fall through */ 5563 case INDEX_op_add: 5564 case INDEX_op_addcio: 5565 case INDEX_op_addco: 5566 case INDEX_op_and: 5567 case INDEX_op_andc: 5568 case INDEX_op_clz: 5569 case INDEX_op_ctz: 5570 case INDEX_op_divs: 5571 case INDEX_op_divu: 5572 case INDEX_op_eqv: 5573 case INDEX_op_mul: 5574 case INDEX_op_mulsh: 5575 case INDEX_op_muluh: 5576 case INDEX_op_nand: 5577 case INDEX_op_nor: 5578 case INDEX_op_or: 5579 case INDEX_op_orc: 5580 case INDEX_op_rems: 5581 case INDEX_op_remu: 5582 case INDEX_op_rotl: 5583 case INDEX_op_rotr: 5584 case INDEX_op_sar: 5585 case INDEX_op_shl: 5586 case INDEX_op_shr: 5587 case INDEX_op_xor: 5588 { 5589 const TCGOutOpBinary *out = 5590 container_of(all_outop[op->opc], TCGOutOpBinary, base); 5591 5592 /* Constants should never appear in the first source operand. */ 5593 tcg_debug_assert(!const_args[1]); 5594 if (const_args[2]) { 5595 out->out_rri(s, type, new_args[0], new_args[1], new_args[2]); 5596 } else { 5597 out->out_rrr(s, type, new_args[0], new_args[1], new_args[2]); 5598 } 5599 } 5600 break; 5601 5602 case INDEX_op_sub: 5603 { 5604 const TCGOutOpSubtract *out = &outop_sub; 5605 5606 /* 5607 * Constants should never appear in the second source operand. 5608 * These are folded to add with negative constant. 5609 */ 5610 tcg_debug_assert(!const_args[2]); 5611 if (const_args[1]) { 5612 out->out_rir(s, type, new_args[0], new_args[1], new_args[2]); 5613 } else { 5614 out->out_rrr(s, type, new_args[0], new_args[1], new_args[2]); 5615 } 5616 } 5617 break; 5618 5619 case INDEX_op_subb1o: 5620 tcg_out_set_borrow(s); 5621 /* fall through */ 5622 case INDEX_op_addci: 5623 case INDEX_op_subbi: 5624 case INDEX_op_subbio: 5625 case INDEX_op_subbo: 5626 { 5627 const TCGOutOpAddSubCarry *out = 5628 container_of(all_outop[op->opc], TCGOutOpAddSubCarry, base); 5629 5630 if (const_args[2]) { 5631 if (const_args[1]) { 5632 out->out_rii(s, type, new_args[0], 5633 new_args[1], new_args[2]); 5634 } else { 5635 out->out_rri(s, type, new_args[0], 5636 new_args[1], new_args[2]); 5637 } 5638 } else if (const_args[1]) { 5639 out->out_rir(s, type, new_args[0], new_args[1], new_args[2]); 5640 } else { 5641 out->out_rrr(s, type, new_args[0], new_args[1], new_args[2]); 5642 } 5643 } 5644 break; 5645 5646 case INDEX_op_bswap64: 5647 case INDEX_op_ext_i32_i64: 5648 case INDEX_op_extu_i32_i64: 5649 case INDEX_op_extrl_i64_i32: 5650 case INDEX_op_extrh_i64_i32: 5651 assert(TCG_TARGET_REG_BITS == 64); 5652 /* fall through */ 5653 case INDEX_op_ctpop: 5654 case INDEX_op_neg: 5655 case INDEX_op_not: 5656 { 5657 const TCGOutOpUnary *out = 5658 container_of(all_outop[op->opc], TCGOutOpUnary, base); 5659 5660 /* Constants should have been folded. */ 5661 tcg_debug_assert(!const_args[1]); 5662 out->out_rr(s, type, new_args[0], new_args[1]); 5663 } 5664 break; 5665 5666 case INDEX_op_bswap16: 5667 case INDEX_op_bswap32: 5668 { 5669 const TCGOutOpBswap *out = 5670 container_of(all_outop[op->opc], TCGOutOpBswap, base); 5671 5672 tcg_debug_assert(!const_args[1]); 5673 out->out_rr(s, type, new_args[0], new_args[1], new_args[2]); 5674 } 5675 break; 5676 5677 case INDEX_op_deposit: 5678 { 5679 const TCGOutOpDeposit *out = &outop_deposit; 5680 5681 if (const_args[2]) { 5682 tcg_debug_assert(!const_args[1]); 5683 out->out_rri(s, type, new_args[0], new_args[1], 5684 new_args[2], new_args[3], new_args[4]); 5685 } else if (const_args[1]) { 5686 tcg_debug_assert(new_args[1] == 0); 5687 tcg_debug_assert(!const_args[2]); 5688 out->out_rzr(s, type, new_args[0], new_args[2], 5689 new_args[3], new_args[4]); 5690 } else { 5691 out->out_rrr(s, type, new_args[0], new_args[1], 5692 new_args[2], new_args[3], new_args[4]); 5693 } 5694 } 5695 break; 5696 5697 case INDEX_op_divs2: 5698 case INDEX_op_divu2: 5699 { 5700 const TCGOutOpDivRem *out = 5701 container_of(all_outop[op->opc], TCGOutOpDivRem, base); 5702 5703 /* Only used by x86 and s390x, which use matching constraints. */ 5704 tcg_debug_assert(new_args[0] == new_args[2]); 5705 tcg_debug_assert(new_args[1] == new_args[3]); 5706 tcg_debug_assert(!const_args[4]); 5707 out->out_rr01r(s, type, new_args[0], new_args[1], new_args[4]); 5708 } 5709 break; 5710 5711 case INDEX_op_extract: 5712 case INDEX_op_sextract: 5713 { 5714 const TCGOutOpExtract *out = 5715 container_of(all_outop[op->opc], TCGOutOpExtract, base); 5716 5717 tcg_debug_assert(!const_args[1]); 5718 out->out_rr(s, type, new_args[0], new_args[1], 5719 new_args[2], new_args[3]); 5720 } 5721 break; 5722 5723 case INDEX_op_extract2: 5724 { 5725 const TCGOutOpExtract2 *out = &outop_extract2; 5726 5727 tcg_debug_assert(!const_args[1]); 5728 tcg_debug_assert(!const_args[2]); 5729 out->out_rrr(s, type, new_args[0], new_args[1], 5730 new_args[2], new_args[3]); 5731 } 5732 break; 5733 5734 case INDEX_op_muls2: 5735 case INDEX_op_mulu2: 5736 { 5737 const TCGOutOpMul2 *out = 5738 container_of(all_outop[op->opc], TCGOutOpMul2, base); 5739 5740 tcg_debug_assert(!const_args[2]); 5741 tcg_debug_assert(!const_args[3]); 5742 out->out_rrrr(s, type, new_args[0], new_args[1], 5743 new_args[2], new_args[3]); 5744 } 5745 break; 5746 5747 case INDEX_op_brcond: 5748 { 5749 const TCGOutOpBrcond *out = &outop_brcond; 5750 TCGCond cond = new_args[2]; 5751 TCGLabel *label = arg_label(new_args[3]); 5752 5753 tcg_debug_assert(!const_args[0]); 5754 if (const_args[1]) { 5755 out->out_ri(s, type, cond, new_args[0], new_args[1], label); 5756 } else { 5757 out->out_rr(s, type, cond, new_args[0], new_args[1], label); 5758 } 5759 } 5760 break; 5761 5762 case INDEX_op_movcond: 5763 { 5764 const TCGOutOpMovcond *out = &outop_movcond; 5765 TCGCond cond = new_args[5]; 5766 5767 tcg_debug_assert(!const_args[1]); 5768 out->out(s, type, cond, new_args[0], 5769 new_args[1], new_args[2], const_args[2], 5770 new_args[3], const_args[3], 5771 new_args[4], const_args[4]); 5772 } 5773 break; 5774 5775 case INDEX_op_setcond: 5776 case INDEX_op_negsetcond: 5777 { 5778 const TCGOutOpSetcond *out = 5779 container_of(all_outop[op->opc], TCGOutOpSetcond, base); 5780 TCGCond cond = new_args[3]; 5781 5782 tcg_debug_assert(!const_args[1]); 5783 if (const_args[2]) { 5784 out->out_rri(s, type, cond, 5785 new_args[0], new_args[1], new_args[2]); 5786 } else { 5787 out->out_rrr(s, type, cond, 5788 new_args[0], new_args[1], new_args[2]); 5789 } 5790 } 5791 break; 5792 5793 #if TCG_TARGET_REG_BITS == 32 5794 case INDEX_op_brcond2_i32: 5795 { 5796 const TCGOutOpBrcond2 *out = &outop_brcond2; 5797 TCGCond cond = new_args[4]; 5798 TCGLabel *label = arg_label(new_args[5]); 5799 5800 tcg_debug_assert(!const_args[0]); 5801 tcg_debug_assert(!const_args[1]); 5802 out->out(s, cond, new_args[0], new_args[1], 5803 new_args[2], const_args[2], 5804 new_args[3], const_args[3], label); 5805 } 5806 break; 5807 case INDEX_op_setcond2_i32: 5808 { 5809 const TCGOutOpSetcond2 *out = &outop_setcond2; 5810 TCGCond cond = new_args[5]; 5811 5812 tcg_debug_assert(!const_args[1]); 5813 tcg_debug_assert(!const_args[2]); 5814 out->out(s, cond, new_args[0], new_args[1], new_args[2], 5815 new_args[3], const_args[3], new_args[4], const_args[4]); 5816 } 5817 break; 5818 #else 5819 case INDEX_op_brcond2_i32: 5820 case INDEX_op_setcond2_i32: 5821 g_assert_not_reached(); 5822 #endif 5823 5824 default: 5825 if (def->flags & TCG_OPF_VECTOR) { 5826 tcg_out_vec_op(s, op->opc, type - TCG_TYPE_V64, 5827 TCGOP_VECE(op), new_args, const_args); 5828 } else { 5829 tcg_out_op(s, op->opc, type, new_args, const_args); 5830 } 5831 break; 5832 } 5833 5834 if (def->flags & TCG_OPF_CARRY_IN) { 5835 s->carry_live = false; 5836 } 5837 if (def->flags & TCG_OPF_CARRY_OUT) { 5838 s->carry_live = true; 5839 } 5840 5841 /* move the outputs in the correct register if needed */ 5842 for(i = 0; i < nb_oargs; i++) { 5843 ts = arg_temp(op->args[i]); 5844 5845 /* ENV should not be modified. */ 5846 tcg_debug_assert(!temp_readonly(ts)); 5847 5848 if (NEED_SYNC_ARG(i)) { 5849 temp_sync(s, ts, o_allocated_regs, 0, IS_DEAD_ARG(i)); 5850 } else if (IS_DEAD_ARG(i)) { 5851 temp_dead(s, ts); 5852 } 5853 } 5854 } 5855 5856 static bool tcg_reg_alloc_dup2(TCGContext *s, const TCGOp *op) 5857 { 5858 const TCGLifeData arg_life = op->life; 5859 TCGTemp *ots, *itsl, *itsh; 5860 TCGType vtype = TCGOP_TYPE(op); 5861 5862 /* This opcode is only valid for 32-bit hosts, for 64-bit elements. */ 5863 tcg_debug_assert(TCG_TARGET_REG_BITS == 32); 5864 tcg_debug_assert(TCGOP_VECE(op) == MO_64); 5865 5866 ots = arg_temp(op->args[0]); 5867 itsl = arg_temp(op->args[1]); 5868 itsh = arg_temp(op->args[2]); 5869 5870 /* ENV should not be modified. */ 5871 tcg_debug_assert(!temp_readonly(ots)); 5872 5873 /* Allocate the output register now. */ 5874 if (ots->val_type != TEMP_VAL_REG) { 5875 TCGRegSet allocated_regs = s->reserved_regs; 5876 TCGRegSet dup_out_regs = opcode_args_ct(op)[0].regs; 5877 TCGReg oreg; 5878 5879 /* Make sure to not spill the input registers. */ 5880 if (!IS_DEAD_ARG(1) && itsl->val_type == TEMP_VAL_REG) { 5881 tcg_regset_set_reg(allocated_regs, itsl->reg); 5882 } 5883 if (!IS_DEAD_ARG(2) && itsh->val_type == TEMP_VAL_REG) { 5884 tcg_regset_set_reg(allocated_regs, itsh->reg); 5885 } 5886 5887 oreg = tcg_reg_alloc(s, dup_out_regs, allocated_regs, 5888 output_pref(op, 0), ots->indirect_base); 5889 set_temp_val_reg(s, ots, oreg); 5890 } 5891 5892 /* Promote dup2 of immediates to dupi_vec. */ 5893 if (itsl->val_type == TEMP_VAL_CONST && itsh->val_type == TEMP_VAL_CONST) { 5894 uint64_t val = deposit64(itsl->val, 32, 32, itsh->val); 5895 MemOp vece = MO_64; 5896 5897 if (val == dup_const(MO_8, val)) { 5898 vece = MO_8; 5899 } else if (val == dup_const(MO_16, val)) { 5900 vece = MO_16; 5901 } else if (val == dup_const(MO_32, val)) { 5902 vece = MO_32; 5903 } 5904 5905 tcg_out_dupi_vec(s, vtype, vece, ots->reg, val); 5906 goto done; 5907 } 5908 5909 /* If the two inputs form one 64-bit value, try dupm_vec. */ 5910 if (itsl->temp_subindex == HOST_BIG_ENDIAN && 5911 itsh->temp_subindex == !HOST_BIG_ENDIAN && 5912 itsl == itsh + (HOST_BIG_ENDIAN ? 1 : -1)) { 5913 TCGTemp *its = itsl - HOST_BIG_ENDIAN; 5914 5915 temp_sync(s, its + 0, s->reserved_regs, 0, 0); 5916 temp_sync(s, its + 1, s->reserved_regs, 0, 0); 5917 5918 if (tcg_out_dupm_vec(s, vtype, MO_64, ots->reg, 5919 its->mem_base->reg, its->mem_offset)) { 5920 goto done; 5921 } 5922 } 5923 5924 /* Fall back to generic expansion. */ 5925 return false; 5926 5927 done: 5928 ots->mem_coherent = 0; 5929 if (IS_DEAD_ARG(1)) { 5930 temp_dead(s, itsl); 5931 } 5932 if (IS_DEAD_ARG(2)) { 5933 temp_dead(s, itsh); 5934 } 5935 if (NEED_SYNC_ARG(0)) { 5936 temp_sync(s, ots, s->reserved_regs, 0, IS_DEAD_ARG(0)); 5937 } else if (IS_DEAD_ARG(0)) { 5938 temp_dead(s, ots); 5939 } 5940 return true; 5941 } 5942 5943 static void load_arg_reg(TCGContext *s, TCGReg reg, TCGTemp *ts, 5944 TCGRegSet allocated_regs) 5945 { 5946 if (ts->val_type == TEMP_VAL_REG) { 5947 if (ts->reg != reg) { 5948 tcg_reg_free(s, reg, allocated_regs); 5949 if (!tcg_out_mov(s, ts->type, reg, ts->reg)) { 5950 /* 5951 * Cross register class move not supported. Sync the 5952 * temp back to its slot and load from there. 5953 */ 5954 temp_sync(s, ts, allocated_regs, 0, 0); 5955 tcg_out_ld(s, ts->type, reg, 5956 ts->mem_base->reg, ts->mem_offset); 5957 } 5958 } 5959 } else { 5960 TCGRegSet arg_set = 0; 5961 5962 tcg_reg_free(s, reg, allocated_regs); 5963 tcg_regset_set_reg(arg_set, reg); 5964 temp_load(s, ts, arg_set, allocated_regs, 0); 5965 } 5966 } 5967 5968 static void load_arg_stk(TCGContext *s, unsigned arg_slot, TCGTemp *ts, 5969 TCGRegSet allocated_regs) 5970 { 5971 /* 5972 * When the destination is on the stack, load up the temp and store. 5973 * If there are many call-saved registers, the temp might live to 5974 * see another use; otherwise it'll be discarded. 5975 */ 5976 temp_load(s, ts, tcg_target_available_regs[ts->type], allocated_regs, 0); 5977 tcg_out_st(s, ts->type, ts->reg, TCG_REG_CALL_STACK, 5978 arg_slot_stk_ofs(arg_slot)); 5979 } 5980 5981 static void load_arg_normal(TCGContext *s, const TCGCallArgumentLoc *l, 5982 TCGTemp *ts, TCGRegSet *allocated_regs) 5983 { 5984 if (arg_slot_reg_p(l->arg_slot)) { 5985 TCGReg reg = tcg_target_call_iarg_regs[l->arg_slot]; 5986 load_arg_reg(s, reg, ts, *allocated_regs); 5987 tcg_regset_set_reg(*allocated_regs, reg); 5988 } else { 5989 load_arg_stk(s, l->arg_slot, ts, *allocated_regs); 5990 } 5991 } 5992 5993 static void load_arg_ref(TCGContext *s, unsigned arg_slot, TCGReg ref_base, 5994 intptr_t ref_off, TCGRegSet *allocated_regs) 5995 { 5996 TCGReg reg; 5997 5998 if (arg_slot_reg_p(arg_slot)) { 5999 reg = tcg_target_call_iarg_regs[arg_slot]; 6000 tcg_reg_free(s, reg, *allocated_regs); 6001 tcg_out_addi_ptr(s, reg, ref_base, ref_off); 6002 tcg_regset_set_reg(*allocated_regs, reg); 6003 } else { 6004 reg = tcg_reg_alloc(s, tcg_target_available_regs[TCG_TYPE_PTR], 6005 *allocated_regs, 0, false); 6006 tcg_out_addi_ptr(s, reg, ref_base, ref_off); 6007 tcg_out_st(s, TCG_TYPE_PTR, reg, TCG_REG_CALL_STACK, 6008 arg_slot_stk_ofs(arg_slot)); 6009 } 6010 } 6011 6012 static void tcg_reg_alloc_call(TCGContext *s, TCGOp *op) 6013 { 6014 const int nb_oargs = TCGOP_CALLO(op); 6015 const int nb_iargs = TCGOP_CALLI(op); 6016 const TCGLifeData arg_life = op->life; 6017 const TCGHelperInfo *info = tcg_call_info(op); 6018 TCGRegSet allocated_regs = s->reserved_regs; 6019 int i; 6020 6021 /* 6022 * Move inputs into place in reverse order, 6023 * so that we place stacked arguments first. 6024 */ 6025 for (i = nb_iargs - 1; i >= 0; --i) { 6026 const TCGCallArgumentLoc *loc = &info->in[i]; 6027 TCGTemp *ts = arg_temp(op->args[nb_oargs + i]); 6028 6029 switch (loc->kind) { 6030 case TCG_CALL_ARG_NORMAL: 6031 case TCG_CALL_ARG_EXTEND_U: 6032 case TCG_CALL_ARG_EXTEND_S: 6033 load_arg_normal(s, loc, ts, &allocated_regs); 6034 break; 6035 case TCG_CALL_ARG_BY_REF: 6036 load_arg_stk(s, loc->ref_slot, ts, allocated_regs); 6037 load_arg_ref(s, loc->arg_slot, TCG_REG_CALL_STACK, 6038 arg_slot_stk_ofs(loc->ref_slot), 6039 &allocated_regs); 6040 break; 6041 case TCG_CALL_ARG_BY_REF_N: 6042 load_arg_stk(s, loc->ref_slot, ts, allocated_regs); 6043 break; 6044 default: 6045 g_assert_not_reached(); 6046 } 6047 } 6048 6049 /* Mark dead temporaries and free the associated registers. */ 6050 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) { 6051 if (IS_DEAD_ARG(i)) { 6052 temp_dead(s, arg_temp(op->args[i])); 6053 } 6054 } 6055 6056 /* Clobber call registers. */ 6057 for (i = 0; i < TCG_TARGET_NB_REGS; i++) { 6058 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) { 6059 tcg_reg_free(s, i, allocated_regs); 6060 } 6061 } 6062 6063 /* 6064 * Save globals if they might be written by the helper, 6065 * sync them if they might be read. 6066 */ 6067 if (info->flags & TCG_CALL_NO_READ_GLOBALS) { 6068 /* Nothing to do */ 6069 } else if (info->flags & TCG_CALL_NO_WRITE_GLOBALS) { 6070 sync_globals(s, allocated_regs); 6071 } else { 6072 save_globals(s, allocated_regs); 6073 } 6074 6075 /* 6076 * If the ABI passes a pointer to the returned struct as the first 6077 * argument, load that now. Pass a pointer to the output home slot. 6078 */ 6079 if (info->out_kind == TCG_CALL_RET_BY_REF) { 6080 TCGTemp *ts = arg_temp(op->args[0]); 6081 6082 if (!ts->mem_allocated) { 6083 temp_allocate_frame(s, ts); 6084 } 6085 load_arg_ref(s, 0, ts->mem_base->reg, ts->mem_offset, &allocated_regs); 6086 } 6087 6088 tcg_out_call(s, tcg_call_func(op), info); 6089 6090 /* Assign output registers and emit moves if needed. */ 6091 switch (info->out_kind) { 6092 case TCG_CALL_RET_NORMAL: 6093 for (i = 0; i < nb_oargs; i++) { 6094 TCGTemp *ts = arg_temp(op->args[i]); 6095 TCGReg reg = tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, i); 6096 6097 /* ENV should not be modified. */ 6098 tcg_debug_assert(!temp_readonly(ts)); 6099 6100 set_temp_val_reg(s, ts, reg); 6101 ts->mem_coherent = 0; 6102 } 6103 break; 6104 6105 case TCG_CALL_RET_BY_VEC: 6106 { 6107 TCGTemp *ts = arg_temp(op->args[0]); 6108 6109 tcg_debug_assert(ts->base_type == TCG_TYPE_I128); 6110 tcg_debug_assert(ts->temp_subindex == 0); 6111 if (!ts->mem_allocated) { 6112 temp_allocate_frame(s, ts); 6113 } 6114 tcg_out_st(s, TCG_TYPE_V128, 6115 tcg_target_call_oarg_reg(TCG_CALL_RET_BY_VEC, 0), 6116 ts->mem_base->reg, ts->mem_offset); 6117 } 6118 /* fall through to mark all parts in memory */ 6119 6120 case TCG_CALL_RET_BY_REF: 6121 /* The callee has performed a write through the reference. */ 6122 for (i = 0; i < nb_oargs; i++) { 6123 TCGTemp *ts = arg_temp(op->args[i]); 6124 ts->val_type = TEMP_VAL_MEM; 6125 } 6126 break; 6127 6128 default: 6129 g_assert_not_reached(); 6130 } 6131 6132 /* Flush or discard output registers as needed. */ 6133 for (i = 0; i < nb_oargs; i++) { 6134 TCGTemp *ts = arg_temp(op->args[i]); 6135 if (NEED_SYNC_ARG(i)) { 6136 temp_sync(s, ts, s->reserved_regs, 0, IS_DEAD_ARG(i)); 6137 } else if (IS_DEAD_ARG(i)) { 6138 temp_dead(s, ts); 6139 } 6140 } 6141 } 6142 6143 /** 6144 * atom_and_align_for_opc: 6145 * @s: tcg context 6146 * @opc: memory operation code 6147 * @host_atom: MO_ATOM_{IFALIGN,WITHIN16,SUBALIGN} for host operations 6148 * @allow_two_ops: true if we are prepared to issue two operations 6149 * 6150 * Return the alignment and atomicity to use for the inline fast path 6151 * for the given memory operation. The alignment may be larger than 6152 * that specified in @opc, and the correct alignment will be diagnosed 6153 * by the slow path helper. 6154 * 6155 * If @allow_two_ops, the host is prepared to test for 2x alignment, 6156 * and issue two loads or stores for subalignment. 6157 */ 6158 static TCGAtomAlign atom_and_align_for_opc(TCGContext *s, MemOp opc, 6159 MemOp host_atom, bool allow_two_ops) 6160 { 6161 MemOp align = memop_alignment_bits(opc); 6162 MemOp size = opc & MO_SIZE; 6163 MemOp half = size ? size - 1 : 0; 6164 MemOp atom = opc & MO_ATOM_MASK; 6165 MemOp atmax; 6166 6167 switch (atom) { 6168 case MO_ATOM_NONE: 6169 /* The operation requires no specific atomicity. */ 6170 atmax = MO_8; 6171 break; 6172 6173 case MO_ATOM_IFALIGN: 6174 atmax = size; 6175 break; 6176 6177 case MO_ATOM_IFALIGN_PAIR: 6178 atmax = half; 6179 break; 6180 6181 case MO_ATOM_WITHIN16: 6182 atmax = size; 6183 if (size == MO_128) { 6184 /* Misalignment implies !within16, and therefore no atomicity. */ 6185 } else if (host_atom != MO_ATOM_WITHIN16) { 6186 /* The host does not implement within16, so require alignment. */ 6187 align = MAX(align, size); 6188 } 6189 break; 6190 6191 case MO_ATOM_WITHIN16_PAIR: 6192 atmax = size; 6193 /* 6194 * Misalignment implies !within16, and therefore half atomicity. 6195 * Any host prepared for two operations can implement this with 6196 * half alignment. 6197 */ 6198 if (host_atom != MO_ATOM_WITHIN16 && allow_two_ops) { 6199 align = MAX(align, half); 6200 } 6201 break; 6202 6203 case MO_ATOM_SUBALIGN: 6204 atmax = size; 6205 if (host_atom != MO_ATOM_SUBALIGN) { 6206 /* If unaligned but not odd, there are subobjects up to half. */ 6207 if (allow_two_ops) { 6208 align = MAX(align, half); 6209 } else { 6210 align = MAX(align, size); 6211 } 6212 } 6213 break; 6214 6215 default: 6216 g_assert_not_reached(); 6217 } 6218 6219 return (TCGAtomAlign){ .atom = atmax, .align = align }; 6220 } 6221 6222 /* 6223 * Similarly for qemu_ld/st slow path helpers. 6224 * We must re-implement tcg_gen_callN and tcg_reg_alloc_call simultaneously, 6225 * using only the provided backend tcg_out_* functions. 6226 */ 6227 6228 static int tcg_out_helper_stk_ofs(TCGType type, unsigned slot) 6229 { 6230 int ofs = arg_slot_stk_ofs(slot); 6231 6232 /* 6233 * Each stack slot is TCG_TARGET_LONG_BITS. If the host does not 6234 * require extension to uint64_t, adjust the address for uint32_t. 6235 */ 6236 if (HOST_BIG_ENDIAN && 6237 TCG_TARGET_REG_BITS == 64 && 6238 type == TCG_TYPE_I32) { 6239 ofs += 4; 6240 } 6241 return ofs; 6242 } 6243 6244 static void tcg_out_helper_load_slots(TCGContext *s, 6245 unsigned nmov, TCGMovExtend *mov, 6246 const TCGLdstHelperParam *parm) 6247 { 6248 unsigned i; 6249 TCGReg dst3; 6250 6251 /* 6252 * Start from the end, storing to the stack first. 6253 * This frees those registers, so we need not consider overlap. 6254 */ 6255 for (i = nmov; i-- > 0; ) { 6256 unsigned slot = mov[i].dst; 6257 6258 if (arg_slot_reg_p(slot)) { 6259 goto found_reg; 6260 } 6261 6262 TCGReg src = mov[i].src; 6263 TCGType dst_type = mov[i].dst_type; 6264 MemOp dst_mo = dst_type == TCG_TYPE_I32 ? MO_32 : MO_64; 6265 6266 /* The argument is going onto the stack; extend into scratch. */ 6267 if ((mov[i].src_ext & MO_SIZE) != dst_mo) { 6268 tcg_debug_assert(parm->ntmp != 0); 6269 mov[i].dst = src = parm->tmp[0]; 6270 tcg_out_movext1(s, &mov[i]); 6271 } 6272 6273 tcg_out_st(s, dst_type, src, TCG_REG_CALL_STACK, 6274 tcg_out_helper_stk_ofs(dst_type, slot)); 6275 } 6276 return; 6277 6278 found_reg: 6279 /* 6280 * The remaining arguments are in registers. 6281 * Convert slot numbers to argument registers. 6282 */ 6283 nmov = i + 1; 6284 for (i = 0; i < nmov; ++i) { 6285 mov[i].dst = tcg_target_call_iarg_regs[mov[i].dst]; 6286 } 6287 6288 switch (nmov) { 6289 case 4: 6290 /* The backend must have provided enough temps for the worst case. */ 6291 tcg_debug_assert(parm->ntmp >= 2); 6292 6293 dst3 = mov[3].dst; 6294 for (unsigned j = 0; j < 3; ++j) { 6295 if (dst3 == mov[j].src) { 6296 /* 6297 * Conflict. Copy the source to a temporary, perform the 6298 * remaining moves, then the extension from our scratch 6299 * on the way out. 6300 */ 6301 TCGReg scratch = parm->tmp[1]; 6302 6303 tcg_out_mov(s, mov[3].src_type, scratch, mov[3].src); 6304 tcg_out_movext3(s, mov, mov + 1, mov + 2, parm->tmp[0]); 6305 tcg_out_movext1_new_src(s, &mov[3], scratch); 6306 break; 6307 } 6308 } 6309 6310 /* No conflicts: perform this move and continue. */ 6311 tcg_out_movext1(s, &mov[3]); 6312 /* fall through */ 6313 6314 case 3: 6315 tcg_out_movext3(s, mov, mov + 1, mov + 2, 6316 parm->ntmp ? parm->tmp[0] : -1); 6317 break; 6318 case 2: 6319 tcg_out_movext2(s, mov, mov + 1, 6320 parm->ntmp ? parm->tmp[0] : -1); 6321 break; 6322 case 1: 6323 tcg_out_movext1(s, mov); 6324 break; 6325 default: 6326 g_assert_not_reached(); 6327 } 6328 } 6329 6330 static void tcg_out_helper_load_imm(TCGContext *s, unsigned slot, 6331 TCGType type, tcg_target_long imm, 6332 const TCGLdstHelperParam *parm) 6333 { 6334 if (arg_slot_reg_p(slot)) { 6335 tcg_out_movi(s, type, tcg_target_call_iarg_regs[slot], imm); 6336 } else { 6337 int ofs = tcg_out_helper_stk_ofs(type, slot); 6338 if (!tcg_out_sti(s, type, imm, TCG_REG_CALL_STACK, ofs)) { 6339 tcg_debug_assert(parm->ntmp != 0); 6340 tcg_out_movi(s, type, parm->tmp[0], imm); 6341 tcg_out_st(s, type, parm->tmp[0], TCG_REG_CALL_STACK, ofs); 6342 } 6343 } 6344 } 6345 6346 static void tcg_out_helper_load_common_args(TCGContext *s, 6347 const TCGLabelQemuLdst *ldst, 6348 const TCGLdstHelperParam *parm, 6349 const TCGHelperInfo *info, 6350 unsigned next_arg) 6351 { 6352 TCGMovExtend ptr_mov = { 6353 .dst_type = TCG_TYPE_PTR, 6354 .src_type = TCG_TYPE_PTR, 6355 .src_ext = sizeof(void *) == 4 ? MO_32 : MO_64 6356 }; 6357 const TCGCallArgumentLoc *loc = &info->in[0]; 6358 TCGType type; 6359 unsigned slot; 6360 tcg_target_ulong imm; 6361 6362 /* 6363 * Handle env, which is always first. 6364 */ 6365 ptr_mov.dst = loc->arg_slot; 6366 ptr_mov.src = TCG_AREG0; 6367 tcg_out_helper_load_slots(s, 1, &ptr_mov, parm); 6368 6369 /* 6370 * Handle oi. 6371 */ 6372 imm = ldst->oi; 6373 loc = &info->in[next_arg]; 6374 type = TCG_TYPE_I32; 6375 switch (loc->kind) { 6376 case TCG_CALL_ARG_NORMAL: 6377 break; 6378 case TCG_CALL_ARG_EXTEND_U: 6379 case TCG_CALL_ARG_EXTEND_S: 6380 /* No extension required for MemOpIdx. */ 6381 tcg_debug_assert(imm <= INT32_MAX); 6382 type = TCG_TYPE_REG; 6383 break; 6384 default: 6385 g_assert_not_reached(); 6386 } 6387 tcg_out_helper_load_imm(s, loc->arg_slot, type, imm, parm); 6388 next_arg++; 6389 6390 /* 6391 * Handle ra. 6392 */ 6393 loc = &info->in[next_arg]; 6394 slot = loc->arg_slot; 6395 if (parm->ra_gen) { 6396 int arg_reg = -1; 6397 TCGReg ra_reg; 6398 6399 if (arg_slot_reg_p(slot)) { 6400 arg_reg = tcg_target_call_iarg_regs[slot]; 6401 } 6402 ra_reg = parm->ra_gen(s, ldst, arg_reg); 6403 6404 ptr_mov.dst = slot; 6405 ptr_mov.src = ra_reg; 6406 tcg_out_helper_load_slots(s, 1, &ptr_mov, parm); 6407 } else { 6408 imm = (uintptr_t)ldst->raddr; 6409 tcg_out_helper_load_imm(s, slot, TCG_TYPE_PTR, imm, parm); 6410 } 6411 } 6412 6413 static unsigned tcg_out_helper_add_mov(TCGMovExtend *mov, 6414 const TCGCallArgumentLoc *loc, 6415 TCGType dst_type, TCGType src_type, 6416 TCGReg lo, TCGReg hi) 6417 { 6418 MemOp reg_mo; 6419 6420 if (dst_type <= TCG_TYPE_REG) { 6421 MemOp src_ext; 6422 6423 switch (loc->kind) { 6424 case TCG_CALL_ARG_NORMAL: 6425 src_ext = src_type == TCG_TYPE_I32 ? MO_32 : MO_64; 6426 break; 6427 case TCG_CALL_ARG_EXTEND_U: 6428 dst_type = TCG_TYPE_REG; 6429 src_ext = MO_UL; 6430 break; 6431 case TCG_CALL_ARG_EXTEND_S: 6432 dst_type = TCG_TYPE_REG; 6433 src_ext = MO_SL; 6434 break; 6435 default: 6436 g_assert_not_reached(); 6437 } 6438 6439 mov[0].dst = loc->arg_slot; 6440 mov[0].dst_type = dst_type; 6441 mov[0].src = lo; 6442 mov[0].src_type = src_type; 6443 mov[0].src_ext = src_ext; 6444 return 1; 6445 } 6446 6447 if (TCG_TARGET_REG_BITS == 32) { 6448 assert(dst_type == TCG_TYPE_I64); 6449 reg_mo = MO_32; 6450 } else { 6451 assert(dst_type == TCG_TYPE_I128); 6452 reg_mo = MO_64; 6453 } 6454 6455 mov[0].dst = loc[HOST_BIG_ENDIAN].arg_slot; 6456 mov[0].src = lo; 6457 mov[0].dst_type = TCG_TYPE_REG; 6458 mov[0].src_type = TCG_TYPE_REG; 6459 mov[0].src_ext = reg_mo; 6460 6461 mov[1].dst = loc[!HOST_BIG_ENDIAN].arg_slot; 6462 mov[1].src = hi; 6463 mov[1].dst_type = TCG_TYPE_REG; 6464 mov[1].src_type = TCG_TYPE_REG; 6465 mov[1].src_ext = reg_mo; 6466 6467 return 2; 6468 } 6469 6470 static void tcg_out_ld_helper_args(TCGContext *s, const TCGLabelQemuLdst *ldst, 6471 const TCGLdstHelperParam *parm) 6472 { 6473 const TCGHelperInfo *info; 6474 const TCGCallArgumentLoc *loc; 6475 TCGMovExtend mov[2]; 6476 unsigned next_arg, nmov; 6477 MemOp mop = get_memop(ldst->oi); 6478 6479 switch (mop & MO_SIZE) { 6480 case MO_8: 6481 case MO_16: 6482 case MO_32: 6483 info = &info_helper_ld32_mmu; 6484 break; 6485 case MO_64: 6486 info = &info_helper_ld64_mmu; 6487 break; 6488 case MO_128: 6489 info = &info_helper_ld128_mmu; 6490 break; 6491 default: 6492 g_assert_not_reached(); 6493 } 6494 6495 /* Defer env argument. */ 6496 next_arg = 1; 6497 6498 loc = &info->in[next_arg]; 6499 if (TCG_TARGET_REG_BITS == 32 && s->addr_type == TCG_TYPE_I32) { 6500 /* 6501 * 32-bit host with 32-bit guest: zero-extend the guest address 6502 * to 64-bits for the helper by storing the low part, then 6503 * load a zero for the high part. 6504 */ 6505 tcg_out_helper_add_mov(mov, loc + HOST_BIG_ENDIAN, 6506 TCG_TYPE_I32, TCG_TYPE_I32, 6507 ldst->addr_reg, -1); 6508 tcg_out_helper_load_slots(s, 1, mov, parm); 6509 6510 tcg_out_helper_load_imm(s, loc[!HOST_BIG_ENDIAN].arg_slot, 6511 TCG_TYPE_I32, 0, parm); 6512 next_arg += 2; 6513 } else { 6514 nmov = tcg_out_helper_add_mov(mov, loc, TCG_TYPE_I64, s->addr_type, 6515 ldst->addr_reg, -1); 6516 tcg_out_helper_load_slots(s, nmov, mov, parm); 6517 next_arg += nmov; 6518 } 6519 6520 switch (info->out_kind) { 6521 case TCG_CALL_RET_NORMAL: 6522 case TCG_CALL_RET_BY_VEC: 6523 break; 6524 case TCG_CALL_RET_BY_REF: 6525 /* 6526 * The return reference is in the first argument slot. 6527 * We need memory in which to return: re-use the top of stack. 6528 */ 6529 { 6530 int ofs_slot0 = TCG_TARGET_CALL_STACK_OFFSET; 6531 6532 if (arg_slot_reg_p(0)) { 6533 tcg_out_addi_ptr(s, tcg_target_call_iarg_regs[0], 6534 TCG_REG_CALL_STACK, ofs_slot0); 6535 } else { 6536 tcg_debug_assert(parm->ntmp != 0); 6537 tcg_out_addi_ptr(s, parm->tmp[0], 6538 TCG_REG_CALL_STACK, ofs_slot0); 6539 tcg_out_st(s, TCG_TYPE_PTR, parm->tmp[0], 6540 TCG_REG_CALL_STACK, ofs_slot0); 6541 } 6542 } 6543 break; 6544 default: 6545 g_assert_not_reached(); 6546 } 6547 6548 tcg_out_helper_load_common_args(s, ldst, parm, info, next_arg); 6549 } 6550 6551 static void tcg_out_ld_helper_ret(TCGContext *s, const TCGLabelQemuLdst *ldst, 6552 bool load_sign, 6553 const TCGLdstHelperParam *parm) 6554 { 6555 MemOp mop = get_memop(ldst->oi); 6556 TCGMovExtend mov[2]; 6557 int ofs_slot0; 6558 6559 switch (ldst->type) { 6560 case TCG_TYPE_I64: 6561 if (TCG_TARGET_REG_BITS == 32) { 6562 break; 6563 } 6564 /* fall through */ 6565 6566 case TCG_TYPE_I32: 6567 mov[0].dst = ldst->datalo_reg; 6568 mov[0].src = tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, 0); 6569 mov[0].dst_type = ldst->type; 6570 mov[0].src_type = TCG_TYPE_REG; 6571 6572 /* 6573 * If load_sign, then we allowed the helper to perform the 6574 * appropriate sign extension to tcg_target_ulong, and all 6575 * we need now is a plain move. 6576 * 6577 * If they do not, then we expect the relevant extension 6578 * instruction to be no more expensive than a move, and 6579 * we thus save the icache etc by only using one of two 6580 * helper functions. 6581 */ 6582 if (load_sign || !(mop & MO_SIGN)) { 6583 if (TCG_TARGET_REG_BITS == 32 || ldst->type == TCG_TYPE_I32) { 6584 mov[0].src_ext = MO_32; 6585 } else { 6586 mov[0].src_ext = MO_64; 6587 } 6588 } else { 6589 mov[0].src_ext = mop & MO_SSIZE; 6590 } 6591 tcg_out_movext1(s, mov); 6592 return; 6593 6594 case TCG_TYPE_I128: 6595 tcg_debug_assert(TCG_TARGET_REG_BITS == 64); 6596 ofs_slot0 = TCG_TARGET_CALL_STACK_OFFSET; 6597 switch (TCG_TARGET_CALL_RET_I128) { 6598 case TCG_CALL_RET_NORMAL: 6599 break; 6600 case TCG_CALL_RET_BY_VEC: 6601 tcg_out_st(s, TCG_TYPE_V128, 6602 tcg_target_call_oarg_reg(TCG_CALL_RET_BY_VEC, 0), 6603 TCG_REG_CALL_STACK, ofs_slot0); 6604 /* fall through */ 6605 case TCG_CALL_RET_BY_REF: 6606 tcg_out_ld(s, TCG_TYPE_I64, ldst->datalo_reg, 6607 TCG_REG_CALL_STACK, ofs_slot0 + 8 * HOST_BIG_ENDIAN); 6608 tcg_out_ld(s, TCG_TYPE_I64, ldst->datahi_reg, 6609 TCG_REG_CALL_STACK, ofs_slot0 + 8 * !HOST_BIG_ENDIAN); 6610 return; 6611 default: 6612 g_assert_not_reached(); 6613 } 6614 break; 6615 6616 default: 6617 g_assert_not_reached(); 6618 } 6619 6620 mov[0].dst = ldst->datalo_reg; 6621 mov[0].src = 6622 tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, HOST_BIG_ENDIAN); 6623 mov[0].dst_type = TCG_TYPE_REG; 6624 mov[0].src_type = TCG_TYPE_REG; 6625 mov[0].src_ext = TCG_TARGET_REG_BITS == 32 ? MO_32 : MO_64; 6626 6627 mov[1].dst = ldst->datahi_reg; 6628 mov[1].src = 6629 tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, !HOST_BIG_ENDIAN); 6630 mov[1].dst_type = TCG_TYPE_REG; 6631 mov[1].src_type = TCG_TYPE_REG; 6632 mov[1].src_ext = TCG_TARGET_REG_BITS == 32 ? MO_32 : MO_64; 6633 6634 tcg_out_movext2(s, mov, mov + 1, parm->ntmp ? parm->tmp[0] : -1); 6635 } 6636 6637 static void tcg_out_st_helper_args(TCGContext *s, const TCGLabelQemuLdst *ldst, 6638 const TCGLdstHelperParam *parm) 6639 { 6640 const TCGHelperInfo *info; 6641 const TCGCallArgumentLoc *loc; 6642 TCGMovExtend mov[4]; 6643 TCGType data_type; 6644 unsigned next_arg, nmov, n; 6645 MemOp mop = get_memop(ldst->oi); 6646 6647 switch (mop & MO_SIZE) { 6648 case MO_8: 6649 case MO_16: 6650 case MO_32: 6651 info = &info_helper_st32_mmu; 6652 data_type = TCG_TYPE_I32; 6653 break; 6654 case MO_64: 6655 info = &info_helper_st64_mmu; 6656 data_type = TCG_TYPE_I64; 6657 break; 6658 case MO_128: 6659 info = &info_helper_st128_mmu; 6660 data_type = TCG_TYPE_I128; 6661 break; 6662 default: 6663 g_assert_not_reached(); 6664 } 6665 6666 /* Defer env argument. */ 6667 next_arg = 1; 6668 nmov = 0; 6669 6670 /* Handle addr argument. */ 6671 loc = &info->in[next_arg]; 6672 tcg_debug_assert(s->addr_type <= TCG_TYPE_REG); 6673 if (TCG_TARGET_REG_BITS == 32) { 6674 /* 6675 * 32-bit host (and thus 32-bit guest): zero-extend the guest address 6676 * to 64-bits for the helper by storing the low part. Later, 6677 * after we have processed the register inputs, we will load a 6678 * zero for the high part. 6679 */ 6680 tcg_out_helper_add_mov(mov, loc + HOST_BIG_ENDIAN, 6681 TCG_TYPE_I32, TCG_TYPE_I32, 6682 ldst->addr_reg, -1); 6683 next_arg += 2; 6684 nmov += 1; 6685 } else { 6686 n = tcg_out_helper_add_mov(mov, loc, TCG_TYPE_I64, s->addr_type, 6687 ldst->addr_reg, -1); 6688 next_arg += n; 6689 nmov += n; 6690 } 6691 6692 /* Handle data argument. */ 6693 loc = &info->in[next_arg]; 6694 switch (loc->kind) { 6695 case TCG_CALL_ARG_NORMAL: 6696 case TCG_CALL_ARG_EXTEND_U: 6697 case TCG_CALL_ARG_EXTEND_S: 6698 n = tcg_out_helper_add_mov(mov + nmov, loc, data_type, ldst->type, 6699 ldst->datalo_reg, ldst->datahi_reg); 6700 next_arg += n; 6701 nmov += n; 6702 tcg_out_helper_load_slots(s, nmov, mov, parm); 6703 break; 6704 6705 case TCG_CALL_ARG_BY_REF: 6706 tcg_debug_assert(TCG_TARGET_REG_BITS == 64); 6707 tcg_debug_assert(data_type == TCG_TYPE_I128); 6708 tcg_out_st(s, TCG_TYPE_I64, 6709 HOST_BIG_ENDIAN ? ldst->datahi_reg : ldst->datalo_reg, 6710 TCG_REG_CALL_STACK, arg_slot_stk_ofs(loc[0].ref_slot)); 6711 tcg_out_st(s, TCG_TYPE_I64, 6712 HOST_BIG_ENDIAN ? ldst->datalo_reg : ldst->datahi_reg, 6713 TCG_REG_CALL_STACK, arg_slot_stk_ofs(loc[1].ref_slot)); 6714 6715 tcg_out_helper_load_slots(s, nmov, mov, parm); 6716 6717 if (arg_slot_reg_p(loc->arg_slot)) { 6718 tcg_out_addi_ptr(s, tcg_target_call_iarg_regs[loc->arg_slot], 6719 TCG_REG_CALL_STACK, 6720 arg_slot_stk_ofs(loc->ref_slot)); 6721 } else { 6722 tcg_debug_assert(parm->ntmp != 0); 6723 tcg_out_addi_ptr(s, parm->tmp[0], TCG_REG_CALL_STACK, 6724 arg_slot_stk_ofs(loc->ref_slot)); 6725 tcg_out_st(s, TCG_TYPE_PTR, parm->tmp[0], 6726 TCG_REG_CALL_STACK, arg_slot_stk_ofs(loc->arg_slot)); 6727 } 6728 next_arg += 2; 6729 break; 6730 6731 default: 6732 g_assert_not_reached(); 6733 } 6734 6735 if (TCG_TARGET_REG_BITS == 32) { 6736 /* Zero extend the address by loading a zero for the high part. */ 6737 loc = &info->in[1 + !HOST_BIG_ENDIAN]; 6738 tcg_out_helper_load_imm(s, loc->arg_slot, TCG_TYPE_I32, 0, parm); 6739 } 6740 6741 tcg_out_helper_load_common_args(s, ldst, parm, info, next_arg); 6742 } 6743 6744 int tcg_gen_code(TCGContext *s, TranslationBlock *tb, uint64_t pc_start) 6745 { 6746 int i, start_words, num_insns; 6747 TCGOp *op; 6748 6749 if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP) 6750 && qemu_log_in_addr_range(pc_start))) { 6751 FILE *logfile = qemu_log_trylock(); 6752 if (logfile) { 6753 fprintf(logfile, "OP:\n"); 6754 tcg_dump_ops(s, logfile, false); 6755 fprintf(logfile, "\n"); 6756 qemu_log_unlock(logfile); 6757 } 6758 } 6759 6760 #ifdef CONFIG_DEBUG_TCG 6761 /* Ensure all labels referenced have been emitted. */ 6762 { 6763 TCGLabel *l; 6764 bool error = false; 6765 6766 QSIMPLEQ_FOREACH(l, &s->labels, next) { 6767 if (unlikely(!l->present) && !QSIMPLEQ_EMPTY(&l->branches)) { 6768 qemu_log_mask(CPU_LOG_TB_OP, 6769 "$L%d referenced but not present.\n", l->id); 6770 error = true; 6771 } 6772 } 6773 assert(!error); 6774 } 6775 #endif 6776 6777 /* Do not reuse any EBB that may be allocated within the TB. */ 6778 tcg_temp_ebb_reset_freed(s); 6779 6780 tcg_optimize(s); 6781 6782 reachable_code_pass(s); 6783 liveness_pass_0(s); 6784 liveness_pass_1(s); 6785 6786 if (s->nb_indirects > 0) { 6787 if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_IND) 6788 && qemu_log_in_addr_range(pc_start))) { 6789 FILE *logfile = qemu_log_trylock(); 6790 if (logfile) { 6791 fprintf(logfile, "OP before indirect lowering:\n"); 6792 tcg_dump_ops(s, logfile, false); 6793 fprintf(logfile, "\n"); 6794 qemu_log_unlock(logfile); 6795 } 6796 } 6797 6798 /* Replace indirect temps with direct temps. */ 6799 if (liveness_pass_2(s)) { 6800 /* If changes were made, re-run liveness. */ 6801 liveness_pass_1(s); 6802 } 6803 } 6804 6805 if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_OPT) 6806 && qemu_log_in_addr_range(pc_start))) { 6807 FILE *logfile = qemu_log_trylock(); 6808 if (logfile) { 6809 fprintf(logfile, "OP after optimization and liveness analysis:\n"); 6810 tcg_dump_ops(s, logfile, true); 6811 fprintf(logfile, "\n"); 6812 qemu_log_unlock(logfile); 6813 } 6814 } 6815 6816 /* Initialize goto_tb jump offsets. */ 6817 tb->jmp_reset_offset[0] = TB_JMP_OFFSET_INVALID; 6818 tb->jmp_reset_offset[1] = TB_JMP_OFFSET_INVALID; 6819 tb->jmp_insn_offset[0] = TB_JMP_OFFSET_INVALID; 6820 tb->jmp_insn_offset[1] = TB_JMP_OFFSET_INVALID; 6821 6822 tcg_reg_alloc_start(s); 6823 6824 /* 6825 * Reset the buffer pointers when restarting after overflow. 6826 * TODO: Move this into translate-all.c with the rest of the 6827 * buffer management. Having only this done here is confusing. 6828 */ 6829 s->code_buf = tcg_splitwx_to_rw(tb->tc.ptr); 6830 s->code_ptr = s->code_buf; 6831 s->data_gen_ptr = NULL; 6832 6833 QSIMPLEQ_INIT(&s->ldst_labels); 6834 s->pool_labels = NULL; 6835 6836 start_words = s->insn_start_words; 6837 s->gen_insn_data = 6838 tcg_malloc(sizeof(uint64_t) * s->gen_tb->icount * start_words); 6839 6840 tcg_out_tb_start(s); 6841 6842 num_insns = -1; 6843 s->carry_live = false; 6844 QTAILQ_FOREACH(op, &s->ops, link) { 6845 TCGOpcode opc = op->opc; 6846 6847 switch (opc) { 6848 case INDEX_op_extrl_i64_i32: 6849 assert(TCG_TARGET_REG_BITS == 64); 6850 /* 6851 * If TCG_TYPE_I32 is represented in some canonical form, 6852 * e.g. zero or sign-extended, then emit as a unary op. 6853 * Otherwise we can treat this as a plain move. 6854 * If the output dies, treat this as a plain move, because 6855 * this will be implemented with a store. 6856 */ 6857 if (TCG_TARGET_HAS_extr_i64_i32) { 6858 TCGLifeData arg_life = op->life; 6859 if (!IS_DEAD_ARG(0)) { 6860 goto do_default; 6861 } 6862 } 6863 /* fall through */ 6864 case INDEX_op_mov: 6865 case INDEX_op_mov_vec: 6866 tcg_reg_alloc_mov(s, op); 6867 break; 6868 case INDEX_op_dup_vec: 6869 tcg_reg_alloc_dup(s, op); 6870 break; 6871 case INDEX_op_insn_start: 6872 assert_carry_dead(s); 6873 if (num_insns >= 0) { 6874 size_t off = tcg_current_code_size(s); 6875 s->gen_insn_end_off[num_insns] = off; 6876 /* Assert that we do not overflow our stored offset. */ 6877 assert(s->gen_insn_end_off[num_insns] == off); 6878 } 6879 num_insns++; 6880 for (i = 0; i < start_words; ++i) { 6881 s->gen_insn_data[num_insns * start_words + i] = 6882 tcg_get_insn_start_param(op, i); 6883 } 6884 break; 6885 case INDEX_op_discard: 6886 temp_dead(s, arg_temp(op->args[0])); 6887 break; 6888 case INDEX_op_set_label: 6889 tcg_reg_alloc_bb_end(s, s->reserved_regs); 6890 tcg_out_label(s, arg_label(op->args[0])); 6891 break; 6892 case INDEX_op_call: 6893 assert_carry_dead(s); 6894 tcg_reg_alloc_call(s, op); 6895 break; 6896 case INDEX_op_exit_tb: 6897 tcg_out_exit_tb(s, op->args[0]); 6898 break; 6899 case INDEX_op_goto_tb: 6900 tcg_out_goto_tb(s, op->args[0]); 6901 break; 6902 case INDEX_op_dup2_vec: 6903 if (tcg_reg_alloc_dup2(s, op)) { 6904 break; 6905 } 6906 /* fall through */ 6907 default: 6908 do_default: 6909 /* Sanity check that we've not introduced any unhandled opcodes. */ 6910 tcg_debug_assert(tcg_op_supported(opc, TCGOP_TYPE(op), 6911 TCGOP_FLAGS(op))); 6912 /* Note: in order to speed up the code, it would be much 6913 faster to have specialized register allocator functions for 6914 some common argument patterns */ 6915 tcg_reg_alloc_op(s, op); 6916 break; 6917 } 6918 /* Test for (pending) buffer overflow. The assumption is that any 6919 one operation beginning below the high water mark cannot overrun 6920 the buffer completely. Thus we can test for overflow after 6921 generating code without having to check during generation. */ 6922 if (unlikely((void *)s->code_ptr > s->code_gen_highwater)) { 6923 return -1; 6924 } 6925 /* Test for TB overflow, as seen by gen_insn_end_off. */ 6926 if (unlikely(tcg_current_code_size(s) > UINT16_MAX)) { 6927 return -2; 6928 } 6929 } 6930 assert_carry_dead(s); 6931 6932 tcg_debug_assert(num_insns + 1 == s->gen_tb->icount); 6933 s->gen_insn_end_off[num_insns] = tcg_current_code_size(s); 6934 6935 /* Generate TB finalization at the end of block */ 6936 i = tcg_out_ldst_finalize(s); 6937 if (i < 0) { 6938 return i; 6939 } 6940 i = tcg_out_pool_finalize(s); 6941 if (i < 0) { 6942 return i; 6943 } 6944 if (!tcg_resolve_relocs(s)) { 6945 return -2; 6946 } 6947 6948 #ifndef CONFIG_TCG_INTERPRETER 6949 /* flush instruction cache */ 6950 flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(s->code_buf), 6951 (uintptr_t)s->code_buf, 6952 tcg_ptr_byte_diff(s->code_ptr, s->code_buf)); 6953 #endif 6954 6955 return tcg_current_code_size(s); 6956 } 6957 6958 #ifdef ELF_HOST_MACHINE 6959 /* In order to use this feature, the backend needs to do three things: 6960 6961 (1) Define ELF_HOST_MACHINE to indicate both what value to 6962 put into the ELF image and to indicate support for the feature. 6963 6964 (2) Define tcg_register_jit. This should create a buffer containing 6965 the contents of a .debug_frame section that describes the post- 6966 prologue unwind info for the tcg machine. 6967 6968 (3) Call tcg_register_jit_int, with the constructed .debug_frame. 6969 */ 6970 6971 /* Begin GDB interface. THE FOLLOWING MUST MATCH GDB DOCS. */ 6972 typedef enum { 6973 JIT_NOACTION = 0, 6974 JIT_REGISTER_FN, 6975 JIT_UNREGISTER_FN 6976 } jit_actions_t; 6977 6978 struct jit_code_entry { 6979 struct jit_code_entry *next_entry; 6980 struct jit_code_entry *prev_entry; 6981 const void *symfile_addr; 6982 uint64_t symfile_size; 6983 }; 6984 6985 struct jit_descriptor { 6986 uint32_t version; 6987 uint32_t action_flag; 6988 struct jit_code_entry *relevant_entry; 6989 struct jit_code_entry *first_entry; 6990 }; 6991 6992 void __jit_debug_register_code(void) __attribute__((noinline)); 6993 void __jit_debug_register_code(void) 6994 { 6995 asm(""); 6996 } 6997 6998 /* Must statically initialize the version, because GDB may check 6999 the version before we can set it. */ 7000 struct jit_descriptor __jit_debug_descriptor = { 1, 0, 0, 0 }; 7001 7002 /* End GDB interface. */ 7003 7004 static int find_string(const char *strtab, const char *str) 7005 { 7006 const char *p = strtab + 1; 7007 7008 while (1) { 7009 if (strcmp(p, str) == 0) { 7010 return p - strtab; 7011 } 7012 p += strlen(p) + 1; 7013 } 7014 } 7015 7016 static void tcg_register_jit_int(const void *buf_ptr, size_t buf_size, 7017 const void *debug_frame, 7018 size_t debug_frame_size) 7019 { 7020 struct __attribute__((packed)) DebugInfo { 7021 uint32_t len; 7022 uint16_t version; 7023 uint32_t abbrev; 7024 uint8_t ptr_size; 7025 uint8_t cu_die; 7026 uint16_t cu_lang; 7027 uintptr_t cu_low_pc; 7028 uintptr_t cu_high_pc; 7029 uint8_t fn_die; 7030 char fn_name[16]; 7031 uintptr_t fn_low_pc; 7032 uintptr_t fn_high_pc; 7033 uint8_t cu_eoc; 7034 }; 7035 7036 struct ElfImage { 7037 ElfW(Ehdr) ehdr; 7038 ElfW(Phdr) phdr; 7039 ElfW(Shdr) shdr[7]; 7040 ElfW(Sym) sym[2]; 7041 struct DebugInfo di; 7042 uint8_t da[24]; 7043 char str[80]; 7044 }; 7045 7046 struct ElfImage *img; 7047 7048 static const struct ElfImage img_template = { 7049 .ehdr = { 7050 .e_ident[EI_MAG0] = ELFMAG0, 7051 .e_ident[EI_MAG1] = ELFMAG1, 7052 .e_ident[EI_MAG2] = ELFMAG2, 7053 .e_ident[EI_MAG3] = ELFMAG3, 7054 .e_ident[EI_CLASS] = ELF_CLASS, 7055 .e_ident[EI_DATA] = ELF_DATA, 7056 .e_ident[EI_VERSION] = EV_CURRENT, 7057 .e_type = ET_EXEC, 7058 .e_machine = ELF_HOST_MACHINE, 7059 .e_version = EV_CURRENT, 7060 .e_phoff = offsetof(struct ElfImage, phdr), 7061 .e_shoff = offsetof(struct ElfImage, shdr), 7062 .e_ehsize = sizeof(ElfW(Shdr)), 7063 .e_phentsize = sizeof(ElfW(Phdr)), 7064 .e_phnum = 1, 7065 .e_shentsize = sizeof(ElfW(Shdr)), 7066 .e_shnum = ARRAY_SIZE(img->shdr), 7067 .e_shstrndx = ARRAY_SIZE(img->shdr) - 1, 7068 #ifdef ELF_HOST_FLAGS 7069 .e_flags = ELF_HOST_FLAGS, 7070 #endif 7071 #ifdef ELF_OSABI 7072 .e_ident[EI_OSABI] = ELF_OSABI, 7073 #endif 7074 }, 7075 .phdr = { 7076 .p_type = PT_LOAD, 7077 .p_flags = PF_X, 7078 }, 7079 .shdr = { 7080 [0] = { .sh_type = SHT_NULL }, 7081 /* Trick: The contents of code_gen_buffer are not present in 7082 this fake ELF file; that got allocated elsewhere. Therefore 7083 we mark .text as SHT_NOBITS (similar to .bss) so that readers 7084 will not look for contents. We can record any address. */ 7085 [1] = { /* .text */ 7086 .sh_type = SHT_NOBITS, 7087 .sh_flags = SHF_EXECINSTR | SHF_ALLOC, 7088 }, 7089 [2] = { /* .debug_info */ 7090 .sh_type = SHT_PROGBITS, 7091 .sh_offset = offsetof(struct ElfImage, di), 7092 .sh_size = sizeof(struct DebugInfo), 7093 }, 7094 [3] = { /* .debug_abbrev */ 7095 .sh_type = SHT_PROGBITS, 7096 .sh_offset = offsetof(struct ElfImage, da), 7097 .sh_size = sizeof(img->da), 7098 }, 7099 [4] = { /* .debug_frame */ 7100 .sh_type = SHT_PROGBITS, 7101 .sh_offset = sizeof(struct ElfImage), 7102 }, 7103 [5] = { /* .symtab */ 7104 .sh_type = SHT_SYMTAB, 7105 .sh_offset = offsetof(struct ElfImage, sym), 7106 .sh_size = sizeof(img->sym), 7107 .sh_info = 1, 7108 .sh_link = ARRAY_SIZE(img->shdr) - 1, 7109 .sh_entsize = sizeof(ElfW(Sym)), 7110 }, 7111 [6] = { /* .strtab */ 7112 .sh_type = SHT_STRTAB, 7113 .sh_offset = offsetof(struct ElfImage, str), 7114 .sh_size = sizeof(img->str), 7115 } 7116 }, 7117 .sym = { 7118 [1] = { /* code_gen_buffer */ 7119 .st_info = ELF_ST_INFO(STB_GLOBAL, STT_FUNC), 7120 .st_shndx = 1, 7121 } 7122 }, 7123 .di = { 7124 .len = sizeof(struct DebugInfo) - 4, 7125 .version = 2, 7126 .ptr_size = sizeof(void *), 7127 .cu_die = 1, 7128 .cu_lang = 0x8001, /* DW_LANG_Mips_Assembler */ 7129 .fn_die = 2, 7130 .fn_name = "code_gen_buffer" 7131 }, 7132 .da = { 7133 1, /* abbrev number (the cu) */ 7134 0x11, 1, /* DW_TAG_compile_unit, has children */ 7135 0x13, 0x5, /* DW_AT_language, DW_FORM_data2 */ 7136 0x11, 0x1, /* DW_AT_low_pc, DW_FORM_addr */ 7137 0x12, 0x1, /* DW_AT_high_pc, DW_FORM_addr */ 7138 0, 0, /* end of abbrev */ 7139 2, /* abbrev number (the fn) */ 7140 0x2e, 0, /* DW_TAG_subprogram, no children */ 7141 0x3, 0x8, /* DW_AT_name, DW_FORM_string */ 7142 0x11, 0x1, /* DW_AT_low_pc, DW_FORM_addr */ 7143 0x12, 0x1, /* DW_AT_high_pc, DW_FORM_addr */ 7144 0, 0, /* end of abbrev */ 7145 0 /* no more abbrev */ 7146 }, 7147 .str = "\0" ".text\0" ".debug_info\0" ".debug_abbrev\0" 7148 ".debug_frame\0" ".symtab\0" ".strtab\0" "code_gen_buffer", 7149 }; 7150 7151 /* We only need a single jit entry; statically allocate it. */ 7152 static struct jit_code_entry one_entry; 7153 7154 uintptr_t buf = (uintptr_t)buf_ptr; 7155 size_t img_size = sizeof(struct ElfImage) + debug_frame_size; 7156 DebugFrameHeader *dfh; 7157 7158 img = g_malloc(img_size); 7159 *img = img_template; 7160 7161 img->phdr.p_vaddr = buf; 7162 img->phdr.p_paddr = buf; 7163 img->phdr.p_memsz = buf_size; 7164 7165 img->shdr[1].sh_name = find_string(img->str, ".text"); 7166 img->shdr[1].sh_addr = buf; 7167 img->shdr[1].sh_size = buf_size; 7168 7169 img->shdr[2].sh_name = find_string(img->str, ".debug_info"); 7170 img->shdr[3].sh_name = find_string(img->str, ".debug_abbrev"); 7171 7172 img->shdr[4].sh_name = find_string(img->str, ".debug_frame"); 7173 img->shdr[4].sh_size = debug_frame_size; 7174 7175 img->shdr[5].sh_name = find_string(img->str, ".symtab"); 7176 img->shdr[6].sh_name = find_string(img->str, ".strtab"); 7177 7178 img->sym[1].st_name = find_string(img->str, "code_gen_buffer"); 7179 img->sym[1].st_value = buf; 7180 img->sym[1].st_size = buf_size; 7181 7182 img->di.cu_low_pc = buf; 7183 img->di.cu_high_pc = buf + buf_size; 7184 img->di.fn_low_pc = buf; 7185 img->di.fn_high_pc = buf + buf_size; 7186 7187 dfh = (DebugFrameHeader *)(img + 1); 7188 memcpy(dfh, debug_frame, debug_frame_size); 7189 dfh->fde.func_start = buf; 7190 dfh->fde.func_len = buf_size; 7191 7192 #ifdef DEBUG_JIT 7193 /* Enable this block to be able to debug the ELF image file creation. 7194 One can use readelf, objdump, or other inspection utilities. */ 7195 { 7196 g_autofree char *jit = g_strdup_printf("%s/qemu.jit", g_get_tmp_dir()); 7197 FILE *f = fopen(jit, "w+b"); 7198 if (f) { 7199 if (fwrite(img, img_size, 1, f) != img_size) { 7200 /* Avoid stupid unused return value warning for fwrite. */ 7201 } 7202 fclose(f); 7203 } 7204 } 7205 #endif 7206 7207 one_entry.symfile_addr = img; 7208 one_entry.symfile_size = img_size; 7209 7210 __jit_debug_descriptor.action_flag = JIT_REGISTER_FN; 7211 __jit_debug_descriptor.relevant_entry = &one_entry; 7212 __jit_debug_descriptor.first_entry = &one_entry; 7213 __jit_debug_register_code(); 7214 } 7215 #else 7216 /* No support for the feature. Provide the entry point expected by exec.c, 7217 and implement the internal function we declared earlier. */ 7218 7219 static void tcg_register_jit_int(const void *buf, size_t size, 7220 const void *debug_frame, 7221 size_t debug_frame_size) 7222 { 7223 } 7224 7225 void tcg_register_jit(const void *buf, size_t buf_size) 7226 { 7227 } 7228 #endif /* ELF_HOST_MACHINE */ 7229 7230 #if !TCG_TARGET_MAYBE_vec 7231 void tcg_expand_vec_op(TCGOpcode o, TCGType t, unsigned e, TCGArg a0, ...) 7232 { 7233 g_assert_not_reached(); 7234 } 7235 #endif 7236