1 /* 2 * Tiny Code Generator for QEMU 3 * 4 * Copyright (c) 2008 Fabrice Bellard 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a copy 7 * of this software and associated documentation files (the "Software"), to deal 8 * in the Software without restriction, including without limitation the rights 9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 * copies of the Software, and to permit persons to whom the Software is 11 * furnished to do so, subject to the following conditions: 12 * 13 * The above copyright notice and this permission notice shall be included in 14 * all copies or substantial portions of the Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 22 * THE SOFTWARE. 23 */ 24 25 #include "qemu/osdep.h" 26 27 /* Define to jump the ELF file used to communicate with GDB. */ 28 #undef DEBUG_JIT 29 30 #include "qemu/error-report.h" 31 #include "qemu/cutils.h" 32 #include "qemu/host-utils.h" 33 #include "qemu/qemu-print.h" 34 #include "qemu/cacheflush.h" 35 #include "qemu/cacheinfo.h" 36 #include "qemu/timer.h" 37 #include "exec/translation-block.h" 38 #include "exec/tlb-common.h" 39 #include "tcg/startup.h" 40 #include "tcg/tcg-op-common.h" 41 42 #if UINTPTR_MAX == UINT32_MAX 43 # define ELF_CLASS ELFCLASS32 44 #else 45 # define ELF_CLASS ELFCLASS64 46 #endif 47 #if HOST_BIG_ENDIAN 48 # define ELF_DATA ELFDATA2MSB 49 #else 50 # define ELF_DATA ELFDATA2LSB 51 #endif 52 53 #include "elf.h" 54 #include "exec/log.h" 55 #include "tcg/tcg-ldst.h" 56 #include "tcg/tcg-temp-internal.h" 57 #include "tcg-internal.h" 58 #include "tcg/perf.h" 59 #include "tcg-has.h" 60 #ifdef CONFIG_USER_ONLY 61 #include "user/guest-base.h" 62 #endif 63 64 /* Forward declarations for functions declared in tcg-target.c.inc and 65 used here. */ 66 static void tcg_target_init(TCGContext *s); 67 static void tcg_target_qemu_prologue(TCGContext *s); 68 static bool patch_reloc(tcg_insn_unit *code_ptr, int type, 69 intptr_t value, intptr_t addend); 70 static void tcg_out_nop_fill(tcg_insn_unit *p, int count); 71 72 typedef struct TCGLabelQemuLdst TCGLabelQemuLdst; 73 static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l); 74 static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l); 75 76 /* The CIE and FDE header definitions will be common to all hosts. */ 77 typedef struct { 78 uint32_t len __attribute__((aligned((sizeof(void *))))); 79 uint32_t id; 80 uint8_t version; 81 char augmentation[1]; 82 uint8_t code_align; 83 uint8_t data_align; 84 uint8_t return_column; 85 } DebugFrameCIE; 86 87 typedef struct QEMU_PACKED { 88 uint32_t len __attribute__((aligned((sizeof(void *))))); 89 uint32_t cie_offset; 90 uintptr_t func_start; 91 uintptr_t func_len; 92 } DebugFrameFDEHeader; 93 94 typedef struct QEMU_PACKED { 95 DebugFrameCIE cie; 96 DebugFrameFDEHeader fde; 97 } DebugFrameHeader; 98 99 struct TCGLabelQemuLdst { 100 bool is_ld; /* qemu_ld: true, qemu_st: false */ 101 MemOpIdx oi; 102 TCGType type; /* result type of a load */ 103 TCGReg addr_reg; /* reg index for guest virtual addr */ 104 TCGReg datalo_reg; /* reg index for low word to be loaded or stored */ 105 TCGReg datahi_reg; /* reg index for high word to be loaded or stored */ 106 const tcg_insn_unit *raddr; /* addr of the next IR of qemu_ld/st IR */ 107 tcg_insn_unit *label_ptr[2]; /* label pointers to be updated */ 108 QSIMPLEQ_ENTRY(TCGLabelQemuLdst) next; 109 }; 110 111 static void tcg_register_jit_int(const void *buf, size_t size, 112 const void *debug_frame, 113 size_t debug_frame_size) 114 __attribute__((unused)); 115 116 /* Forward declarations for functions declared and used in tcg-target.c.inc. */ 117 static void tcg_out_tb_start(TCGContext *s); 118 static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg1, 119 intptr_t arg2); 120 static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg); 121 static void tcg_out_movi(TCGContext *s, TCGType type, 122 TCGReg ret, tcg_target_long arg); 123 static void tcg_out_ext8s(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg); 124 static void tcg_out_ext16s(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg); 125 static void tcg_out_ext8u(TCGContext *s, TCGReg ret, TCGReg arg); 126 static void tcg_out_ext16u(TCGContext *s, TCGReg ret, TCGReg arg); 127 static void tcg_out_ext32s(TCGContext *s, TCGReg ret, TCGReg arg); 128 static void tcg_out_ext32u(TCGContext *s, TCGReg ret, TCGReg arg); 129 static void tcg_out_exts_i32_i64(TCGContext *s, TCGReg ret, TCGReg arg); 130 static void tcg_out_extu_i32_i64(TCGContext *s, TCGReg ret, TCGReg arg); 131 static void tcg_out_extrl_i64_i32(TCGContext *s, TCGReg ret, TCGReg arg); 132 static void tcg_out_addi_ptr(TCGContext *s, TCGReg, TCGReg, tcg_target_long); 133 static bool tcg_out_xchg(TCGContext *s, TCGType type, TCGReg r1, TCGReg r2); 134 static void tcg_out_exit_tb(TCGContext *s, uintptr_t arg); 135 static void tcg_out_goto_tb(TCGContext *s, int which); 136 static void tcg_out_goto_ptr(TCGContext *s, TCGReg dest); 137 static void tcg_out_mb(TCGContext *s, unsigned bar); 138 static void tcg_out_br(TCGContext *s, TCGLabel *l); 139 static void tcg_out_set_carry(TCGContext *s); 140 static void tcg_out_set_borrow(TCGContext *s); 141 static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, 142 const TCGArg args[TCG_MAX_OP_ARGS], 143 const int const_args[TCG_MAX_OP_ARGS]); 144 #if TCG_TARGET_MAYBE_vec 145 static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece, 146 TCGReg dst, TCGReg src); 147 static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece, 148 TCGReg dst, TCGReg base, intptr_t offset); 149 static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece, 150 TCGReg dst, int64_t arg); 151 static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, 152 unsigned vecl, unsigned vece, 153 const TCGArg args[TCG_MAX_OP_ARGS], 154 const int const_args[TCG_MAX_OP_ARGS]); 155 #else 156 static inline bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece, 157 TCGReg dst, TCGReg src) 158 { 159 g_assert_not_reached(); 160 } 161 static inline bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece, 162 TCGReg dst, TCGReg base, intptr_t offset) 163 { 164 g_assert_not_reached(); 165 } 166 static inline void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece, 167 TCGReg dst, int64_t arg) 168 { 169 g_assert_not_reached(); 170 } 171 static inline void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, 172 unsigned vecl, unsigned vece, 173 const TCGArg args[TCG_MAX_OP_ARGS], 174 const int const_args[TCG_MAX_OP_ARGS]) 175 { 176 g_assert_not_reached(); 177 } 178 int tcg_can_emit_vec_op(TCGOpcode o, TCGType t, unsigned ve) 179 { 180 return 0; 181 } 182 #endif 183 static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, TCGReg arg1, 184 intptr_t arg2); 185 static bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val, 186 TCGReg base, intptr_t ofs); 187 static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target, 188 const TCGHelperInfo *info); 189 static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot); 190 static bool tcg_target_const_match(int64_t val, int ct, 191 TCGType type, TCGCond cond, int vece); 192 193 #ifndef CONFIG_USER_ONLY 194 #define guest_base ({ qemu_build_not_reached(); (uintptr_t)0; }) 195 #endif 196 197 typedef struct TCGLdstHelperParam { 198 TCGReg (*ra_gen)(TCGContext *s, const TCGLabelQemuLdst *l, int arg_reg); 199 unsigned ntmp; 200 int tmp[3]; 201 } TCGLdstHelperParam; 202 203 static void tcg_out_ld_helper_args(TCGContext *s, const TCGLabelQemuLdst *l, 204 const TCGLdstHelperParam *p) 205 __attribute__((unused)); 206 static void tcg_out_ld_helper_ret(TCGContext *s, const TCGLabelQemuLdst *l, 207 bool load_sign, const TCGLdstHelperParam *p) 208 __attribute__((unused)); 209 static void tcg_out_st_helper_args(TCGContext *s, const TCGLabelQemuLdst *l, 210 const TCGLdstHelperParam *p) 211 __attribute__((unused)); 212 213 static void * const qemu_ld_helpers[MO_SSIZE + 1] __attribute__((unused)) = { 214 [MO_UB] = helper_ldub_mmu, 215 [MO_SB] = helper_ldsb_mmu, 216 [MO_UW] = helper_lduw_mmu, 217 [MO_SW] = helper_ldsw_mmu, 218 [MO_UL] = helper_ldul_mmu, 219 [MO_UQ] = helper_ldq_mmu, 220 #if TCG_TARGET_REG_BITS == 64 221 [MO_SL] = helper_ldsl_mmu, 222 [MO_128] = helper_ld16_mmu, 223 #endif 224 }; 225 226 static void * const qemu_st_helpers[MO_SIZE + 1] __attribute__((unused)) = { 227 [MO_8] = helper_stb_mmu, 228 [MO_16] = helper_stw_mmu, 229 [MO_32] = helper_stl_mmu, 230 [MO_64] = helper_stq_mmu, 231 #if TCG_TARGET_REG_BITS == 64 232 [MO_128] = helper_st16_mmu, 233 #endif 234 }; 235 236 typedef struct { 237 MemOp atom; /* lg2 bits of atomicity required */ 238 MemOp align; /* lg2 bits of alignment to use */ 239 } TCGAtomAlign; 240 241 static TCGAtomAlign atom_and_align_for_opc(TCGContext *s, MemOp opc, 242 MemOp host_atom, bool allow_two_ops) 243 __attribute__((unused)); 244 245 #ifdef CONFIG_USER_ONLY 246 bool tcg_use_softmmu; 247 #endif 248 249 TCGContext tcg_init_ctx; 250 __thread TCGContext *tcg_ctx; 251 252 TCGContext **tcg_ctxs; 253 unsigned int tcg_cur_ctxs; 254 unsigned int tcg_max_ctxs; 255 TCGv_env tcg_env; 256 const void *tcg_code_gen_epilogue; 257 uintptr_t tcg_splitwx_diff; 258 259 #ifndef CONFIG_TCG_INTERPRETER 260 tcg_prologue_fn *tcg_qemu_tb_exec; 261 #endif 262 263 static TCGRegSet tcg_target_available_regs[TCG_TYPE_COUNT]; 264 static TCGRegSet tcg_target_call_clobber_regs; 265 266 #if TCG_TARGET_INSN_UNIT_SIZE == 1 267 static __attribute__((unused)) inline void tcg_out8(TCGContext *s, uint8_t v) 268 { 269 *s->code_ptr++ = v; 270 } 271 272 static __attribute__((unused)) inline void tcg_patch8(tcg_insn_unit *p, 273 uint8_t v) 274 { 275 *p = v; 276 } 277 #endif 278 279 #if TCG_TARGET_INSN_UNIT_SIZE <= 2 280 static __attribute__((unused)) inline void tcg_out16(TCGContext *s, uint16_t v) 281 { 282 if (TCG_TARGET_INSN_UNIT_SIZE == 2) { 283 *s->code_ptr++ = v; 284 } else { 285 tcg_insn_unit *p = s->code_ptr; 286 memcpy(p, &v, sizeof(v)); 287 s->code_ptr = p + (2 / TCG_TARGET_INSN_UNIT_SIZE); 288 } 289 } 290 291 static __attribute__((unused)) inline void tcg_patch16(tcg_insn_unit *p, 292 uint16_t v) 293 { 294 if (TCG_TARGET_INSN_UNIT_SIZE == 2) { 295 *p = v; 296 } else { 297 memcpy(p, &v, sizeof(v)); 298 } 299 } 300 #endif 301 302 #if TCG_TARGET_INSN_UNIT_SIZE <= 4 303 static __attribute__((unused)) inline void tcg_out32(TCGContext *s, uint32_t v) 304 { 305 if (TCG_TARGET_INSN_UNIT_SIZE == 4) { 306 *s->code_ptr++ = v; 307 } else { 308 tcg_insn_unit *p = s->code_ptr; 309 memcpy(p, &v, sizeof(v)); 310 s->code_ptr = p + (4 / TCG_TARGET_INSN_UNIT_SIZE); 311 } 312 } 313 314 static __attribute__((unused)) inline void tcg_patch32(tcg_insn_unit *p, 315 uint32_t v) 316 { 317 if (TCG_TARGET_INSN_UNIT_SIZE == 4) { 318 *p = v; 319 } else { 320 memcpy(p, &v, sizeof(v)); 321 } 322 } 323 #endif 324 325 #if TCG_TARGET_INSN_UNIT_SIZE <= 8 326 static __attribute__((unused)) inline void tcg_out64(TCGContext *s, uint64_t v) 327 { 328 if (TCG_TARGET_INSN_UNIT_SIZE == 8) { 329 *s->code_ptr++ = v; 330 } else { 331 tcg_insn_unit *p = s->code_ptr; 332 memcpy(p, &v, sizeof(v)); 333 s->code_ptr = p + (8 / TCG_TARGET_INSN_UNIT_SIZE); 334 } 335 } 336 337 static __attribute__((unused)) inline void tcg_patch64(tcg_insn_unit *p, 338 uint64_t v) 339 { 340 if (TCG_TARGET_INSN_UNIT_SIZE == 8) { 341 *p = v; 342 } else { 343 memcpy(p, &v, sizeof(v)); 344 } 345 } 346 #endif 347 348 /* label relocation processing */ 349 350 static void tcg_out_reloc(TCGContext *s, tcg_insn_unit *code_ptr, int type, 351 TCGLabel *l, intptr_t addend) 352 { 353 TCGRelocation *r = tcg_malloc(sizeof(TCGRelocation)); 354 355 r->type = type; 356 r->ptr = code_ptr; 357 r->addend = addend; 358 QSIMPLEQ_INSERT_TAIL(&l->relocs, r, next); 359 } 360 361 static void tcg_out_label(TCGContext *s, TCGLabel *l) 362 { 363 tcg_debug_assert(!l->has_value); 364 l->has_value = 1; 365 l->u.value_ptr = tcg_splitwx_to_rx(s->code_ptr); 366 } 367 368 TCGLabel *gen_new_label(void) 369 { 370 TCGContext *s = tcg_ctx; 371 TCGLabel *l = tcg_malloc(sizeof(TCGLabel)); 372 373 memset(l, 0, sizeof(TCGLabel)); 374 l->id = s->nb_labels++; 375 QSIMPLEQ_INIT(&l->branches); 376 QSIMPLEQ_INIT(&l->relocs); 377 378 QSIMPLEQ_INSERT_TAIL(&s->labels, l, next); 379 380 return l; 381 } 382 383 static bool tcg_resolve_relocs(TCGContext *s) 384 { 385 TCGLabel *l; 386 387 QSIMPLEQ_FOREACH(l, &s->labels, next) { 388 TCGRelocation *r; 389 uintptr_t value = l->u.value; 390 391 QSIMPLEQ_FOREACH(r, &l->relocs, next) { 392 if (!patch_reloc(r->ptr, r->type, value, r->addend)) { 393 return false; 394 } 395 } 396 } 397 return true; 398 } 399 400 static void set_jmp_reset_offset(TCGContext *s, int which) 401 { 402 /* 403 * We will check for overflow at the end of the opcode loop in 404 * tcg_gen_code, where we bound tcg_current_code_size to UINT16_MAX. 405 */ 406 s->gen_tb->jmp_reset_offset[which] = tcg_current_code_size(s); 407 } 408 409 static void G_GNUC_UNUSED set_jmp_insn_offset(TCGContext *s, int which) 410 { 411 /* 412 * We will check for overflow at the end of the opcode loop in 413 * tcg_gen_code, where we bound tcg_current_code_size to UINT16_MAX. 414 */ 415 s->gen_tb->jmp_insn_offset[which] = tcg_current_code_size(s); 416 } 417 418 static uintptr_t G_GNUC_UNUSED get_jmp_target_addr(TCGContext *s, int which) 419 { 420 /* 421 * Return the read-execute version of the pointer, for the benefit 422 * of any pc-relative addressing mode. 423 */ 424 return (uintptr_t)tcg_splitwx_to_rx(&s->gen_tb->jmp_target_addr[which]); 425 } 426 427 static int __attribute__((unused)) 428 tlb_mask_table_ofs(TCGContext *s, int which) 429 { 430 return (offsetof(CPUNegativeOffsetState, tlb.f[which]) - 431 sizeof(CPUNegativeOffsetState)); 432 } 433 434 /* Signal overflow, starting over with fewer guest insns. */ 435 static G_NORETURN 436 void tcg_raise_tb_overflow(TCGContext *s) 437 { 438 siglongjmp(s->jmp_trans, -2); 439 } 440 441 /* 442 * Used by tcg_out_movext{1,2} to hold the arguments for tcg_out_movext. 443 * By the time we arrive at tcg_out_movext1, @dst is always a TCGReg. 444 * 445 * However, tcg_out_helper_load_slots reuses this field to hold an 446 * argument slot number (which may designate a argument register or an 447 * argument stack slot), converting to TCGReg once all arguments that 448 * are destined for the stack are processed. 449 */ 450 typedef struct TCGMovExtend { 451 unsigned dst; 452 TCGReg src; 453 TCGType dst_type; 454 TCGType src_type; 455 MemOp src_ext; 456 } TCGMovExtend; 457 458 /** 459 * tcg_out_movext -- move and extend 460 * @s: tcg context 461 * @dst_type: integral type for destination 462 * @dst: destination register 463 * @src_type: integral type for source 464 * @src_ext: extension to apply to source 465 * @src: source register 466 * 467 * Move or extend @src into @dst, depending on @src_ext and the types. 468 */ 469 static void tcg_out_movext(TCGContext *s, TCGType dst_type, TCGReg dst, 470 TCGType src_type, MemOp src_ext, TCGReg src) 471 { 472 switch (src_ext) { 473 case MO_UB: 474 tcg_out_ext8u(s, dst, src); 475 break; 476 case MO_SB: 477 tcg_out_ext8s(s, dst_type, dst, src); 478 break; 479 case MO_UW: 480 tcg_out_ext16u(s, dst, src); 481 break; 482 case MO_SW: 483 tcg_out_ext16s(s, dst_type, dst, src); 484 break; 485 case MO_UL: 486 case MO_SL: 487 if (dst_type == TCG_TYPE_I32) { 488 if (src_type == TCG_TYPE_I32) { 489 tcg_out_mov(s, TCG_TYPE_I32, dst, src); 490 } else { 491 tcg_out_extrl_i64_i32(s, dst, src); 492 } 493 } else if (src_type == TCG_TYPE_I32) { 494 if (src_ext & MO_SIGN) { 495 tcg_out_exts_i32_i64(s, dst, src); 496 } else { 497 tcg_out_extu_i32_i64(s, dst, src); 498 } 499 } else { 500 if (src_ext & MO_SIGN) { 501 tcg_out_ext32s(s, dst, src); 502 } else { 503 tcg_out_ext32u(s, dst, src); 504 } 505 } 506 break; 507 case MO_UQ: 508 tcg_debug_assert(TCG_TARGET_REG_BITS == 64); 509 if (dst_type == TCG_TYPE_I32) { 510 tcg_out_extrl_i64_i32(s, dst, src); 511 } else { 512 tcg_out_mov(s, TCG_TYPE_I64, dst, src); 513 } 514 break; 515 default: 516 g_assert_not_reached(); 517 } 518 } 519 520 /* Minor variations on a theme, using a structure. */ 521 static void tcg_out_movext1_new_src(TCGContext *s, const TCGMovExtend *i, 522 TCGReg src) 523 { 524 tcg_out_movext(s, i->dst_type, i->dst, i->src_type, i->src_ext, src); 525 } 526 527 static void tcg_out_movext1(TCGContext *s, const TCGMovExtend *i) 528 { 529 tcg_out_movext1_new_src(s, i, i->src); 530 } 531 532 /** 533 * tcg_out_movext2 -- move and extend two pair 534 * @s: tcg context 535 * @i1: first move description 536 * @i2: second move description 537 * @scratch: temporary register, or -1 for none 538 * 539 * As tcg_out_movext, for both @i1 and @i2, caring for overlap 540 * between the sources and destinations. 541 */ 542 543 static void tcg_out_movext2(TCGContext *s, const TCGMovExtend *i1, 544 const TCGMovExtend *i2, int scratch) 545 { 546 TCGReg src1 = i1->src; 547 TCGReg src2 = i2->src; 548 549 if (i1->dst != src2) { 550 tcg_out_movext1(s, i1); 551 tcg_out_movext1(s, i2); 552 return; 553 } 554 if (i2->dst == src1) { 555 TCGType src1_type = i1->src_type; 556 TCGType src2_type = i2->src_type; 557 558 if (tcg_out_xchg(s, MAX(src1_type, src2_type), src1, src2)) { 559 /* The data is now in the correct registers, now extend. */ 560 src1 = i2->src; 561 src2 = i1->src; 562 } else { 563 tcg_debug_assert(scratch >= 0); 564 tcg_out_mov(s, src1_type, scratch, src1); 565 src1 = scratch; 566 } 567 } 568 tcg_out_movext1_new_src(s, i2, src2); 569 tcg_out_movext1_new_src(s, i1, src1); 570 } 571 572 /** 573 * tcg_out_movext3 -- move and extend three pair 574 * @s: tcg context 575 * @i1: first move description 576 * @i2: second move description 577 * @i3: third move description 578 * @scratch: temporary register, or -1 for none 579 * 580 * As tcg_out_movext, for all of @i1, @i2 and @i3, caring for overlap 581 * between the sources and destinations. 582 */ 583 584 static void tcg_out_movext3(TCGContext *s, const TCGMovExtend *i1, 585 const TCGMovExtend *i2, const TCGMovExtend *i3, 586 int scratch) 587 { 588 TCGReg src1 = i1->src; 589 TCGReg src2 = i2->src; 590 TCGReg src3 = i3->src; 591 592 if (i1->dst != src2 && i1->dst != src3) { 593 tcg_out_movext1(s, i1); 594 tcg_out_movext2(s, i2, i3, scratch); 595 return; 596 } 597 if (i2->dst != src1 && i2->dst != src3) { 598 tcg_out_movext1(s, i2); 599 tcg_out_movext2(s, i1, i3, scratch); 600 return; 601 } 602 if (i3->dst != src1 && i3->dst != src2) { 603 tcg_out_movext1(s, i3); 604 tcg_out_movext2(s, i1, i2, scratch); 605 return; 606 } 607 608 /* 609 * There is a cycle. Since there are only 3 nodes, the cycle is 610 * either "clockwise" or "anti-clockwise", and can be solved with 611 * a single scratch or two xchg. 612 */ 613 if (i1->dst == src2 && i2->dst == src3 && i3->dst == src1) { 614 /* "Clockwise" */ 615 if (tcg_out_xchg(s, MAX(i1->src_type, i2->src_type), src1, src2)) { 616 tcg_out_xchg(s, MAX(i2->src_type, i3->src_type), src2, src3); 617 /* The data is now in the correct registers, now extend. */ 618 tcg_out_movext1_new_src(s, i1, i1->dst); 619 tcg_out_movext1_new_src(s, i2, i2->dst); 620 tcg_out_movext1_new_src(s, i3, i3->dst); 621 } else { 622 tcg_debug_assert(scratch >= 0); 623 tcg_out_mov(s, i1->src_type, scratch, src1); 624 tcg_out_movext1(s, i3); 625 tcg_out_movext1(s, i2); 626 tcg_out_movext1_new_src(s, i1, scratch); 627 } 628 } else if (i1->dst == src3 && i2->dst == src1 && i3->dst == src2) { 629 /* "Anti-clockwise" */ 630 if (tcg_out_xchg(s, MAX(i2->src_type, i3->src_type), src2, src3)) { 631 tcg_out_xchg(s, MAX(i1->src_type, i2->src_type), src1, src2); 632 /* The data is now in the correct registers, now extend. */ 633 tcg_out_movext1_new_src(s, i1, i1->dst); 634 tcg_out_movext1_new_src(s, i2, i2->dst); 635 tcg_out_movext1_new_src(s, i3, i3->dst); 636 } else { 637 tcg_debug_assert(scratch >= 0); 638 tcg_out_mov(s, i1->src_type, scratch, src1); 639 tcg_out_movext1(s, i2); 640 tcg_out_movext1(s, i3); 641 tcg_out_movext1_new_src(s, i1, scratch); 642 } 643 } else { 644 g_assert_not_reached(); 645 } 646 } 647 648 /* 649 * Allocate a new TCGLabelQemuLdst entry. 650 */ 651 652 __attribute__((unused)) 653 static TCGLabelQemuLdst *new_ldst_label(TCGContext *s) 654 { 655 TCGLabelQemuLdst *l = tcg_malloc(sizeof(*l)); 656 657 memset(l, 0, sizeof(*l)); 658 QSIMPLEQ_INSERT_TAIL(&s->ldst_labels, l, next); 659 660 return l; 661 } 662 663 /* 664 * Allocate new constant pool entries. 665 */ 666 667 typedef struct TCGLabelPoolData { 668 struct TCGLabelPoolData *next; 669 tcg_insn_unit *label; 670 intptr_t addend; 671 int rtype; 672 unsigned nlong; 673 tcg_target_ulong data[]; 674 } TCGLabelPoolData; 675 676 static TCGLabelPoolData *new_pool_alloc(TCGContext *s, int nlong, int rtype, 677 tcg_insn_unit *label, intptr_t addend) 678 { 679 TCGLabelPoolData *n = tcg_malloc(sizeof(TCGLabelPoolData) 680 + sizeof(tcg_target_ulong) * nlong); 681 682 n->label = label; 683 n->addend = addend; 684 n->rtype = rtype; 685 n->nlong = nlong; 686 return n; 687 } 688 689 static void new_pool_insert(TCGContext *s, TCGLabelPoolData *n) 690 { 691 TCGLabelPoolData *i, **pp; 692 int nlong = n->nlong; 693 694 /* Insertion sort on the pool. */ 695 for (pp = &s->pool_labels; (i = *pp) != NULL; pp = &i->next) { 696 if (nlong > i->nlong) { 697 break; 698 } 699 if (nlong < i->nlong) { 700 continue; 701 } 702 if (memcmp(n->data, i->data, sizeof(tcg_target_ulong) * nlong) >= 0) { 703 break; 704 } 705 } 706 n->next = *pp; 707 *pp = n; 708 } 709 710 /* The "usual" for generic integer code. */ 711 __attribute__((unused)) 712 static void new_pool_label(TCGContext *s, tcg_target_ulong d, int rtype, 713 tcg_insn_unit *label, intptr_t addend) 714 { 715 TCGLabelPoolData *n = new_pool_alloc(s, 1, rtype, label, addend); 716 n->data[0] = d; 717 new_pool_insert(s, n); 718 } 719 720 /* For v64 or v128, depending on the host. */ 721 __attribute__((unused)) 722 static void new_pool_l2(TCGContext *s, int rtype, tcg_insn_unit *label, 723 intptr_t addend, tcg_target_ulong d0, 724 tcg_target_ulong d1) 725 { 726 TCGLabelPoolData *n = new_pool_alloc(s, 2, rtype, label, addend); 727 n->data[0] = d0; 728 n->data[1] = d1; 729 new_pool_insert(s, n); 730 } 731 732 /* For v128 or v256, depending on the host. */ 733 __attribute__((unused)) 734 static void new_pool_l4(TCGContext *s, int rtype, tcg_insn_unit *label, 735 intptr_t addend, tcg_target_ulong d0, 736 tcg_target_ulong d1, tcg_target_ulong d2, 737 tcg_target_ulong d3) 738 { 739 TCGLabelPoolData *n = new_pool_alloc(s, 4, rtype, label, addend); 740 n->data[0] = d0; 741 n->data[1] = d1; 742 n->data[2] = d2; 743 n->data[3] = d3; 744 new_pool_insert(s, n); 745 } 746 747 /* For v256, for 32-bit host. */ 748 __attribute__((unused)) 749 static void new_pool_l8(TCGContext *s, int rtype, tcg_insn_unit *label, 750 intptr_t addend, tcg_target_ulong d0, 751 tcg_target_ulong d1, tcg_target_ulong d2, 752 tcg_target_ulong d3, tcg_target_ulong d4, 753 tcg_target_ulong d5, tcg_target_ulong d6, 754 tcg_target_ulong d7) 755 { 756 TCGLabelPoolData *n = new_pool_alloc(s, 8, rtype, label, addend); 757 n->data[0] = d0; 758 n->data[1] = d1; 759 n->data[2] = d2; 760 n->data[3] = d3; 761 n->data[4] = d4; 762 n->data[5] = d5; 763 n->data[6] = d6; 764 n->data[7] = d7; 765 new_pool_insert(s, n); 766 } 767 768 /* 769 * Generate TB finalization at the end of block 770 */ 771 772 static int tcg_out_ldst_finalize(TCGContext *s) 773 { 774 TCGLabelQemuLdst *lb; 775 776 /* qemu_ld/st slow paths */ 777 QSIMPLEQ_FOREACH(lb, &s->ldst_labels, next) { 778 if (lb->is_ld 779 ? !tcg_out_qemu_ld_slow_path(s, lb) 780 : !tcg_out_qemu_st_slow_path(s, lb)) { 781 return -2; 782 } 783 784 /* 785 * Test for (pending) buffer overflow. The assumption is that any 786 * one operation beginning below the high water mark cannot overrun 787 * the buffer completely. Thus we can test for overflow after 788 * generating code without having to check during generation. 789 */ 790 if (unlikely((void *)s->code_ptr > s->code_gen_highwater)) { 791 return -1; 792 } 793 } 794 return 0; 795 } 796 797 static int tcg_out_pool_finalize(TCGContext *s) 798 { 799 TCGLabelPoolData *p = s->pool_labels; 800 TCGLabelPoolData *l = NULL; 801 void *a; 802 803 if (p == NULL) { 804 return 0; 805 } 806 807 /* 808 * ??? Round up to qemu_icache_linesize, but then do not round 809 * again when allocating the next TranslationBlock structure. 810 */ 811 a = (void *)ROUND_UP((uintptr_t)s->code_ptr, 812 sizeof(tcg_target_ulong) * p->nlong); 813 tcg_out_nop_fill(s->code_ptr, (tcg_insn_unit *)a - s->code_ptr); 814 s->data_gen_ptr = a; 815 816 for (; p != NULL; p = p->next) { 817 size_t size = sizeof(tcg_target_ulong) * p->nlong; 818 uintptr_t value; 819 820 if (!l || l->nlong != p->nlong || memcmp(l->data, p->data, size)) { 821 if (unlikely(a > s->code_gen_highwater)) { 822 return -1; 823 } 824 memcpy(a, p->data, size); 825 a += size; 826 l = p; 827 } 828 829 value = (uintptr_t)tcg_splitwx_to_rx(a) - size; 830 if (!patch_reloc(p->label, p->rtype, value, p->addend)) { 831 return -2; 832 } 833 } 834 835 s->code_ptr = a; 836 return 0; 837 } 838 839 #define C_PFX1(P, A) P##A 840 #define C_PFX2(P, A, B) P##A##_##B 841 #define C_PFX3(P, A, B, C) P##A##_##B##_##C 842 #define C_PFX4(P, A, B, C, D) P##A##_##B##_##C##_##D 843 #define C_PFX5(P, A, B, C, D, E) P##A##_##B##_##C##_##D##_##E 844 #define C_PFX6(P, A, B, C, D, E, F) P##A##_##B##_##C##_##D##_##E##_##F 845 846 /* Define an enumeration for the various combinations. */ 847 848 #define C_O0_I1(I1) C_PFX1(c_o0_i1_, I1), 849 #define C_O0_I2(I1, I2) C_PFX2(c_o0_i2_, I1, I2), 850 #define C_O0_I3(I1, I2, I3) C_PFX3(c_o0_i3_, I1, I2, I3), 851 #define C_O0_I4(I1, I2, I3, I4) C_PFX4(c_o0_i4_, I1, I2, I3, I4), 852 853 #define C_O1_I1(O1, I1) C_PFX2(c_o1_i1_, O1, I1), 854 #define C_O1_I2(O1, I1, I2) C_PFX3(c_o1_i2_, O1, I1, I2), 855 #define C_O1_I3(O1, I1, I2, I3) C_PFX4(c_o1_i3_, O1, I1, I2, I3), 856 #define C_O1_I4(O1, I1, I2, I3, I4) C_PFX5(c_o1_i4_, O1, I1, I2, I3, I4), 857 858 #define C_N1_I2(O1, I1, I2) C_PFX3(c_n1_i2_, O1, I1, I2), 859 #define C_N1O1_I1(O1, O2, I1) C_PFX3(c_n1o1_i1_, O1, O2, I1), 860 #define C_N2_I1(O1, O2, I1) C_PFX3(c_n2_i1_, O1, O2, I1), 861 862 #define C_O2_I1(O1, O2, I1) C_PFX3(c_o2_i1_, O1, O2, I1), 863 #define C_O2_I2(O1, O2, I1, I2) C_PFX4(c_o2_i2_, O1, O2, I1, I2), 864 #define C_O2_I3(O1, O2, I1, I2, I3) C_PFX5(c_o2_i3_, O1, O2, I1, I2, I3), 865 #define C_O2_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_o2_i4_, O1, O2, I1, I2, I3, I4), 866 #define C_N1_O1_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_n1_o1_i4_, O1, O2, I1, I2, I3, I4), 867 868 typedef enum { 869 C_Dynamic = -2, 870 C_NotImplemented = -1, 871 #include "tcg-target-con-set.h" 872 } TCGConstraintSetIndex; 873 874 static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode, TCGType, unsigned); 875 876 #undef C_O0_I1 877 #undef C_O0_I2 878 #undef C_O0_I3 879 #undef C_O0_I4 880 #undef C_O1_I1 881 #undef C_O1_I2 882 #undef C_O1_I3 883 #undef C_O1_I4 884 #undef C_N1_I2 885 #undef C_N1O1_I1 886 #undef C_N2_I1 887 #undef C_O2_I1 888 #undef C_O2_I2 889 #undef C_O2_I3 890 #undef C_O2_I4 891 #undef C_N1_O1_I4 892 893 /* Put all of the constraint sets into an array, indexed by the enum. */ 894 895 typedef struct TCGConstraintSet { 896 uint8_t nb_oargs, nb_iargs; 897 const char *args_ct_str[TCG_MAX_OP_ARGS]; 898 } TCGConstraintSet; 899 900 #define C_O0_I1(I1) { 0, 1, { #I1 } }, 901 #define C_O0_I2(I1, I2) { 0, 2, { #I1, #I2 } }, 902 #define C_O0_I3(I1, I2, I3) { 0, 3, { #I1, #I2, #I3 } }, 903 #define C_O0_I4(I1, I2, I3, I4) { 0, 4, { #I1, #I2, #I3, #I4 } }, 904 905 #define C_O1_I1(O1, I1) { 1, 1, { #O1, #I1 } }, 906 #define C_O1_I2(O1, I1, I2) { 1, 2, { #O1, #I1, #I2 } }, 907 #define C_O1_I3(O1, I1, I2, I3) { 1, 3, { #O1, #I1, #I2, #I3 } }, 908 #define C_O1_I4(O1, I1, I2, I3, I4) { 1, 4, { #O1, #I1, #I2, #I3, #I4 } }, 909 910 #define C_N1_I2(O1, I1, I2) { 1, 2, { "&" #O1, #I1, #I2 } }, 911 #define C_N1O1_I1(O1, O2, I1) { 2, 1, { "&" #O1, #O2, #I1 } }, 912 #define C_N2_I1(O1, O2, I1) { 2, 1, { "&" #O1, "&" #O2, #I1 } }, 913 914 #define C_O2_I1(O1, O2, I1) { 2, 1, { #O1, #O2, #I1 } }, 915 #define C_O2_I2(O1, O2, I1, I2) { 2, 2, { #O1, #O2, #I1, #I2 } }, 916 #define C_O2_I3(O1, O2, I1, I2, I3) { 2, 3, { #O1, #O2, #I1, #I2, #I3 } }, 917 #define C_O2_I4(O1, O2, I1, I2, I3, I4) { 2, 4, { #O1, #O2, #I1, #I2, #I3, #I4 } }, 918 #define C_N1_O1_I4(O1, O2, I1, I2, I3, I4) { 2, 4, { "&" #O1, #O2, #I1, #I2, #I3, #I4 } }, 919 920 static const TCGConstraintSet constraint_sets[] = { 921 #include "tcg-target-con-set.h" 922 }; 923 924 #undef C_O0_I1 925 #undef C_O0_I2 926 #undef C_O0_I3 927 #undef C_O0_I4 928 #undef C_O1_I1 929 #undef C_O1_I2 930 #undef C_O1_I3 931 #undef C_O1_I4 932 #undef C_N1_I2 933 #undef C_N1O1_I1 934 #undef C_N2_I1 935 #undef C_O2_I1 936 #undef C_O2_I2 937 #undef C_O2_I3 938 #undef C_O2_I4 939 #undef C_N1_O1_I4 940 941 /* Expand the enumerator to be returned from tcg_target_op_def(). */ 942 943 #define C_O0_I1(I1) C_PFX1(c_o0_i1_, I1) 944 #define C_O0_I2(I1, I2) C_PFX2(c_o0_i2_, I1, I2) 945 #define C_O0_I3(I1, I2, I3) C_PFX3(c_o0_i3_, I1, I2, I3) 946 #define C_O0_I4(I1, I2, I3, I4) C_PFX4(c_o0_i4_, I1, I2, I3, I4) 947 948 #define C_O1_I1(O1, I1) C_PFX2(c_o1_i1_, O1, I1) 949 #define C_O1_I2(O1, I1, I2) C_PFX3(c_o1_i2_, O1, I1, I2) 950 #define C_O1_I3(O1, I1, I2, I3) C_PFX4(c_o1_i3_, O1, I1, I2, I3) 951 #define C_O1_I4(O1, I1, I2, I3, I4) C_PFX5(c_o1_i4_, O1, I1, I2, I3, I4) 952 953 #define C_N1_I2(O1, I1, I2) C_PFX3(c_n1_i2_, O1, I1, I2) 954 #define C_N1O1_I1(O1, O2, I1) C_PFX3(c_n1o1_i1_, O1, O2, I1) 955 #define C_N2_I1(O1, O2, I1) C_PFX3(c_n2_i1_, O1, O2, I1) 956 957 #define C_O2_I1(O1, O2, I1) C_PFX3(c_o2_i1_, O1, O2, I1) 958 #define C_O2_I2(O1, O2, I1, I2) C_PFX4(c_o2_i2_, O1, O2, I1, I2) 959 #define C_O2_I3(O1, O2, I1, I2, I3) C_PFX5(c_o2_i3_, O1, O2, I1, I2, I3) 960 #define C_O2_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_o2_i4_, O1, O2, I1, I2, I3, I4) 961 #define C_N1_O1_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_n1_o1_i4_, O1, O2, I1, I2, I3, I4) 962 963 /* 964 * TCGOutOp is the base class for a set of structures that describe how 965 * to generate code for a given TCGOpcode. 966 * 967 * @static_constraint: 968 * C_NotImplemented: The TCGOpcode is not supported by the backend. 969 * C_Dynamic: Use @dynamic_constraint to select a constraint set 970 * based on any of @type, @flags, or host isa. 971 * Otherwise: The register allocation constrains for the TCGOpcode. 972 * 973 * Subclasses of TCGOutOp will define a set of output routines that may 974 * be used. Such routines will often be selected by the set of registers 975 * and constants that come out of register allocation. The set of 976 * routines that are provided will guide the set of constraints that are 977 * legal. In particular, assume that tcg_optimize() has done its job in 978 * swapping commutative operands and folding operations for which all 979 * operands are constant. 980 */ 981 typedef struct TCGOutOp { 982 TCGConstraintSetIndex static_constraint; 983 TCGConstraintSetIndex (*dynamic_constraint)(TCGType type, unsigned flags); 984 } TCGOutOp; 985 986 typedef struct TCGOutOpAddSubCarry { 987 TCGOutOp base; 988 void (*out_rrr)(TCGContext *s, TCGType type, 989 TCGReg a0, TCGReg a1, TCGReg a2); 990 void (*out_rri)(TCGContext *s, TCGType type, 991 TCGReg a0, TCGReg a1, tcg_target_long a2); 992 void (*out_rir)(TCGContext *s, TCGType type, 993 TCGReg a0, tcg_target_long a1, TCGReg a2); 994 void (*out_rii)(TCGContext *s, TCGType type, 995 TCGReg a0, tcg_target_long a1, tcg_target_long a2); 996 } TCGOutOpAddSubCarry; 997 998 typedef struct TCGOutOpBinary { 999 TCGOutOp base; 1000 void (*out_rrr)(TCGContext *s, TCGType type, 1001 TCGReg a0, TCGReg a1, TCGReg a2); 1002 void (*out_rri)(TCGContext *s, TCGType type, 1003 TCGReg a0, TCGReg a1, tcg_target_long a2); 1004 } TCGOutOpBinary; 1005 1006 typedef struct TCGOutOpBrcond { 1007 TCGOutOp base; 1008 void (*out_rr)(TCGContext *s, TCGType type, TCGCond cond, 1009 TCGReg a1, TCGReg a2, TCGLabel *label); 1010 void (*out_ri)(TCGContext *s, TCGType type, TCGCond cond, 1011 TCGReg a1, tcg_target_long a2, TCGLabel *label); 1012 } TCGOutOpBrcond; 1013 1014 typedef struct TCGOutOpBrcond2 { 1015 TCGOutOp base; 1016 void (*out)(TCGContext *s, TCGCond cond, TCGReg al, TCGReg ah, 1017 TCGArg bl, bool const_bl, 1018 TCGArg bh, bool const_bh, TCGLabel *l); 1019 } TCGOutOpBrcond2; 1020 1021 typedef struct TCGOutOpBswap { 1022 TCGOutOp base; 1023 void (*out_rr)(TCGContext *s, TCGType type, 1024 TCGReg a0, TCGReg a1, unsigned flags); 1025 } TCGOutOpBswap; 1026 1027 typedef struct TCGOutOpDeposit { 1028 TCGOutOp base; 1029 void (*out_rrr)(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, 1030 TCGReg a2, unsigned ofs, unsigned len); 1031 void (*out_rri)(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, 1032 tcg_target_long a2, unsigned ofs, unsigned len); 1033 void (*out_rzr)(TCGContext *s, TCGType type, TCGReg a0, 1034 TCGReg a2, unsigned ofs, unsigned len); 1035 } TCGOutOpDeposit; 1036 1037 typedef struct TCGOutOpDivRem { 1038 TCGOutOp base; 1039 void (*out_rr01r)(TCGContext *s, TCGType type, 1040 TCGReg a0, TCGReg a1, TCGReg a4); 1041 } TCGOutOpDivRem; 1042 1043 typedef struct TCGOutOpExtract { 1044 TCGOutOp base; 1045 void (*out_rr)(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, 1046 unsigned ofs, unsigned len); 1047 } TCGOutOpExtract; 1048 1049 typedef struct TCGOutOpExtract2 { 1050 TCGOutOp base; 1051 void (*out_rrr)(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, 1052 TCGReg a2, unsigned shr); 1053 } TCGOutOpExtract2; 1054 1055 typedef struct TCGOutOpLoad { 1056 TCGOutOp base; 1057 void (*out)(TCGContext *s, TCGType type, TCGReg dest, 1058 TCGReg base, intptr_t offset); 1059 } TCGOutOpLoad; 1060 1061 typedef struct TCGOutOpMovcond { 1062 TCGOutOp base; 1063 void (*out)(TCGContext *s, TCGType type, TCGCond cond, 1064 TCGReg ret, TCGReg c1, TCGArg c2, bool const_c2, 1065 TCGArg vt, bool const_vt, TCGArg vf, bool consf_vf); 1066 } TCGOutOpMovcond; 1067 1068 typedef struct TCGOutOpMul2 { 1069 TCGOutOp base; 1070 void (*out_rrrr)(TCGContext *s, TCGType type, 1071 TCGReg a0, TCGReg a1, TCGReg a2, TCGReg a3); 1072 } TCGOutOpMul2; 1073 1074 typedef struct TCGOutOpUnary { 1075 TCGOutOp base; 1076 void (*out_rr)(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1); 1077 } TCGOutOpUnary; 1078 1079 typedef struct TCGOutOpSetcond { 1080 TCGOutOp base; 1081 void (*out_rrr)(TCGContext *s, TCGType type, TCGCond cond, 1082 TCGReg ret, TCGReg a1, TCGReg a2); 1083 void (*out_rri)(TCGContext *s, TCGType type, TCGCond cond, 1084 TCGReg ret, TCGReg a1, tcg_target_long a2); 1085 } TCGOutOpSetcond; 1086 1087 typedef struct TCGOutOpSetcond2 { 1088 TCGOutOp base; 1089 void (*out)(TCGContext *s, TCGCond cond, TCGReg ret, TCGReg al, TCGReg ah, 1090 TCGArg bl, bool const_bl, TCGArg bh, bool const_bh); 1091 } TCGOutOpSetcond2; 1092 1093 typedef struct TCGOutOpStore { 1094 TCGOutOp base; 1095 void (*out_r)(TCGContext *s, TCGType type, TCGReg data, 1096 TCGReg base, intptr_t offset); 1097 void (*out_i)(TCGContext *s, TCGType type, tcg_target_long data, 1098 TCGReg base, intptr_t offset); 1099 } TCGOutOpStore; 1100 1101 typedef struct TCGOutOpSubtract { 1102 TCGOutOp base; 1103 void (*out_rrr)(TCGContext *s, TCGType type, 1104 TCGReg a0, TCGReg a1, TCGReg a2); 1105 void (*out_rir)(TCGContext *s, TCGType type, 1106 TCGReg a0, tcg_target_long a1, TCGReg a2); 1107 } TCGOutOpSubtract; 1108 1109 #include "tcg-target.c.inc" 1110 1111 #ifndef CONFIG_TCG_INTERPRETER 1112 /* Validate CPUTLBDescFast placement. */ 1113 QEMU_BUILD_BUG_ON((int)(offsetof(CPUNegativeOffsetState, tlb.f[0]) - 1114 sizeof(CPUNegativeOffsetState)) 1115 < MIN_TLB_MASK_TABLE_OFS); 1116 #endif 1117 1118 #if TCG_TARGET_REG_BITS == 64 1119 /* 1120 * We require these functions for slow-path function calls. 1121 * Adapt them generically for opcode output. 1122 */ 1123 1124 static void tgen_exts_i32_i64(TCGContext *s, TCGType t, TCGReg a0, TCGReg a1) 1125 { 1126 tcg_out_exts_i32_i64(s, a0, a1); 1127 } 1128 1129 static const TCGOutOpUnary outop_exts_i32_i64 = { 1130 .base.static_constraint = C_O1_I1(r, r), 1131 .out_rr = tgen_exts_i32_i64, 1132 }; 1133 1134 static void tgen_extu_i32_i64(TCGContext *s, TCGType t, TCGReg a0, TCGReg a1) 1135 { 1136 tcg_out_extu_i32_i64(s, a0, a1); 1137 } 1138 1139 static const TCGOutOpUnary outop_extu_i32_i64 = { 1140 .base.static_constraint = C_O1_I1(r, r), 1141 .out_rr = tgen_extu_i32_i64, 1142 }; 1143 1144 static void tgen_extrl_i64_i32(TCGContext *s, TCGType t, TCGReg a0, TCGReg a1) 1145 { 1146 tcg_out_extrl_i64_i32(s, a0, a1); 1147 } 1148 1149 static const TCGOutOpUnary outop_extrl_i64_i32 = { 1150 .base.static_constraint = C_O1_I1(r, r), 1151 .out_rr = TCG_TARGET_HAS_extr_i64_i32 ? tgen_extrl_i64_i32 : NULL, 1152 }; 1153 #endif 1154 1155 static const TCGOutOp outop_goto_ptr = { 1156 .static_constraint = C_O0_I1(r), 1157 }; 1158 1159 static const TCGOutOpLoad outop_ld = { 1160 .base.static_constraint = C_O1_I1(r, r), 1161 .out = tcg_out_ld, 1162 }; 1163 1164 /* 1165 * Register V as the TCGOutOp for O. 1166 * This verifies that V is of type T, otherwise give a nice compiler error. 1167 * This prevents trivial mistakes within each arch/tcg-target.c.inc. 1168 */ 1169 #define OUTOP(O, T, V) [O] = _Generic(V, T: &V.base) 1170 1171 /* Register allocation descriptions for every TCGOpcode. */ 1172 static const TCGOutOp * const all_outop[NB_OPS] = { 1173 OUTOP(INDEX_op_add, TCGOutOpBinary, outop_add), 1174 OUTOP(INDEX_op_addci, TCGOutOpAddSubCarry, outop_addci), 1175 OUTOP(INDEX_op_addcio, TCGOutOpBinary, outop_addcio), 1176 OUTOP(INDEX_op_addco, TCGOutOpBinary, outop_addco), 1177 /* addc1o is implemented with set_carry + addcio */ 1178 OUTOP(INDEX_op_addc1o, TCGOutOpBinary, outop_addcio), 1179 OUTOP(INDEX_op_and, TCGOutOpBinary, outop_and), 1180 OUTOP(INDEX_op_andc, TCGOutOpBinary, outop_andc), 1181 OUTOP(INDEX_op_brcond, TCGOutOpBrcond, outop_brcond), 1182 OUTOP(INDEX_op_bswap16, TCGOutOpBswap, outop_bswap16), 1183 OUTOP(INDEX_op_bswap32, TCGOutOpBswap, outop_bswap32), 1184 OUTOP(INDEX_op_clz, TCGOutOpBinary, outop_clz), 1185 OUTOP(INDEX_op_ctpop, TCGOutOpUnary, outop_ctpop), 1186 OUTOP(INDEX_op_ctz, TCGOutOpBinary, outop_ctz), 1187 OUTOP(INDEX_op_deposit, TCGOutOpDeposit, outop_deposit), 1188 OUTOP(INDEX_op_divs, TCGOutOpBinary, outop_divs), 1189 OUTOP(INDEX_op_divu, TCGOutOpBinary, outop_divu), 1190 OUTOP(INDEX_op_divs2, TCGOutOpDivRem, outop_divs2), 1191 OUTOP(INDEX_op_divu2, TCGOutOpDivRem, outop_divu2), 1192 OUTOP(INDEX_op_eqv, TCGOutOpBinary, outop_eqv), 1193 OUTOP(INDEX_op_extract, TCGOutOpExtract, outop_extract), 1194 OUTOP(INDEX_op_extract2, TCGOutOpExtract2, outop_extract2), 1195 OUTOP(INDEX_op_ld8u, TCGOutOpLoad, outop_ld8u), 1196 OUTOP(INDEX_op_ld8s, TCGOutOpLoad, outop_ld8s), 1197 OUTOP(INDEX_op_ld16u, TCGOutOpLoad, outop_ld16u), 1198 OUTOP(INDEX_op_ld16s, TCGOutOpLoad, outop_ld16s), 1199 OUTOP(INDEX_op_ld, TCGOutOpLoad, outop_ld), 1200 OUTOP(INDEX_op_movcond, TCGOutOpMovcond, outop_movcond), 1201 OUTOP(INDEX_op_mul, TCGOutOpBinary, outop_mul), 1202 OUTOP(INDEX_op_muls2, TCGOutOpMul2, outop_muls2), 1203 OUTOP(INDEX_op_mulsh, TCGOutOpBinary, outop_mulsh), 1204 OUTOP(INDEX_op_mulu2, TCGOutOpMul2, outop_mulu2), 1205 OUTOP(INDEX_op_muluh, TCGOutOpBinary, outop_muluh), 1206 OUTOP(INDEX_op_nand, TCGOutOpBinary, outop_nand), 1207 OUTOP(INDEX_op_neg, TCGOutOpUnary, outop_neg), 1208 OUTOP(INDEX_op_negsetcond, TCGOutOpSetcond, outop_negsetcond), 1209 OUTOP(INDEX_op_nor, TCGOutOpBinary, outop_nor), 1210 OUTOP(INDEX_op_not, TCGOutOpUnary, outop_not), 1211 OUTOP(INDEX_op_or, TCGOutOpBinary, outop_or), 1212 OUTOP(INDEX_op_orc, TCGOutOpBinary, outop_orc), 1213 OUTOP(INDEX_op_rems, TCGOutOpBinary, outop_rems), 1214 OUTOP(INDEX_op_remu, TCGOutOpBinary, outop_remu), 1215 OUTOP(INDEX_op_rotl, TCGOutOpBinary, outop_rotl), 1216 OUTOP(INDEX_op_rotr, TCGOutOpBinary, outop_rotr), 1217 OUTOP(INDEX_op_sar, TCGOutOpBinary, outop_sar), 1218 OUTOP(INDEX_op_setcond, TCGOutOpSetcond, outop_setcond), 1219 OUTOP(INDEX_op_sextract, TCGOutOpExtract, outop_sextract), 1220 OUTOP(INDEX_op_shl, TCGOutOpBinary, outop_shl), 1221 OUTOP(INDEX_op_shr, TCGOutOpBinary, outop_shr), 1222 OUTOP(INDEX_op_st, TCGOutOpStore, outop_st), 1223 OUTOP(INDEX_op_st8, TCGOutOpStore, outop_st8), 1224 OUTOP(INDEX_op_st16, TCGOutOpStore, outop_st16), 1225 OUTOP(INDEX_op_sub, TCGOutOpSubtract, outop_sub), 1226 OUTOP(INDEX_op_subbi, TCGOutOpAddSubCarry, outop_subbi), 1227 OUTOP(INDEX_op_subbio, TCGOutOpAddSubCarry, outop_subbio), 1228 OUTOP(INDEX_op_subbo, TCGOutOpAddSubCarry, outop_subbo), 1229 /* subb1o is implemented with set_borrow + subbio */ 1230 OUTOP(INDEX_op_subb1o, TCGOutOpAddSubCarry, outop_subbio), 1231 OUTOP(INDEX_op_xor, TCGOutOpBinary, outop_xor), 1232 1233 [INDEX_op_goto_ptr] = &outop_goto_ptr, 1234 1235 #if TCG_TARGET_REG_BITS == 32 1236 OUTOP(INDEX_op_brcond2_i32, TCGOutOpBrcond2, outop_brcond2), 1237 OUTOP(INDEX_op_setcond2_i32, TCGOutOpSetcond2, outop_setcond2), 1238 #else 1239 OUTOP(INDEX_op_bswap64, TCGOutOpUnary, outop_bswap64), 1240 OUTOP(INDEX_op_ext_i32_i64, TCGOutOpUnary, outop_exts_i32_i64), 1241 OUTOP(INDEX_op_extu_i32_i64, TCGOutOpUnary, outop_extu_i32_i64), 1242 OUTOP(INDEX_op_extrl_i64_i32, TCGOutOpUnary, outop_extrl_i64_i32), 1243 OUTOP(INDEX_op_extrh_i64_i32, TCGOutOpUnary, outop_extrh_i64_i32), 1244 OUTOP(INDEX_op_ld32u, TCGOutOpLoad, outop_ld32u), 1245 OUTOP(INDEX_op_ld32s, TCGOutOpLoad, outop_ld32s), 1246 OUTOP(INDEX_op_st32, TCGOutOpStore, outop_st), 1247 #endif 1248 }; 1249 1250 #undef OUTOP 1251 1252 /* 1253 * All TCG threads except the parent (i.e. the one that called tcg_context_init 1254 * and registered the target's TCG globals) must register with this function 1255 * before initiating translation. 1256 * 1257 * In user-mode we just point tcg_ctx to tcg_init_ctx. See the documentation 1258 * of tcg_region_init() for the reasoning behind this. 1259 * 1260 * In system-mode each caller registers its context in tcg_ctxs[]. Note that in 1261 * system-mode tcg_ctxs[] does not track tcg_ctx_init, since the initial context 1262 * is not used anymore for translation once this function is called. 1263 * 1264 * Not tracking tcg_init_ctx in tcg_ctxs[] in system-mode keeps code that 1265 * iterates over the array (e.g. tcg_code_size() the same for both system/user 1266 * modes. 1267 */ 1268 #ifdef CONFIG_USER_ONLY 1269 void tcg_register_thread(void) 1270 { 1271 tcg_ctx = &tcg_init_ctx; 1272 } 1273 #else 1274 void tcg_register_thread(void) 1275 { 1276 TCGContext *s = g_malloc(sizeof(*s)); 1277 unsigned int i, n; 1278 1279 *s = tcg_init_ctx; 1280 1281 /* Relink mem_base. */ 1282 for (i = 0, n = tcg_init_ctx.nb_globals; i < n; ++i) { 1283 if (tcg_init_ctx.temps[i].mem_base) { 1284 ptrdiff_t b = tcg_init_ctx.temps[i].mem_base - tcg_init_ctx.temps; 1285 tcg_debug_assert(b >= 0 && b < n); 1286 s->temps[i].mem_base = &s->temps[b]; 1287 } 1288 } 1289 1290 /* Claim an entry in tcg_ctxs */ 1291 n = qatomic_fetch_inc(&tcg_cur_ctxs); 1292 g_assert(n < tcg_max_ctxs); 1293 qatomic_set(&tcg_ctxs[n], s); 1294 1295 if (n > 0) { 1296 tcg_region_initial_alloc(s); 1297 } 1298 1299 tcg_ctx = s; 1300 } 1301 #endif /* !CONFIG_USER_ONLY */ 1302 1303 /* pool based memory allocation */ 1304 void *tcg_malloc_internal(TCGContext *s, int size) 1305 { 1306 TCGPool *p; 1307 int pool_size; 1308 1309 if (size > TCG_POOL_CHUNK_SIZE) { 1310 /* big malloc: insert a new pool (XXX: could optimize) */ 1311 p = g_malloc(sizeof(TCGPool) + size); 1312 p->size = size; 1313 p->next = s->pool_first_large; 1314 s->pool_first_large = p; 1315 return p->data; 1316 } else { 1317 p = s->pool_current; 1318 if (!p) { 1319 p = s->pool_first; 1320 if (!p) 1321 goto new_pool; 1322 } else { 1323 if (!p->next) { 1324 new_pool: 1325 pool_size = TCG_POOL_CHUNK_SIZE; 1326 p = g_malloc(sizeof(TCGPool) + pool_size); 1327 p->size = pool_size; 1328 p->next = NULL; 1329 if (s->pool_current) { 1330 s->pool_current->next = p; 1331 } else { 1332 s->pool_first = p; 1333 } 1334 } else { 1335 p = p->next; 1336 } 1337 } 1338 } 1339 s->pool_current = p; 1340 s->pool_cur = p->data + size; 1341 s->pool_end = p->data + p->size; 1342 return p->data; 1343 } 1344 1345 void tcg_pool_reset(TCGContext *s) 1346 { 1347 TCGPool *p, *t; 1348 for (p = s->pool_first_large; p; p = t) { 1349 t = p->next; 1350 g_free(p); 1351 } 1352 s->pool_first_large = NULL; 1353 s->pool_cur = s->pool_end = NULL; 1354 s->pool_current = NULL; 1355 } 1356 1357 /* 1358 * Create TCGHelperInfo structures for "tcg/tcg-ldst.h" functions, 1359 * akin to what "exec/helper-tcg.h" does with DEF_HELPER_FLAGS_N. 1360 * We only use these for layout in tcg_out_ld_helper_ret and 1361 * tcg_out_st_helper_args, and share them between several of 1362 * the helpers, with the end result that it's easier to build manually. 1363 */ 1364 1365 #if TCG_TARGET_REG_BITS == 32 1366 # define dh_typecode_ttl dh_typecode_i32 1367 #else 1368 # define dh_typecode_ttl dh_typecode_i64 1369 #endif 1370 1371 static TCGHelperInfo info_helper_ld32_mmu = { 1372 .flags = TCG_CALL_NO_WG, 1373 .typemask = dh_typemask(ttl, 0) /* return tcg_target_ulong */ 1374 | dh_typemask(env, 1) 1375 | dh_typemask(i64, 2) /* uint64_t addr */ 1376 | dh_typemask(i32, 3) /* unsigned oi */ 1377 | dh_typemask(ptr, 4) /* uintptr_t ra */ 1378 }; 1379 1380 static TCGHelperInfo info_helper_ld64_mmu = { 1381 .flags = TCG_CALL_NO_WG, 1382 .typemask = dh_typemask(i64, 0) /* return uint64_t */ 1383 | dh_typemask(env, 1) 1384 | dh_typemask(i64, 2) /* uint64_t addr */ 1385 | dh_typemask(i32, 3) /* unsigned oi */ 1386 | dh_typemask(ptr, 4) /* uintptr_t ra */ 1387 }; 1388 1389 static TCGHelperInfo info_helper_ld128_mmu = { 1390 .flags = TCG_CALL_NO_WG, 1391 .typemask = dh_typemask(i128, 0) /* return Int128 */ 1392 | dh_typemask(env, 1) 1393 | dh_typemask(i64, 2) /* uint64_t addr */ 1394 | dh_typemask(i32, 3) /* unsigned oi */ 1395 | dh_typemask(ptr, 4) /* uintptr_t ra */ 1396 }; 1397 1398 static TCGHelperInfo info_helper_st32_mmu = { 1399 .flags = TCG_CALL_NO_WG, 1400 .typemask = dh_typemask(void, 0) 1401 | dh_typemask(env, 1) 1402 | dh_typemask(i64, 2) /* uint64_t addr */ 1403 | dh_typemask(i32, 3) /* uint32_t data */ 1404 | dh_typemask(i32, 4) /* unsigned oi */ 1405 | dh_typemask(ptr, 5) /* uintptr_t ra */ 1406 }; 1407 1408 static TCGHelperInfo info_helper_st64_mmu = { 1409 .flags = TCG_CALL_NO_WG, 1410 .typemask = dh_typemask(void, 0) 1411 | dh_typemask(env, 1) 1412 | dh_typemask(i64, 2) /* uint64_t addr */ 1413 | dh_typemask(i64, 3) /* uint64_t data */ 1414 | dh_typemask(i32, 4) /* unsigned oi */ 1415 | dh_typemask(ptr, 5) /* uintptr_t ra */ 1416 }; 1417 1418 static TCGHelperInfo info_helper_st128_mmu = { 1419 .flags = TCG_CALL_NO_WG, 1420 .typemask = dh_typemask(void, 0) 1421 | dh_typemask(env, 1) 1422 | dh_typemask(i64, 2) /* uint64_t addr */ 1423 | dh_typemask(i128, 3) /* Int128 data */ 1424 | dh_typemask(i32, 4) /* unsigned oi */ 1425 | dh_typemask(ptr, 5) /* uintptr_t ra */ 1426 }; 1427 1428 #ifdef CONFIG_TCG_INTERPRETER 1429 static ffi_type *typecode_to_ffi(int argmask) 1430 { 1431 /* 1432 * libffi does not support __int128_t, so we have forced Int128 1433 * to use the structure definition instead of the builtin type. 1434 */ 1435 static ffi_type *ffi_type_i128_elements[3] = { 1436 &ffi_type_uint64, 1437 &ffi_type_uint64, 1438 NULL 1439 }; 1440 static ffi_type ffi_type_i128 = { 1441 .size = 16, 1442 .alignment = __alignof__(Int128), 1443 .type = FFI_TYPE_STRUCT, 1444 .elements = ffi_type_i128_elements, 1445 }; 1446 1447 switch (argmask) { 1448 case dh_typecode_void: 1449 return &ffi_type_void; 1450 case dh_typecode_i32: 1451 return &ffi_type_uint32; 1452 case dh_typecode_s32: 1453 return &ffi_type_sint32; 1454 case dh_typecode_i64: 1455 return &ffi_type_uint64; 1456 case dh_typecode_s64: 1457 return &ffi_type_sint64; 1458 case dh_typecode_ptr: 1459 return &ffi_type_pointer; 1460 case dh_typecode_i128: 1461 return &ffi_type_i128; 1462 } 1463 g_assert_not_reached(); 1464 } 1465 1466 static ffi_cif *init_ffi_layout(TCGHelperInfo *info) 1467 { 1468 unsigned typemask = info->typemask; 1469 struct { 1470 ffi_cif cif; 1471 ffi_type *args[]; 1472 } *ca; 1473 ffi_status status; 1474 int nargs; 1475 1476 /* Ignoring the return type, find the last non-zero field. */ 1477 nargs = 32 - clz32(typemask >> 3); 1478 nargs = DIV_ROUND_UP(nargs, 3); 1479 assert(nargs <= MAX_CALL_IARGS); 1480 1481 ca = g_malloc0(sizeof(*ca) + nargs * sizeof(ffi_type *)); 1482 ca->cif.rtype = typecode_to_ffi(typemask & 7); 1483 ca->cif.nargs = nargs; 1484 1485 if (nargs != 0) { 1486 ca->cif.arg_types = ca->args; 1487 for (int j = 0; j < nargs; ++j) { 1488 int typecode = extract32(typemask, (j + 1) * 3, 3); 1489 ca->args[j] = typecode_to_ffi(typecode); 1490 } 1491 } 1492 1493 status = ffi_prep_cif(&ca->cif, FFI_DEFAULT_ABI, nargs, 1494 ca->cif.rtype, ca->cif.arg_types); 1495 assert(status == FFI_OK); 1496 1497 return &ca->cif; 1498 } 1499 1500 #define HELPER_INFO_INIT(I) (&(I)->cif) 1501 #define HELPER_INFO_INIT_VAL(I) init_ffi_layout(I) 1502 #else 1503 #define HELPER_INFO_INIT(I) (&(I)->init) 1504 #define HELPER_INFO_INIT_VAL(I) 1 1505 #endif /* CONFIG_TCG_INTERPRETER */ 1506 1507 static inline bool arg_slot_reg_p(unsigned arg_slot) 1508 { 1509 /* 1510 * Split the sizeof away from the comparison to avoid Werror from 1511 * "unsigned < 0 is always false", when iarg_regs is empty. 1512 */ 1513 unsigned nreg = ARRAY_SIZE(tcg_target_call_iarg_regs); 1514 return arg_slot < nreg; 1515 } 1516 1517 static inline int arg_slot_stk_ofs(unsigned arg_slot) 1518 { 1519 unsigned max = TCG_STATIC_CALL_ARGS_SIZE / sizeof(tcg_target_long); 1520 unsigned stk_slot = arg_slot - ARRAY_SIZE(tcg_target_call_iarg_regs); 1521 1522 tcg_debug_assert(stk_slot < max); 1523 return TCG_TARGET_CALL_STACK_OFFSET + stk_slot * sizeof(tcg_target_long); 1524 } 1525 1526 typedef struct TCGCumulativeArgs { 1527 int arg_idx; /* tcg_gen_callN args[] */ 1528 int info_in_idx; /* TCGHelperInfo in[] */ 1529 int arg_slot; /* regs+stack slot */ 1530 int ref_slot; /* stack slots for references */ 1531 } TCGCumulativeArgs; 1532 1533 static void layout_arg_even(TCGCumulativeArgs *cum) 1534 { 1535 cum->arg_slot += cum->arg_slot & 1; 1536 } 1537 1538 static void layout_arg_1(TCGCumulativeArgs *cum, TCGHelperInfo *info, 1539 TCGCallArgumentKind kind) 1540 { 1541 TCGCallArgumentLoc *loc = &info->in[cum->info_in_idx]; 1542 1543 *loc = (TCGCallArgumentLoc){ 1544 .kind = kind, 1545 .arg_idx = cum->arg_idx, 1546 .arg_slot = cum->arg_slot, 1547 }; 1548 cum->info_in_idx++; 1549 cum->arg_slot++; 1550 } 1551 1552 static void layout_arg_normal_n(TCGCumulativeArgs *cum, 1553 TCGHelperInfo *info, int n) 1554 { 1555 TCGCallArgumentLoc *loc = &info->in[cum->info_in_idx]; 1556 1557 for (int i = 0; i < n; ++i) { 1558 /* Layout all using the same arg_idx, adjusting the subindex. */ 1559 loc[i] = (TCGCallArgumentLoc){ 1560 .kind = TCG_CALL_ARG_NORMAL, 1561 .arg_idx = cum->arg_idx, 1562 .tmp_subindex = i, 1563 .arg_slot = cum->arg_slot + i, 1564 }; 1565 } 1566 cum->info_in_idx += n; 1567 cum->arg_slot += n; 1568 } 1569 1570 static void layout_arg_by_ref(TCGCumulativeArgs *cum, TCGHelperInfo *info) 1571 { 1572 TCGCallArgumentLoc *loc = &info->in[cum->info_in_idx]; 1573 int n = 128 / TCG_TARGET_REG_BITS; 1574 1575 /* The first subindex carries the pointer. */ 1576 layout_arg_1(cum, info, TCG_CALL_ARG_BY_REF); 1577 1578 /* 1579 * The callee is allowed to clobber memory associated with 1580 * structure pass by-reference. Therefore we must make copies. 1581 * Allocate space from "ref_slot", which will be adjusted to 1582 * follow the parameters on the stack. 1583 */ 1584 loc[0].ref_slot = cum->ref_slot; 1585 1586 /* 1587 * Subsequent words also go into the reference slot, but 1588 * do not accumulate into the regular arguments. 1589 */ 1590 for (int i = 1; i < n; ++i) { 1591 loc[i] = (TCGCallArgumentLoc){ 1592 .kind = TCG_CALL_ARG_BY_REF_N, 1593 .arg_idx = cum->arg_idx, 1594 .tmp_subindex = i, 1595 .ref_slot = cum->ref_slot + i, 1596 }; 1597 } 1598 cum->info_in_idx += n - 1; /* i=0 accounted for in layout_arg_1 */ 1599 cum->ref_slot += n; 1600 } 1601 1602 static void init_call_layout(TCGHelperInfo *info) 1603 { 1604 int max_reg_slots = ARRAY_SIZE(tcg_target_call_iarg_regs); 1605 int max_stk_slots = TCG_STATIC_CALL_ARGS_SIZE / sizeof(tcg_target_long); 1606 unsigned typemask = info->typemask; 1607 unsigned typecode; 1608 TCGCumulativeArgs cum = { }; 1609 1610 /* 1611 * Parse and place any function return value. 1612 */ 1613 typecode = typemask & 7; 1614 switch (typecode) { 1615 case dh_typecode_void: 1616 info->nr_out = 0; 1617 break; 1618 case dh_typecode_i32: 1619 case dh_typecode_s32: 1620 case dh_typecode_ptr: 1621 info->nr_out = 1; 1622 info->out_kind = TCG_CALL_RET_NORMAL; 1623 break; 1624 case dh_typecode_i64: 1625 case dh_typecode_s64: 1626 info->nr_out = 64 / TCG_TARGET_REG_BITS; 1627 info->out_kind = TCG_CALL_RET_NORMAL; 1628 /* Query the last register now to trigger any assert early. */ 1629 tcg_target_call_oarg_reg(info->out_kind, info->nr_out - 1); 1630 break; 1631 case dh_typecode_i128: 1632 info->nr_out = 128 / TCG_TARGET_REG_BITS; 1633 info->out_kind = TCG_TARGET_CALL_RET_I128; 1634 switch (TCG_TARGET_CALL_RET_I128) { 1635 case TCG_CALL_RET_NORMAL: 1636 /* Query the last register now to trigger any assert early. */ 1637 tcg_target_call_oarg_reg(info->out_kind, info->nr_out - 1); 1638 break; 1639 case TCG_CALL_RET_BY_VEC: 1640 /* Query the single register now to trigger any assert early. */ 1641 tcg_target_call_oarg_reg(TCG_CALL_RET_BY_VEC, 0); 1642 break; 1643 case TCG_CALL_RET_BY_REF: 1644 /* 1645 * Allocate the first argument to the output. 1646 * We don't need to store this anywhere, just make it 1647 * unavailable for use in the input loop below. 1648 */ 1649 cum.arg_slot = 1; 1650 break; 1651 default: 1652 qemu_build_not_reached(); 1653 } 1654 break; 1655 default: 1656 g_assert_not_reached(); 1657 } 1658 1659 /* 1660 * Parse and place function arguments. 1661 */ 1662 for (typemask >>= 3; typemask; typemask >>= 3, cum.arg_idx++) { 1663 TCGCallArgumentKind kind; 1664 TCGType type; 1665 1666 typecode = typemask & 7; 1667 switch (typecode) { 1668 case dh_typecode_i32: 1669 case dh_typecode_s32: 1670 type = TCG_TYPE_I32; 1671 break; 1672 case dh_typecode_i64: 1673 case dh_typecode_s64: 1674 type = TCG_TYPE_I64; 1675 break; 1676 case dh_typecode_ptr: 1677 type = TCG_TYPE_PTR; 1678 break; 1679 case dh_typecode_i128: 1680 type = TCG_TYPE_I128; 1681 break; 1682 default: 1683 g_assert_not_reached(); 1684 } 1685 1686 switch (type) { 1687 case TCG_TYPE_I32: 1688 switch (TCG_TARGET_CALL_ARG_I32) { 1689 case TCG_CALL_ARG_EVEN: 1690 layout_arg_even(&cum); 1691 /* fall through */ 1692 case TCG_CALL_ARG_NORMAL: 1693 layout_arg_1(&cum, info, TCG_CALL_ARG_NORMAL); 1694 break; 1695 case TCG_CALL_ARG_EXTEND: 1696 kind = TCG_CALL_ARG_EXTEND_U + (typecode & 1); 1697 layout_arg_1(&cum, info, kind); 1698 break; 1699 default: 1700 qemu_build_not_reached(); 1701 } 1702 break; 1703 1704 case TCG_TYPE_I64: 1705 switch (TCG_TARGET_CALL_ARG_I64) { 1706 case TCG_CALL_ARG_EVEN: 1707 layout_arg_even(&cum); 1708 /* fall through */ 1709 case TCG_CALL_ARG_NORMAL: 1710 if (TCG_TARGET_REG_BITS == 32) { 1711 layout_arg_normal_n(&cum, info, 2); 1712 } else { 1713 layout_arg_1(&cum, info, TCG_CALL_ARG_NORMAL); 1714 } 1715 break; 1716 default: 1717 qemu_build_not_reached(); 1718 } 1719 break; 1720 1721 case TCG_TYPE_I128: 1722 switch (TCG_TARGET_CALL_ARG_I128) { 1723 case TCG_CALL_ARG_EVEN: 1724 layout_arg_even(&cum); 1725 /* fall through */ 1726 case TCG_CALL_ARG_NORMAL: 1727 layout_arg_normal_n(&cum, info, 128 / TCG_TARGET_REG_BITS); 1728 break; 1729 case TCG_CALL_ARG_BY_REF: 1730 layout_arg_by_ref(&cum, info); 1731 break; 1732 default: 1733 qemu_build_not_reached(); 1734 } 1735 break; 1736 1737 default: 1738 g_assert_not_reached(); 1739 } 1740 } 1741 info->nr_in = cum.info_in_idx; 1742 1743 /* Validate that we didn't overrun the input array. */ 1744 assert(cum.info_in_idx <= ARRAY_SIZE(info->in)); 1745 /* Validate the backend has enough argument space. */ 1746 assert(cum.arg_slot <= max_reg_slots + max_stk_slots); 1747 1748 /* 1749 * Relocate the "ref_slot" area to the end of the parameters. 1750 * Minimizing this stack offset helps code size for x86, 1751 * which has a signed 8-bit offset encoding. 1752 */ 1753 if (cum.ref_slot != 0) { 1754 int ref_base = 0; 1755 1756 if (cum.arg_slot > max_reg_slots) { 1757 int align = __alignof(Int128) / sizeof(tcg_target_long); 1758 1759 ref_base = cum.arg_slot - max_reg_slots; 1760 if (align > 1) { 1761 ref_base = ROUND_UP(ref_base, align); 1762 } 1763 } 1764 assert(ref_base + cum.ref_slot <= max_stk_slots); 1765 ref_base += max_reg_slots; 1766 1767 if (ref_base != 0) { 1768 for (int i = cum.info_in_idx - 1; i >= 0; --i) { 1769 TCGCallArgumentLoc *loc = &info->in[i]; 1770 switch (loc->kind) { 1771 case TCG_CALL_ARG_BY_REF: 1772 case TCG_CALL_ARG_BY_REF_N: 1773 loc->ref_slot += ref_base; 1774 break; 1775 default: 1776 break; 1777 } 1778 } 1779 } 1780 } 1781 } 1782 1783 static int indirect_reg_alloc_order[ARRAY_SIZE(tcg_target_reg_alloc_order)]; 1784 static void process_constraint_sets(void); 1785 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type, 1786 TCGReg reg, const char *name); 1787 1788 static void tcg_context_init(unsigned max_threads) 1789 { 1790 TCGContext *s = &tcg_init_ctx; 1791 int n, i; 1792 TCGTemp *ts; 1793 1794 memset(s, 0, sizeof(*s)); 1795 s->nb_globals = 0; 1796 1797 init_call_layout(&info_helper_ld32_mmu); 1798 init_call_layout(&info_helper_ld64_mmu); 1799 init_call_layout(&info_helper_ld128_mmu); 1800 init_call_layout(&info_helper_st32_mmu); 1801 init_call_layout(&info_helper_st64_mmu); 1802 init_call_layout(&info_helper_st128_mmu); 1803 1804 tcg_target_init(s); 1805 process_constraint_sets(); 1806 1807 /* Reverse the order of the saved registers, assuming they're all at 1808 the start of tcg_target_reg_alloc_order. */ 1809 for (n = 0; n < ARRAY_SIZE(tcg_target_reg_alloc_order); ++n) { 1810 int r = tcg_target_reg_alloc_order[n]; 1811 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, r)) { 1812 break; 1813 } 1814 } 1815 for (i = 0; i < n; ++i) { 1816 indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[n - 1 - i]; 1817 } 1818 for (; i < ARRAY_SIZE(tcg_target_reg_alloc_order); ++i) { 1819 indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[i]; 1820 } 1821 1822 tcg_ctx = s; 1823 /* 1824 * In user-mode we simply share the init context among threads, since we 1825 * use a single region. See the documentation tcg_region_init() for the 1826 * reasoning behind this. 1827 * In system-mode we will have at most max_threads TCG threads. 1828 */ 1829 #ifdef CONFIG_USER_ONLY 1830 tcg_ctxs = &tcg_ctx; 1831 tcg_cur_ctxs = 1; 1832 tcg_max_ctxs = 1; 1833 #else 1834 tcg_max_ctxs = max_threads; 1835 tcg_ctxs = g_new0(TCGContext *, max_threads); 1836 #endif 1837 1838 tcg_debug_assert(!tcg_regset_test_reg(s->reserved_regs, TCG_AREG0)); 1839 ts = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, TCG_AREG0, "env"); 1840 tcg_env = temp_tcgv_ptr(ts); 1841 } 1842 1843 void tcg_init(size_t tb_size, int splitwx, unsigned max_threads) 1844 { 1845 tcg_context_init(max_threads); 1846 tcg_region_init(tb_size, splitwx, max_threads); 1847 } 1848 1849 /* 1850 * Allocate TBs right before their corresponding translated code, making 1851 * sure that TBs and code are on different cache lines. 1852 */ 1853 TranslationBlock *tcg_tb_alloc(TCGContext *s) 1854 { 1855 uintptr_t align = qemu_icache_linesize; 1856 TranslationBlock *tb; 1857 void *next; 1858 1859 retry: 1860 tb = (void *)ROUND_UP((uintptr_t)s->code_gen_ptr, align); 1861 next = (void *)ROUND_UP((uintptr_t)(tb + 1), align); 1862 1863 if (unlikely(next > s->code_gen_highwater)) { 1864 if (tcg_region_alloc(s)) { 1865 return NULL; 1866 } 1867 goto retry; 1868 } 1869 qatomic_set(&s->code_gen_ptr, next); 1870 return tb; 1871 } 1872 1873 void tcg_prologue_init(void) 1874 { 1875 TCGContext *s = tcg_ctx; 1876 size_t prologue_size; 1877 1878 s->code_ptr = s->code_gen_ptr; 1879 s->code_buf = s->code_gen_ptr; 1880 s->data_gen_ptr = NULL; 1881 1882 #ifndef CONFIG_TCG_INTERPRETER 1883 tcg_qemu_tb_exec = (tcg_prologue_fn *)tcg_splitwx_to_rx(s->code_ptr); 1884 #endif 1885 1886 s->pool_labels = NULL; 1887 1888 qemu_thread_jit_write(); 1889 /* Generate the prologue. */ 1890 tcg_target_qemu_prologue(s); 1891 1892 /* Allow the prologue to put e.g. guest_base into a pool entry. */ 1893 { 1894 int result = tcg_out_pool_finalize(s); 1895 tcg_debug_assert(result == 0); 1896 } 1897 1898 prologue_size = tcg_current_code_size(s); 1899 perf_report_prologue(s->code_gen_ptr, prologue_size); 1900 1901 #ifndef CONFIG_TCG_INTERPRETER 1902 flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(s->code_buf), 1903 (uintptr_t)s->code_buf, prologue_size); 1904 #endif 1905 1906 if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM)) { 1907 FILE *logfile = qemu_log_trylock(); 1908 if (logfile) { 1909 fprintf(logfile, "PROLOGUE: [size=%zu]\n", prologue_size); 1910 if (s->data_gen_ptr) { 1911 size_t code_size = s->data_gen_ptr - s->code_gen_ptr; 1912 size_t data_size = prologue_size - code_size; 1913 size_t i; 1914 1915 disas(logfile, s->code_gen_ptr, code_size); 1916 1917 for (i = 0; i < data_size; i += sizeof(tcg_target_ulong)) { 1918 if (sizeof(tcg_target_ulong) == 8) { 1919 fprintf(logfile, 1920 "0x%08" PRIxPTR ": .quad 0x%016" PRIx64 "\n", 1921 (uintptr_t)s->data_gen_ptr + i, 1922 *(uint64_t *)(s->data_gen_ptr + i)); 1923 } else { 1924 fprintf(logfile, 1925 "0x%08" PRIxPTR ": .long 0x%08x\n", 1926 (uintptr_t)s->data_gen_ptr + i, 1927 *(uint32_t *)(s->data_gen_ptr + i)); 1928 } 1929 } 1930 } else { 1931 disas(logfile, s->code_gen_ptr, prologue_size); 1932 } 1933 fprintf(logfile, "\n"); 1934 qemu_log_unlock(logfile); 1935 } 1936 } 1937 1938 #ifndef CONFIG_TCG_INTERPRETER 1939 /* 1940 * Assert that goto_ptr is implemented completely, setting an epilogue. 1941 * For tci, we use NULL as the signal to return from the interpreter, 1942 * so skip this check. 1943 */ 1944 tcg_debug_assert(tcg_code_gen_epilogue != NULL); 1945 #endif 1946 1947 tcg_region_prologue_set(s); 1948 } 1949 1950 void tcg_func_start(TCGContext *s) 1951 { 1952 tcg_pool_reset(s); 1953 s->nb_temps = s->nb_globals; 1954 1955 /* No temps have been previously allocated for size or locality. */ 1956 tcg_temp_ebb_reset_freed(s); 1957 1958 /* No constant temps have been previously allocated. */ 1959 for (int i = 0; i < TCG_TYPE_COUNT; ++i) { 1960 if (s->const_table[i]) { 1961 g_hash_table_remove_all(s->const_table[i]); 1962 } 1963 } 1964 1965 s->nb_ops = 0; 1966 s->nb_labels = 0; 1967 s->current_frame_offset = s->frame_start; 1968 1969 #ifdef CONFIG_DEBUG_TCG 1970 s->goto_tb_issue_mask = 0; 1971 #endif 1972 1973 QTAILQ_INIT(&s->ops); 1974 QTAILQ_INIT(&s->free_ops); 1975 s->emit_before_op = NULL; 1976 QSIMPLEQ_INIT(&s->labels); 1977 1978 tcg_debug_assert(s->addr_type <= TCG_TYPE_REG); 1979 tcg_debug_assert(s->insn_start_words > 0); 1980 } 1981 1982 static TCGTemp *tcg_temp_alloc(TCGContext *s) 1983 { 1984 int n = s->nb_temps++; 1985 1986 if (n >= TCG_MAX_TEMPS) { 1987 tcg_raise_tb_overflow(s); 1988 } 1989 return memset(&s->temps[n], 0, sizeof(TCGTemp)); 1990 } 1991 1992 static TCGTemp *tcg_global_alloc(TCGContext *s) 1993 { 1994 TCGTemp *ts; 1995 1996 tcg_debug_assert(s->nb_globals == s->nb_temps); 1997 tcg_debug_assert(s->nb_globals < TCG_MAX_TEMPS); 1998 s->nb_globals++; 1999 ts = tcg_temp_alloc(s); 2000 ts->kind = TEMP_GLOBAL; 2001 2002 return ts; 2003 } 2004 2005 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type, 2006 TCGReg reg, const char *name) 2007 { 2008 TCGTemp *ts; 2009 2010 tcg_debug_assert(TCG_TARGET_REG_BITS == 64 || type == TCG_TYPE_I32); 2011 2012 ts = tcg_global_alloc(s); 2013 ts->base_type = type; 2014 ts->type = type; 2015 ts->kind = TEMP_FIXED; 2016 ts->reg = reg; 2017 ts->name = name; 2018 tcg_regset_set_reg(s->reserved_regs, reg); 2019 2020 return ts; 2021 } 2022 2023 void tcg_set_frame(TCGContext *s, TCGReg reg, intptr_t start, intptr_t size) 2024 { 2025 s->frame_start = start; 2026 s->frame_end = start + size; 2027 s->frame_temp 2028 = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, reg, "_frame"); 2029 } 2030 2031 static TCGTemp *tcg_global_mem_new_internal(TCGv_ptr base, intptr_t offset, 2032 const char *name, TCGType type) 2033 { 2034 TCGContext *s = tcg_ctx; 2035 TCGTemp *base_ts = tcgv_ptr_temp(base); 2036 TCGTemp *ts = tcg_global_alloc(s); 2037 int indirect_reg = 0; 2038 2039 switch (base_ts->kind) { 2040 case TEMP_FIXED: 2041 break; 2042 case TEMP_GLOBAL: 2043 /* We do not support double-indirect registers. */ 2044 tcg_debug_assert(!base_ts->indirect_reg); 2045 base_ts->indirect_base = 1; 2046 s->nb_indirects += (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64 2047 ? 2 : 1); 2048 indirect_reg = 1; 2049 break; 2050 default: 2051 g_assert_not_reached(); 2052 } 2053 2054 if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) { 2055 TCGTemp *ts2 = tcg_global_alloc(s); 2056 char buf[64]; 2057 2058 ts->base_type = TCG_TYPE_I64; 2059 ts->type = TCG_TYPE_I32; 2060 ts->indirect_reg = indirect_reg; 2061 ts->mem_allocated = 1; 2062 ts->mem_base = base_ts; 2063 ts->mem_offset = offset; 2064 pstrcpy(buf, sizeof(buf), name); 2065 pstrcat(buf, sizeof(buf), "_0"); 2066 ts->name = strdup(buf); 2067 2068 tcg_debug_assert(ts2 == ts + 1); 2069 ts2->base_type = TCG_TYPE_I64; 2070 ts2->type = TCG_TYPE_I32; 2071 ts2->indirect_reg = indirect_reg; 2072 ts2->mem_allocated = 1; 2073 ts2->mem_base = base_ts; 2074 ts2->mem_offset = offset + 4; 2075 ts2->temp_subindex = 1; 2076 pstrcpy(buf, sizeof(buf), name); 2077 pstrcat(buf, sizeof(buf), "_1"); 2078 ts2->name = strdup(buf); 2079 } else { 2080 ts->base_type = type; 2081 ts->type = type; 2082 ts->indirect_reg = indirect_reg; 2083 ts->mem_allocated = 1; 2084 ts->mem_base = base_ts; 2085 ts->mem_offset = offset; 2086 ts->name = name; 2087 } 2088 return ts; 2089 } 2090 2091 TCGv_i32 tcg_global_mem_new_i32(TCGv_ptr reg, intptr_t off, const char *name) 2092 { 2093 TCGTemp *ts = tcg_global_mem_new_internal(reg, off, name, TCG_TYPE_I32); 2094 return temp_tcgv_i32(ts); 2095 } 2096 2097 TCGv_i64 tcg_global_mem_new_i64(TCGv_ptr reg, intptr_t off, const char *name) 2098 { 2099 TCGTemp *ts = tcg_global_mem_new_internal(reg, off, name, TCG_TYPE_I64); 2100 return temp_tcgv_i64(ts); 2101 } 2102 2103 TCGv_ptr tcg_global_mem_new_ptr(TCGv_ptr reg, intptr_t off, const char *name) 2104 { 2105 TCGTemp *ts = tcg_global_mem_new_internal(reg, off, name, TCG_TYPE_PTR); 2106 return temp_tcgv_ptr(ts); 2107 } 2108 2109 TCGTemp *tcg_temp_new_internal(TCGType type, TCGTempKind kind) 2110 { 2111 TCGContext *s = tcg_ctx; 2112 TCGTemp *ts; 2113 int n; 2114 2115 if (kind == TEMP_EBB) { 2116 int idx = find_first_bit(s->free_temps[type].l, TCG_MAX_TEMPS); 2117 2118 if (idx < TCG_MAX_TEMPS) { 2119 /* There is already an available temp with the right type. */ 2120 clear_bit(idx, s->free_temps[type].l); 2121 2122 ts = &s->temps[idx]; 2123 ts->temp_allocated = 1; 2124 tcg_debug_assert(ts->base_type == type); 2125 tcg_debug_assert(ts->kind == kind); 2126 return ts; 2127 } 2128 } else { 2129 tcg_debug_assert(kind == TEMP_TB); 2130 } 2131 2132 switch (type) { 2133 case TCG_TYPE_I32: 2134 case TCG_TYPE_V64: 2135 case TCG_TYPE_V128: 2136 case TCG_TYPE_V256: 2137 n = 1; 2138 break; 2139 case TCG_TYPE_I64: 2140 n = 64 / TCG_TARGET_REG_BITS; 2141 break; 2142 case TCG_TYPE_I128: 2143 n = 128 / TCG_TARGET_REG_BITS; 2144 break; 2145 default: 2146 g_assert_not_reached(); 2147 } 2148 2149 ts = tcg_temp_alloc(s); 2150 ts->base_type = type; 2151 ts->temp_allocated = 1; 2152 ts->kind = kind; 2153 2154 if (n == 1) { 2155 ts->type = type; 2156 } else { 2157 ts->type = TCG_TYPE_REG; 2158 2159 for (int i = 1; i < n; ++i) { 2160 TCGTemp *ts2 = tcg_temp_alloc(s); 2161 2162 tcg_debug_assert(ts2 == ts + i); 2163 ts2->base_type = type; 2164 ts2->type = TCG_TYPE_REG; 2165 ts2->temp_allocated = 1; 2166 ts2->temp_subindex = i; 2167 ts2->kind = kind; 2168 } 2169 } 2170 return ts; 2171 } 2172 2173 TCGv_i32 tcg_temp_new_i32(void) 2174 { 2175 return temp_tcgv_i32(tcg_temp_new_internal(TCG_TYPE_I32, TEMP_TB)); 2176 } 2177 2178 TCGv_i32 tcg_temp_ebb_new_i32(void) 2179 { 2180 return temp_tcgv_i32(tcg_temp_new_internal(TCG_TYPE_I32, TEMP_EBB)); 2181 } 2182 2183 TCGv_i64 tcg_temp_new_i64(void) 2184 { 2185 return temp_tcgv_i64(tcg_temp_new_internal(TCG_TYPE_I64, TEMP_TB)); 2186 } 2187 2188 TCGv_i64 tcg_temp_ebb_new_i64(void) 2189 { 2190 return temp_tcgv_i64(tcg_temp_new_internal(TCG_TYPE_I64, TEMP_EBB)); 2191 } 2192 2193 TCGv_ptr tcg_temp_new_ptr(void) 2194 { 2195 return temp_tcgv_ptr(tcg_temp_new_internal(TCG_TYPE_PTR, TEMP_TB)); 2196 } 2197 2198 TCGv_ptr tcg_temp_ebb_new_ptr(void) 2199 { 2200 return temp_tcgv_ptr(tcg_temp_new_internal(TCG_TYPE_PTR, TEMP_EBB)); 2201 } 2202 2203 TCGv_i128 tcg_temp_new_i128(void) 2204 { 2205 return temp_tcgv_i128(tcg_temp_new_internal(TCG_TYPE_I128, TEMP_TB)); 2206 } 2207 2208 TCGv_i128 tcg_temp_ebb_new_i128(void) 2209 { 2210 return temp_tcgv_i128(tcg_temp_new_internal(TCG_TYPE_I128, TEMP_EBB)); 2211 } 2212 2213 TCGv_vec tcg_temp_new_vec(TCGType type) 2214 { 2215 TCGTemp *t; 2216 2217 #ifdef CONFIG_DEBUG_TCG 2218 switch (type) { 2219 case TCG_TYPE_V64: 2220 assert(TCG_TARGET_HAS_v64); 2221 break; 2222 case TCG_TYPE_V128: 2223 assert(TCG_TARGET_HAS_v128); 2224 break; 2225 case TCG_TYPE_V256: 2226 assert(TCG_TARGET_HAS_v256); 2227 break; 2228 default: 2229 g_assert_not_reached(); 2230 } 2231 #endif 2232 2233 t = tcg_temp_new_internal(type, TEMP_EBB); 2234 return temp_tcgv_vec(t); 2235 } 2236 2237 /* Create a new temp of the same type as an existing temp. */ 2238 TCGv_vec tcg_temp_new_vec_matching(TCGv_vec match) 2239 { 2240 TCGTemp *t = tcgv_vec_temp(match); 2241 2242 tcg_debug_assert(t->temp_allocated != 0); 2243 2244 t = tcg_temp_new_internal(t->base_type, TEMP_EBB); 2245 return temp_tcgv_vec(t); 2246 } 2247 2248 void tcg_temp_free_internal(TCGTemp *ts) 2249 { 2250 TCGContext *s = tcg_ctx; 2251 2252 switch (ts->kind) { 2253 case TEMP_CONST: 2254 case TEMP_TB: 2255 /* Silently ignore free. */ 2256 break; 2257 case TEMP_EBB: 2258 tcg_debug_assert(ts->temp_allocated != 0); 2259 ts->temp_allocated = 0; 2260 set_bit(temp_idx(ts), s->free_temps[ts->base_type].l); 2261 break; 2262 default: 2263 /* It never made sense to free TEMP_FIXED or TEMP_GLOBAL. */ 2264 g_assert_not_reached(); 2265 } 2266 } 2267 2268 void tcg_temp_free_i32(TCGv_i32 arg) 2269 { 2270 tcg_temp_free_internal(tcgv_i32_temp(arg)); 2271 } 2272 2273 void tcg_temp_free_i64(TCGv_i64 arg) 2274 { 2275 tcg_temp_free_internal(tcgv_i64_temp(arg)); 2276 } 2277 2278 void tcg_temp_free_i128(TCGv_i128 arg) 2279 { 2280 tcg_temp_free_internal(tcgv_i128_temp(arg)); 2281 } 2282 2283 void tcg_temp_free_ptr(TCGv_ptr arg) 2284 { 2285 tcg_temp_free_internal(tcgv_ptr_temp(arg)); 2286 } 2287 2288 void tcg_temp_free_vec(TCGv_vec arg) 2289 { 2290 tcg_temp_free_internal(tcgv_vec_temp(arg)); 2291 } 2292 2293 TCGTemp *tcg_constant_internal(TCGType type, int64_t val) 2294 { 2295 TCGContext *s = tcg_ctx; 2296 GHashTable *h = s->const_table[type]; 2297 TCGTemp *ts; 2298 2299 if (h == NULL) { 2300 h = g_hash_table_new(g_int64_hash, g_int64_equal); 2301 s->const_table[type] = h; 2302 } 2303 2304 ts = g_hash_table_lookup(h, &val); 2305 if (ts == NULL) { 2306 int64_t *val_ptr; 2307 2308 ts = tcg_temp_alloc(s); 2309 2310 if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) { 2311 TCGTemp *ts2 = tcg_temp_alloc(s); 2312 2313 tcg_debug_assert(ts2 == ts + 1); 2314 2315 ts->base_type = TCG_TYPE_I64; 2316 ts->type = TCG_TYPE_I32; 2317 ts->kind = TEMP_CONST; 2318 ts->temp_allocated = 1; 2319 2320 ts2->base_type = TCG_TYPE_I64; 2321 ts2->type = TCG_TYPE_I32; 2322 ts2->kind = TEMP_CONST; 2323 ts2->temp_allocated = 1; 2324 ts2->temp_subindex = 1; 2325 2326 /* 2327 * Retain the full value of the 64-bit constant in the low 2328 * part, so that the hash table works. Actual uses will 2329 * truncate the value to the low part. 2330 */ 2331 ts[HOST_BIG_ENDIAN].val = val; 2332 ts[!HOST_BIG_ENDIAN].val = val >> 32; 2333 val_ptr = &ts[HOST_BIG_ENDIAN].val; 2334 } else { 2335 ts->base_type = type; 2336 ts->type = type; 2337 ts->kind = TEMP_CONST; 2338 ts->temp_allocated = 1; 2339 ts->val = val; 2340 val_ptr = &ts->val; 2341 } 2342 g_hash_table_insert(h, val_ptr, ts); 2343 } 2344 2345 return ts; 2346 } 2347 2348 TCGv_i32 tcg_constant_i32(int32_t val) 2349 { 2350 return temp_tcgv_i32(tcg_constant_internal(TCG_TYPE_I32, val)); 2351 } 2352 2353 TCGv_i64 tcg_constant_i64(int64_t val) 2354 { 2355 return temp_tcgv_i64(tcg_constant_internal(TCG_TYPE_I64, val)); 2356 } 2357 2358 TCGv_ptr tcg_constant_ptr_int(intptr_t val) 2359 { 2360 return temp_tcgv_ptr(tcg_constant_internal(TCG_TYPE_PTR, val)); 2361 } 2362 2363 TCGv_vec tcg_constant_vec(TCGType type, unsigned vece, int64_t val) 2364 { 2365 val = dup_const(vece, val); 2366 return temp_tcgv_vec(tcg_constant_internal(type, val)); 2367 } 2368 2369 TCGv_vec tcg_constant_vec_matching(TCGv_vec match, unsigned vece, int64_t val) 2370 { 2371 TCGTemp *t = tcgv_vec_temp(match); 2372 2373 tcg_debug_assert(t->temp_allocated != 0); 2374 return tcg_constant_vec(t->base_type, vece, val); 2375 } 2376 2377 #ifdef CONFIG_DEBUG_TCG 2378 size_t temp_idx(TCGTemp *ts) 2379 { 2380 ptrdiff_t n = ts - tcg_ctx->temps; 2381 assert(n >= 0 && n < tcg_ctx->nb_temps); 2382 return n; 2383 } 2384 2385 TCGTemp *tcgv_i32_temp(TCGv_i32 v) 2386 { 2387 uintptr_t o = (uintptr_t)v - offsetof(TCGContext, temps); 2388 2389 assert(o < sizeof(TCGTemp) * tcg_ctx->nb_temps); 2390 assert(o % sizeof(TCGTemp) == 0); 2391 2392 return (void *)tcg_ctx + (uintptr_t)v; 2393 } 2394 #endif /* CONFIG_DEBUG_TCG */ 2395 2396 /* 2397 * Return true if OP may appear in the opcode stream with TYPE. 2398 * Test the runtime variable that controls each opcode. 2399 */ 2400 bool tcg_op_supported(TCGOpcode op, TCGType type, unsigned flags) 2401 { 2402 bool has_type; 2403 2404 switch (type) { 2405 case TCG_TYPE_I32: 2406 has_type = true; 2407 break; 2408 case TCG_TYPE_I64: 2409 has_type = TCG_TARGET_REG_BITS == 64; 2410 break; 2411 case TCG_TYPE_V64: 2412 has_type = TCG_TARGET_HAS_v64; 2413 break; 2414 case TCG_TYPE_V128: 2415 has_type = TCG_TARGET_HAS_v128; 2416 break; 2417 case TCG_TYPE_V256: 2418 has_type = TCG_TARGET_HAS_v256; 2419 break; 2420 default: 2421 has_type = false; 2422 break; 2423 } 2424 2425 switch (op) { 2426 case INDEX_op_discard: 2427 case INDEX_op_set_label: 2428 case INDEX_op_call: 2429 case INDEX_op_br: 2430 case INDEX_op_mb: 2431 case INDEX_op_insn_start: 2432 case INDEX_op_exit_tb: 2433 case INDEX_op_goto_tb: 2434 case INDEX_op_goto_ptr: 2435 case INDEX_op_qemu_ld_i32: 2436 case INDEX_op_qemu_st_i32: 2437 case INDEX_op_qemu_ld_i64: 2438 case INDEX_op_qemu_st_i64: 2439 return true; 2440 2441 case INDEX_op_qemu_ld_i128: 2442 case INDEX_op_qemu_st_i128: 2443 return TCG_TARGET_HAS_qemu_ldst_i128; 2444 2445 case INDEX_op_add: 2446 case INDEX_op_and: 2447 case INDEX_op_brcond: 2448 case INDEX_op_deposit: 2449 case INDEX_op_extract: 2450 case INDEX_op_ld8u: 2451 case INDEX_op_ld8s: 2452 case INDEX_op_ld16u: 2453 case INDEX_op_ld16s: 2454 case INDEX_op_ld: 2455 case INDEX_op_mov: 2456 case INDEX_op_movcond: 2457 case INDEX_op_negsetcond: 2458 case INDEX_op_or: 2459 case INDEX_op_setcond: 2460 case INDEX_op_sextract: 2461 case INDEX_op_st8: 2462 case INDEX_op_st16: 2463 case INDEX_op_st: 2464 case INDEX_op_xor: 2465 return has_type; 2466 2467 case INDEX_op_brcond2_i32: 2468 case INDEX_op_setcond2_i32: 2469 return TCG_TARGET_REG_BITS == 32; 2470 2471 case INDEX_op_ld32u: 2472 case INDEX_op_ld32s: 2473 case INDEX_op_st32: 2474 case INDEX_op_ext_i32_i64: 2475 case INDEX_op_extu_i32_i64: 2476 case INDEX_op_extrl_i64_i32: 2477 case INDEX_op_extrh_i64_i32: 2478 return TCG_TARGET_REG_BITS == 64; 2479 2480 case INDEX_op_mov_vec: 2481 case INDEX_op_dup_vec: 2482 case INDEX_op_dupm_vec: 2483 case INDEX_op_ld_vec: 2484 case INDEX_op_st_vec: 2485 case INDEX_op_add_vec: 2486 case INDEX_op_sub_vec: 2487 case INDEX_op_and_vec: 2488 case INDEX_op_or_vec: 2489 case INDEX_op_xor_vec: 2490 case INDEX_op_cmp_vec: 2491 return has_type; 2492 case INDEX_op_dup2_vec: 2493 return has_type && TCG_TARGET_REG_BITS == 32; 2494 case INDEX_op_not_vec: 2495 return has_type && TCG_TARGET_HAS_not_vec; 2496 case INDEX_op_neg_vec: 2497 return has_type && TCG_TARGET_HAS_neg_vec; 2498 case INDEX_op_abs_vec: 2499 return has_type && TCG_TARGET_HAS_abs_vec; 2500 case INDEX_op_andc_vec: 2501 return has_type && TCG_TARGET_HAS_andc_vec; 2502 case INDEX_op_orc_vec: 2503 return has_type && TCG_TARGET_HAS_orc_vec; 2504 case INDEX_op_nand_vec: 2505 return has_type && TCG_TARGET_HAS_nand_vec; 2506 case INDEX_op_nor_vec: 2507 return has_type && TCG_TARGET_HAS_nor_vec; 2508 case INDEX_op_eqv_vec: 2509 return has_type && TCG_TARGET_HAS_eqv_vec; 2510 case INDEX_op_mul_vec: 2511 return has_type && TCG_TARGET_HAS_mul_vec; 2512 case INDEX_op_shli_vec: 2513 case INDEX_op_shri_vec: 2514 case INDEX_op_sari_vec: 2515 return has_type && TCG_TARGET_HAS_shi_vec; 2516 case INDEX_op_shls_vec: 2517 case INDEX_op_shrs_vec: 2518 case INDEX_op_sars_vec: 2519 return has_type && TCG_TARGET_HAS_shs_vec; 2520 case INDEX_op_shlv_vec: 2521 case INDEX_op_shrv_vec: 2522 case INDEX_op_sarv_vec: 2523 return has_type && TCG_TARGET_HAS_shv_vec; 2524 case INDEX_op_rotli_vec: 2525 return has_type && TCG_TARGET_HAS_roti_vec; 2526 case INDEX_op_rotls_vec: 2527 return has_type && TCG_TARGET_HAS_rots_vec; 2528 case INDEX_op_rotlv_vec: 2529 case INDEX_op_rotrv_vec: 2530 return has_type && TCG_TARGET_HAS_rotv_vec; 2531 case INDEX_op_ssadd_vec: 2532 case INDEX_op_usadd_vec: 2533 case INDEX_op_sssub_vec: 2534 case INDEX_op_ussub_vec: 2535 return has_type && TCG_TARGET_HAS_sat_vec; 2536 case INDEX_op_smin_vec: 2537 case INDEX_op_umin_vec: 2538 case INDEX_op_smax_vec: 2539 case INDEX_op_umax_vec: 2540 return has_type && TCG_TARGET_HAS_minmax_vec; 2541 case INDEX_op_bitsel_vec: 2542 return has_type && TCG_TARGET_HAS_bitsel_vec; 2543 case INDEX_op_cmpsel_vec: 2544 return has_type && TCG_TARGET_HAS_cmpsel_vec; 2545 2546 default: 2547 if (op < INDEX_op_last_generic) { 2548 const TCGOutOp *outop; 2549 TCGConstraintSetIndex con_set; 2550 2551 if (!has_type) { 2552 return false; 2553 } 2554 2555 outop = all_outop[op]; 2556 tcg_debug_assert(outop != NULL); 2557 2558 con_set = outop->static_constraint; 2559 if (con_set == C_Dynamic) { 2560 con_set = outop->dynamic_constraint(type, flags); 2561 } 2562 if (con_set >= 0) { 2563 return true; 2564 } 2565 tcg_debug_assert(con_set == C_NotImplemented); 2566 return false; 2567 } 2568 tcg_debug_assert(op < NB_OPS); 2569 return true; 2570 2571 case INDEX_op_last_generic: 2572 g_assert_not_reached(); 2573 } 2574 } 2575 2576 bool tcg_op_deposit_valid(TCGType type, unsigned ofs, unsigned len) 2577 { 2578 unsigned width; 2579 2580 tcg_debug_assert(type == TCG_TYPE_I32 || type == TCG_TYPE_I64); 2581 width = (type == TCG_TYPE_I32 ? 32 : 64); 2582 2583 tcg_debug_assert(ofs < width); 2584 tcg_debug_assert(len > 0); 2585 tcg_debug_assert(len <= width - ofs); 2586 2587 return TCG_TARGET_deposit_valid(type, ofs, len); 2588 } 2589 2590 static TCGOp *tcg_op_alloc(TCGOpcode opc, unsigned nargs); 2591 2592 static void tcg_gen_callN(void *func, TCGHelperInfo *info, 2593 TCGTemp *ret, TCGTemp **args) 2594 { 2595 TCGv_i64 extend_free[MAX_CALL_IARGS]; 2596 int n_extend = 0; 2597 TCGOp *op; 2598 int i, n, pi = 0, total_args; 2599 2600 if (unlikely(g_once_init_enter(HELPER_INFO_INIT(info)))) { 2601 init_call_layout(info); 2602 g_once_init_leave(HELPER_INFO_INIT(info), HELPER_INFO_INIT_VAL(info)); 2603 } 2604 2605 total_args = info->nr_out + info->nr_in + 2; 2606 op = tcg_op_alloc(INDEX_op_call, total_args); 2607 2608 #ifdef CONFIG_PLUGIN 2609 /* Flag helpers that may affect guest state */ 2610 if (tcg_ctx->plugin_insn && !(info->flags & TCG_CALL_NO_SIDE_EFFECTS)) { 2611 tcg_ctx->plugin_insn->calls_helpers = true; 2612 } 2613 #endif 2614 2615 TCGOP_CALLO(op) = n = info->nr_out; 2616 switch (n) { 2617 case 0: 2618 tcg_debug_assert(ret == NULL); 2619 break; 2620 case 1: 2621 tcg_debug_assert(ret != NULL); 2622 op->args[pi++] = temp_arg(ret); 2623 break; 2624 case 2: 2625 case 4: 2626 tcg_debug_assert(ret != NULL); 2627 tcg_debug_assert(ret->base_type == ret->type + ctz32(n)); 2628 tcg_debug_assert(ret->temp_subindex == 0); 2629 for (i = 0; i < n; ++i) { 2630 op->args[pi++] = temp_arg(ret + i); 2631 } 2632 break; 2633 default: 2634 g_assert_not_reached(); 2635 } 2636 2637 TCGOP_CALLI(op) = n = info->nr_in; 2638 for (i = 0; i < n; i++) { 2639 const TCGCallArgumentLoc *loc = &info->in[i]; 2640 TCGTemp *ts = args[loc->arg_idx] + loc->tmp_subindex; 2641 2642 switch (loc->kind) { 2643 case TCG_CALL_ARG_NORMAL: 2644 case TCG_CALL_ARG_BY_REF: 2645 case TCG_CALL_ARG_BY_REF_N: 2646 op->args[pi++] = temp_arg(ts); 2647 break; 2648 2649 case TCG_CALL_ARG_EXTEND_U: 2650 case TCG_CALL_ARG_EXTEND_S: 2651 { 2652 TCGv_i64 temp = tcg_temp_ebb_new_i64(); 2653 TCGv_i32 orig = temp_tcgv_i32(ts); 2654 2655 if (loc->kind == TCG_CALL_ARG_EXTEND_S) { 2656 tcg_gen_ext_i32_i64(temp, orig); 2657 } else { 2658 tcg_gen_extu_i32_i64(temp, orig); 2659 } 2660 op->args[pi++] = tcgv_i64_arg(temp); 2661 extend_free[n_extend++] = temp; 2662 } 2663 break; 2664 2665 default: 2666 g_assert_not_reached(); 2667 } 2668 } 2669 op->args[pi++] = (uintptr_t)func; 2670 op->args[pi++] = (uintptr_t)info; 2671 tcg_debug_assert(pi == total_args); 2672 2673 if (tcg_ctx->emit_before_op) { 2674 QTAILQ_INSERT_BEFORE(tcg_ctx->emit_before_op, op, link); 2675 } else { 2676 QTAILQ_INSERT_TAIL(&tcg_ctx->ops, op, link); 2677 } 2678 2679 tcg_debug_assert(n_extend < ARRAY_SIZE(extend_free)); 2680 for (i = 0; i < n_extend; ++i) { 2681 tcg_temp_free_i64(extend_free[i]); 2682 } 2683 } 2684 2685 void tcg_gen_call0(void *func, TCGHelperInfo *info, TCGTemp *ret) 2686 { 2687 tcg_gen_callN(func, info, ret, NULL); 2688 } 2689 2690 void tcg_gen_call1(void *func, TCGHelperInfo *info, TCGTemp *ret, TCGTemp *t1) 2691 { 2692 tcg_gen_callN(func, info, ret, &t1); 2693 } 2694 2695 void tcg_gen_call2(void *func, TCGHelperInfo *info, TCGTemp *ret, 2696 TCGTemp *t1, TCGTemp *t2) 2697 { 2698 TCGTemp *args[2] = { t1, t2 }; 2699 tcg_gen_callN(func, info, ret, args); 2700 } 2701 2702 void tcg_gen_call3(void *func, TCGHelperInfo *info, TCGTemp *ret, 2703 TCGTemp *t1, TCGTemp *t2, TCGTemp *t3) 2704 { 2705 TCGTemp *args[3] = { t1, t2, t3 }; 2706 tcg_gen_callN(func, info, ret, args); 2707 } 2708 2709 void tcg_gen_call4(void *func, TCGHelperInfo *info, TCGTemp *ret, 2710 TCGTemp *t1, TCGTemp *t2, TCGTemp *t3, TCGTemp *t4) 2711 { 2712 TCGTemp *args[4] = { t1, t2, t3, t4 }; 2713 tcg_gen_callN(func, info, ret, args); 2714 } 2715 2716 void tcg_gen_call5(void *func, TCGHelperInfo *info, TCGTemp *ret, TCGTemp *t1, 2717 TCGTemp *t2, TCGTemp *t3, TCGTemp *t4, TCGTemp *t5) 2718 { 2719 TCGTemp *args[5] = { t1, t2, t3, t4, t5 }; 2720 tcg_gen_callN(func, info, ret, args); 2721 } 2722 2723 void tcg_gen_call6(void *func, TCGHelperInfo *info, TCGTemp *ret, 2724 TCGTemp *t1, TCGTemp *t2, TCGTemp *t3, 2725 TCGTemp *t4, TCGTemp *t5, TCGTemp *t6) 2726 { 2727 TCGTemp *args[6] = { t1, t2, t3, t4, t5, t6 }; 2728 tcg_gen_callN(func, info, ret, args); 2729 } 2730 2731 void tcg_gen_call7(void *func, TCGHelperInfo *info, TCGTemp *ret, TCGTemp *t1, 2732 TCGTemp *t2, TCGTemp *t3, TCGTemp *t4, 2733 TCGTemp *t5, TCGTemp *t6, TCGTemp *t7) 2734 { 2735 TCGTemp *args[7] = { t1, t2, t3, t4, t5, t6, t7 }; 2736 tcg_gen_callN(func, info, ret, args); 2737 } 2738 2739 static void tcg_reg_alloc_start(TCGContext *s) 2740 { 2741 int i, n; 2742 2743 for (i = 0, n = s->nb_temps; i < n; i++) { 2744 TCGTemp *ts = &s->temps[i]; 2745 TCGTempVal val = TEMP_VAL_MEM; 2746 2747 switch (ts->kind) { 2748 case TEMP_CONST: 2749 val = TEMP_VAL_CONST; 2750 break; 2751 case TEMP_FIXED: 2752 val = TEMP_VAL_REG; 2753 break; 2754 case TEMP_GLOBAL: 2755 break; 2756 case TEMP_EBB: 2757 val = TEMP_VAL_DEAD; 2758 /* fall through */ 2759 case TEMP_TB: 2760 ts->mem_allocated = 0; 2761 break; 2762 default: 2763 g_assert_not_reached(); 2764 } 2765 ts->val_type = val; 2766 } 2767 2768 memset(s->reg_to_temp, 0, sizeof(s->reg_to_temp)); 2769 } 2770 2771 static char *tcg_get_arg_str_ptr(TCGContext *s, char *buf, int buf_size, 2772 TCGTemp *ts) 2773 { 2774 int idx = temp_idx(ts); 2775 2776 switch (ts->kind) { 2777 case TEMP_FIXED: 2778 case TEMP_GLOBAL: 2779 pstrcpy(buf, buf_size, ts->name); 2780 break; 2781 case TEMP_TB: 2782 snprintf(buf, buf_size, "loc%d", idx - s->nb_globals); 2783 break; 2784 case TEMP_EBB: 2785 snprintf(buf, buf_size, "tmp%d", idx - s->nb_globals); 2786 break; 2787 case TEMP_CONST: 2788 switch (ts->type) { 2789 case TCG_TYPE_I32: 2790 snprintf(buf, buf_size, "$0x%x", (int32_t)ts->val); 2791 break; 2792 #if TCG_TARGET_REG_BITS > 32 2793 case TCG_TYPE_I64: 2794 snprintf(buf, buf_size, "$0x%" PRIx64, ts->val); 2795 break; 2796 #endif 2797 case TCG_TYPE_V64: 2798 case TCG_TYPE_V128: 2799 case TCG_TYPE_V256: 2800 snprintf(buf, buf_size, "v%d$0x%" PRIx64, 2801 64 << (ts->type - TCG_TYPE_V64), ts->val); 2802 break; 2803 default: 2804 g_assert_not_reached(); 2805 } 2806 break; 2807 } 2808 return buf; 2809 } 2810 2811 static char *tcg_get_arg_str(TCGContext *s, char *buf, 2812 int buf_size, TCGArg arg) 2813 { 2814 return tcg_get_arg_str_ptr(s, buf, buf_size, arg_temp(arg)); 2815 } 2816 2817 static const char * const cond_name[] = 2818 { 2819 [TCG_COND_NEVER] = "never", 2820 [TCG_COND_ALWAYS] = "always", 2821 [TCG_COND_EQ] = "eq", 2822 [TCG_COND_NE] = "ne", 2823 [TCG_COND_LT] = "lt", 2824 [TCG_COND_GE] = "ge", 2825 [TCG_COND_LE] = "le", 2826 [TCG_COND_GT] = "gt", 2827 [TCG_COND_LTU] = "ltu", 2828 [TCG_COND_GEU] = "geu", 2829 [TCG_COND_LEU] = "leu", 2830 [TCG_COND_GTU] = "gtu", 2831 [TCG_COND_TSTEQ] = "tsteq", 2832 [TCG_COND_TSTNE] = "tstne", 2833 }; 2834 2835 static const char * const ldst_name[(MO_BSWAP | MO_SSIZE) + 1] = 2836 { 2837 [MO_UB] = "ub", 2838 [MO_SB] = "sb", 2839 [MO_LEUW] = "leuw", 2840 [MO_LESW] = "lesw", 2841 [MO_LEUL] = "leul", 2842 [MO_LESL] = "lesl", 2843 [MO_LEUQ] = "leq", 2844 [MO_BEUW] = "beuw", 2845 [MO_BESW] = "besw", 2846 [MO_BEUL] = "beul", 2847 [MO_BESL] = "besl", 2848 [MO_BEUQ] = "beq", 2849 [MO_128 + MO_BE] = "beo", 2850 [MO_128 + MO_LE] = "leo", 2851 }; 2852 2853 static const char * const alignment_name[(MO_AMASK >> MO_ASHIFT) + 1] = { 2854 [MO_UNALN >> MO_ASHIFT] = "un+", 2855 [MO_ALIGN >> MO_ASHIFT] = "al+", 2856 [MO_ALIGN_2 >> MO_ASHIFT] = "al2+", 2857 [MO_ALIGN_4 >> MO_ASHIFT] = "al4+", 2858 [MO_ALIGN_8 >> MO_ASHIFT] = "al8+", 2859 [MO_ALIGN_16 >> MO_ASHIFT] = "al16+", 2860 [MO_ALIGN_32 >> MO_ASHIFT] = "al32+", 2861 [MO_ALIGN_64 >> MO_ASHIFT] = "al64+", 2862 }; 2863 2864 static const char * const atom_name[(MO_ATOM_MASK >> MO_ATOM_SHIFT) + 1] = { 2865 [MO_ATOM_IFALIGN >> MO_ATOM_SHIFT] = "", 2866 [MO_ATOM_IFALIGN_PAIR >> MO_ATOM_SHIFT] = "pair+", 2867 [MO_ATOM_WITHIN16 >> MO_ATOM_SHIFT] = "w16+", 2868 [MO_ATOM_WITHIN16_PAIR >> MO_ATOM_SHIFT] = "w16p+", 2869 [MO_ATOM_SUBALIGN >> MO_ATOM_SHIFT] = "sub+", 2870 [MO_ATOM_NONE >> MO_ATOM_SHIFT] = "noat+", 2871 }; 2872 2873 static const char bswap_flag_name[][6] = { 2874 [TCG_BSWAP_IZ] = "iz", 2875 [TCG_BSWAP_OZ] = "oz", 2876 [TCG_BSWAP_OS] = "os", 2877 [TCG_BSWAP_IZ | TCG_BSWAP_OZ] = "iz,oz", 2878 [TCG_BSWAP_IZ | TCG_BSWAP_OS] = "iz,os", 2879 }; 2880 2881 #ifdef CONFIG_PLUGIN 2882 static const char * const plugin_from_name[] = { 2883 "from-tb", 2884 "from-insn", 2885 "after-insn", 2886 "after-tb", 2887 }; 2888 #endif 2889 2890 static inline bool tcg_regset_single(TCGRegSet d) 2891 { 2892 return (d & (d - 1)) == 0; 2893 } 2894 2895 static inline TCGReg tcg_regset_first(TCGRegSet d) 2896 { 2897 if (TCG_TARGET_NB_REGS <= 32) { 2898 return ctz32(d); 2899 } else { 2900 return ctz64(d); 2901 } 2902 } 2903 2904 /* Return only the number of characters output -- no error return. */ 2905 #define ne_fprintf(...) \ 2906 ({ int ret_ = fprintf(__VA_ARGS__); ret_ >= 0 ? ret_ : 0; }) 2907 2908 void tcg_dump_ops(TCGContext *s, FILE *f, bool have_prefs) 2909 { 2910 char buf[128]; 2911 TCGOp *op; 2912 2913 QTAILQ_FOREACH(op, &s->ops, link) { 2914 int i, k, nb_oargs, nb_iargs, nb_cargs; 2915 const TCGOpDef *def; 2916 TCGOpcode c; 2917 int col = 0; 2918 2919 c = op->opc; 2920 def = &tcg_op_defs[c]; 2921 2922 if (c == INDEX_op_insn_start) { 2923 nb_oargs = 0; 2924 col += ne_fprintf(f, "\n ----"); 2925 2926 for (i = 0, k = s->insn_start_words; i < k; ++i) { 2927 col += ne_fprintf(f, " %016" PRIx64, 2928 tcg_get_insn_start_param(op, i)); 2929 } 2930 } else if (c == INDEX_op_call) { 2931 const TCGHelperInfo *info = tcg_call_info(op); 2932 void *func = tcg_call_func(op); 2933 2934 /* variable number of arguments */ 2935 nb_oargs = TCGOP_CALLO(op); 2936 nb_iargs = TCGOP_CALLI(op); 2937 nb_cargs = def->nb_cargs; 2938 2939 col += ne_fprintf(f, " %s ", def->name); 2940 2941 /* 2942 * Print the function name from TCGHelperInfo, if available. 2943 * Note that plugins have a template function for the info, 2944 * but the actual function pointer comes from the plugin. 2945 */ 2946 if (func == info->func) { 2947 col += ne_fprintf(f, "%s", info->name); 2948 } else { 2949 col += ne_fprintf(f, "plugin(%p)", func); 2950 } 2951 2952 col += ne_fprintf(f, ",$0x%x,$%d", info->flags, nb_oargs); 2953 for (i = 0; i < nb_oargs; i++) { 2954 col += ne_fprintf(f, ",%s", tcg_get_arg_str(s, buf, sizeof(buf), 2955 op->args[i])); 2956 } 2957 for (i = 0; i < nb_iargs; i++) { 2958 TCGArg arg = op->args[nb_oargs + i]; 2959 const char *t = tcg_get_arg_str(s, buf, sizeof(buf), arg); 2960 col += ne_fprintf(f, ",%s", t); 2961 } 2962 } else { 2963 if (def->flags & TCG_OPF_INT) { 2964 col += ne_fprintf(f, " %s_i%d ", 2965 def->name, 2966 8 * tcg_type_size(TCGOP_TYPE(op))); 2967 } else if (def->flags & TCG_OPF_VECTOR) { 2968 col += ne_fprintf(f, "%s v%d,e%d,", 2969 def->name, 2970 8 * tcg_type_size(TCGOP_TYPE(op)), 2971 8 << TCGOP_VECE(op)); 2972 } else { 2973 col += ne_fprintf(f, " %s ", def->name); 2974 } 2975 2976 nb_oargs = def->nb_oargs; 2977 nb_iargs = def->nb_iargs; 2978 nb_cargs = def->nb_cargs; 2979 2980 k = 0; 2981 for (i = 0; i < nb_oargs; i++) { 2982 const char *sep = k ? "," : ""; 2983 col += ne_fprintf(f, "%s%s", sep, 2984 tcg_get_arg_str(s, buf, sizeof(buf), 2985 op->args[k++])); 2986 } 2987 for (i = 0; i < nb_iargs; i++) { 2988 const char *sep = k ? "," : ""; 2989 col += ne_fprintf(f, "%s%s", sep, 2990 tcg_get_arg_str(s, buf, sizeof(buf), 2991 op->args[k++])); 2992 } 2993 switch (c) { 2994 case INDEX_op_brcond: 2995 case INDEX_op_setcond: 2996 case INDEX_op_negsetcond: 2997 case INDEX_op_movcond: 2998 case INDEX_op_brcond2_i32: 2999 case INDEX_op_setcond2_i32: 3000 case INDEX_op_cmp_vec: 3001 case INDEX_op_cmpsel_vec: 3002 if (op->args[k] < ARRAY_SIZE(cond_name) 3003 && cond_name[op->args[k]]) { 3004 col += ne_fprintf(f, ",%s", cond_name[op->args[k++]]); 3005 } else { 3006 col += ne_fprintf(f, ",$0x%" TCG_PRIlx, op->args[k++]); 3007 } 3008 i = 1; 3009 break; 3010 case INDEX_op_qemu_ld_i32: 3011 case INDEX_op_qemu_st_i32: 3012 case INDEX_op_qemu_ld_i64: 3013 case INDEX_op_qemu_st_i64: 3014 case INDEX_op_qemu_ld_i128: 3015 case INDEX_op_qemu_st_i128: 3016 { 3017 const char *s_al, *s_op, *s_at; 3018 MemOpIdx oi = op->args[k++]; 3019 MemOp mop = get_memop(oi); 3020 unsigned ix = get_mmuidx(oi); 3021 3022 s_al = alignment_name[(mop & MO_AMASK) >> MO_ASHIFT]; 3023 s_op = ldst_name[mop & (MO_BSWAP | MO_SSIZE)]; 3024 s_at = atom_name[(mop & MO_ATOM_MASK) >> MO_ATOM_SHIFT]; 3025 mop &= ~(MO_AMASK | MO_BSWAP | MO_SSIZE | MO_ATOM_MASK); 3026 3027 /* If all fields are accounted for, print symbolically. */ 3028 if (!mop && s_al && s_op && s_at) { 3029 col += ne_fprintf(f, ",%s%s%s,%u", 3030 s_at, s_al, s_op, ix); 3031 } else { 3032 mop = get_memop(oi); 3033 col += ne_fprintf(f, ",$0x%x,%u", mop, ix); 3034 } 3035 i = 1; 3036 } 3037 break; 3038 case INDEX_op_bswap16: 3039 case INDEX_op_bswap32: 3040 case INDEX_op_bswap64: 3041 { 3042 TCGArg flags = op->args[k]; 3043 const char *name = NULL; 3044 3045 if (flags < ARRAY_SIZE(bswap_flag_name)) { 3046 name = bswap_flag_name[flags]; 3047 } 3048 if (name) { 3049 col += ne_fprintf(f, ",%s", name); 3050 } else { 3051 col += ne_fprintf(f, ",$0x%" TCG_PRIlx, flags); 3052 } 3053 i = k = 1; 3054 } 3055 break; 3056 #ifdef CONFIG_PLUGIN 3057 case INDEX_op_plugin_cb: 3058 { 3059 TCGArg from = op->args[k++]; 3060 const char *name = NULL; 3061 3062 if (from < ARRAY_SIZE(plugin_from_name)) { 3063 name = plugin_from_name[from]; 3064 } 3065 if (name) { 3066 col += ne_fprintf(f, "%s", name); 3067 } else { 3068 col += ne_fprintf(f, "$0x%" TCG_PRIlx, from); 3069 } 3070 i = 1; 3071 } 3072 break; 3073 #endif 3074 default: 3075 i = 0; 3076 break; 3077 } 3078 switch (c) { 3079 case INDEX_op_set_label: 3080 case INDEX_op_br: 3081 case INDEX_op_brcond: 3082 case INDEX_op_brcond2_i32: 3083 col += ne_fprintf(f, "%s$L%d", k ? "," : "", 3084 arg_label(op->args[k])->id); 3085 i++, k++; 3086 break; 3087 case INDEX_op_mb: 3088 { 3089 TCGBar membar = op->args[k]; 3090 const char *b_op, *m_op; 3091 3092 switch (membar & TCG_BAR_SC) { 3093 case 0: 3094 b_op = "none"; 3095 break; 3096 case TCG_BAR_LDAQ: 3097 b_op = "acq"; 3098 break; 3099 case TCG_BAR_STRL: 3100 b_op = "rel"; 3101 break; 3102 case TCG_BAR_SC: 3103 b_op = "seq"; 3104 break; 3105 default: 3106 g_assert_not_reached(); 3107 } 3108 3109 switch (membar & TCG_MO_ALL) { 3110 case 0: 3111 m_op = "none"; 3112 break; 3113 case TCG_MO_LD_LD: 3114 m_op = "rr"; 3115 break; 3116 case TCG_MO_LD_ST: 3117 m_op = "rw"; 3118 break; 3119 case TCG_MO_ST_LD: 3120 m_op = "wr"; 3121 break; 3122 case TCG_MO_ST_ST: 3123 m_op = "ww"; 3124 break; 3125 case TCG_MO_LD_LD | TCG_MO_LD_ST: 3126 m_op = "rr+rw"; 3127 break; 3128 case TCG_MO_LD_LD | TCG_MO_ST_LD: 3129 m_op = "rr+wr"; 3130 break; 3131 case TCG_MO_LD_LD | TCG_MO_ST_ST: 3132 m_op = "rr+ww"; 3133 break; 3134 case TCG_MO_LD_ST | TCG_MO_ST_LD: 3135 m_op = "rw+wr"; 3136 break; 3137 case TCG_MO_LD_ST | TCG_MO_ST_ST: 3138 m_op = "rw+ww"; 3139 break; 3140 case TCG_MO_ST_LD | TCG_MO_ST_ST: 3141 m_op = "wr+ww"; 3142 break; 3143 case TCG_MO_LD_LD | TCG_MO_LD_ST | TCG_MO_ST_LD: 3144 m_op = "rr+rw+wr"; 3145 break; 3146 case TCG_MO_LD_LD | TCG_MO_LD_ST | TCG_MO_ST_ST: 3147 m_op = "rr+rw+ww"; 3148 break; 3149 case TCG_MO_LD_LD | TCG_MO_ST_LD | TCG_MO_ST_ST: 3150 m_op = "rr+wr+ww"; 3151 break; 3152 case TCG_MO_LD_ST | TCG_MO_ST_LD | TCG_MO_ST_ST: 3153 m_op = "rw+wr+ww"; 3154 break; 3155 case TCG_MO_ALL: 3156 m_op = "all"; 3157 break; 3158 default: 3159 g_assert_not_reached(); 3160 } 3161 3162 col += ne_fprintf(f, "%s%s:%s", (k ? "," : ""), b_op, m_op); 3163 i++, k++; 3164 } 3165 break; 3166 default: 3167 break; 3168 } 3169 for (; i < nb_cargs; i++, k++) { 3170 col += ne_fprintf(f, "%s$0x%" TCG_PRIlx, k ? "," : "", 3171 op->args[k]); 3172 } 3173 } 3174 3175 if (have_prefs || op->life) { 3176 for (; col < 40; ++col) { 3177 putc(' ', f); 3178 } 3179 } 3180 3181 if (op->life) { 3182 unsigned life = op->life; 3183 3184 if (life & (SYNC_ARG * 3)) { 3185 ne_fprintf(f, " sync:"); 3186 for (i = 0; i < 2; ++i) { 3187 if (life & (SYNC_ARG << i)) { 3188 ne_fprintf(f, " %d", i); 3189 } 3190 } 3191 } 3192 life /= DEAD_ARG; 3193 if (life) { 3194 ne_fprintf(f, " dead:"); 3195 for (i = 0; life; ++i, life >>= 1) { 3196 if (life & 1) { 3197 ne_fprintf(f, " %d", i); 3198 } 3199 } 3200 } 3201 } 3202 3203 if (have_prefs) { 3204 for (i = 0; i < nb_oargs; ++i) { 3205 TCGRegSet set = output_pref(op, i); 3206 3207 if (i == 0) { 3208 ne_fprintf(f, " pref="); 3209 } else { 3210 ne_fprintf(f, ","); 3211 } 3212 if (set == 0) { 3213 ne_fprintf(f, "none"); 3214 } else if (set == MAKE_64BIT_MASK(0, TCG_TARGET_NB_REGS)) { 3215 ne_fprintf(f, "all"); 3216 #ifdef CONFIG_DEBUG_TCG 3217 } else if (tcg_regset_single(set)) { 3218 TCGReg reg = tcg_regset_first(set); 3219 ne_fprintf(f, "%s", tcg_target_reg_names[reg]); 3220 #endif 3221 } else if (TCG_TARGET_NB_REGS <= 32) { 3222 ne_fprintf(f, "0x%x", (uint32_t)set); 3223 } else { 3224 ne_fprintf(f, "0x%" PRIx64, (uint64_t)set); 3225 } 3226 } 3227 } 3228 3229 putc('\n', f); 3230 } 3231 } 3232 3233 /* we give more priority to constraints with less registers */ 3234 static int get_constraint_priority(const TCGArgConstraint *arg_ct, int k) 3235 { 3236 int n; 3237 3238 arg_ct += k; 3239 n = ctpop64(arg_ct->regs); 3240 3241 /* 3242 * Sort constraints of a single register first, which includes output 3243 * aliases (which must exactly match the input already allocated). 3244 */ 3245 if (n == 1 || arg_ct->oalias) { 3246 return INT_MAX; 3247 } 3248 3249 /* 3250 * Sort register pairs next, first then second immediately after. 3251 * Arbitrarily sort multiple pairs by the index of the first reg; 3252 * there shouldn't be many pairs. 3253 */ 3254 switch (arg_ct->pair) { 3255 case 1: 3256 case 3: 3257 return (k + 1) * 2; 3258 case 2: 3259 return (arg_ct->pair_index + 1) * 2 - 1; 3260 } 3261 3262 /* Finally, sort by decreasing register count. */ 3263 assert(n > 1); 3264 return -n; 3265 } 3266 3267 /* sort from highest priority to lowest */ 3268 static void sort_constraints(TCGArgConstraint *a, int start, int n) 3269 { 3270 int i, j; 3271 3272 for (i = 0; i < n; i++) { 3273 a[start + i].sort_index = start + i; 3274 } 3275 if (n <= 1) { 3276 return; 3277 } 3278 for (i = 0; i < n - 1; i++) { 3279 for (j = i + 1; j < n; j++) { 3280 int p1 = get_constraint_priority(a, a[start + i].sort_index); 3281 int p2 = get_constraint_priority(a, a[start + j].sort_index); 3282 if (p1 < p2) { 3283 int tmp = a[start + i].sort_index; 3284 a[start + i].sort_index = a[start + j].sort_index; 3285 a[start + j].sort_index = tmp; 3286 } 3287 } 3288 } 3289 } 3290 3291 static const TCGArgConstraint empty_cts[TCG_MAX_OP_ARGS]; 3292 static TCGArgConstraint all_cts[ARRAY_SIZE(constraint_sets)][TCG_MAX_OP_ARGS]; 3293 3294 static void process_constraint_sets(void) 3295 { 3296 for (size_t c = 0; c < ARRAY_SIZE(constraint_sets); ++c) { 3297 const TCGConstraintSet *tdefs = &constraint_sets[c]; 3298 TCGArgConstraint *args_ct = all_cts[c]; 3299 int nb_oargs = tdefs->nb_oargs; 3300 int nb_iargs = tdefs->nb_iargs; 3301 int nb_args = nb_oargs + nb_iargs; 3302 bool saw_alias_pair = false; 3303 3304 for (int i = 0; i < nb_args; i++) { 3305 const char *ct_str = tdefs->args_ct_str[i]; 3306 bool input_p = i >= nb_oargs; 3307 int o; 3308 3309 switch (*ct_str) { 3310 case '0' ... '9': 3311 o = *ct_str - '0'; 3312 tcg_debug_assert(input_p); 3313 tcg_debug_assert(o < nb_oargs); 3314 tcg_debug_assert(args_ct[o].regs != 0); 3315 tcg_debug_assert(!args_ct[o].oalias); 3316 args_ct[i] = args_ct[o]; 3317 /* The output sets oalias. */ 3318 args_ct[o].oalias = 1; 3319 args_ct[o].alias_index = i; 3320 /* The input sets ialias. */ 3321 args_ct[i].ialias = 1; 3322 args_ct[i].alias_index = o; 3323 if (args_ct[i].pair) { 3324 saw_alias_pair = true; 3325 } 3326 tcg_debug_assert(ct_str[1] == '\0'); 3327 continue; 3328 3329 case '&': 3330 tcg_debug_assert(!input_p); 3331 args_ct[i].newreg = true; 3332 ct_str++; 3333 break; 3334 3335 case 'p': /* plus */ 3336 /* Allocate to the register after the previous. */ 3337 tcg_debug_assert(i > (input_p ? nb_oargs : 0)); 3338 o = i - 1; 3339 tcg_debug_assert(!args_ct[o].pair); 3340 tcg_debug_assert(!args_ct[o].ct); 3341 args_ct[i] = (TCGArgConstraint){ 3342 .pair = 2, 3343 .pair_index = o, 3344 .regs = args_ct[o].regs << 1, 3345 .newreg = args_ct[o].newreg, 3346 }; 3347 args_ct[o].pair = 1; 3348 args_ct[o].pair_index = i; 3349 tcg_debug_assert(ct_str[1] == '\0'); 3350 continue; 3351 3352 case 'm': /* minus */ 3353 /* Allocate to the register before the previous. */ 3354 tcg_debug_assert(i > (input_p ? nb_oargs : 0)); 3355 o = i - 1; 3356 tcg_debug_assert(!args_ct[o].pair); 3357 tcg_debug_assert(!args_ct[o].ct); 3358 args_ct[i] = (TCGArgConstraint){ 3359 .pair = 1, 3360 .pair_index = o, 3361 .regs = args_ct[o].regs >> 1, 3362 .newreg = args_ct[o].newreg, 3363 }; 3364 args_ct[o].pair = 2; 3365 args_ct[o].pair_index = i; 3366 tcg_debug_assert(ct_str[1] == '\0'); 3367 continue; 3368 } 3369 3370 do { 3371 switch (*ct_str) { 3372 case 'i': 3373 args_ct[i].ct |= TCG_CT_CONST; 3374 break; 3375 #ifdef TCG_REG_ZERO 3376 case 'z': 3377 args_ct[i].ct |= TCG_CT_REG_ZERO; 3378 break; 3379 #endif 3380 3381 /* Include all of the target-specific constraints. */ 3382 3383 #undef CONST 3384 #define CONST(CASE, MASK) \ 3385 case CASE: args_ct[i].ct |= MASK; break; 3386 #define REGS(CASE, MASK) \ 3387 case CASE: args_ct[i].regs |= MASK; break; 3388 3389 #include "tcg-target-con-str.h" 3390 3391 #undef REGS 3392 #undef CONST 3393 default: 3394 case '0' ... '9': 3395 case '&': 3396 case 'p': 3397 case 'm': 3398 /* Typo in TCGConstraintSet constraint. */ 3399 g_assert_not_reached(); 3400 } 3401 } while (*++ct_str != '\0'); 3402 } 3403 3404 /* 3405 * Fix up output pairs that are aliased with inputs. 3406 * When we created the alias, we copied pair from the output. 3407 * There are three cases: 3408 * (1a) Pairs of inputs alias pairs of outputs. 3409 * (1b) One input aliases the first of a pair of outputs. 3410 * (2) One input aliases the second of a pair of outputs. 3411 * 3412 * Case 1a is handled by making sure that the pair_index'es are 3413 * properly updated so that they appear the same as a pair of inputs. 3414 * 3415 * Case 1b is handled by setting the pair_index of the input to 3416 * itself, simply so it doesn't point to an unrelated argument. 3417 * Since we don't encounter the "second" during the input allocation 3418 * phase, nothing happens with the second half of the input pair. 3419 * 3420 * Case 2 is handled by setting the second input to pair=3, the 3421 * first output to pair=3, and the pair_index'es to match. 3422 */ 3423 if (saw_alias_pair) { 3424 for (int i = nb_oargs; i < nb_args; i++) { 3425 int o, o2, i2; 3426 3427 /* 3428 * Since [0-9pm] must be alone in the constraint string, 3429 * the only way they can both be set is if the pair comes 3430 * from the output alias. 3431 */ 3432 if (!args_ct[i].ialias) { 3433 continue; 3434 } 3435 switch (args_ct[i].pair) { 3436 case 0: 3437 break; 3438 case 1: 3439 o = args_ct[i].alias_index; 3440 o2 = args_ct[o].pair_index; 3441 tcg_debug_assert(args_ct[o].pair == 1); 3442 tcg_debug_assert(args_ct[o2].pair == 2); 3443 if (args_ct[o2].oalias) { 3444 /* Case 1a */ 3445 i2 = args_ct[o2].alias_index; 3446 tcg_debug_assert(args_ct[i2].pair == 2); 3447 args_ct[i2].pair_index = i; 3448 args_ct[i].pair_index = i2; 3449 } else { 3450 /* Case 1b */ 3451 args_ct[i].pair_index = i; 3452 } 3453 break; 3454 case 2: 3455 o = args_ct[i].alias_index; 3456 o2 = args_ct[o].pair_index; 3457 tcg_debug_assert(args_ct[o].pair == 2); 3458 tcg_debug_assert(args_ct[o2].pair == 1); 3459 if (args_ct[o2].oalias) { 3460 /* Case 1a */ 3461 i2 = args_ct[o2].alias_index; 3462 tcg_debug_assert(args_ct[i2].pair == 1); 3463 args_ct[i2].pair_index = i; 3464 args_ct[i].pair_index = i2; 3465 } else { 3466 /* Case 2 */ 3467 args_ct[i].pair = 3; 3468 args_ct[o2].pair = 3; 3469 args_ct[i].pair_index = o2; 3470 args_ct[o2].pair_index = i; 3471 } 3472 break; 3473 default: 3474 g_assert_not_reached(); 3475 } 3476 } 3477 } 3478 3479 /* sort the constraints (XXX: this is just an heuristic) */ 3480 sort_constraints(args_ct, 0, nb_oargs); 3481 sort_constraints(args_ct, nb_oargs, nb_iargs); 3482 } 3483 } 3484 3485 static const TCGArgConstraint *opcode_args_ct(const TCGOp *op) 3486 { 3487 TCGOpcode opc = op->opc; 3488 TCGType type = TCGOP_TYPE(op); 3489 unsigned flags = TCGOP_FLAGS(op); 3490 const TCGOpDef *def = &tcg_op_defs[opc]; 3491 const TCGOutOp *outop = all_outop[opc]; 3492 TCGConstraintSetIndex con_set; 3493 3494 if (def->flags & TCG_OPF_NOT_PRESENT) { 3495 return empty_cts; 3496 } 3497 3498 if (outop) { 3499 con_set = outop->static_constraint; 3500 if (con_set == C_Dynamic) { 3501 con_set = outop->dynamic_constraint(type, flags); 3502 } 3503 } else { 3504 con_set = tcg_target_op_def(opc, type, flags); 3505 } 3506 tcg_debug_assert(con_set >= 0); 3507 tcg_debug_assert(con_set < ARRAY_SIZE(constraint_sets)); 3508 3509 /* The constraint arguments must match TCGOpcode arguments. */ 3510 tcg_debug_assert(constraint_sets[con_set].nb_oargs == def->nb_oargs); 3511 tcg_debug_assert(constraint_sets[con_set].nb_iargs == def->nb_iargs); 3512 3513 return all_cts[con_set]; 3514 } 3515 3516 static void remove_label_use(TCGOp *op, int idx) 3517 { 3518 TCGLabel *label = arg_label(op->args[idx]); 3519 TCGLabelUse *use; 3520 3521 QSIMPLEQ_FOREACH(use, &label->branches, next) { 3522 if (use->op == op) { 3523 QSIMPLEQ_REMOVE(&label->branches, use, TCGLabelUse, next); 3524 return; 3525 } 3526 } 3527 g_assert_not_reached(); 3528 } 3529 3530 void tcg_op_remove(TCGContext *s, TCGOp *op) 3531 { 3532 switch (op->opc) { 3533 case INDEX_op_br: 3534 remove_label_use(op, 0); 3535 break; 3536 case INDEX_op_brcond: 3537 remove_label_use(op, 3); 3538 break; 3539 case INDEX_op_brcond2_i32: 3540 remove_label_use(op, 5); 3541 break; 3542 default: 3543 break; 3544 } 3545 3546 QTAILQ_REMOVE(&s->ops, op, link); 3547 QTAILQ_INSERT_TAIL(&s->free_ops, op, link); 3548 s->nb_ops--; 3549 } 3550 3551 void tcg_remove_ops_after(TCGOp *op) 3552 { 3553 TCGContext *s = tcg_ctx; 3554 3555 while (true) { 3556 TCGOp *last = tcg_last_op(); 3557 if (last == op) { 3558 return; 3559 } 3560 tcg_op_remove(s, last); 3561 } 3562 } 3563 3564 static TCGOp *tcg_op_alloc(TCGOpcode opc, unsigned nargs) 3565 { 3566 TCGContext *s = tcg_ctx; 3567 TCGOp *op = NULL; 3568 3569 if (unlikely(!QTAILQ_EMPTY(&s->free_ops))) { 3570 QTAILQ_FOREACH(op, &s->free_ops, link) { 3571 if (nargs <= op->nargs) { 3572 QTAILQ_REMOVE(&s->free_ops, op, link); 3573 nargs = op->nargs; 3574 goto found; 3575 } 3576 } 3577 } 3578 3579 /* Most opcodes have 3 or 4 operands: reduce fragmentation. */ 3580 nargs = MAX(4, nargs); 3581 op = tcg_malloc(sizeof(TCGOp) + sizeof(TCGArg) * nargs); 3582 3583 found: 3584 memset(op, 0, offsetof(TCGOp, link)); 3585 op->opc = opc; 3586 op->nargs = nargs; 3587 3588 /* Check for bitfield overflow. */ 3589 tcg_debug_assert(op->nargs == nargs); 3590 3591 s->nb_ops++; 3592 return op; 3593 } 3594 3595 TCGOp *tcg_emit_op(TCGOpcode opc, unsigned nargs) 3596 { 3597 TCGOp *op = tcg_op_alloc(opc, nargs); 3598 3599 if (tcg_ctx->emit_before_op) { 3600 QTAILQ_INSERT_BEFORE(tcg_ctx->emit_before_op, op, link); 3601 } else { 3602 QTAILQ_INSERT_TAIL(&tcg_ctx->ops, op, link); 3603 } 3604 return op; 3605 } 3606 3607 TCGOp *tcg_op_insert_before(TCGContext *s, TCGOp *old_op, 3608 TCGOpcode opc, TCGType type, unsigned nargs) 3609 { 3610 TCGOp *new_op = tcg_op_alloc(opc, nargs); 3611 3612 TCGOP_TYPE(new_op) = type; 3613 QTAILQ_INSERT_BEFORE(old_op, new_op, link); 3614 return new_op; 3615 } 3616 3617 TCGOp *tcg_op_insert_after(TCGContext *s, TCGOp *old_op, 3618 TCGOpcode opc, TCGType type, unsigned nargs) 3619 { 3620 TCGOp *new_op = tcg_op_alloc(opc, nargs); 3621 3622 TCGOP_TYPE(new_op) = type; 3623 QTAILQ_INSERT_AFTER(&s->ops, old_op, new_op, link); 3624 return new_op; 3625 } 3626 3627 static void move_label_uses(TCGLabel *to, TCGLabel *from) 3628 { 3629 TCGLabelUse *u; 3630 3631 QSIMPLEQ_FOREACH(u, &from->branches, next) { 3632 TCGOp *op = u->op; 3633 switch (op->opc) { 3634 case INDEX_op_br: 3635 op->args[0] = label_arg(to); 3636 break; 3637 case INDEX_op_brcond: 3638 op->args[3] = label_arg(to); 3639 break; 3640 case INDEX_op_brcond2_i32: 3641 op->args[5] = label_arg(to); 3642 break; 3643 default: 3644 g_assert_not_reached(); 3645 } 3646 } 3647 3648 QSIMPLEQ_CONCAT(&to->branches, &from->branches); 3649 } 3650 3651 /* Reachable analysis : remove unreachable code. */ 3652 static void __attribute__((noinline)) 3653 reachable_code_pass(TCGContext *s) 3654 { 3655 TCGOp *op, *op_next, *op_prev; 3656 bool dead = false; 3657 3658 QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) { 3659 bool remove = dead; 3660 TCGLabel *label; 3661 3662 switch (op->opc) { 3663 case INDEX_op_set_label: 3664 label = arg_label(op->args[0]); 3665 3666 /* 3667 * Note that the first op in the TB is always a load, 3668 * so there is always something before a label. 3669 */ 3670 op_prev = QTAILQ_PREV(op, link); 3671 3672 /* 3673 * If we find two sequential labels, move all branches to 3674 * reference the second label and remove the first label. 3675 * Do this before branch to next optimization, so that the 3676 * middle label is out of the way. 3677 */ 3678 if (op_prev->opc == INDEX_op_set_label) { 3679 move_label_uses(label, arg_label(op_prev->args[0])); 3680 tcg_op_remove(s, op_prev); 3681 op_prev = QTAILQ_PREV(op, link); 3682 } 3683 3684 /* 3685 * Optimization can fold conditional branches to unconditional. 3686 * If we find a label which is preceded by an unconditional 3687 * branch to next, remove the branch. We couldn't do this when 3688 * processing the branch because any dead code between the branch 3689 * and label had not yet been removed. 3690 */ 3691 if (op_prev->opc == INDEX_op_br && 3692 label == arg_label(op_prev->args[0])) { 3693 tcg_op_remove(s, op_prev); 3694 /* Fall through means insns become live again. */ 3695 dead = false; 3696 } 3697 3698 if (QSIMPLEQ_EMPTY(&label->branches)) { 3699 /* 3700 * While there is an occasional backward branch, virtually 3701 * all branches generated by the translators are forward. 3702 * Which means that generally we will have already removed 3703 * all references to the label that will be, and there is 3704 * little to be gained by iterating. 3705 */ 3706 remove = true; 3707 } else { 3708 /* Once we see a label, insns become live again. */ 3709 dead = false; 3710 remove = false; 3711 } 3712 break; 3713 3714 case INDEX_op_br: 3715 case INDEX_op_exit_tb: 3716 case INDEX_op_goto_ptr: 3717 /* Unconditional branches; everything following is dead. */ 3718 dead = true; 3719 break; 3720 3721 case INDEX_op_call: 3722 /* Notice noreturn helper calls, raising exceptions. */ 3723 if (tcg_call_flags(op) & TCG_CALL_NO_RETURN) { 3724 dead = true; 3725 } 3726 break; 3727 3728 case INDEX_op_insn_start: 3729 /* Never remove -- we need to keep these for unwind. */ 3730 remove = false; 3731 break; 3732 3733 default: 3734 break; 3735 } 3736 3737 if (remove) { 3738 tcg_op_remove(s, op); 3739 } 3740 } 3741 } 3742 3743 #define TS_DEAD 1 3744 #define TS_MEM 2 3745 3746 #define IS_DEAD_ARG(n) (arg_life & (DEAD_ARG << (n))) 3747 #define NEED_SYNC_ARG(n) (arg_life & (SYNC_ARG << (n))) 3748 3749 /* For liveness_pass_1, the register preferences for a given temp. */ 3750 static inline TCGRegSet *la_temp_pref(TCGTemp *ts) 3751 { 3752 return ts->state_ptr; 3753 } 3754 3755 /* For liveness_pass_1, reset the preferences for a given temp to the 3756 * maximal regset for its type. 3757 */ 3758 static inline void la_reset_pref(TCGTemp *ts) 3759 { 3760 *la_temp_pref(ts) 3761 = (ts->state == TS_DEAD ? 0 : tcg_target_available_regs[ts->type]); 3762 } 3763 3764 /* liveness analysis: end of function: all temps are dead, and globals 3765 should be in memory. */ 3766 static void la_func_end(TCGContext *s, int ng, int nt) 3767 { 3768 int i; 3769 3770 for (i = 0; i < ng; ++i) { 3771 s->temps[i].state = TS_DEAD | TS_MEM; 3772 la_reset_pref(&s->temps[i]); 3773 } 3774 for (i = ng; i < nt; ++i) { 3775 s->temps[i].state = TS_DEAD; 3776 la_reset_pref(&s->temps[i]); 3777 } 3778 } 3779 3780 /* liveness analysis: end of basic block: all temps are dead, globals 3781 and local temps should be in memory. */ 3782 static void la_bb_end(TCGContext *s, int ng, int nt) 3783 { 3784 int i; 3785 3786 for (i = 0; i < nt; ++i) { 3787 TCGTemp *ts = &s->temps[i]; 3788 int state; 3789 3790 switch (ts->kind) { 3791 case TEMP_FIXED: 3792 case TEMP_GLOBAL: 3793 case TEMP_TB: 3794 state = TS_DEAD | TS_MEM; 3795 break; 3796 case TEMP_EBB: 3797 case TEMP_CONST: 3798 state = TS_DEAD; 3799 break; 3800 default: 3801 g_assert_not_reached(); 3802 } 3803 ts->state = state; 3804 la_reset_pref(ts); 3805 } 3806 } 3807 3808 /* liveness analysis: sync globals back to memory. */ 3809 static void la_global_sync(TCGContext *s, int ng) 3810 { 3811 int i; 3812 3813 for (i = 0; i < ng; ++i) { 3814 int state = s->temps[i].state; 3815 s->temps[i].state = state | TS_MEM; 3816 if (state == TS_DEAD) { 3817 /* If the global was previously dead, reset prefs. */ 3818 la_reset_pref(&s->temps[i]); 3819 } 3820 } 3821 } 3822 3823 /* 3824 * liveness analysis: conditional branch: all temps are dead unless 3825 * explicitly live-across-conditional-branch, globals and local temps 3826 * should be synced. 3827 */ 3828 static void la_bb_sync(TCGContext *s, int ng, int nt) 3829 { 3830 la_global_sync(s, ng); 3831 3832 for (int i = ng; i < nt; ++i) { 3833 TCGTemp *ts = &s->temps[i]; 3834 int state; 3835 3836 switch (ts->kind) { 3837 case TEMP_TB: 3838 state = ts->state; 3839 ts->state = state | TS_MEM; 3840 if (state != TS_DEAD) { 3841 continue; 3842 } 3843 break; 3844 case TEMP_EBB: 3845 case TEMP_CONST: 3846 continue; 3847 default: 3848 g_assert_not_reached(); 3849 } 3850 la_reset_pref(&s->temps[i]); 3851 } 3852 } 3853 3854 /* liveness analysis: sync globals back to memory and kill. */ 3855 static void la_global_kill(TCGContext *s, int ng) 3856 { 3857 int i; 3858 3859 for (i = 0; i < ng; i++) { 3860 s->temps[i].state = TS_DEAD | TS_MEM; 3861 la_reset_pref(&s->temps[i]); 3862 } 3863 } 3864 3865 /* liveness analysis: note live globals crossing calls. */ 3866 static void la_cross_call(TCGContext *s, int nt) 3867 { 3868 TCGRegSet mask = ~tcg_target_call_clobber_regs; 3869 int i; 3870 3871 for (i = 0; i < nt; i++) { 3872 TCGTemp *ts = &s->temps[i]; 3873 if (!(ts->state & TS_DEAD)) { 3874 TCGRegSet *pset = la_temp_pref(ts); 3875 TCGRegSet set = *pset; 3876 3877 set &= mask; 3878 /* If the combination is not possible, restart. */ 3879 if (set == 0) { 3880 set = tcg_target_available_regs[ts->type] & mask; 3881 } 3882 *pset = set; 3883 } 3884 } 3885 } 3886 3887 /* 3888 * Liveness analysis: Verify the lifetime of TEMP_TB, and reduce 3889 * to TEMP_EBB, if possible. 3890 */ 3891 static void __attribute__((noinline)) 3892 liveness_pass_0(TCGContext *s) 3893 { 3894 void * const multiple_ebb = (void *)(uintptr_t)-1; 3895 int nb_temps = s->nb_temps; 3896 TCGOp *op, *ebb; 3897 3898 for (int i = s->nb_globals; i < nb_temps; ++i) { 3899 s->temps[i].state_ptr = NULL; 3900 } 3901 3902 /* 3903 * Represent each EBB by the op at which it begins. In the case of 3904 * the first EBB, this is the first op, otherwise it is a label. 3905 * Collect the uses of each TEMP_TB: NULL for unused, EBB for use 3906 * within a single EBB, else MULTIPLE_EBB. 3907 */ 3908 ebb = QTAILQ_FIRST(&s->ops); 3909 QTAILQ_FOREACH(op, &s->ops, link) { 3910 const TCGOpDef *def; 3911 int nb_oargs, nb_iargs; 3912 3913 switch (op->opc) { 3914 case INDEX_op_set_label: 3915 ebb = op; 3916 continue; 3917 case INDEX_op_discard: 3918 continue; 3919 case INDEX_op_call: 3920 nb_oargs = TCGOP_CALLO(op); 3921 nb_iargs = TCGOP_CALLI(op); 3922 break; 3923 default: 3924 def = &tcg_op_defs[op->opc]; 3925 nb_oargs = def->nb_oargs; 3926 nb_iargs = def->nb_iargs; 3927 break; 3928 } 3929 3930 for (int i = 0; i < nb_oargs + nb_iargs; ++i) { 3931 TCGTemp *ts = arg_temp(op->args[i]); 3932 3933 if (ts->kind != TEMP_TB) { 3934 continue; 3935 } 3936 if (ts->state_ptr == NULL) { 3937 ts->state_ptr = ebb; 3938 } else if (ts->state_ptr != ebb) { 3939 ts->state_ptr = multiple_ebb; 3940 } 3941 } 3942 } 3943 3944 /* 3945 * For TEMP_TB that turned out not to be used beyond one EBB, 3946 * reduce the liveness to TEMP_EBB. 3947 */ 3948 for (int i = s->nb_globals; i < nb_temps; ++i) { 3949 TCGTemp *ts = &s->temps[i]; 3950 if (ts->kind == TEMP_TB && ts->state_ptr != multiple_ebb) { 3951 ts->kind = TEMP_EBB; 3952 } 3953 } 3954 } 3955 3956 static void assert_carry_dead(TCGContext *s) 3957 { 3958 /* 3959 * Carry operations can be separated by a few insns like mov, 3960 * load or store, but they should always be "close", and 3961 * carry-out operations should always be paired with carry-in. 3962 * At various boundaries, carry must have been consumed. 3963 */ 3964 tcg_debug_assert(!s->carry_live); 3965 } 3966 3967 /* Liveness analysis : update the opc_arg_life array to tell if a 3968 given input arguments is dead. Instructions updating dead 3969 temporaries are removed. */ 3970 static void __attribute__((noinline)) 3971 liveness_pass_1(TCGContext *s) 3972 { 3973 int nb_globals = s->nb_globals; 3974 int nb_temps = s->nb_temps; 3975 TCGOp *op, *op_prev; 3976 TCGRegSet *prefs; 3977 3978 prefs = tcg_malloc(sizeof(TCGRegSet) * nb_temps); 3979 for (int i = 0; i < nb_temps; ++i) { 3980 s->temps[i].state_ptr = prefs + i; 3981 } 3982 3983 /* ??? Should be redundant with the exit_tb that ends the TB. */ 3984 la_func_end(s, nb_globals, nb_temps); 3985 3986 s->carry_live = false; 3987 QTAILQ_FOREACH_REVERSE_SAFE(op, &s->ops, link, op_prev) { 3988 int nb_iargs, nb_oargs; 3989 TCGOpcode opc_new, opc_new2; 3990 TCGLifeData arg_life = 0; 3991 TCGTemp *ts; 3992 TCGOpcode opc = op->opc; 3993 const TCGOpDef *def; 3994 const TCGArgConstraint *args_ct; 3995 3996 switch (opc) { 3997 case INDEX_op_call: 3998 assert_carry_dead(s); 3999 { 4000 const TCGHelperInfo *info = tcg_call_info(op); 4001 int call_flags = tcg_call_flags(op); 4002 4003 nb_oargs = TCGOP_CALLO(op); 4004 nb_iargs = TCGOP_CALLI(op); 4005 4006 /* pure functions can be removed if their result is unused */ 4007 if (call_flags & TCG_CALL_NO_SIDE_EFFECTS) { 4008 for (int i = 0; i < nb_oargs; i++) { 4009 ts = arg_temp(op->args[i]); 4010 if (ts->state != TS_DEAD) { 4011 goto do_not_remove_call; 4012 } 4013 } 4014 goto do_remove; 4015 } 4016 do_not_remove_call: 4017 4018 /* Output args are dead. */ 4019 for (int i = 0; i < nb_oargs; i++) { 4020 ts = arg_temp(op->args[i]); 4021 if (ts->state & TS_DEAD) { 4022 arg_life |= DEAD_ARG << i; 4023 } 4024 if (ts->state & TS_MEM) { 4025 arg_life |= SYNC_ARG << i; 4026 } 4027 ts->state = TS_DEAD; 4028 la_reset_pref(ts); 4029 } 4030 4031 /* Not used -- it will be tcg_target_call_oarg_reg(). */ 4032 memset(op->output_pref, 0, sizeof(op->output_pref)); 4033 4034 if (!(call_flags & (TCG_CALL_NO_WRITE_GLOBALS | 4035 TCG_CALL_NO_READ_GLOBALS))) { 4036 la_global_kill(s, nb_globals); 4037 } else if (!(call_flags & TCG_CALL_NO_READ_GLOBALS)) { 4038 la_global_sync(s, nb_globals); 4039 } 4040 4041 /* Record arguments that die in this helper. */ 4042 for (int i = nb_oargs; i < nb_iargs + nb_oargs; i++) { 4043 ts = arg_temp(op->args[i]); 4044 if (ts->state & TS_DEAD) { 4045 arg_life |= DEAD_ARG << i; 4046 } 4047 } 4048 4049 /* For all live registers, remove call-clobbered prefs. */ 4050 la_cross_call(s, nb_temps); 4051 4052 /* 4053 * Input arguments are live for preceding opcodes. 4054 * 4055 * For those arguments that die, and will be allocated in 4056 * registers, clear the register set for that arg, to be 4057 * filled in below. For args that will be on the stack, 4058 * reset to any available reg. Process arguments in reverse 4059 * order so that if a temp is used more than once, the stack 4060 * reset to max happens before the register reset to 0. 4061 */ 4062 for (int i = nb_iargs - 1; i >= 0; i--) { 4063 const TCGCallArgumentLoc *loc = &info->in[i]; 4064 ts = arg_temp(op->args[nb_oargs + i]); 4065 4066 if (ts->state & TS_DEAD) { 4067 switch (loc->kind) { 4068 case TCG_CALL_ARG_NORMAL: 4069 case TCG_CALL_ARG_EXTEND_U: 4070 case TCG_CALL_ARG_EXTEND_S: 4071 if (arg_slot_reg_p(loc->arg_slot)) { 4072 *la_temp_pref(ts) = 0; 4073 break; 4074 } 4075 /* fall through */ 4076 default: 4077 *la_temp_pref(ts) = 4078 tcg_target_available_regs[ts->type]; 4079 break; 4080 } 4081 ts->state &= ~TS_DEAD; 4082 } 4083 } 4084 4085 /* 4086 * For each input argument, add its input register to prefs. 4087 * If a temp is used once, this produces a single set bit; 4088 * if a temp is used multiple times, this produces a set. 4089 */ 4090 for (int i = 0; i < nb_iargs; i++) { 4091 const TCGCallArgumentLoc *loc = &info->in[i]; 4092 ts = arg_temp(op->args[nb_oargs + i]); 4093 4094 switch (loc->kind) { 4095 case TCG_CALL_ARG_NORMAL: 4096 case TCG_CALL_ARG_EXTEND_U: 4097 case TCG_CALL_ARG_EXTEND_S: 4098 if (arg_slot_reg_p(loc->arg_slot)) { 4099 tcg_regset_set_reg(*la_temp_pref(ts), 4100 tcg_target_call_iarg_regs[loc->arg_slot]); 4101 } 4102 break; 4103 default: 4104 break; 4105 } 4106 } 4107 } 4108 break; 4109 case INDEX_op_insn_start: 4110 assert_carry_dead(s); 4111 break; 4112 case INDEX_op_discard: 4113 /* mark the temporary as dead */ 4114 ts = arg_temp(op->args[0]); 4115 ts->state = TS_DEAD; 4116 la_reset_pref(ts); 4117 break; 4118 4119 case INDEX_op_muls2: 4120 opc_new = INDEX_op_mul; 4121 opc_new2 = INDEX_op_mulsh; 4122 goto do_mul2; 4123 case INDEX_op_mulu2: 4124 opc_new = INDEX_op_mul; 4125 opc_new2 = INDEX_op_muluh; 4126 do_mul2: 4127 assert_carry_dead(s); 4128 if (arg_temp(op->args[1])->state == TS_DEAD) { 4129 if (arg_temp(op->args[0])->state == TS_DEAD) { 4130 /* Both parts of the operation are dead. */ 4131 goto do_remove; 4132 } 4133 /* The high part of the operation is dead; generate the low. */ 4134 op->opc = opc = opc_new; 4135 op->args[1] = op->args[2]; 4136 op->args[2] = op->args[3]; 4137 } else if (arg_temp(op->args[0])->state == TS_DEAD && 4138 tcg_op_supported(opc_new2, TCGOP_TYPE(op), 0)) { 4139 /* The low part of the operation is dead; generate the high. */ 4140 op->opc = opc = opc_new2; 4141 op->args[0] = op->args[1]; 4142 op->args[1] = op->args[2]; 4143 op->args[2] = op->args[3]; 4144 } else { 4145 goto do_not_remove; 4146 } 4147 /* Mark the single-word operation live. */ 4148 goto do_not_remove; 4149 4150 case INDEX_op_addco: 4151 if (s->carry_live) { 4152 goto do_not_remove; 4153 } 4154 op->opc = opc = INDEX_op_add; 4155 goto do_default; 4156 4157 case INDEX_op_addcio: 4158 if (s->carry_live) { 4159 goto do_not_remove; 4160 } 4161 op->opc = opc = INDEX_op_addci; 4162 goto do_default; 4163 4164 case INDEX_op_subbo: 4165 if (s->carry_live) { 4166 goto do_not_remove; 4167 } 4168 /* Lower to sub, but this may also require canonicalization. */ 4169 op->opc = opc = INDEX_op_sub; 4170 ts = arg_temp(op->args[2]); 4171 if (ts->kind == TEMP_CONST) { 4172 ts = tcg_constant_internal(ts->type, -ts->val); 4173 if (ts->state_ptr == NULL) { 4174 tcg_debug_assert(temp_idx(ts) == nb_temps); 4175 nb_temps++; 4176 ts->state_ptr = tcg_malloc(sizeof(TCGRegSet)); 4177 ts->state = TS_DEAD; 4178 la_reset_pref(ts); 4179 } 4180 op->args[2] = temp_arg(ts); 4181 op->opc = opc = INDEX_op_add; 4182 } 4183 goto do_default; 4184 4185 case INDEX_op_subbio: 4186 if (s->carry_live) { 4187 goto do_not_remove; 4188 } 4189 op->opc = opc = INDEX_op_subbi; 4190 goto do_default; 4191 4192 case INDEX_op_addc1o: 4193 if (s->carry_live) { 4194 goto do_not_remove; 4195 } 4196 /* Lower to add, add +1. */ 4197 op_prev = tcg_op_insert_before(s, op, INDEX_op_add, 4198 TCGOP_TYPE(op), 3); 4199 op_prev->args[0] = op->args[0]; 4200 op_prev->args[1] = op->args[1]; 4201 op_prev->args[2] = op->args[2]; 4202 op->opc = opc = INDEX_op_add; 4203 op->args[1] = op->args[0]; 4204 ts = arg_temp(op->args[0]); 4205 ts = tcg_constant_internal(ts->type, 1); 4206 op->args[2] = temp_arg(ts); 4207 goto do_default; 4208 4209 case INDEX_op_subb1o: 4210 if (s->carry_live) { 4211 goto do_not_remove; 4212 } 4213 /* Lower to sub, add -1. */ 4214 op_prev = tcg_op_insert_before(s, op, INDEX_op_sub, 4215 TCGOP_TYPE(op), 3); 4216 op_prev->args[0] = op->args[0]; 4217 op_prev->args[1] = op->args[1]; 4218 op_prev->args[2] = op->args[2]; 4219 op->opc = opc = INDEX_op_add; 4220 op->args[1] = op->args[0]; 4221 ts = arg_temp(op->args[0]); 4222 ts = tcg_constant_internal(ts->type, -1); 4223 op->args[2] = temp_arg(ts); 4224 goto do_default; 4225 4226 default: 4227 do_default: 4228 /* 4229 * Test if the operation can be removed because all 4230 * its outputs are dead. We assume that nb_oargs == 0 4231 * implies side effects. 4232 */ 4233 def = &tcg_op_defs[opc]; 4234 if (!(def->flags & TCG_OPF_SIDE_EFFECTS) && def->nb_oargs != 0) { 4235 for (int i = def->nb_oargs - 1; i >= 0; i--) { 4236 if (arg_temp(op->args[i])->state != TS_DEAD) { 4237 goto do_not_remove; 4238 } 4239 } 4240 goto do_remove; 4241 } 4242 goto do_not_remove; 4243 4244 do_remove: 4245 tcg_op_remove(s, op); 4246 break; 4247 4248 do_not_remove: 4249 def = &tcg_op_defs[opc]; 4250 nb_iargs = def->nb_iargs; 4251 nb_oargs = def->nb_oargs; 4252 4253 for (int i = 0; i < nb_oargs; i++) { 4254 ts = arg_temp(op->args[i]); 4255 4256 /* Remember the preference of the uses that followed. */ 4257 if (i < ARRAY_SIZE(op->output_pref)) { 4258 op->output_pref[i] = *la_temp_pref(ts); 4259 } 4260 4261 /* Output args are dead. */ 4262 if (ts->state & TS_DEAD) { 4263 arg_life |= DEAD_ARG << i; 4264 } 4265 if (ts->state & TS_MEM) { 4266 arg_life |= SYNC_ARG << i; 4267 } 4268 ts->state = TS_DEAD; 4269 la_reset_pref(ts); 4270 } 4271 4272 /* If end of basic block, update. */ 4273 if (def->flags & TCG_OPF_BB_EXIT) { 4274 assert_carry_dead(s); 4275 la_func_end(s, nb_globals, nb_temps); 4276 } else if (def->flags & TCG_OPF_COND_BRANCH) { 4277 assert_carry_dead(s); 4278 la_bb_sync(s, nb_globals, nb_temps); 4279 } else if (def->flags & TCG_OPF_BB_END) { 4280 assert_carry_dead(s); 4281 la_bb_end(s, nb_globals, nb_temps); 4282 } else if (def->flags & TCG_OPF_SIDE_EFFECTS) { 4283 assert_carry_dead(s); 4284 la_global_sync(s, nb_globals); 4285 if (def->flags & TCG_OPF_CALL_CLOBBER) { 4286 la_cross_call(s, nb_temps); 4287 } 4288 } 4289 4290 /* Record arguments that die in this opcode. */ 4291 for (int i = nb_oargs; i < nb_oargs + nb_iargs; i++) { 4292 ts = arg_temp(op->args[i]); 4293 if (ts->state & TS_DEAD) { 4294 arg_life |= DEAD_ARG << i; 4295 } 4296 } 4297 if (def->flags & TCG_OPF_CARRY_OUT) { 4298 s->carry_live = false; 4299 } 4300 4301 /* Input arguments are live for preceding opcodes. */ 4302 for (int i = nb_oargs; i < nb_oargs + nb_iargs; i++) { 4303 ts = arg_temp(op->args[i]); 4304 if (ts->state & TS_DEAD) { 4305 /* For operands that were dead, initially allow 4306 all regs for the type. */ 4307 *la_temp_pref(ts) = tcg_target_available_regs[ts->type]; 4308 ts->state &= ~TS_DEAD; 4309 } 4310 } 4311 if (def->flags & TCG_OPF_CARRY_IN) { 4312 s->carry_live = true; 4313 } 4314 4315 /* Incorporate constraints for this operand. */ 4316 switch (opc) { 4317 case INDEX_op_mov: 4318 /* Note that these are TCG_OPF_NOT_PRESENT and do not 4319 have proper constraints. That said, special case 4320 moves to propagate preferences backward. */ 4321 if (IS_DEAD_ARG(1)) { 4322 *la_temp_pref(arg_temp(op->args[0])) 4323 = *la_temp_pref(arg_temp(op->args[1])); 4324 } 4325 break; 4326 4327 default: 4328 args_ct = opcode_args_ct(op); 4329 for (int i = nb_oargs; i < nb_oargs + nb_iargs; i++) { 4330 const TCGArgConstraint *ct = &args_ct[i]; 4331 TCGRegSet set, *pset; 4332 4333 ts = arg_temp(op->args[i]); 4334 pset = la_temp_pref(ts); 4335 set = *pset; 4336 4337 set &= ct->regs; 4338 if (ct->ialias) { 4339 set &= output_pref(op, ct->alias_index); 4340 } 4341 /* If the combination is not possible, restart. */ 4342 if (set == 0) { 4343 set = ct->regs; 4344 } 4345 *pset = set; 4346 } 4347 break; 4348 } 4349 break; 4350 } 4351 op->life = arg_life; 4352 } 4353 assert_carry_dead(s); 4354 } 4355 4356 /* Liveness analysis: Convert indirect regs to direct temporaries. */ 4357 static bool __attribute__((noinline)) 4358 liveness_pass_2(TCGContext *s) 4359 { 4360 int nb_globals = s->nb_globals; 4361 int nb_temps, i; 4362 bool changes = false; 4363 TCGOp *op, *op_next; 4364 4365 /* Create a temporary for each indirect global. */ 4366 for (i = 0; i < nb_globals; ++i) { 4367 TCGTemp *its = &s->temps[i]; 4368 if (its->indirect_reg) { 4369 TCGTemp *dts = tcg_temp_alloc(s); 4370 dts->type = its->type; 4371 dts->base_type = its->base_type; 4372 dts->temp_subindex = its->temp_subindex; 4373 dts->kind = TEMP_EBB; 4374 its->state_ptr = dts; 4375 } else { 4376 its->state_ptr = NULL; 4377 } 4378 /* All globals begin dead. */ 4379 its->state = TS_DEAD; 4380 } 4381 for (nb_temps = s->nb_temps; i < nb_temps; ++i) { 4382 TCGTemp *its = &s->temps[i]; 4383 its->state_ptr = NULL; 4384 its->state = TS_DEAD; 4385 } 4386 4387 QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) { 4388 TCGOpcode opc = op->opc; 4389 const TCGOpDef *def = &tcg_op_defs[opc]; 4390 TCGLifeData arg_life = op->life; 4391 int nb_iargs, nb_oargs, call_flags; 4392 TCGTemp *arg_ts, *dir_ts; 4393 4394 if (opc == INDEX_op_call) { 4395 nb_oargs = TCGOP_CALLO(op); 4396 nb_iargs = TCGOP_CALLI(op); 4397 call_flags = tcg_call_flags(op); 4398 } else { 4399 nb_iargs = def->nb_iargs; 4400 nb_oargs = def->nb_oargs; 4401 4402 /* Set flags similar to how calls require. */ 4403 if (def->flags & TCG_OPF_COND_BRANCH) { 4404 /* Like reading globals: sync_globals */ 4405 call_flags = TCG_CALL_NO_WRITE_GLOBALS; 4406 } else if (def->flags & TCG_OPF_BB_END) { 4407 /* Like writing globals: save_globals */ 4408 call_flags = 0; 4409 } else if (def->flags & TCG_OPF_SIDE_EFFECTS) { 4410 /* Like reading globals: sync_globals */ 4411 call_flags = TCG_CALL_NO_WRITE_GLOBALS; 4412 } else { 4413 /* No effect on globals. */ 4414 call_flags = (TCG_CALL_NO_READ_GLOBALS | 4415 TCG_CALL_NO_WRITE_GLOBALS); 4416 } 4417 } 4418 4419 /* Make sure that input arguments are available. */ 4420 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) { 4421 arg_ts = arg_temp(op->args[i]); 4422 dir_ts = arg_ts->state_ptr; 4423 if (dir_ts && arg_ts->state == TS_DEAD) { 4424 TCGOp *lop = tcg_op_insert_before(s, op, INDEX_op_ld, 4425 arg_ts->type, 3); 4426 4427 lop->args[0] = temp_arg(dir_ts); 4428 lop->args[1] = temp_arg(arg_ts->mem_base); 4429 lop->args[2] = arg_ts->mem_offset; 4430 4431 /* Loaded, but synced with memory. */ 4432 arg_ts->state = TS_MEM; 4433 } 4434 } 4435 4436 /* Perform input replacement, and mark inputs that became dead. 4437 No action is required except keeping temp_state up to date 4438 so that we reload when needed. */ 4439 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) { 4440 arg_ts = arg_temp(op->args[i]); 4441 dir_ts = arg_ts->state_ptr; 4442 if (dir_ts) { 4443 op->args[i] = temp_arg(dir_ts); 4444 changes = true; 4445 if (IS_DEAD_ARG(i)) { 4446 arg_ts->state = TS_DEAD; 4447 } 4448 } 4449 } 4450 4451 /* Liveness analysis should ensure that the following are 4452 all correct, for call sites and basic block end points. */ 4453 if (call_flags & TCG_CALL_NO_READ_GLOBALS) { 4454 /* Nothing to do */ 4455 } else if (call_flags & TCG_CALL_NO_WRITE_GLOBALS) { 4456 for (i = 0; i < nb_globals; ++i) { 4457 /* Liveness should see that globals are synced back, 4458 that is, either TS_DEAD or TS_MEM. */ 4459 arg_ts = &s->temps[i]; 4460 tcg_debug_assert(arg_ts->state_ptr == 0 4461 || arg_ts->state != 0); 4462 } 4463 } else { 4464 for (i = 0; i < nb_globals; ++i) { 4465 /* Liveness should see that globals are saved back, 4466 that is, TS_DEAD, waiting to be reloaded. */ 4467 arg_ts = &s->temps[i]; 4468 tcg_debug_assert(arg_ts->state_ptr == 0 4469 || arg_ts->state == TS_DEAD); 4470 } 4471 } 4472 4473 /* Outputs become available. */ 4474 if (opc == INDEX_op_mov) { 4475 arg_ts = arg_temp(op->args[0]); 4476 dir_ts = arg_ts->state_ptr; 4477 if (dir_ts) { 4478 op->args[0] = temp_arg(dir_ts); 4479 changes = true; 4480 4481 /* The output is now live and modified. */ 4482 arg_ts->state = 0; 4483 4484 if (NEED_SYNC_ARG(0)) { 4485 TCGOp *sop = tcg_op_insert_after(s, op, INDEX_op_st, 4486 arg_ts->type, 3); 4487 TCGTemp *out_ts = dir_ts; 4488 4489 if (IS_DEAD_ARG(0)) { 4490 out_ts = arg_temp(op->args[1]); 4491 arg_ts->state = TS_DEAD; 4492 tcg_op_remove(s, op); 4493 } else { 4494 arg_ts->state = TS_MEM; 4495 } 4496 4497 sop->args[0] = temp_arg(out_ts); 4498 sop->args[1] = temp_arg(arg_ts->mem_base); 4499 sop->args[2] = arg_ts->mem_offset; 4500 } else { 4501 tcg_debug_assert(!IS_DEAD_ARG(0)); 4502 } 4503 } 4504 } else { 4505 for (i = 0; i < nb_oargs; i++) { 4506 arg_ts = arg_temp(op->args[i]); 4507 dir_ts = arg_ts->state_ptr; 4508 if (!dir_ts) { 4509 continue; 4510 } 4511 op->args[i] = temp_arg(dir_ts); 4512 changes = true; 4513 4514 /* The output is now live and modified. */ 4515 arg_ts->state = 0; 4516 4517 /* Sync outputs upon their last write. */ 4518 if (NEED_SYNC_ARG(i)) { 4519 TCGOp *sop = tcg_op_insert_after(s, op, INDEX_op_st, 4520 arg_ts->type, 3); 4521 4522 sop->args[0] = temp_arg(dir_ts); 4523 sop->args[1] = temp_arg(arg_ts->mem_base); 4524 sop->args[2] = arg_ts->mem_offset; 4525 4526 arg_ts->state = TS_MEM; 4527 } 4528 /* Drop outputs that are dead. */ 4529 if (IS_DEAD_ARG(i)) { 4530 arg_ts->state = TS_DEAD; 4531 } 4532 } 4533 } 4534 } 4535 4536 return changes; 4537 } 4538 4539 static void temp_allocate_frame(TCGContext *s, TCGTemp *ts) 4540 { 4541 intptr_t off; 4542 int size, align; 4543 4544 /* When allocating an object, look at the full type. */ 4545 size = tcg_type_size(ts->base_type); 4546 switch (ts->base_type) { 4547 case TCG_TYPE_I32: 4548 align = 4; 4549 break; 4550 case TCG_TYPE_I64: 4551 case TCG_TYPE_V64: 4552 align = 8; 4553 break; 4554 case TCG_TYPE_I128: 4555 case TCG_TYPE_V128: 4556 case TCG_TYPE_V256: 4557 /* 4558 * Note that we do not require aligned storage for V256, 4559 * and that we provide alignment for I128 to match V128, 4560 * even if that's above what the host ABI requires. 4561 */ 4562 align = 16; 4563 break; 4564 default: 4565 g_assert_not_reached(); 4566 } 4567 4568 /* 4569 * Assume the stack is sufficiently aligned. 4570 * This affects e.g. ARM NEON, where we have 8 byte stack alignment 4571 * and do not require 16 byte vector alignment. This seems slightly 4572 * easier than fully parameterizing the above switch statement. 4573 */ 4574 align = MIN(TCG_TARGET_STACK_ALIGN, align); 4575 off = ROUND_UP(s->current_frame_offset, align); 4576 4577 /* If we've exhausted the stack frame, restart with a smaller TB. */ 4578 if (off + size > s->frame_end) { 4579 tcg_raise_tb_overflow(s); 4580 } 4581 s->current_frame_offset = off + size; 4582 #if defined(__sparc__) 4583 off += TCG_TARGET_STACK_BIAS; 4584 #endif 4585 4586 /* If the object was subdivided, assign memory to all the parts. */ 4587 if (ts->base_type != ts->type) { 4588 int part_size = tcg_type_size(ts->type); 4589 int part_count = size / part_size; 4590 4591 /* 4592 * Each part is allocated sequentially in tcg_temp_new_internal. 4593 * Jump back to the first part by subtracting the current index. 4594 */ 4595 ts -= ts->temp_subindex; 4596 for (int i = 0; i < part_count; ++i) { 4597 ts[i].mem_offset = off + i * part_size; 4598 ts[i].mem_base = s->frame_temp; 4599 ts[i].mem_allocated = 1; 4600 } 4601 } else { 4602 ts->mem_offset = off; 4603 ts->mem_base = s->frame_temp; 4604 ts->mem_allocated = 1; 4605 } 4606 } 4607 4608 /* Assign @reg to @ts, and update reg_to_temp[]. */ 4609 static void set_temp_val_reg(TCGContext *s, TCGTemp *ts, TCGReg reg) 4610 { 4611 if (ts->val_type == TEMP_VAL_REG) { 4612 TCGReg old = ts->reg; 4613 tcg_debug_assert(s->reg_to_temp[old] == ts); 4614 if (old == reg) { 4615 return; 4616 } 4617 s->reg_to_temp[old] = NULL; 4618 } 4619 tcg_debug_assert(s->reg_to_temp[reg] == NULL); 4620 s->reg_to_temp[reg] = ts; 4621 ts->val_type = TEMP_VAL_REG; 4622 ts->reg = reg; 4623 } 4624 4625 /* Assign a non-register value type to @ts, and update reg_to_temp[]. */ 4626 static void set_temp_val_nonreg(TCGContext *s, TCGTemp *ts, TCGTempVal type) 4627 { 4628 tcg_debug_assert(type != TEMP_VAL_REG); 4629 if (ts->val_type == TEMP_VAL_REG) { 4630 TCGReg reg = ts->reg; 4631 tcg_debug_assert(s->reg_to_temp[reg] == ts); 4632 s->reg_to_temp[reg] = NULL; 4633 } 4634 ts->val_type = type; 4635 } 4636 4637 static void temp_load(TCGContext *, TCGTemp *, TCGRegSet, TCGRegSet, TCGRegSet); 4638 4639 /* Mark a temporary as free or dead. If 'free_or_dead' is negative, 4640 mark it free; otherwise mark it dead. */ 4641 static void temp_free_or_dead(TCGContext *s, TCGTemp *ts, int free_or_dead) 4642 { 4643 TCGTempVal new_type; 4644 4645 switch (ts->kind) { 4646 case TEMP_FIXED: 4647 return; 4648 case TEMP_GLOBAL: 4649 case TEMP_TB: 4650 new_type = TEMP_VAL_MEM; 4651 break; 4652 case TEMP_EBB: 4653 new_type = free_or_dead < 0 ? TEMP_VAL_MEM : TEMP_VAL_DEAD; 4654 break; 4655 case TEMP_CONST: 4656 new_type = TEMP_VAL_CONST; 4657 break; 4658 default: 4659 g_assert_not_reached(); 4660 } 4661 set_temp_val_nonreg(s, ts, new_type); 4662 } 4663 4664 /* Mark a temporary as dead. */ 4665 static inline void temp_dead(TCGContext *s, TCGTemp *ts) 4666 { 4667 temp_free_or_dead(s, ts, 1); 4668 } 4669 4670 /* Sync a temporary to memory. 'allocated_regs' is used in case a temporary 4671 registers needs to be allocated to store a constant. If 'free_or_dead' 4672 is non-zero, subsequently release the temporary; if it is positive, the 4673 temp is dead; if it is negative, the temp is free. */ 4674 static void temp_sync(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs, 4675 TCGRegSet preferred_regs, int free_or_dead) 4676 { 4677 if (!temp_readonly(ts) && !ts->mem_coherent) { 4678 if (!ts->mem_allocated) { 4679 temp_allocate_frame(s, ts); 4680 } 4681 switch (ts->val_type) { 4682 case TEMP_VAL_CONST: 4683 /* If we're going to free the temp immediately, then we won't 4684 require it later in a register, so attempt to store the 4685 constant to memory directly. */ 4686 if (free_or_dead 4687 && tcg_out_sti(s, ts->type, ts->val, 4688 ts->mem_base->reg, ts->mem_offset)) { 4689 break; 4690 } 4691 temp_load(s, ts, tcg_target_available_regs[ts->type], 4692 allocated_regs, preferred_regs); 4693 /* fallthrough */ 4694 4695 case TEMP_VAL_REG: 4696 tcg_out_st(s, ts->type, ts->reg, 4697 ts->mem_base->reg, ts->mem_offset); 4698 break; 4699 4700 case TEMP_VAL_MEM: 4701 break; 4702 4703 case TEMP_VAL_DEAD: 4704 default: 4705 g_assert_not_reached(); 4706 } 4707 ts->mem_coherent = 1; 4708 } 4709 if (free_or_dead) { 4710 temp_free_or_dead(s, ts, free_or_dead); 4711 } 4712 } 4713 4714 /* free register 'reg' by spilling the corresponding temporary if necessary */ 4715 static void tcg_reg_free(TCGContext *s, TCGReg reg, TCGRegSet allocated_regs) 4716 { 4717 TCGTemp *ts = s->reg_to_temp[reg]; 4718 if (ts != NULL) { 4719 temp_sync(s, ts, allocated_regs, 0, -1); 4720 } 4721 } 4722 4723 /** 4724 * tcg_reg_alloc: 4725 * @required_regs: Set of registers in which we must allocate. 4726 * @allocated_regs: Set of registers which must be avoided. 4727 * @preferred_regs: Set of registers we should prefer. 4728 * @rev: True if we search the registers in "indirect" order. 4729 * 4730 * The allocated register must be in @required_regs & ~@allocated_regs, 4731 * but if we can put it in @preferred_regs we may save a move later. 4732 */ 4733 static TCGReg tcg_reg_alloc(TCGContext *s, TCGRegSet required_regs, 4734 TCGRegSet allocated_regs, 4735 TCGRegSet preferred_regs, bool rev) 4736 { 4737 int i, j, f, n = ARRAY_SIZE(tcg_target_reg_alloc_order); 4738 TCGRegSet reg_ct[2]; 4739 const int *order; 4740 4741 reg_ct[1] = required_regs & ~allocated_regs; 4742 tcg_debug_assert(reg_ct[1] != 0); 4743 reg_ct[0] = reg_ct[1] & preferred_regs; 4744 4745 /* Skip the preferred_regs option if it cannot be satisfied, 4746 or if the preference made no difference. */ 4747 f = reg_ct[0] == 0 || reg_ct[0] == reg_ct[1]; 4748 4749 order = rev ? indirect_reg_alloc_order : tcg_target_reg_alloc_order; 4750 4751 /* Try free registers, preferences first. */ 4752 for (j = f; j < 2; j++) { 4753 TCGRegSet set = reg_ct[j]; 4754 4755 if (tcg_regset_single(set)) { 4756 /* One register in the set. */ 4757 TCGReg reg = tcg_regset_first(set); 4758 if (s->reg_to_temp[reg] == NULL) { 4759 return reg; 4760 } 4761 } else { 4762 for (i = 0; i < n; i++) { 4763 TCGReg reg = order[i]; 4764 if (s->reg_to_temp[reg] == NULL && 4765 tcg_regset_test_reg(set, reg)) { 4766 return reg; 4767 } 4768 } 4769 } 4770 } 4771 4772 /* We must spill something. */ 4773 for (j = f; j < 2; j++) { 4774 TCGRegSet set = reg_ct[j]; 4775 4776 if (tcg_regset_single(set)) { 4777 /* One register in the set. */ 4778 TCGReg reg = tcg_regset_first(set); 4779 tcg_reg_free(s, reg, allocated_regs); 4780 return reg; 4781 } else { 4782 for (i = 0; i < n; i++) { 4783 TCGReg reg = order[i]; 4784 if (tcg_regset_test_reg(set, reg)) { 4785 tcg_reg_free(s, reg, allocated_regs); 4786 return reg; 4787 } 4788 } 4789 } 4790 } 4791 4792 g_assert_not_reached(); 4793 } 4794 4795 static TCGReg tcg_reg_alloc_pair(TCGContext *s, TCGRegSet required_regs, 4796 TCGRegSet allocated_regs, 4797 TCGRegSet preferred_regs, bool rev) 4798 { 4799 int i, j, k, fmin, n = ARRAY_SIZE(tcg_target_reg_alloc_order); 4800 TCGRegSet reg_ct[2]; 4801 const int *order; 4802 4803 /* Ensure that if I is not in allocated_regs, I+1 is not either. */ 4804 reg_ct[1] = required_regs & ~(allocated_regs | (allocated_regs >> 1)); 4805 tcg_debug_assert(reg_ct[1] != 0); 4806 reg_ct[0] = reg_ct[1] & preferred_regs; 4807 4808 order = rev ? indirect_reg_alloc_order : tcg_target_reg_alloc_order; 4809 4810 /* 4811 * Skip the preferred_regs option if it cannot be satisfied, 4812 * or if the preference made no difference. 4813 */ 4814 k = reg_ct[0] == 0 || reg_ct[0] == reg_ct[1]; 4815 4816 /* 4817 * Minimize the number of flushes by looking for 2 free registers first, 4818 * then a single flush, then two flushes. 4819 */ 4820 for (fmin = 2; fmin >= 0; fmin--) { 4821 for (j = k; j < 2; j++) { 4822 TCGRegSet set = reg_ct[j]; 4823 4824 for (i = 0; i < n; i++) { 4825 TCGReg reg = order[i]; 4826 4827 if (tcg_regset_test_reg(set, reg)) { 4828 int f = !s->reg_to_temp[reg] + !s->reg_to_temp[reg + 1]; 4829 if (f >= fmin) { 4830 tcg_reg_free(s, reg, allocated_regs); 4831 tcg_reg_free(s, reg + 1, allocated_regs); 4832 return reg; 4833 } 4834 } 4835 } 4836 } 4837 } 4838 g_assert_not_reached(); 4839 } 4840 4841 /* Make sure the temporary is in a register. If needed, allocate the register 4842 from DESIRED while avoiding ALLOCATED. */ 4843 static void temp_load(TCGContext *s, TCGTemp *ts, TCGRegSet desired_regs, 4844 TCGRegSet allocated_regs, TCGRegSet preferred_regs) 4845 { 4846 TCGReg reg; 4847 4848 switch (ts->val_type) { 4849 case TEMP_VAL_REG: 4850 return; 4851 case TEMP_VAL_CONST: 4852 reg = tcg_reg_alloc(s, desired_regs, allocated_regs, 4853 preferred_regs, ts->indirect_base); 4854 if (ts->type <= TCG_TYPE_I64) { 4855 tcg_out_movi(s, ts->type, reg, ts->val); 4856 } else { 4857 uint64_t val = ts->val; 4858 MemOp vece = MO_64; 4859 4860 /* 4861 * Find the minimal vector element that matches the constant. 4862 * The targets will, in general, have to do this search anyway, 4863 * do this generically. 4864 */ 4865 if (val == dup_const(MO_8, val)) { 4866 vece = MO_8; 4867 } else if (val == dup_const(MO_16, val)) { 4868 vece = MO_16; 4869 } else if (val == dup_const(MO_32, val)) { 4870 vece = MO_32; 4871 } 4872 4873 tcg_out_dupi_vec(s, ts->type, vece, reg, ts->val); 4874 } 4875 ts->mem_coherent = 0; 4876 break; 4877 case TEMP_VAL_MEM: 4878 if (!ts->mem_allocated) { 4879 temp_allocate_frame(s, ts); 4880 } 4881 reg = tcg_reg_alloc(s, desired_regs, allocated_regs, 4882 preferred_regs, ts->indirect_base); 4883 tcg_out_ld(s, ts->type, reg, ts->mem_base->reg, ts->mem_offset); 4884 ts->mem_coherent = 1; 4885 break; 4886 case TEMP_VAL_DEAD: 4887 default: 4888 g_assert_not_reached(); 4889 } 4890 set_temp_val_reg(s, ts, reg); 4891 } 4892 4893 /* Save a temporary to memory. 'allocated_regs' is used in case a 4894 temporary registers needs to be allocated to store a constant. */ 4895 static void temp_save(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs) 4896 { 4897 /* The liveness analysis already ensures that globals are back 4898 in memory. Keep an tcg_debug_assert for safety. */ 4899 tcg_debug_assert(ts->val_type == TEMP_VAL_MEM || temp_readonly(ts)); 4900 } 4901 4902 /* save globals to their canonical location and assume they can be 4903 modified be the following code. 'allocated_regs' is used in case a 4904 temporary registers needs to be allocated to store a constant. */ 4905 static void save_globals(TCGContext *s, TCGRegSet allocated_regs) 4906 { 4907 int i, n; 4908 4909 for (i = 0, n = s->nb_globals; i < n; i++) { 4910 temp_save(s, &s->temps[i], allocated_regs); 4911 } 4912 } 4913 4914 /* sync globals to their canonical location and assume they can be 4915 read by the following code. 'allocated_regs' is used in case a 4916 temporary registers needs to be allocated to store a constant. */ 4917 static void sync_globals(TCGContext *s, TCGRegSet allocated_regs) 4918 { 4919 int i, n; 4920 4921 for (i = 0, n = s->nb_globals; i < n; i++) { 4922 TCGTemp *ts = &s->temps[i]; 4923 tcg_debug_assert(ts->val_type != TEMP_VAL_REG 4924 || ts->kind == TEMP_FIXED 4925 || ts->mem_coherent); 4926 } 4927 } 4928 4929 /* at the end of a basic block, we assume all temporaries are dead and 4930 all globals are stored at their canonical location. */ 4931 static void tcg_reg_alloc_bb_end(TCGContext *s, TCGRegSet allocated_regs) 4932 { 4933 assert_carry_dead(s); 4934 for (int i = s->nb_globals; i < s->nb_temps; i++) { 4935 TCGTemp *ts = &s->temps[i]; 4936 4937 switch (ts->kind) { 4938 case TEMP_TB: 4939 temp_save(s, ts, allocated_regs); 4940 break; 4941 case TEMP_EBB: 4942 /* The liveness analysis already ensures that temps are dead. 4943 Keep an tcg_debug_assert for safety. */ 4944 tcg_debug_assert(ts->val_type == TEMP_VAL_DEAD); 4945 break; 4946 case TEMP_CONST: 4947 /* Similarly, we should have freed any allocated register. */ 4948 tcg_debug_assert(ts->val_type == TEMP_VAL_CONST); 4949 break; 4950 default: 4951 g_assert_not_reached(); 4952 } 4953 } 4954 4955 save_globals(s, allocated_regs); 4956 } 4957 4958 /* 4959 * At a conditional branch, we assume all temporaries are dead unless 4960 * explicitly live-across-conditional-branch; all globals and local 4961 * temps are synced to their location. 4962 */ 4963 static void tcg_reg_alloc_cbranch(TCGContext *s, TCGRegSet allocated_regs) 4964 { 4965 assert_carry_dead(s); 4966 sync_globals(s, allocated_regs); 4967 4968 for (int i = s->nb_globals; i < s->nb_temps; i++) { 4969 TCGTemp *ts = &s->temps[i]; 4970 /* 4971 * The liveness analysis already ensures that temps are dead. 4972 * Keep tcg_debug_asserts for safety. 4973 */ 4974 switch (ts->kind) { 4975 case TEMP_TB: 4976 tcg_debug_assert(ts->val_type != TEMP_VAL_REG || ts->mem_coherent); 4977 break; 4978 case TEMP_EBB: 4979 case TEMP_CONST: 4980 break; 4981 default: 4982 g_assert_not_reached(); 4983 } 4984 } 4985 } 4986 4987 /* 4988 * Specialized code generation for INDEX_op_mov_* with a constant. 4989 */ 4990 static void tcg_reg_alloc_do_movi(TCGContext *s, TCGTemp *ots, 4991 tcg_target_ulong val, TCGLifeData arg_life, 4992 TCGRegSet preferred_regs) 4993 { 4994 /* ENV should not be modified. */ 4995 tcg_debug_assert(!temp_readonly(ots)); 4996 4997 /* The movi is not explicitly generated here. */ 4998 set_temp_val_nonreg(s, ots, TEMP_VAL_CONST); 4999 ots->val = val; 5000 ots->mem_coherent = 0; 5001 if (NEED_SYNC_ARG(0)) { 5002 temp_sync(s, ots, s->reserved_regs, preferred_regs, IS_DEAD_ARG(0)); 5003 } else if (IS_DEAD_ARG(0)) { 5004 temp_dead(s, ots); 5005 } 5006 } 5007 5008 /* 5009 * Specialized code generation for INDEX_op_mov_*. 5010 */ 5011 static void tcg_reg_alloc_mov(TCGContext *s, const TCGOp *op) 5012 { 5013 const TCGLifeData arg_life = op->life; 5014 TCGRegSet allocated_regs, preferred_regs; 5015 TCGTemp *ts, *ots; 5016 TCGType otype, itype; 5017 TCGReg oreg, ireg; 5018 5019 allocated_regs = s->reserved_regs; 5020 preferred_regs = output_pref(op, 0); 5021 ots = arg_temp(op->args[0]); 5022 ts = arg_temp(op->args[1]); 5023 5024 /* ENV should not be modified. */ 5025 tcg_debug_assert(!temp_readonly(ots)); 5026 5027 /* Note that otype != itype for no-op truncation. */ 5028 otype = ots->type; 5029 itype = ts->type; 5030 5031 if (ts->val_type == TEMP_VAL_CONST) { 5032 /* propagate constant or generate sti */ 5033 tcg_target_ulong val = ts->val; 5034 if (IS_DEAD_ARG(1)) { 5035 temp_dead(s, ts); 5036 } 5037 tcg_reg_alloc_do_movi(s, ots, val, arg_life, preferred_regs); 5038 return; 5039 } 5040 5041 /* If the source value is in memory we're going to be forced 5042 to have it in a register in order to perform the copy. Copy 5043 the SOURCE value into its own register first, that way we 5044 don't have to reload SOURCE the next time it is used. */ 5045 if (ts->val_type == TEMP_VAL_MEM) { 5046 temp_load(s, ts, tcg_target_available_regs[itype], 5047 allocated_regs, preferred_regs); 5048 } 5049 tcg_debug_assert(ts->val_type == TEMP_VAL_REG); 5050 ireg = ts->reg; 5051 5052 if (IS_DEAD_ARG(0)) { 5053 /* mov to a non-saved dead register makes no sense (even with 5054 liveness analysis disabled). */ 5055 tcg_debug_assert(NEED_SYNC_ARG(0)); 5056 if (!ots->mem_allocated) { 5057 temp_allocate_frame(s, ots); 5058 } 5059 tcg_out_st(s, otype, ireg, ots->mem_base->reg, ots->mem_offset); 5060 if (IS_DEAD_ARG(1)) { 5061 temp_dead(s, ts); 5062 } 5063 temp_dead(s, ots); 5064 return; 5065 } 5066 5067 if (IS_DEAD_ARG(1) && ts->kind != TEMP_FIXED) { 5068 /* 5069 * The mov can be suppressed. Kill input first, so that it 5070 * is unlinked from reg_to_temp, then set the output to the 5071 * reg that we saved from the input. 5072 */ 5073 temp_dead(s, ts); 5074 oreg = ireg; 5075 } else { 5076 if (ots->val_type == TEMP_VAL_REG) { 5077 oreg = ots->reg; 5078 } else { 5079 /* Make sure to not spill the input register during allocation. */ 5080 oreg = tcg_reg_alloc(s, tcg_target_available_regs[otype], 5081 allocated_regs | ((TCGRegSet)1 << ireg), 5082 preferred_regs, ots->indirect_base); 5083 } 5084 if (!tcg_out_mov(s, otype, oreg, ireg)) { 5085 /* 5086 * Cross register class move not supported. 5087 * Store the source register into the destination slot 5088 * and leave the destination temp as TEMP_VAL_MEM. 5089 */ 5090 assert(!temp_readonly(ots)); 5091 if (!ts->mem_allocated) { 5092 temp_allocate_frame(s, ots); 5093 } 5094 tcg_out_st(s, ts->type, ireg, ots->mem_base->reg, ots->mem_offset); 5095 set_temp_val_nonreg(s, ts, TEMP_VAL_MEM); 5096 ots->mem_coherent = 1; 5097 return; 5098 } 5099 } 5100 set_temp_val_reg(s, ots, oreg); 5101 ots->mem_coherent = 0; 5102 5103 if (NEED_SYNC_ARG(0)) { 5104 temp_sync(s, ots, allocated_regs, 0, 0); 5105 } 5106 } 5107 5108 /* 5109 * Specialized code generation for INDEX_op_dup_vec. 5110 */ 5111 static void tcg_reg_alloc_dup(TCGContext *s, const TCGOp *op) 5112 { 5113 const TCGLifeData arg_life = op->life; 5114 TCGRegSet dup_out_regs, dup_in_regs; 5115 const TCGArgConstraint *dup_args_ct; 5116 TCGTemp *its, *ots; 5117 TCGType itype, vtype; 5118 unsigned vece; 5119 int lowpart_ofs; 5120 bool ok; 5121 5122 ots = arg_temp(op->args[0]); 5123 its = arg_temp(op->args[1]); 5124 5125 /* ENV should not be modified. */ 5126 tcg_debug_assert(!temp_readonly(ots)); 5127 5128 itype = its->type; 5129 vece = TCGOP_VECE(op); 5130 vtype = TCGOP_TYPE(op); 5131 5132 if (its->val_type == TEMP_VAL_CONST) { 5133 /* Propagate constant via movi -> dupi. */ 5134 tcg_target_ulong val = its->val; 5135 if (IS_DEAD_ARG(1)) { 5136 temp_dead(s, its); 5137 } 5138 tcg_reg_alloc_do_movi(s, ots, val, arg_life, output_pref(op, 0)); 5139 return; 5140 } 5141 5142 dup_args_ct = opcode_args_ct(op); 5143 dup_out_regs = dup_args_ct[0].regs; 5144 dup_in_regs = dup_args_ct[1].regs; 5145 5146 /* Allocate the output register now. */ 5147 if (ots->val_type != TEMP_VAL_REG) { 5148 TCGRegSet allocated_regs = s->reserved_regs; 5149 TCGReg oreg; 5150 5151 if (!IS_DEAD_ARG(1) && its->val_type == TEMP_VAL_REG) { 5152 /* Make sure to not spill the input register. */ 5153 tcg_regset_set_reg(allocated_regs, its->reg); 5154 } 5155 oreg = tcg_reg_alloc(s, dup_out_regs, allocated_regs, 5156 output_pref(op, 0), ots->indirect_base); 5157 set_temp_val_reg(s, ots, oreg); 5158 } 5159 5160 switch (its->val_type) { 5161 case TEMP_VAL_REG: 5162 /* 5163 * The dup constriaints must be broad, covering all possible VECE. 5164 * However, tcg_op_dup_vec() gets to see the VECE and we allow it 5165 * to fail, indicating that extra moves are required for that case. 5166 */ 5167 if (tcg_regset_test_reg(dup_in_regs, its->reg)) { 5168 if (tcg_out_dup_vec(s, vtype, vece, ots->reg, its->reg)) { 5169 goto done; 5170 } 5171 /* Try again from memory or a vector input register. */ 5172 } 5173 if (!its->mem_coherent) { 5174 /* 5175 * The input register is not synced, and so an extra store 5176 * would be required to use memory. Attempt an integer-vector 5177 * register move first. We do not have a TCGRegSet for this. 5178 */ 5179 if (tcg_out_mov(s, itype, ots->reg, its->reg)) { 5180 break; 5181 } 5182 /* Sync the temp back to its slot and load from there. */ 5183 temp_sync(s, its, s->reserved_regs, 0, 0); 5184 } 5185 /* fall through */ 5186 5187 case TEMP_VAL_MEM: 5188 lowpart_ofs = 0; 5189 if (HOST_BIG_ENDIAN) { 5190 lowpart_ofs = tcg_type_size(itype) - (1 << vece); 5191 } 5192 if (tcg_out_dupm_vec(s, vtype, vece, ots->reg, its->mem_base->reg, 5193 its->mem_offset + lowpart_ofs)) { 5194 goto done; 5195 } 5196 /* Load the input into the destination vector register. */ 5197 tcg_out_ld(s, itype, ots->reg, its->mem_base->reg, its->mem_offset); 5198 break; 5199 5200 default: 5201 g_assert_not_reached(); 5202 } 5203 5204 /* We now have a vector input register, so dup must succeed. */ 5205 ok = tcg_out_dup_vec(s, vtype, vece, ots->reg, ots->reg); 5206 tcg_debug_assert(ok); 5207 5208 done: 5209 ots->mem_coherent = 0; 5210 if (IS_DEAD_ARG(1)) { 5211 temp_dead(s, its); 5212 } 5213 if (NEED_SYNC_ARG(0)) { 5214 temp_sync(s, ots, s->reserved_regs, 0, 0); 5215 } 5216 if (IS_DEAD_ARG(0)) { 5217 temp_dead(s, ots); 5218 } 5219 } 5220 5221 static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op) 5222 { 5223 const TCGLifeData arg_life = op->life; 5224 const TCGOpDef * const def = &tcg_op_defs[op->opc]; 5225 TCGRegSet i_allocated_regs; 5226 TCGRegSet o_allocated_regs; 5227 int i, k, nb_iargs, nb_oargs; 5228 TCGReg reg; 5229 TCGArg arg; 5230 const TCGArgConstraint *args_ct; 5231 const TCGArgConstraint *arg_ct; 5232 TCGTemp *ts; 5233 TCGArg new_args[TCG_MAX_OP_ARGS]; 5234 int const_args[TCG_MAX_OP_ARGS]; 5235 TCGCond op_cond; 5236 5237 if (def->flags & TCG_OPF_CARRY_IN) { 5238 tcg_debug_assert(s->carry_live); 5239 } 5240 5241 nb_oargs = def->nb_oargs; 5242 nb_iargs = def->nb_iargs; 5243 5244 /* copy constants */ 5245 memcpy(new_args + nb_oargs + nb_iargs, 5246 op->args + nb_oargs + nb_iargs, 5247 sizeof(TCGArg) * def->nb_cargs); 5248 5249 i_allocated_regs = s->reserved_regs; 5250 o_allocated_regs = s->reserved_regs; 5251 5252 switch (op->opc) { 5253 case INDEX_op_brcond: 5254 op_cond = op->args[2]; 5255 break; 5256 case INDEX_op_setcond: 5257 case INDEX_op_negsetcond: 5258 case INDEX_op_cmp_vec: 5259 op_cond = op->args[3]; 5260 break; 5261 case INDEX_op_brcond2_i32: 5262 op_cond = op->args[4]; 5263 break; 5264 case INDEX_op_movcond: 5265 case INDEX_op_setcond2_i32: 5266 case INDEX_op_cmpsel_vec: 5267 op_cond = op->args[5]; 5268 break; 5269 default: 5270 /* No condition within opcode. */ 5271 op_cond = TCG_COND_ALWAYS; 5272 break; 5273 } 5274 5275 args_ct = opcode_args_ct(op); 5276 5277 /* satisfy input constraints */ 5278 for (k = 0; k < nb_iargs; k++) { 5279 TCGRegSet i_preferred_regs, i_required_regs; 5280 bool allocate_new_reg, copyto_new_reg; 5281 TCGTemp *ts2; 5282 int i1, i2; 5283 5284 i = args_ct[nb_oargs + k].sort_index; 5285 arg = op->args[i]; 5286 arg_ct = &args_ct[i]; 5287 ts = arg_temp(arg); 5288 5289 if (ts->val_type == TEMP_VAL_CONST) { 5290 #ifdef TCG_REG_ZERO 5291 if (ts->val == 0 && (arg_ct->ct & TCG_CT_REG_ZERO)) { 5292 /* Hardware zero register: indicate register via non-const. */ 5293 const_args[i] = 0; 5294 new_args[i] = TCG_REG_ZERO; 5295 continue; 5296 } 5297 #endif 5298 5299 if (tcg_target_const_match(ts->val, arg_ct->ct, ts->type, 5300 op_cond, TCGOP_VECE(op))) { 5301 /* constant is OK for instruction */ 5302 const_args[i] = 1; 5303 new_args[i] = ts->val; 5304 continue; 5305 } 5306 } 5307 5308 reg = ts->reg; 5309 i_preferred_regs = 0; 5310 i_required_regs = arg_ct->regs; 5311 allocate_new_reg = false; 5312 copyto_new_reg = false; 5313 5314 switch (arg_ct->pair) { 5315 case 0: /* not paired */ 5316 if (arg_ct->ialias) { 5317 i_preferred_regs = output_pref(op, arg_ct->alias_index); 5318 5319 /* 5320 * If the input is readonly, then it cannot also be an 5321 * output and aliased to itself. If the input is not 5322 * dead after the instruction, we must allocate a new 5323 * register and move it. 5324 */ 5325 if (temp_readonly(ts) || !IS_DEAD_ARG(i) 5326 || args_ct[arg_ct->alias_index].newreg) { 5327 allocate_new_reg = true; 5328 } else if (ts->val_type == TEMP_VAL_REG) { 5329 /* 5330 * Check if the current register has already been 5331 * allocated for another input. 5332 */ 5333 allocate_new_reg = 5334 tcg_regset_test_reg(i_allocated_regs, reg); 5335 } 5336 } 5337 if (!allocate_new_reg) { 5338 temp_load(s, ts, i_required_regs, i_allocated_regs, 5339 i_preferred_regs); 5340 reg = ts->reg; 5341 allocate_new_reg = !tcg_regset_test_reg(i_required_regs, reg); 5342 } 5343 if (allocate_new_reg) { 5344 /* 5345 * Allocate a new register matching the constraint 5346 * and move the temporary register into it. 5347 */ 5348 temp_load(s, ts, tcg_target_available_regs[ts->type], 5349 i_allocated_regs, 0); 5350 reg = tcg_reg_alloc(s, i_required_regs, i_allocated_regs, 5351 i_preferred_regs, ts->indirect_base); 5352 copyto_new_reg = true; 5353 } 5354 break; 5355 5356 case 1: 5357 /* First of an input pair; if i1 == i2, the second is an output. */ 5358 i1 = i; 5359 i2 = arg_ct->pair_index; 5360 ts2 = i1 != i2 ? arg_temp(op->args[i2]) : NULL; 5361 5362 /* 5363 * It is easier to default to allocating a new pair 5364 * and to identify a few cases where it's not required. 5365 */ 5366 if (arg_ct->ialias) { 5367 i_preferred_regs = output_pref(op, arg_ct->alias_index); 5368 if (IS_DEAD_ARG(i1) && 5369 IS_DEAD_ARG(i2) && 5370 !temp_readonly(ts) && 5371 ts->val_type == TEMP_VAL_REG && 5372 ts->reg < TCG_TARGET_NB_REGS - 1 && 5373 tcg_regset_test_reg(i_required_regs, reg) && 5374 !tcg_regset_test_reg(i_allocated_regs, reg) && 5375 !tcg_regset_test_reg(i_allocated_regs, reg + 1) && 5376 (ts2 5377 ? ts2->val_type == TEMP_VAL_REG && 5378 ts2->reg == reg + 1 && 5379 !temp_readonly(ts2) 5380 : s->reg_to_temp[reg + 1] == NULL)) { 5381 break; 5382 } 5383 } else { 5384 /* Without aliasing, the pair must also be an input. */ 5385 tcg_debug_assert(ts2); 5386 if (ts->val_type == TEMP_VAL_REG && 5387 ts2->val_type == TEMP_VAL_REG && 5388 ts2->reg == reg + 1 && 5389 tcg_regset_test_reg(i_required_regs, reg)) { 5390 break; 5391 } 5392 } 5393 reg = tcg_reg_alloc_pair(s, i_required_regs, i_allocated_regs, 5394 0, ts->indirect_base); 5395 goto do_pair; 5396 5397 case 2: /* pair second */ 5398 reg = new_args[arg_ct->pair_index] + 1; 5399 goto do_pair; 5400 5401 case 3: /* ialias with second output, no first input */ 5402 tcg_debug_assert(arg_ct->ialias); 5403 i_preferred_regs = output_pref(op, arg_ct->alias_index); 5404 5405 if (IS_DEAD_ARG(i) && 5406 !temp_readonly(ts) && 5407 ts->val_type == TEMP_VAL_REG && 5408 reg > 0 && 5409 s->reg_to_temp[reg - 1] == NULL && 5410 tcg_regset_test_reg(i_required_regs, reg) && 5411 !tcg_regset_test_reg(i_allocated_regs, reg) && 5412 !tcg_regset_test_reg(i_allocated_regs, reg - 1)) { 5413 tcg_regset_set_reg(i_allocated_regs, reg - 1); 5414 break; 5415 } 5416 reg = tcg_reg_alloc_pair(s, i_required_regs >> 1, 5417 i_allocated_regs, 0, 5418 ts->indirect_base); 5419 tcg_regset_set_reg(i_allocated_regs, reg); 5420 reg += 1; 5421 goto do_pair; 5422 5423 do_pair: 5424 /* 5425 * If an aliased input is not dead after the instruction, 5426 * we must allocate a new register and move it. 5427 */ 5428 if (arg_ct->ialias && (!IS_DEAD_ARG(i) || temp_readonly(ts))) { 5429 TCGRegSet t_allocated_regs = i_allocated_regs; 5430 5431 /* 5432 * Because of the alias, and the continued life, make sure 5433 * that the temp is somewhere *other* than the reg pair, 5434 * and we get a copy in reg. 5435 */ 5436 tcg_regset_set_reg(t_allocated_regs, reg); 5437 tcg_regset_set_reg(t_allocated_regs, reg + 1); 5438 if (ts->val_type == TEMP_VAL_REG && ts->reg == reg) { 5439 /* If ts was already in reg, copy it somewhere else. */ 5440 TCGReg nr; 5441 bool ok; 5442 5443 tcg_debug_assert(ts->kind != TEMP_FIXED); 5444 nr = tcg_reg_alloc(s, tcg_target_available_regs[ts->type], 5445 t_allocated_regs, 0, ts->indirect_base); 5446 ok = tcg_out_mov(s, ts->type, nr, reg); 5447 tcg_debug_assert(ok); 5448 5449 set_temp_val_reg(s, ts, nr); 5450 } else { 5451 temp_load(s, ts, tcg_target_available_regs[ts->type], 5452 t_allocated_regs, 0); 5453 copyto_new_reg = true; 5454 } 5455 } else { 5456 /* Preferably allocate to reg, otherwise copy. */ 5457 i_required_regs = (TCGRegSet)1 << reg; 5458 temp_load(s, ts, i_required_regs, i_allocated_regs, 5459 i_preferred_regs); 5460 copyto_new_reg = ts->reg != reg; 5461 } 5462 break; 5463 5464 default: 5465 g_assert_not_reached(); 5466 } 5467 5468 if (copyto_new_reg) { 5469 if (!tcg_out_mov(s, ts->type, reg, ts->reg)) { 5470 /* 5471 * Cross register class move not supported. Sync the 5472 * temp back to its slot and load from there. 5473 */ 5474 temp_sync(s, ts, i_allocated_regs, 0, 0); 5475 tcg_out_ld(s, ts->type, reg, 5476 ts->mem_base->reg, ts->mem_offset); 5477 } 5478 } 5479 new_args[i] = reg; 5480 const_args[i] = 0; 5481 tcg_regset_set_reg(i_allocated_regs, reg); 5482 } 5483 5484 /* mark dead temporaries and free the associated registers */ 5485 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) { 5486 if (IS_DEAD_ARG(i)) { 5487 temp_dead(s, arg_temp(op->args[i])); 5488 } 5489 } 5490 5491 if (def->flags & TCG_OPF_COND_BRANCH) { 5492 tcg_reg_alloc_cbranch(s, i_allocated_regs); 5493 } else if (def->flags & TCG_OPF_BB_END) { 5494 tcg_reg_alloc_bb_end(s, i_allocated_regs); 5495 } else { 5496 if (def->flags & TCG_OPF_CALL_CLOBBER) { 5497 assert_carry_dead(s); 5498 /* XXX: permit generic clobber register list ? */ 5499 for (i = 0; i < TCG_TARGET_NB_REGS; i++) { 5500 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) { 5501 tcg_reg_free(s, i, i_allocated_regs); 5502 } 5503 } 5504 } 5505 if (def->flags & TCG_OPF_SIDE_EFFECTS) { 5506 /* sync globals if the op has side effects and might trigger 5507 an exception. */ 5508 sync_globals(s, i_allocated_regs); 5509 } 5510 5511 /* satisfy the output constraints */ 5512 for (k = 0; k < nb_oargs; k++) { 5513 i = args_ct[k].sort_index; 5514 arg = op->args[i]; 5515 arg_ct = &args_ct[i]; 5516 ts = arg_temp(arg); 5517 5518 /* ENV should not be modified. */ 5519 tcg_debug_assert(!temp_readonly(ts)); 5520 5521 switch (arg_ct->pair) { 5522 case 0: /* not paired */ 5523 if (arg_ct->oalias && !const_args[arg_ct->alias_index]) { 5524 reg = new_args[arg_ct->alias_index]; 5525 } else if (arg_ct->newreg) { 5526 reg = tcg_reg_alloc(s, arg_ct->regs, 5527 i_allocated_regs | o_allocated_regs, 5528 output_pref(op, k), ts->indirect_base); 5529 } else { 5530 reg = tcg_reg_alloc(s, arg_ct->regs, o_allocated_regs, 5531 output_pref(op, k), ts->indirect_base); 5532 } 5533 break; 5534 5535 case 1: /* first of pair */ 5536 if (arg_ct->oalias) { 5537 reg = new_args[arg_ct->alias_index]; 5538 } else if (arg_ct->newreg) { 5539 reg = tcg_reg_alloc_pair(s, arg_ct->regs, 5540 i_allocated_regs | o_allocated_regs, 5541 output_pref(op, k), 5542 ts->indirect_base); 5543 } else { 5544 reg = tcg_reg_alloc_pair(s, arg_ct->regs, o_allocated_regs, 5545 output_pref(op, k), 5546 ts->indirect_base); 5547 } 5548 break; 5549 5550 case 2: /* second of pair */ 5551 if (arg_ct->oalias) { 5552 reg = new_args[arg_ct->alias_index]; 5553 } else { 5554 reg = new_args[arg_ct->pair_index] + 1; 5555 } 5556 break; 5557 5558 case 3: /* first of pair, aliasing with a second input */ 5559 tcg_debug_assert(!arg_ct->newreg); 5560 reg = new_args[arg_ct->pair_index] - 1; 5561 break; 5562 5563 default: 5564 g_assert_not_reached(); 5565 } 5566 tcg_regset_set_reg(o_allocated_regs, reg); 5567 set_temp_val_reg(s, ts, reg); 5568 ts->mem_coherent = 0; 5569 new_args[i] = reg; 5570 } 5571 } 5572 5573 /* emit instruction */ 5574 TCGType type = TCGOP_TYPE(op); 5575 switch (op->opc) { 5576 case INDEX_op_addc1o: 5577 tcg_out_set_carry(s); 5578 /* fall through */ 5579 case INDEX_op_add: 5580 case INDEX_op_addcio: 5581 case INDEX_op_addco: 5582 case INDEX_op_and: 5583 case INDEX_op_andc: 5584 case INDEX_op_clz: 5585 case INDEX_op_ctz: 5586 case INDEX_op_divs: 5587 case INDEX_op_divu: 5588 case INDEX_op_eqv: 5589 case INDEX_op_mul: 5590 case INDEX_op_mulsh: 5591 case INDEX_op_muluh: 5592 case INDEX_op_nand: 5593 case INDEX_op_nor: 5594 case INDEX_op_or: 5595 case INDEX_op_orc: 5596 case INDEX_op_rems: 5597 case INDEX_op_remu: 5598 case INDEX_op_rotl: 5599 case INDEX_op_rotr: 5600 case INDEX_op_sar: 5601 case INDEX_op_shl: 5602 case INDEX_op_shr: 5603 case INDEX_op_xor: 5604 { 5605 const TCGOutOpBinary *out = 5606 container_of(all_outop[op->opc], TCGOutOpBinary, base); 5607 5608 /* Constants should never appear in the first source operand. */ 5609 tcg_debug_assert(!const_args[1]); 5610 if (const_args[2]) { 5611 out->out_rri(s, type, new_args[0], new_args[1], new_args[2]); 5612 } else { 5613 out->out_rrr(s, type, new_args[0], new_args[1], new_args[2]); 5614 } 5615 } 5616 break; 5617 5618 case INDEX_op_sub: 5619 { 5620 const TCGOutOpSubtract *out = &outop_sub; 5621 5622 /* 5623 * Constants should never appear in the second source operand. 5624 * These are folded to add with negative constant. 5625 */ 5626 tcg_debug_assert(!const_args[2]); 5627 if (const_args[1]) { 5628 out->out_rir(s, type, new_args[0], new_args[1], new_args[2]); 5629 } else { 5630 out->out_rrr(s, type, new_args[0], new_args[1], new_args[2]); 5631 } 5632 } 5633 break; 5634 5635 case INDEX_op_subb1o: 5636 tcg_out_set_borrow(s); 5637 /* fall through */ 5638 case INDEX_op_addci: 5639 case INDEX_op_subbi: 5640 case INDEX_op_subbio: 5641 case INDEX_op_subbo: 5642 { 5643 const TCGOutOpAddSubCarry *out = 5644 container_of(all_outop[op->opc], TCGOutOpAddSubCarry, base); 5645 5646 if (const_args[2]) { 5647 if (const_args[1]) { 5648 out->out_rii(s, type, new_args[0], 5649 new_args[1], new_args[2]); 5650 } else { 5651 out->out_rri(s, type, new_args[0], 5652 new_args[1], new_args[2]); 5653 } 5654 } else if (const_args[1]) { 5655 out->out_rir(s, type, new_args[0], new_args[1], new_args[2]); 5656 } else { 5657 out->out_rrr(s, type, new_args[0], new_args[1], new_args[2]); 5658 } 5659 } 5660 break; 5661 5662 case INDEX_op_bswap64: 5663 case INDEX_op_ext_i32_i64: 5664 case INDEX_op_extu_i32_i64: 5665 case INDEX_op_extrl_i64_i32: 5666 case INDEX_op_extrh_i64_i32: 5667 assert(TCG_TARGET_REG_BITS == 64); 5668 /* fall through */ 5669 case INDEX_op_ctpop: 5670 case INDEX_op_neg: 5671 case INDEX_op_not: 5672 { 5673 const TCGOutOpUnary *out = 5674 container_of(all_outop[op->opc], TCGOutOpUnary, base); 5675 5676 /* Constants should have been folded. */ 5677 tcg_debug_assert(!const_args[1]); 5678 out->out_rr(s, type, new_args[0], new_args[1]); 5679 } 5680 break; 5681 5682 case INDEX_op_bswap16: 5683 case INDEX_op_bswap32: 5684 { 5685 const TCGOutOpBswap *out = 5686 container_of(all_outop[op->opc], TCGOutOpBswap, base); 5687 5688 tcg_debug_assert(!const_args[1]); 5689 out->out_rr(s, type, new_args[0], new_args[1], new_args[2]); 5690 } 5691 break; 5692 5693 case INDEX_op_deposit: 5694 { 5695 const TCGOutOpDeposit *out = &outop_deposit; 5696 5697 if (const_args[2]) { 5698 tcg_debug_assert(!const_args[1]); 5699 out->out_rri(s, type, new_args[0], new_args[1], 5700 new_args[2], new_args[3], new_args[4]); 5701 } else if (const_args[1]) { 5702 tcg_debug_assert(new_args[1] == 0); 5703 tcg_debug_assert(!const_args[2]); 5704 out->out_rzr(s, type, new_args[0], new_args[2], 5705 new_args[3], new_args[4]); 5706 } else { 5707 out->out_rrr(s, type, new_args[0], new_args[1], 5708 new_args[2], new_args[3], new_args[4]); 5709 } 5710 } 5711 break; 5712 5713 case INDEX_op_divs2: 5714 case INDEX_op_divu2: 5715 { 5716 const TCGOutOpDivRem *out = 5717 container_of(all_outop[op->opc], TCGOutOpDivRem, base); 5718 5719 /* Only used by x86 and s390x, which use matching constraints. */ 5720 tcg_debug_assert(new_args[0] == new_args[2]); 5721 tcg_debug_assert(new_args[1] == new_args[3]); 5722 tcg_debug_assert(!const_args[4]); 5723 out->out_rr01r(s, type, new_args[0], new_args[1], new_args[4]); 5724 } 5725 break; 5726 5727 case INDEX_op_extract: 5728 case INDEX_op_sextract: 5729 { 5730 const TCGOutOpExtract *out = 5731 container_of(all_outop[op->opc], TCGOutOpExtract, base); 5732 5733 tcg_debug_assert(!const_args[1]); 5734 out->out_rr(s, type, new_args[0], new_args[1], 5735 new_args[2], new_args[3]); 5736 } 5737 break; 5738 5739 case INDEX_op_extract2: 5740 { 5741 const TCGOutOpExtract2 *out = &outop_extract2; 5742 5743 tcg_debug_assert(!const_args[1]); 5744 tcg_debug_assert(!const_args[2]); 5745 out->out_rrr(s, type, new_args[0], new_args[1], 5746 new_args[2], new_args[3]); 5747 } 5748 break; 5749 5750 case INDEX_op_ld8u: 5751 case INDEX_op_ld8s: 5752 case INDEX_op_ld16u: 5753 case INDEX_op_ld16s: 5754 case INDEX_op_ld32u: 5755 case INDEX_op_ld32s: 5756 case INDEX_op_ld: 5757 { 5758 const TCGOutOpLoad *out = 5759 container_of(all_outop[op->opc], TCGOutOpLoad, base); 5760 5761 tcg_debug_assert(!const_args[1]); 5762 out->out(s, type, new_args[0], new_args[1], new_args[2]); 5763 } 5764 break; 5765 5766 case INDEX_op_muls2: 5767 case INDEX_op_mulu2: 5768 { 5769 const TCGOutOpMul2 *out = 5770 container_of(all_outop[op->opc], TCGOutOpMul2, base); 5771 5772 tcg_debug_assert(!const_args[2]); 5773 tcg_debug_assert(!const_args[3]); 5774 out->out_rrrr(s, type, new_args[0], new_args[1], 5775 new_args[2], new_args[3]); 5776 } 5777 break; 5778 5779 case INDEX_op_st32: 5780 /* Use tcg_op_st w/ I32. */ 5781 type = TCG_TYPE_I32; 5782 /* fall through */ 5783 case INDEX_op_st: 5784 case INDEX_op_st8: 5785 case INDEX_op_st16: 5786 { 5787 const TCGOutOpStore *out = 5788 container_of(all_outop[op->opc], TCGOutOpStore, base); 5789 5790 if (const_args[0]) { 5791 out->out_i(s, type, new_args[0], new_args[1], new_args[2]); 5792 } else { 5793 out->out_r(s, type, new_args[0], new_args[1], new_args[2]); 5794 } 5795 } 5796 break; 5797 5798 case INDEX_op_brcond: 5799 { 5800 const TCGOutOpBrcond *out = &outop_brcond; 5801 TCGCond cond = new_args[2]; 5802 TCGLabel *label = arg_label(new_args[3]); 5803 5804 tcg_debug_assert(!const_args[0]); 5805 if (const_args[1]) { 5806 out->out_ri(s, type, cond, new_args[0], new_args[1], label); 5807 } else { 5808 out->out_rr(s, type, cond, new_args[0], new_args[1], label); 5809 } 5810 } 5811 break; 5812 5813 case INDEX_op_movcond: 5814 { 5815 const TCGOutOpMovcond *out = &outop_movcond; 5816 TCGCond cond = new_args[5]; 5817 5818 tcg_debug_assert(!const_args[1]); 5819 out->out(s, type, cond, new_args[0], 5820 new_args[1], new_args[2], const_args[2], 5821 new_args[3], const_args[3], 5822 new_args[4], const_args[4]); 5823 } 5824 break; 5825 5826 case INDEX_op_setcond: 5827 case INDEX_op_negsetcond: 5828 { 5829 const TCGOutOpSetcond *out = 5830 container_of(all_outop[op->opc], TCGOutOpSetcond, base); 5831 TCGCond cond = new_args[3]; 5832 5833 tcg_debug_assert(!const_args[1]); 5834 if (const_args[2]) { 5835 out->out_rri(s, type, cond, 5836 new_args[0], new_args[1], new_args[2]); 5837 } else { 5838 out->out_rrr(s, type, cond, 5839 new_args[0], new_args[1], new_args[2]); 5840 } 5841 } 5842 break; 5843 5844 #if TCG_TARGET_REG_BITS == 32 5845 case INDEX_op_brcond2_i32: 5846 { 5847 const TCGOutOpBrcond2 *out = &outop_brcond2; 5848 TCGCond cond = new_args[4]; 5849 TCGLabel *label = arg_label(new_args[5]); 5850 5851 tcg_debug_assert(!const_args[0]); 5852 tcg_debug_assert(!const_args[1]); 5853 out->out(s, cond, new_args[0], new_args[1], 5854 new_args[2], const_args[2], 5855 new_args[3], const_args[3], label); 5856 } 5857 break; 5858 case INDEX_op_setcond2_i32: 5859 { 5860 const TCGOutOpSetcond2 *out = &outop_setcond2; 5861 TCGCond cond = new_args[5]; 5862 5863 tcg_debug_assert(!const_args[1]); 5864 tcg_debug_assert(!const_args[2]); 5865 out->out(s, cond, new_args[0], new_args[1], new_args[2], 5866 new_args[3], const_args[3], new_args[4], const_args[4]); 5867 } 5868 break; 5869 #else 5870 case INDEX_op_brcond2_i32: 5871 case INDEX_op_setcond2_i32: 5872 g_assert_not_reached(); 5873 #endif 5874 5875 case INDEX_op_goto_ptr: 5876 tcg_debug_assert(!const_args[0]); 5877 tcg_out_goto_ptr(s, new_args[0]); 5878 break; 5879 5880 default: 5881 if (def->flags & TCG_OPF_VECTOR) { 5882 tcg_out_vec_op(s, op->opc, type - TCG_TYPE_V64, 5883 TCGOP_VECE(op), new_args, const_args); 5884 } else { 5885 tcg_out_op(s, op->opc, type, new_args, const_args); 5886 } 5887 break; 5888 } 5889 5890 if (def->flags & TCG_OPF_CARRY_IN) { 5891 s->carry_live = false; 5892 } 5893 if (def->flags & TCG_OPF_CARRY_OUT) { 5894 s->carry_live = true; 5895 } 5896 5897 /* move the outputs in the correct register if needed */ 5898 for(i = 0; i < nb_oargs; i++) { 5899 ts = arg_temp(op->args[i]); 5900 5901 /* ENV should not be modified. */ 5902 tcg_debug_assert(!temp_readonly(ts)); 5903 5904 if (NEED_SYNC_ARG(i)) { 5905 temp_sync(s, ts, o_allocated_regs, 0, IS_DEAD_ARG(i)); 5906 } else if (IS_DEAD_ARG(i)) { 5907 temp_dead(s, ts); 5908 } 5909 } 5910 } 5911 5912 static bool tcg_reg_alloc_dup2(TCGContext *s, const TCGOp *op) 5913 { 5914 const TCGLifeData arg_life = op->life; 5915 TCGTemp *ots, *itsl, *itsh; 5916 TCGType vtype = TCGOP_TYPE(op); 5917 5918 /* This opcode is only valid for 32-bit hosts, for 64-bit elements. */ 5919 tcg_debug_assert(TCG_TARGET_REG_BITS == 32); 5920 tcg_debug_assert(TCGOP_VECE(op) == MO_64); 5921 5922 ots = arg_temp(op->args[0]); 5923 itsl = arg_temp(op->args[1]); 5924 itsh = arg_temp(op->args[2]); 5925 5926 /* ENV should not be modified. */ 5927 tcg_debug_assert(!temp_readonly(ots)); 5928 5929 /* Allocate the output register now. */ 5930 if (ots->val_type != TEMP_VAL_REG) { 5931 TCGRegSet allocated_regs = s->reserved_regs; 5932 TCGRegSet dup_out_regs = opcode_args_ct(op)[0].regs; 5933 TCGReg oreg; 5934 5935 /* Make sure to not spill the input registers. */ 5936 if (!IS_DEAD_ARG(1) && itsl->val_type == TEMP_VAL_REG) { 5937 tcg_regset_set_reg(allocated_regs, itsl->reg); 5938 } 5939 if (!IS_DEAD_ARG(2) && itsh->val_type == TEMP_VAL_REG) { 5940 tcg_regset_set_reg(allocated_regs, itsh->reg); 5941 } 5942 5943 oreg = tcg_reg_alloc(s, dup_out_regs, allocated_regs, 5944 output_pref(op, 0), ots->indirect_base); 5945 set_temp_val_reg(s, ots, oreg); 5946 } 5947 5948 /* Promote dup2 of immediates to dupi_vec. */ 5949 if (itsl->val_type == TEMP_VAL_CONST && itsh->val_type == TEMP_VAL_CONST) { 5950 uint64_t val = deposit64(itsl->val, 32, 32, itsh->val); 5951 MemOp vece = MO_64; 5952 5953 if (val == dup_const(MO_8, val)) { 5954 vece = MO_8; 5955 } else if (val == dup_const(MO_16, val)) { 5956 vece = MO_16; 5957 } else if (val == dup_const(MO_32, val)) { 5958 vece = MO_32; 5959 } 5960 5961 tcg_out_dupi_vec(s, vtype, vece, ots->reg, val); 5962 goto done; 5963 } 5964 5965 /* If the two inputs form one 64-bit value, try dupm_vec. */ 5966 if (itsl->temp_subindex == HOST_BIG_ENDIAN && 5967 itsh->temp_subindex == !HOST_BIG_ENDIAN && 5968 itsl == itsh + (HOST_BIG_ENDIAN ? 1 : -1)) { 5969 TCGTemp *its = itsl - HOST_BIG_ENDIAN; 5970 5971 temp_sync(s, its + 0, s->reserved_regs, 0, 0); 5972 temp_sync(s, its + 1, s->reserved_regs, 0, 0); 5973 5974 if (tcg_out_dupm_vec(s, vtype, MO_64, ots->reg, 5975 its->mem_base->reg, its->mem_offset)) { 5976 goto done; 5977 } 5978 } 5979 5980 /* Fall back to generic expansion. */ 5981 return false; 5982 5983 done: 5984 ots->mem_coherent = 0; 5985 if (IS_DEAD_ARG(1)) { 5986 temp_dead(s, itsl); 5987 } 5988 if (IS_DEAD_ARG(2)) { 5989 temp_dead(s, itsh); 5990 } 5991 if (NEED_SYNC_ARG(0)) { 5992 temp_sync(s, ots, s->reserved_regs, 0, IS_DEAD_ARG(0)); 5993 } else if (IS_DEAD_ARG(0)) { 5994 temp_dead(s, ots); 5995 } 5996 return true; 5997 } 5998 5999 static void load_arg_reg(TCGContext *s, TCGReg reg, TCGTemp *ts, 6000 TCGRegSet allocated_regs) 6001 { 6002 if (ts->val_type == TEMP_VAL_REG) { 6003 if (ts->reg != reg) { 6004 tcg_reg_free(s, reg, allocated_regs); 6005 if (!tcg_out_mov(s, ts->type, reg, ts->reg)) { 6006 /* 6007 * Cross register class move not supported. Sync the 6008 * temp back to its slot and load from there. 6009 */ 6010 temp_sync(s, ts, allocated_regs, 0, 0); 6011 tcg_out_ld(s, ts->type, reg, 6012 ts->mem_base->reg, ts->mem_offset); 6013 } 6014 } 6015 } else { 6016 TCGRegSet arg_set = 0; 6017 6018 tcg_reg_free(s, reg, allocated_regs); 6019 tcg_regset_set_reg(arg_set, reg); 6020 temp_load(s, ts, arg_set, allocated_regs, 0); 6021 } 6022 } 6023 6024 static void load_arg_stk(TCGContext *s, unsigned arg_slot, TCGTemp *ts, 6025 TCGRegSet allocated_regs) 6026 { 6027 /* 6028 * When the destination is on the stack, load up the temp and store. 6029 * If there are many call-saved registers, the temp might live to 6030 * see another use; otherwise it'll be discarded. 6031 */ 6032 temp_load(s, ts, tcg_target_available_regs[ts->type], allocated_regs, 0); 6033 tcg_out_st(s, ts->type, ts->reg, TCG_REG_CALL_STACK, 6034 arg_slot_stk_ofs(arg_slot)); 6035 } 6036 6037 static void load_arg_normal(TCGContext *s, const TCGCallArgumentLoc *l, 6038 TCGTemp *ts, TCGRegSet *allocated_regs) 6039 { 6040 if (arg_slot_reg_p(l->arg_slot)) { 6041 TCGReg reg = tcg_target_call_iarg_regs[l->arg_slot]; 6042 load_arg_reg(s, reg, ts, *allocated_regs); 6043 tcg_regset_set_reg(*allocated_regs, reg); 6044 } else { 6045 load_arg_stk(s, l->arg_slot, ts, *allocated_regs); 6046 } 6047 } 6048 6049 static void load_arg_ref(TCGContext *s, unsigned arg_slot, TCGReg ref_base, 6050 intptr_t ref_off, TCGRegSet *allocated_regs) 6051 { 6052 TCGReg reg; 6053 6054 if (arg_slot_reg_p(arg_slot)) { 6055 reg = tcg_target_call_iarg_regs[arg_slot]; 6056 tcg_reg_free(s, reg, *allocated_regs); 6057 tcg_out_addi_ptr(s, reg, ref_base, ref_off); 6058 tcg_regset_set_reg(*allocated_regs, reg); 6059 } else { 6060 reg = tcg_reg_alloc(s, tcg_target_available_regs[TCG_TYPE_PTR], 6061 *allocated_regs, 0, false); 6062 tcg_out_addi_ptr(s, reg, ref_base, ref_off); 6063 tcg_out_st(s, TCG_TYPE_PTR, reg, TCG_REG_CALL_STACK, 6064 arg_slot_stk_ofs(arg_slot)); 6065 } 6066 } 6067 6068 static void tcg_reg_alloc_call(TCGContext *s, TCGOp *op) 6069 { 6070 const int nb_oargs = TCGOP_CALLO(op); 6071 const int nb_iargs = TCGOP_CALLI(op); 6072 const TCGLifeData arg_life = op->life; 6073 const TCGHelperInfo *info = tcg_call_info(op); 6074 TCGRegSet allocated_regs = s->reserved_regs; 6075 int i; 6076 6077 /* 6078 * Move inputs into place in reverse order, 6079 * so that we place stacked arguments first. 6080 */ 6081 for (i = nb_iargs - 1; i >= 0; --i) { 6082 const TCGCallArgumentLoc *loc = &info->in[i]; 6083 TCGTemp *ts = arg_temp(op->args[nb_oargs + i]); 6084 6085 switch (loc->kind) { 6086 case TCG_CALL_ARG_NORMAL: 6087 case TCG_CALL_ARG_EXTEND_U: 6088 case TCG_CALL_ARG_EXTEND_S: 6089 load_arg_normal(s, loc, ts, &allocated_regs); 6090 break; 6091 case TCG_CALL_ARG_BY_REF: 6092 load_arg_stk(s, loc->ref_slot, ts, allocated_regs); 6093 load_arg_ref(s, loc->arg_slot, TCG_REG_CALL_STACK, 6094 arg_slot_stk_ofs(loc->ref_slot), 6095 &allocated_regs); 6096 break; 6097 case TCG_CALL_ARG_BY_REF_N: 6098 load_arg_stk(s, loc->ref_slot, ts, allocated_regs); 6099 break; 6100 default: 6101 g_assert_not_reached(); 6102 } 6103 } 6104 6105 /* Mark dead temporaries and free the associated registers. */ 6106 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) { 6107 if (IS_DEAD_ARG(i)) { 6108 temp_dead(s, arg_temp(op->args[i])); 6109 } 6110 } 6111 6112 /* Clobber call registers. */ 6113 for (i = 0; i < TCG_TARGET_NB_REGS; i++) { 6114 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) { 6115 tcg_reg_free(s, i, allocated_regs); 6116 } 6117 } 6118 6119 /* 6120 * Save globals if they might be written by the helper, 6121 * sync them if they might be read. 6122 */ 6123 if (info->flags & TCG_CALL_NO_READ_GLOBALS) { 6124 /* Nothing to do */ 6125 } else if (info->flags & TCG_CALL_NO_WRITE_GLOBALS) { 6126 sync_globals(s, allocated_regs); 6127 } else { 6128 save_globals(s, allocated_regs); 6129 } 6130 6131 /* 6132 * If the ABI passes a pointer to the returned struct as the first 6133 * argument, load that now. Pass a pointer to the output home slot. 6134 */ 6135 if (info->out_kind == TCG_CALL_RET_BY_REF) { 6136 TCGTemp *ts = arg_temp(op->args[0]); 6137 6138 if (!ts->mem_allocated) { 6139 temp_allocate_frame(s, ts); 6140 } 6141 load_arg_ref(s, 0, ts->mem_base->reg, ts->mem_offset, &allocated_regs); 6142 } 6143 6144 tcg_out_call(s, tcg_call_func(op), info); 6145 6146 /* Assign output registers and emit moves if needed. */ 6147 switch (info->out_kind) { 6148 case TCG_CALL_RET_NORMAL: 6149 for (i = 0; i < nb_oargs; i++) { 6150 TCGTemp *ts = arg_temp(op->args[i]); 6151 TCGReg reg = tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, i); 6152 6153 /* ENV should not be modified. */ 6154 tcg_debug_assert(!temp_readonly(ts)); 6155 6156 set_temp_val_reg(s, ts, reg); 6157 ts->mem_coherent = 0; 6158 } 6159 break; 6160 6161 case TCG_CALL_RET_BY_VEC: 6162 { 6163 TCGTemp *ts = arg_temp(op->args[0]); 6164 6165 tcg_debug_assert(ts->base_type == TCG_TYPE_I128); 6166 tcg_debug_assert(ts->temp_subindex == 0); 6167 if (!ts->mem_allocated) { 6168 temp_allocate_frame(s, ts); 6169 } 6170 tcg_out_st(s, TCG_TYPE_V128, 6171 tcg_target_call_oarg_reg(TCG_CALL_RET_BY_VEC, 0), 6172 ts->mem_base->reg, ts->mem_offset); 6173 } 6174 /* fall through to mark all parts in memory */ 6175 6176 case TCG_CALL_RET_BY_REF: 6177 /* The callee has performed a write through the reference. */ 6178 for (i = 0; i < nb_oargs; i++) { 6179 TCGTemp *ts = arg_temp(op->args[i]); 6180 ts->val_type = TEMP_VAL_MEM; 6181 } 6182 break; 6183 6184 default: 6185 g_assert_not_reached(); 6186 } 6187 6188 /* Flush or discard output registers as needed. */ 6189 for (i = 0; i < nb_oargs; i++) { 6190 TCGTemp *ts = arg_temp(op->args[i]); 6191 if (NEED_SYNC_ARG(i)) { 6192 temp_sync(s, ts, s->reserved_regs, 0, IS_DEAD_ARG(i)); 6193 } else if (IS_DEAD_ARG(i)) { 6194 temp_dead(s, ts); 6195 } 6196 } 6197 } 6198 6199 /** 6200 * atom_and_align_for_opc: 6201 * @s: tcg context 6202 * @opc: memory operation code 6203 * @host_atom: MO_ATOM_{IFALIGN,WITHIN16,SUBALIGN} for host operations 6204 * @allow_two_ops: true if we are prepared to issue two operations 6205 * 6206 * Return the alignment and atomicity to use for the inline fast path 6207 * for the given memory operation. The alignment may be larger than 6208 * that specified in @opc, and the correct alignment will be diagnosed 6209 * by the slow path helper. 6210 * 6211 * If @allow_two_ops, the host is prepared to test for 2x alignment, 6212 * and issue two loads or stores for subalignment. 6213 */ 6214 static TCGAtomAlign atom_and_align_for_opc(TCGContext *s, MemOp opc, 6215 MemOp host_atom, bool allow_two_ops) 6216 { 6217 MemOp align = memop_alignment_bits(opc); 6218 MemOp size = opc & MO_SIZE; 6219 MemOp half = size ? size - 1 : 0; 6220 MemOp atom = opc & MO_ATOM_MASK; 6221 MemOp atmax; 6222 6223 switch (atom) { 6224 case MO_ATOM_NONE: 6225 /* The operation requires no specific atomicity. */ 6226 atmax = MO_8; 6227 break; 6228 6229 case MO_ATOM_IFALIGN: 6230 atmax = size; 6231 break; 6232 6233 case MO_ATOM_IFALIGN_PAIR: 6234 atmax = half; 6235 break; 6236 6237 case MO_ATOM_WITHIN16: 6238 atmax = size; 6239 if (size == MO_128) { 6240 /* Misalignment implies !within16, and therefore no atomicity. */ 6241 } else if (host_atom != MO_ATOM_WITHIN16) { 6242 /* The host does not implement within16, so require alignment. */ 6243 align = MAX(align, size); 6244 } 6245 break; 6246 6247 case MO_ATOM_WITHIN16_PAIR: 6248 atmax = size; 6249 /* 6250 * Misalignment implies !within16, and therefore half atomicity. 6251 * Any host prepared for two operations can implement this with 6252 * half alignment. 6253 */ 6254 if (host_atom != MO_ATOM_WITHIN16 && allow_two_ops) { 6255 align = MAX(align, half); 6256 } 6257 break; 6258 6259 case MO_ATOM_SUBALIGN: 6260 atmax = size; 6261 if (host_atom != MO_ATOM_SUBALIGN) { 6262 /* If unaligned but not odd, there are subobjects up to half. */ 6263 if (allow_two_ops) { 6264 align = MAX(align, half); 6265 } else { 6266 align = MAX(align, size); 6267 } 6268 } 6269 break; 6270 6271 default: 6272 g_assert_not_reached(); 6273 } 6274 6275 return (TCGAtomAlign){ .atom = atmax, .align = align }; 6276 } 6277 6278 /* 6279 * Similarly for qemu_ld/st slow path helpers. 6280 * We must re-implement tcg_gen_callN and tcg_reg_alloc_call simultaneously, 6281 * using only the provided backend tcg_out_* functions. 6282 */ 6283 6284 static int tcg_out_helper_stk_ofs(TCGType type, unsigned slot) 6285 { 6286 int ofs = arg_slot_stk_ofs(slot); 6287 6288 /* 6289 * Each stack slot is TCG_TARGET_LONG_BITS. If the host does not 6290 * require extension to uint64_t, adjust the address for uint32_t. 6291 */ 6292 if (HOST_BIG_ENDIAN && 6293 TCG_TARGET_REG_BITS == 64 && 6294 type == TCG_TYPE_I32) { 6295 ofs += 4; 6296 } 6297 return ofs; 6298 } 6299 6300 static void tcg_out_helper_load_slots(TCGContext *s, 6301 unsigned nmov, TCGMovExtend *mov, 6302 const TCGLdstHelperParam *parm) 6303 { 6304 unsigned i; 6305 TCGReg dst3; 6306 6307 /* 6308 * Start from the end, storing to the stack first. 6309 * This frees those registers, so we need not consider overlap. 6310 */ 6311 for (i = nmov; i-- > 0; ) { 6312 unsigned slot = mov[i].dst; 6313 6314 if (arg_slot_reg_p(slot)) { 6315 goto found_reg; 6316 } 6317 6318 TCGReg src = mov[i].src; 6319 TCGType dst_type = mov[i].dst_type; 6320 MemOp dst_mo = dst_type == TCG_TYPE_I32 ? MO_32 : MO_64; 6321 6322 /* The argument is going onto the stack; extend into scratch. */ 6323 if ((mov[i].src_ext & MO_SIZE) != dst_mo) { 6324 tcg_debug_assert(parm->ntmp != 0); 6325 mov[i].dst = src = parm->tmp[0]; 6326 tcg_out_movext1(s, &mov[i]); 6327 } 6328 6329 tcg_out_st(s, dst_type, src, TCG_REG_CALL_STACK, 6330 tcg_out_helper_stk_ofs(dst_type, slot)); 6331 } 6332 return; 6333 6334 found_reg: 6335 /* 6336 * The remaining arguments are in registers. 6337 * Convert slot numbers to argument registers. 6338 */ 6339 nmov = i + 1; 6340 for (i = 0; i < nmov; ++i) { 6341 mov[i].dst = tcg_target_call_iarg_regs[mov[i].dst]; 6342 } 6343 6344 switch (nmov) { 6345 case 4: 6346 /* The backend must have provided enough temps for the worst case. */ 6347 tcg_debug_assert(parm->ntmp >= 2); 6348 6349 dst3 = mov[3].dst; 6350 for (unsigned j = 0; j < 3; ++j) { 6351 if (dst3 == mov[j].src) { 6352 /* 6353 * Conflict. Copy the source to a temporary, perform the 6354 * remaining moves, then the extension from our scratch 6355 * on the way out. 6356 */ 6357 TCGReg scratch = parm->tmp[1]; 6358 6359 tcg_out_mov(s, mov[3].src_type, scratch, mov[3].src); 6360 tcg_out_movext3(s, mov, mov + 1, mov + 2, parm->tmp[0]); 6361 tcg_out_movext1_new_src(s, &mov[3], scratch); 6362 break; 6363 } 6364 } 6365 6366 /* No conflicts: perform this move and continue. */ 6367 tcg_out_movext1(s, &mov[3]); 6368 /* fall through */ 6369 6370 case 3: 6371 tcg_out_movext3(s, mov, mov + 1, mov + 2, 6372 parm->ntmp ? parm->tmp[0] : -1); 6373 break; 6374 case 2: 6375 tcg_out_movext2(s, mov, mov + 1, 6376 parm->ntmp ? parm->tmp[0] : -1); 6377 break; 6378 case 1: 6379 tcg_out_movext1(s, mov); 6380 break; 6381 default: 6382 g_assert_not_reached(); 6383 } 6384 } 6385 6386 static void tcg_out_helper_load_imm(TCGContext *s, unsigned slot, 6387 TCGType type, tcg_target_long imm, 6388 const TCGLdstHelperParam *parm) 6389 { 6390 if (arg_slot_reg_p(slot)) { 6391 tcg_out_movi(s, type, tcg_target_call_iarg_regs[slot], imm); 6392 } else { 6393 int ofs = tcg_out_helper_stk_ofs(type, slot); 6394 if (!tcg_out_sti(s, type, imm, TCG_REG_CALL_STACK, ofs)) { 6395 tcg_debug_assert(parm->ntmp != 0); 6396 tcg_out_movi(s, type, parm->tmp[0], imm); 6397 tcg_out_st(s, type, parm->tmp[0], TCG_REG_CALL_STACK, ofs); 6398 } 6399 } 6400 } 6401 6402 static void tcg_out_helper_load_common_args(TCGContext *s, 6403 const TCGLabelQemuLdst *ldst, 6404 const TCGLdstHelperParam *parm, 6405 const TCGHelperInfo *info, 6406 unsigned next_arg) 6407 { 6408 TCGMovExtend ptr_mov = { 6409 .dst_type = TCG_TYPE_PTR, 6410 .src_type = TCG_TYPE_PTR, 6411 .src_ext = sizeof(void *) == 4 ? MO_32 : MO_64 6412 }; 6413 const TCGCallArgumentLoc *loc = &info->in[0]; 6414 TCGType type; 6415 unsigned slot; 6416 tcg_target_ulong imm; 6417 6418 /* 6419 * Handle env, which is always first. 6420 */ 6421 ptr_mov.dst = loc->arg_slot; 6422 ptr_mov.src = TCG_AREG0; 6423 tcg_out_helper_load_slots(s, 1, &ptr_mov, parm); 6424 6425 /* 6426 * Handle oi. 6427 */ 6428 imm = ldst->oi; 6429 loc = &info->in[next_arg]; 6430 type = TCG_TYPE_I32; 6431 switch (loc->kind) { 6432 case TCG_CALL_ARG_NORMAL: 6433 break; 6434 case TCG_CALL_ARG_EXTEND_U: 6435 case TCG_CALL_ARG_EXTEND_S: 6436 /* No extension required for MemOpIdx. */ 6437 tcg_debug_assert(imm <= INT32_MAX); 6438 type = TCG_TYPE_REG; 6439 break; 6440 default: 6441 g_assert_not_reached(); 6442 } 6443 tcg_out_helper_load_imm(s, loc->arg_slot, type, imm, parm); 6444 next_arg++; 6445 6446 /* 6447 * Handle ra. 6448 */ 6449 loc = &info->in[next_arg]; 6450 slot = loc->arg_slot; 6451 if (parm->ra_gen) { 6452 int arg_reg = -1; 6453 TCGReg ra_reg; 6454 6455 if (arg_slot_reg_p(slot)) { 6456 arg_reg = tcg_target_call_iarg_regs[slot]; 6457 } 6458 ra_reg = parm->ra_gen(s, ldst, arg_reg); 6459 6460 ptr_mov.dst = slot; 6461 ptr_mov.src = ra_reg; 6462 tcg_out_helper_load_slots(s, 1, &ptr_mov, parm); 6463 } else { 6464 imm = (uintptr_t)ldst->raddr; 6465 tcg_out_helper_load_imm(s, slot, TCG_TYPE_PTR, imm, parm); 6466 } 6467 } 6468 6469 static unsigned tcg_out_helper_add_mov(TCGMovExtend *mov, 6470 const TCGCallArgumentLoc *loc, 6471 TCGType dst_type, TCGType src_type, 6472 TCGReg lo, TCGReg hi) 6473 { 6474 MemOp reg_mo; 6475 6476 if (dst_type <= TCG_TYPE_REG) { 6477 MemOp src_ext; 6478 6479 switch (loc->kind) { 6480 case TCG_CALL_ARG_NORMAL: 6481 src_ext = src_type == TCG_TYPE_I32 ? MO_32 : MO_64; 6482 break; 6483 case TCG_CALL_ARG_EXTEND_U: 6484 dst_type = TCG_TYPE_REG; 6485 src_ext = MO_UL; 6486 break; 6487 case TCG_CALL_ARG_EXTEND_S: 6488 dst_type = TCG_TYPE_REG; 6489 src_ext = MO_SL; 6490 break; 6491 default: 6492 g_assert_not_reached(); 6493 } 6494 6495 mov[0].dst = loc->arg_slot; 6496 mov[0].dst_type = dst_type; 6497 mov[0].src = lo; 6498 mov[0].src_type = src_type; 6499 mov[0].src_ext = src_ext; 6500 return 1; 6501 } 6502 6503 if (TCG_TARGET_REG_BITS == 32) { 6504 assert(dst_type == TCG_TYPE_I64); 6505 reg_mo = MO_32; 6506 } else { 6507 assert(dst_type == TCG_TYPE_I128); 6508 reg_mo = MO_64; 6509 } 6510 6511 mov[0].dst = loc[HOST_BIG_ENDIAN].arg_slot; 6512 mov[0].src = lo; 6513 mov[0].dst_type = TCG_TYPE_REG; 6514 mov[0].src_type = TCG_TYPE_REG; 6515 mov[0].src_ext = reg_mo; 6516 6517 mov[1].dst = loc[!HOST_BIG_ENDIAN].arg_slot; 6518 mov[1].src = hi; 6519 mov[1].dst_type = TCG_TYPE_REG; 6520 mov[1].src_type = TCG_TYPE_REG; 6521 mov[1].src_ext = reg_mo; 6522 6523 return 2; 6524 } 6525 6526 static void tcg_out_ld_helper_args(TCGContext *s, const TCGLabelQemuLdst *ldst, 6527 const TCGLdstHelperParam *parm) 6528 { 6529 const TCGHelperInfo *info; 6530 const TCGCallArgumentLoc *loc; 6531 TCGMovExtend mov[2]; 6532 unsigned next_arg, nmov; 6533 MemOp mop = get_memop(ldst->oi); 6534 6535 switch (mop & MO_SIZE) { 6536 case MO_8: 6537 case MO_16: 6538 case MO_32: 6539 info = &info_helper_ld32_mmu; 6540 break; 6541 case MO_64: 6542 info = &info_helper_ld64_mmu; 6543 break; 6544 case MO_128: 6545 info = &info_helper_ld128_mmu; 6546 break; 6547 default: 6548 g_assert_not_reached(); 6549 } 6550 6551 /* Defer env argument. */ 6552 next_arg = 1; 6553 6554 loc = &info->in[next_arg]; 6555 if (TCG_TARGET_REG_BITS == 32 && s->addr_type == TCG_TYPE_I32) { 6556 /* 6557 * 32-bit host with 32-bit guest: zero-extend the guest address 6558 * to 64-bits for the helper by storing the low part, then 6559 * load a zero for the high part. 6560 */ 6561 tcg_out_helper_add_mov(mov, loc + HOST_BIG_ENDIAN, 6562 TCG_TYPE_I32, TCG_TYPE_I32, 6563 ldst->addr_reg, -1); 6564 tcg_out_helper_load_slots(s, 1, mov, parm); 6565 6566 tcg_out_helper_load_imm(s, loc[!HOST_BIG_ENDIAN].arg_slot, 6567 TCG_TYPE_I32, 0, parm); 6568 next_arg += 2; 6569 } else { 6570 nmov = tcg_out_helper_add_mov(mov, loc, TCG_TYPE_I64, s->addr_type, 6571 ldst->addr_reg, -1); 6572 tcg_out_helper_load_slots(s, nmov, mov, parm); 6573 next_arg += nmov; 6574 } 6575 6576 switch (info->out_kind) { 6577 case TCG_CALL_RET_NORMAL: 6578 case TCG_CALL_RET_BY_VEC: 6579 break; 6580 case TCG_CALL_RET_BY_REF: 6581 /* 6582 * The return reference is in the first argument slot. 6583 * We need memory in which to return: re-use the top of stack. 6584 */ 6585 { 6586 int ofs_slot0 = TCG_TARGET_CALL_STACK_OFFSET; 6587 6588 if (arg_slot_reg_p(0)) { 6589 tcg_out_addi_ptr(s, tcg_target_call_iarg_regs[0], 6590 TCG_REG_CALL_STACK, ofs_slot0); 6591 } else { 6592 tcg_debug_assert(parm->ntmp != 0); 6593 tcg_out_addi_ptr(s, parm->tmp[0], 6594 TCG_REG_CALL_STACK, ofs_slot0); 6595 tcg_out_st(s, TCG_TYPE_PTR, parm->tmp[0], 6596 TCG_REG_CALL_STACK, ofs_slot0); 6597 } 6598 } 6599 break; 6600 default: 6601 g_assert_not_reached(); 6602 } 6603 6604 tcg_out_helper_load_common_args(s, ldst, parm, info, next_arg); 6605 } 6606 6607 static void tcg_out_ld_helper_ret(TCGContext *s, const TCGLabelQemuLdst *ldst, 6608 bool load_sign, 6609 const TCGLdstHelperParam *parm) 6610 { 6611 MemOp mop = get_memop(ldst->oi); 6612 TCGMovExtend mov[2]; 6613 int ofs_slot0; 6614 6615 switch (ldst->type) { 6616 case TCG_TYPE_I64: 6617 if (TCG_TARGET_REG_BITS == 32) { 6618 break; 6619 } 6620 /* fall through */ 6621 6622 case TCG_TYPE_I32: 6623 mov[0].dst = ldst->datalo_reg; 6624 mov[0].src = tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, 0); 6625 mov[0].dst_type = ldst->type; 6626 mov[0].src_type = TCG_TYPE_REG; 6627 6628 /* 6629 * If load_sign, then we allowed the helper to perform the 6630 * appropriate sign extension to tcg_target_ulong, and all 6631 * we need now is a plain move. 6632 * 6633 * If they do not, then we expect the relevant extension 6634 * instruction to be no more expensive than a move, and 6635 * we thus save the icache etc by only using one of two 6636 * helper functions. 6637 */ 6638 if (load_sign || !(mop & MO_SIGN)) { 6639 if (TCG_TARGET_REG_BITS == 32 || ldst->type == TCG_TYPE_I32) { 6640 mov[0].src_ext = MO_32; 6641 } else { 6642 mov[0].src_ext = MO_64; 6643 } 6644 } else { 6645 mov[0].src_ext = mop & MO_SSIZE; 6646 } 6647 tcg_out_movext1(s, mov); 6648 return; 6649 6650 case TCG_TYPE_I128: 6651 tcg_debug_assert(TCG_TARGET_REG_BITS == 64); 6652 ofs_slot0 = TCG_TARGET_CALL_STACK_OFFSET; 6653 switch (TCG_TARGET_CALL_RET_I128) { 6654 case TCG_CALL_RET_NORMAL: 6655 break; 6656 case TCG_CALL_RET_BY_VEC: 6657 tcg_out_st(s, TCG_TYPE_V128, 6658 tcg_target_call_oarg_reg(TCG_CALL_RET_BY_VEC, 0), 6659 TCG_REG_CALL_STACK, ofs_slot0); 6660 /* fall through */ 6661 case TCG_CALL_RET_BY_REF: 6662 tcg_out_ld(s, TCG_TYPE_I64, ldst->datalo_reg, 6663 TCG_REG_CALL_STACK, ofs_slot0 + 8 * HOST_BIG_ENDIAN); 6664 tcg_out_ld(s, TCG_TYPE_I64, ldst->datahi_reg, 6665 TCG_REG_CALL_STACK, ofs_slot0 + 8 * !HOST_BIG_ENDIAN); 6666 return; 6667 default: 6668 g_assert_not_reached(); 6669 } 6670 break; 6671 6672 default: 6673 g_assert_not_reached(); 6674 } 6675 6676 mov[0].dst = ldst->datalo_reg; 6677 mov[0].src = 6678 tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, HOST_BIG_ENDIAN); 6679 mov[0].dst_type = TCG_TYPE_REG; 6680 mov[0].src_type = TCG_TYPE_REG; 6681 mov[0].src_ext = TCG_TARGET_REG_BITS == 32 ? MO_32 : MO_64; 6682 6683 mov[1].dst = ldst->datahi_reg; 6684 mov[1].src = 6685 tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, !HOST_BIG_ENDIAN); 6686 mov[1].dst_type = TCG_TYPE_REG; 6687 mov[1].src_type = TCG_TYPE_REG; 6688 mov[1].src_ext = TCG_TARGET_REG_BITS == 32 ? MO_32 : MO_64; 6689 6690 tcg_out_movext2(s, mov, mov + 1, parm->ntmp ? parm->tmp[0] : -1); 6691 } 6692 6693 static void tcg_out_st_helper_args(TCGContext *s, const TCGLabelQemuLdst *ldst, 6694 const TCGLdstHelperParam *parm) 6695 { 6696 const TCGHelperInfo *info; 6697 const TCGCallArgumentLoc *loc; 6698 TCGMovExtend mov[4]; 6699 TCGType data_type; 6700 unsigned next_arg, nmov, n; 6701 MemOp mop = get_memop(ldst->oi); 6702 6703 switch (mop & MO_SIZE) { 6704 case MO_8: 6705 case MO_16: 6706 case MO_32: 6707 info = &info_helper_st32_mmu; 6708 data_type = TCG_TYPE_I32; 6709 break; 6710 case MO_64: 6711 info = &info_helper_st64_mmu; 6712 data_type = TCG_TYPE_I64; 6713 break; 6714 case MO_128: 6715 info = &info_helper_st128_mmu; 6716 data_type = TCG_TYPE_I128; 6717 break; 6718 default: 6719 g_assert_not_reached(); 6720 } 6721 6722 /* Defer env argument. */ 6723 next_arg = 1; 6724 nmov = 0; 6725 6726 /* Handle addr argument. */ 6727 loc = &info->in[next_arg]; 6728 tcg_debug_assert(s->addr_type <= TCG_TYPE_REG); 6729 if (TCG_TARGET_REG_BITS == 32) { 6730 /* 6731 * 32-bit host (and thus 32-bit guest): zero-extend the guest address 6732 * to 64-bits for the helper by storing the low part. Later, 6733 * after we have processed the register inputs, we will load a 6734 * zero for the high part. 6735 */ 6736 tcg_out_helper_add_mov(mov, loc + HOST_BIG_ENDIAN, 6737 TCG_TYPE_I32, TCG_TYPE_I32, 6738 ldst->addr_reg, -1); 6739 next_arg += 2; 6740 nmov += 1; 6741 } else { 6742 n = tcg_out_helper_add_mov(mov, loc, TCG_TYPE_I64, s->addr_type, 6743 ldst->addr_reg, -1); 6744 next_arg += n; 6745 nmov += n; 6746 } 6747 6748 /* Handle data argument. */ 6749 loc = &info->in[next_arg]; 6750 switch (loc->kind) { 6751 case TCG_CALL_ARG_NORMAL: 6752 case TCG_CALL_ARG_EXTEND_U: 6753 case TCG_CALL_ARG_EXTEND_S: 6754 n = tcg_out_helper_add_mov(mov + nmov, loc, data_type, ldst->type, 6755 ldst->datalo_reg, ldst->datahi_reg); 6756 next_arg += n; 6757 nmov += n; 6758 tcg_out_helper_load_slots(s, nmov, mov, parm); 6759 break; 6760 6761 case TCG_CALL_ARG_BY_REF: 6762 tcg_debug_assert(TCG_TARGET_REG_BITS == 64); 6763 tcg_debug_assert(data_type == TCG_TYPE_I128); 6764 tcg_out_st(s, TCG_TYPE_I64, 6765 HOST_BIG_ENDIAN ? ldst->datahi_reg : ldst->datalo_reg, 6766 TCG_REG_CALL_STACK, arg_slot_stk_ofs(loc[0].ref_slot)); 6767 tcg_out_st(s, TCG_TYPE_I64, 6768 HOST_BIG_ENDIAN ? ldst->datalo_reg : ldst->datahi_reg, 6769 TCG_REG_CALL_STACK, arg_slot_stk_ofs(loc[1].ref_slot)); 6770 6771 tcg_out_helper_load_slots(s, nmov, mov, parm); 6772 6773 if (arg_slot_reg_p(loc->arg_slot)) { 6774 tcg_out_addi_ptr(s, tcg_target_call_iarg_regs[loc->arg_slot], 6775 TCG_REG_CALL_STACK, 6776 arg_slot_stk_ofs(loc->ref_slot)); 6777 } else { 6778 tcg_debug_assert(parm->ntmp != 0); 6779 tcg_out_addi_ptr(s, parm->tmp[0], TCG_REG_CALL_STACK, 6780 arg_slot_stk_ofs(loc->ref_slot)); 6781 tcg_out_st(s, TCG_TYPE_PTR, parm->tmp[0], 6782 TCG_REG_CALL_STACK, arg_slot_stk_ofs(loc->arg_slot)); 6783 } 6784 next_arg += 2; 6785 break; 6786 6787 default: 6788 g_assert_not_reached(); 6789 } 6790 6791 if (TCG_TARGET_REG_BITS == 32) { 6792 /* Zero extend the address by loading a zero for the high part. */ 6793 loc = &info->in[1 + !HOST_BIG_ENDIAN]; 6794 tcg_out_helper_load_imm(s, loc->arg_slot, TCG_TYPE_I32, 0, parm); 6795 } 6796 6797 tcg_out_helper_load_common_args(s, ldst, parm, info, next_arg); 6798 } 6799 6800 int tcg_gen_code(TCGContext *s, TranslationBlock *tb, uint64_t pc_start) 6801 { 6802 int i, start_words, num_insns; 6803 TCGOp *op; 6804 6805 if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP) 6806 && qemu_log_in_addr_range(pc_start))) { 6807 FILE *logfile = qemu_log_trylock(); 6808 if (logfile) { 6809 fprintf(logfile, "OP:\n"); 6810 tcg_dump_ops(s, logfile, false); 6811 fprintf(logfile, "\n"); 6812 qemu_log_unlock(logfile); 6813 } 6814 } 6815 6816 #ifdef CONFIG_DEBUG_TCG 6817 /* Ensure all labels referenced have been emitted. */ 6818 { 6819 TCGLabel *l; 6820 bool error = false; 6821 6822 QSIMPLEQ_FOREACH(l, &s->labels, next) { 6823 if (unlikely(!l->present) && !QSIMPLEQ_EMPTY(&l->branches)) { 6824 qemu_log_mask(CPU_LOG_TB_OP, 6825 "$L%d referenced but not present.\n", l->id); 6826 error = true; 6827 } 6828 } 6829 assert(!error); 6830 } 6831 #endif 6832 6833 /* Do not reuse any EBB that may be allocated within the TB. */ 6834 tcg_temp_ebb_reset_freed(s); 6835 6836 tcg_optimize(s); 6837 6838 reachable_code_pass(s); 6839 liveness_pass_0(s); 6840 liveness_pass_1(s); 6841 6842 if (s->nb_indirects > 0) { 6843 if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_IND) 6844 && qemu_log_in_addr_range(pc_start))) { 6845 FILE *logfile = qemu_log_trylock(); 6846 if (logfile) { 6847 fprintf(logfile, "OP before indirect lowering:\n"); 6848 tcg_dump_ops(s, logfile, false); 6849 fprintf(logfile, "\n"); 6850 qemu_log_unlock(logfile); 6851 } 6852 } 6853 6854 /* Replace indirect temps with direct temps. */ 6855 if (liveness_pass_2(s)) { 6856 /* If changes were made, re-run liveness. */ 6857 liveness_pass_1(s); 6858 } 6859 } 6860 6861 if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_OPT) 6862 && qemu_log_in_addr_range(pc_start))) { 6863 FILE *logfile = qemu_log_trylock(); 6864 if (logfile) { 6865 fprintf(logfile, "OP after optimization and liveness analysis:\n"); 6866 tcg_dump_ops(s, logfile, true); 6867 fprintf(logfile, "\n"); 6868 qemu_log_unlock(logfile); 6869 } 6870 } 6871 6872 /* Initialize goto_tb jump offsets. */ 6873 tb->jmp_reset_offset[0] = TB_JMP_OFFSET_INVALID; 6874 tb->jmp_reset_offset[1] = TB_JMP_OFFSET_INVALID; 6875 tb->jmp_insn_offset[0] = TB_JMP_OFFSET_INVALID; 6876 tb->jmp_insn_offset[1] = TB_JMP_OFFSET_INVALID; 6877 6878 tcg_reg_alloc_start(s); 6879 6880 /* 6881 * Reset the buffer pointers when restarting after overflow. 6882 * TODO: Move this into translate-all.c with the rest of the 6883 * buffer management. Having only this done here is confusing. 6884 */ 6885 s->code_buf = tcg_splitwx_to_rw(tb->tc.ptr); 6886 s->code_ptr = s->code_buf; 6887 s->data_gen_ptr = NULL; 6888 6889 QSIMPLEQ_INIT(&s->ldst_labels); 6890 s->pool_labels = NULL; 6891 6892 start_words = s->insn_start_words; 6893 s->gen_insn_data = 6894 tcg_malloc(sizeof(uint64_t) * s->gen_tb->icount * start_words); 6895 6896 tcg_out_tb_start(s); 6897 6898 num_insns = -1; 6899 s->carry_live = false; 6900 QTAILQ_FOREACH(op, &s->ops, link) { 6901 TCGOpcode opc = op->opc; 6902 6903 switch (opc) { 6904 case INDEX_op_extrl_i64_i32: 6905 assert(TCG_TARGET_REG_BITS == 64); 6906 /* 6907 * If TCG_TYPE_I32 is represented in some canonical form, 6908 * e.g. zero or sign-extended, then emit as a unary op. 6909 * Otherwise we can treat this as a plain move. 6910 * If the output dies, treat this as a plain move, because 6911 * this will be implemented with a store. 6912 */ 6913 if (TCG_TARGET_HAS_extr_i64_i32) { 6914 TCGLifeData arg_life = op->life; 6915 if (!IS_DEAD_ARG(0)) { 6916 goto do_default; 6917 } 6918 } 6919 /* fall through */ 6920 case INDEX_op_mov: 6921 case INDEX_op_mov_vec: 6922 tcg_reg_alloc_mov(s, op); 6923 break; 6924 case INDEX_op_dup_vec: 6925 tcg_reg_alloc_dup(s, op); 6926 break; 6927 case INDEX_op_insn_start: 6928 assert_carry_dead(s); 6929 if (num_insns >= 0) { 6930 size_t off = tcg_current_code_size(s); 6931 s->gen_insn_end_off[num_insns] = off; 6932 /* Assert that we do not overflow our stored offset. */ 6933 assert(s->gen_insn_end_off[num_insns] == off); 6934 } 6935 num_insns++; 6936 for (i = 0; i < start_words; ++i) { 6937 s->gen_insn_data[num_insns * start_words + i] = 6938 tcg_get_insn_start_param(op, i); 6939 } 6940 break; 6941 case INDEX_op_discard: 6942 temp_dead(s, arg_temp(op->args[0])); 6943 break; 6944 case INDEX_op_set_label: 6945 tcg_reg_alloc_bb_end(s, s->reserved_regs); 6946 tcg_out_label(s, arg_label(op->args[0])); 6947 break; 6948 case INDEX_op_call: 6949 assert_carry_dead(s); 6950 tcg_reg_alloc_call(s, op); 6951 break; 6952 case INDEX_op_exit_tb: 6953 tcg_out_exit_tb(s, op->args[0]); 6954 break; 6955 case INDEX_op_goto_tb: 6956 tcg_out_goto_tb(s, op->args[0]); 6957 break; 6958 case INDEX_op_br: 6959 tcg_out_br(s, arg_label(op->args[0])); 6960 break; 6961 case INDEX_op_mb: 6962 tcg_out_mb(s, op->args[0]); 6963 break; 6964 case INDEX_op_dup2_vec: 6965 if (tcg_reg_alloc_dup2(s, op)) { 6966 break; 6967 } 6968 /* fall through */ 6969 default: 6970 do_default: 6971 /* Sanity check that we've not introduced any unhandled opcodes. */ 6972 tcg_debug_assert(tcg_op_supported(opc, TCGOP_TYPE(op), 6973 TCGOP_FLAGS(op))); 6974 /* Note: in order to speed up the code, it would be much 6975 faster to have specialized register allocator functions for 6976 some common argument patterns */ 6977 tcg_reg_alloc_op(s, op); 6978 break; 6979 } 6980 /* Test for (pending) buffer overflow. The assumption is that any 6981 one operation beginning below the high water mark cannot overrun 6982 the buffer completely. Thus we can test for overflow after 6983 generating code without having to check during generation. */ 6984 if (unlikely((void *)s->code_ptr > s->code_gen_highwater)) { 6985 return -1; 6986 } 6987 /* Test for TB overflow, as seen by gen_insn_end_off. */ 6988 if (unlikely(tcg_current_code_size(s) > UINT16_MAX)) { 6989 return -2; 6990 } 6991 } 6992 assert_carry_dead(s); 6993 6994 tcg_debug_assert(num_insns + 1 == s->gen_tb->icount); 6995 s->gen_insn_end_off[num_insns] = tcg_current_code_size(s); 6996 6997 /* Generate TB finalization at the end of block */ 6998 i = tcg_out_ldst_finalize(s); 6999 if (i < 0) { 7000 return i; 7001 } 7002 i = tcg_out_pool_finalize(s); 7003 if (i < 0) { 7004 return i; 7005 } 7006 if (!tcg_resolve_relocs(s)) { 7007 return -2; 7008 } 7009 7010 #ifndef CONFIG_TCG_INTERPRETER 7011 /* flush instruction cache */ 7012 flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(s->code_buf), 7013 (uintptr_t)s->code_buf, 7014 tcg_ptr_byte_diff(s->code_ptr, s->code_buf)); 7015 #endif 7016 7017 return tcg_current_code_size(s); 7018 } 7019 7020 #ifdef ELF_HOST_MACHINE 7021 /* In order to use this feature, the backend needs to do three things: 7022 7023 (1) Define ELF_HOST_MACHINE to indicate both what value to 7024 put into the ELF image and to indicate support for the feature. 7025 7026 (2) Define tcg_register_jit. This should create a buffer containing 7027 the contents of a .debug_frame section that describes the post- 7028 prologue unwind info for the tcg machine. 7029 7030 (3) Call tcg_register_jit_int, with the constructed .debug_frame. 7031 */ 7032 7033 /* Begin GDB interface. THE FOLLOWING MUST MATCH GDB DOCS. */ 7034 typedef enum { 7035 JIT_NOACTION = 0, 7036 JIT_REGISTER_FN, 7037 JIT_UNREGISTER_FN 7038 } jit_actions_t; 7039 7040 struct jit_code_entry { 7041 struct jit_code_entry *next_entry; 7042 struct jit_code_entry *prev_entry; 7043 const void *symfile_addr; 7044 uint64_t symfile_size; 7045 }; 7046 7047 struct jit_descriptor { 7048 uint32_t version; 7049 uint32_t action_flag; 7050 struct jit_code_entry *relevant_entry; 7051 struct jit_code_entry *first_entry; 7052 }; 7053 7054 void __jit_debug_register_code(void) __attribute__((noinline)); 7055 void __jit_debug_register_code(void) 7056 { 7057 asm(""); 7058 } 7059 7060 /* Must statically initialize the version, because GDB may check 7061 the version before we can set it. */ 7062 struct jit_descriptor __jit_debug_descriptor = { 1, 0, 0, 0 }; 7063 7064 /* End GDB interface. */ 7065 7066 static int find_string(const char *strtab, const char *str) 7067 { 7068 const char *p = strtab + 1; 7069 7070 while (1) { 7071 if (strcmp(p, str) == 0) { 7072 return p - strtab; 7073 } 7074 p += strlen(p) + 1; 7075 } 7076 } 7077 7078 static void tcg_register_jit_int(const void *buf_ptr, size_t buf_size, 7079 const void *debug_frame, 7080 size_t debug_frame_size) 7081 { 7082 struct __attribute__((packed)) DebugInfo { 7083 uint32_t len; 7084 uint16_t version; 7085 uint32_t abbrev; 7086 uint8_t ptr_size; 7087 uint8_t cu_die; 7088 uint16_t cu_lang; 7089 uintptr_t cu_low_pc; 7090 uintptr_t cu_high_pc; 7091 uint8_t fn_die; 7092 char fn_name[16]; 7093 uintptr_t fn_low_pc; 7094 uintptr_t fn_high_pc; 7095 uint8_t cu_eoc; 7096 }; 7097 7098 struct ElfImage { 7099 ElfW(Ehdr) ehdr; 7100 ElfW(Phdr) phdr; 7101 ElfW(Shdr) shdr[7]; 7102 ElfW(Sym) sym[2]; 7103 struct DebugInfo di; 7104 uint8_t da[24]; 7105 char str[80]; 7106 }; 7107 7108 struct ElfImage *img; 7109 7110 static const struct ElfImage img_template = { 7111 .ehdr = { 7112 .e_ident[EI_MAG0] = ELFMAG0, 7113 .e_ident[EI_MAG1] = ELFMAG1, 7114 .e_ident[EI_MAG2] = ELFMAG2, 7115 .e_ident[EI_MAG3] = ELFMAG3, 7116 .e_ident[EI_CLASS] = ELF_CLASS, 7117 .e_ident[EI_DATA] = ELF_DATA, 7118 .e_ident[EI_VERSION] = EV_CURRENT, 7119 .e_type = ET_EXEC, 7120 .e_machine = ELF_HOST_MACHINE, 7121 .e_version = EV_CURRENT, 7122 .e_phoff = offsetof(struct ElfImage, phdr), 7123 .e_shoff = offsetof(struct ElfImage, shdr), 7124 .e_ehsize = sizeof(ElfW(Shdr)), 7125 .e_phentsize = sizeof(ElfW(Phdr)), 7126 .e_phnum = 1, 7127 .e_shentsize = sizeof(ElfW(Shdr)), 7128 .e_shnum = ARRAY_SIZE(img->shdr), 7129 .e_shstrndx = ARRAY_SIZE(img->shdr) - 1, 7130 #ifdef ELF_HOST_FLAGS 7131 .e_flags = ELF_HOST_FLAGS, 7132 #endif 7133 #ifdef ELF_OSABI 7134 .e_ident[EI_OSABI] = ELF_OSABI, 7135 #endif 7136 }, 7137 .phdr = { 7138 .p_type = PT_LOAD, 7139 .p_flags = PF_X, 7140 }, 7141 .shdr = { 7142 [0] = { .sh_type = SHT_NULL }, 7143 /* Trick: The contents of code_gen_buffer are not present in 7144 this fake ELF file; that got allocated elsewhere. Therefore 7145 we mark .text as SHT_NOBITS (similar to .bss) so that readers 7146 will not look for contents. We can record any address. */ 7147 [1] = { /* .text */ 7148 .sh_type = SHT_NOBITS, 7149 .sh_flags = SHF_EXECINSTR | SHF_ALLOC, 7150 }, 7151 [2] = { /* .debug_info */ 7152 .sh_type = SHT_PROGBITS, 7153 .sh_offset = offsetof(struct ElfImage, di), 7154 .sh_size = sizeof(struct DebugInfo), 7155 }, 7156 [3] = { /* .debug_abbrev */ 7157 .sh_type = SHT_PROGBITS, 7158 .sh_offset = offsetof(struct ElfImage, da), 7159 .sh_size = sizeof(img->da), 7160 }, 7161 [4] = { /* .debug_frame */ 7162 .sh_type = SHT_PROGBITS, 7163 .sh_offset = sizeof(struct ElfImage), 7164 }, 7165 [5] = { /* .symtab */ 7166 .sh_type = SHT_SYMTAB, 7167 .sh_offset = offsetof(struct ElfImage, sym), 7168 .sh_size = sizeof(img->sym), 7169 .sh_info = 1, 7170 .sh_link = ARRAY_SIZE(img->shdr) - 1, 7171 .sh_entsize = sizeof(ElfW(Sym)), 7172 }, 7173 [6] = { /* .strtab */ 7174 .sh_type = SHT_STRTAB, 7175 .sh_offset = offsetof(struct ElfImage, str), 7176 .sh_size = sizeof(img->str), 7177 } 7178 }, 7179 .sym = { 7180 [1] = { /* code_gen_buffer */ 7181 .st_info = ELF_ST_INFO(STB_GLOBAL, STT_FUNC), 7182 .st_shndx = 1, 7183 } 7184 }, 7185 .di = { 7186 .len = sizeof(struct DebugInfo) - 4, 7187 .version = 2, 7188 .ptr_size = sizeof(void *), 7189 .cu_die = 1, 7190 .cu_lang = 0x8001, /* DW_LANG_Mips_Assembler */ 7191 .fn_die = 2, 7192 .fn_name = "code_gen_buffer" 7193 }, 7194 .da = { 7195 1, /* abbrev number (the cu) */ 7196 0x11, 1, /* DW_TAG_compile_unit, has children */ 7197 0x13, 0x5, /* DW_AT_language, DW_FORM_data2 */ 7198 0x11, 0x1, /* DW_AT_low_pc, DW_FORM_addr */ 7199 0x12, 0x1, /* DW_AT_high_pc, DW_FORM_addr */ 7200 0, 0, /* end of abbrev */ 7201 2, /* abbrev number (the fn) */ 7202 0x2e, 0, /* DW_TAG_subprogram, no children */ 7203 0x3, 0x8, /* DW_AT_name, DW_FORM_string */ 7204 0x11, 0x1, /* DW_AT_low_pc, DW_FORM_addr */ 7205 0x12, 0x1, /* DW_AT_high_pc, DW_FORM_addr */ 7206 0, 0, /* end of abbrev */ 7207 0 /* no more abbrev */ 7208 }, 7209 .str = "\0" ".text\0" ".debug_info\0" ".debug_abbrev\0" 7210 ".debug_frame\0" ".symtab\0" ".strtab\0" "code_gen_buffer", 7211 }; 7212 7213 /* We only need a single jit entry; statically allocate it. */ 7214 static struct jit_code_entry one_entry; 7215 7216 uintptr_t buf = (uintptr_t)buf_ptr; 7217 size_t img_size = sizeof(struct ElfImage) + debug_frame_size; 7218 DebugFrameHeader *dfh; 7219 7220 img = g_malloc(img_size); 7221 *img = img_template; 7222 7223 img->phdr.p_vaddr = buf; 7224 img->phdr.p_paddr = buf; 7225 img->phdr.p_memsz = buf_size; 7226 7227 img->shdr[1].sh_name = find_string(img->str, ".text"); 7228 img->shdr[1].sh_addr = buf; 7229 img->shdr[1].sh_size = buf_size; 7230 7231 img->shdr[2].sh_name = find_string(img->str, ".debug_info"); 7232 img->shdr[3].sh_name = find_string(img->str, ".debug_abbrev"); 7233 7234 img->shdr[4].sh_name = find_string(img->str, ".debug_frame"); 7235 img->shdr[4].sh_size = debug_frame_size; 7236 7237 img->shdr[5].sh_name = find_string(img->str, ".symtab"); 7238 img->shdr[6].sh_name = find_string(img->str, ".strtab"); 7239 7240 img->sym[1].st_name = find_string(img->str, "code_gen_buffer"); 7241 img->sym[1].st_value = buf; 7242 img->sym[1].st_size = buf_size; 7243 7244 img->di.cu_low_pc = buf; 7245 img->di.cu_high_pc = buf + buf_size; 7246 img->di.fn_low_pc = buf; 7247 img->di.fn_high_pc = buf + buf_size; 7248 7249 dfh = (DebugFrameHeader *)(img + 1); 7250 memcpy(dfh, debug_frame, debug_frame_size); 7251 dfh->fde.func_start = buf; 7252 dfh->fde.func_len = buf_size; 7253 7254 #ifdef DEBUG_JIT 7255 /* Enable this block to be able to debug the ELF image file creation. 7256 One can use readelf, objdump, or other inspection utilities. */ 7257 { 7258 g_autofree char *jit = g_strdup_printf("%s/qemu.jit", g_get_tmp_dir()); 7259 FILE *f = fopen(jit, "w+b"); 7260 if (f) { 7261 if (fwrite(img, img_size, 1, f) != img_size) { 7262 /* Avoid stupid unused return value warning for fwrite. */ 7263 } 7264 fclose(f); 7265 } 7266 } 7267 #endif 7268 7269 one_entry.symfile_addr = img; 7270 one_entry.symfile_size = img_size; 7271 7272 __jit_debug_descriptor.action_flag = JIT_REGISTER_FN; 7273 __jit_debug_descriptor.relevant_entry = &one_entry; 7274 __jit_debug_descriptor.first_entry = &one_entry; 7275 __jit_debug_register_code(); 7276 } 7277 #else 7278 /* No support for the feature. Provide the entry point expected by exec.c, 7279 and implement the internal function we declared earlier. */ 7280 7281 static void tcg_register_jit_int(const void *buf, size_t size, 7282 const void *debug_frame, 7283 size_t debug_frame_size) 7284 { 7285 } 7286 7287 void tcg_register_jit(const void *buf, size_t buf_size) 7288 { 7289 } 7290 #endif /* ELF_HOST_MACHINE */ 7291 7292 #if !TCG_TARGET_MAYBE_vec 7293 void tcg_expand_vec_op(TCGOpcode o, TCGType t, unsigned e, TCGArg a0, ...) 7294 { 7295 g_assert_not_reached(); 7296 } 7297 #endif 7298