1 /* 2 * Tiny Code Generator for QEMU 3 * 4 * Copyright (c) 2008 Fabrice Bellard 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a copy 7 * of this software and associated documentation files (the "Software"), to deal 8 * in the Software without restriction, including without limitation the rights 9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 * copies of the Software, and to permit persons to whom the Software is 11 * furnished to do so, subject to the following conditions: 12 * 13 * The above copyright notice and this permission notice shall be included in 14 * all copies or substantial portions of the Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 22 * THE SOFTWARE. 23 */ 24 25 #include "qemu/osdep.h" 26 27 /* Define to jump the ELF file used to communicate with GDB. */ 28 #undef DEBUG_JIT 29 30 #include "qemu/error-report.h" 31 #include "qemu/cutils.h" 32 #include "qemu/host-utils.h" 33 #include "qemu/qemu-print.h" 34 #include "qemu/cacheflush.h" 35 #include "qemu/cacheinfo.h" 36 #include "qemu/timer.h" 37 #include "exec/translation-block.h" 38 #include "exec/tlb-common.h" 39 #include "tcg/startup.h" 40 #include "tcg/tcg-op-common.h" 41 42 #if UINTPTR_MAX == UINT32_MAX 43 # define ELF_CLASS ELFCLASS32 44 #else 45 # define ELF_CLASS ELFCLASS64 46 #endif 47 #if HOST_BIG_ENDIAN 48 # define ELF_DATA ELFDATA2MSB 49 #else 50 # define ELF_DATA ELFDATA2LSB 51 #endif 52 53 #include "elf.h" 54 #include "exec/log.h" 55 #include "tcg/tcg-ldst.h" 56 #include "tcg/tcg-temp-internal.h" 57 #include "tcg-internal.h" 58 #include "tcg/perf.h" 59 #include "tcg-has.h" 60 #ifdef CONFIG_USER_ONLY 61 #include "user/guest-base.h" 62 #endif 63 64 /* Forward declarations for functions declared in tcg-target.c.inc and 65 used here. */ 66 static void tcg_target_init(TCGContext *s); 67 static void tcg_target_qemu_prologue(TCGContext *s); 68 static bool patch_reloc(tcg_insn_unit *code_ptr, int type, 69 intptr_t value, intptr_t addend); 70 static void tcg_out_nop_fill(tcg_insn_unit *p, int count); 71 72 typedef struct TCGLabelQemuLdst TCGLabelQemuLdst; 73 static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l); 74 static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l); 75 76 /* The CIE and FDE header definitions will be common to all hosts. */ 77 typedef struct { 78 uint32_t len __attribute__((aligned((sizeof(void *))))); 79 uint32_t id; 80 uint8_t version; 81 char augmentation[1]; 82 uint8_t code_align; 83 uint8_t data_align; 84 uint8_t return_column; 85 } DebugFrameCIE; 86 87 typedef struct QEMU_PACKED { 88 uint32_t len __attribute__((aligned((sizeof(void *))))); 89 uint32_t cie_offset; 90 uintptr_t func_start; 91 uintptr_t func_len; 92 } DebugFrameFDEHeader; 93 94 typedef struct QEMU_PACKED { 95 DebugFrameCIE cie; 96 DebugFrameFDEHeader fde; 97 } DebugFrameHeader; 98 99 struct TCGLabelQemuLdst { 100 bool is_ld; /* qemu_ld: true, qemu_st: false */ 101 MemOpIdx oi; 102 TCGType type; /* result type of a load */ 103 TCGReg addr_reg; /* reg index for guest virtual addr */ 104 TCGReg datalo_reg; /* reg index for low word to be loaded or stored */ 105 TCGReg datahi_reg; /* reg index for high word to be loaded or stored */ 106 const tcg_insn_unit *raddr; /* addr of the next IR of qemu_ld/st IR */ 107 tcg_insn_unit *label_ptr[2]; /* label pointers to be updated */ 108 QSIMPLEQ_ENTRY(TCGLabelQemuLdst) next; 109 }; 110 111 static void tcg_register_jit_int(const void *buf, size_t size, 112 const void *debug_frame, 113 size_t debug_frame_size) 114 __attribute__((unused)); 115 116 /* Forward declarations for functions declared and used in tcg-target.c.inc. */ 117 static void tcg_out_tb_start(TCGContext *s); 118 static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg1, 119 intptr_t arg2); 120 static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg); 121 static void tcg_out_movi(TCGContext *s, TCGType type, 122 TCGReg ret, tcg_target_long arg); 123 static void tcg_out_ext8s(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg); 124 static void tcg_out_ext16s(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg); 125 static void tcg_out_ext8u(TCGContext *s, TCGReg ret, TCGReg arg); 126 static void tcg_out_ext16u(TCGContext *s, TCGReg ret, TCGReg arg); 127 static void tcg_out_ext32s(TCGContext *s, TCGReg ret, TCGReg arg); 128 static void tcg_out_ext32u(TCGContext *s, TCGReg ret, TCGReg arg); 129 static void tcg_out_exts_i32_i64(TCGContext *s, TCGReg ret, TCGReg arg); 130 static void tcg_out_extu_i32_i64(TCGContext *s, TCGReg ret, TCGReg arg); 131 static void tcg_out_extrl_i64_i32(TCGContext *s, TCGReg ret, TCGReg arg); 132 static void tcg_out_addi_ptr(TCGContext *s, TCGReg, TCGReg, tcg_target_long); 133 static bool tcg_out_xchg(TCGContext *s, TCGType type, TCGReg r1, TCGReg r2); 134 static void tcg_out_exit_tb(TCGContext *s, uintptr_t arg); 135 static void tcg_out_goto_tb(TCGContext *s, int which); 136 static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, 137 const TCGArg args[TCG_MAX_OP_ARGS], 138 const int const_args[TCG_MAX_OP_ARGS]); 139 #if TCG_TARGET_MAYBE_vec 140 static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece, 141 TCGReg dst, TCGReg src); 142 static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece, 143 TCGReg dst, TCGReg base, intptr_t offset); 144 static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece, 145 TCGReg dst, int64_t arg); 146 static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, 147 unsigned vecl, unsigned vece, 148 const TCGArg args[TCG_MAX_OP_ARGS], 149 const int const_args[TCG_MAX_OP_ARGS]); 150 #else 151 static inline bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece, 152 TCGReg dst, TCGReg src) 153 { 154 g_assert_not_reached(); 155 } 156 static inline bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece, 157 TCGReg dst, TCGReg base, intptr_t offset) 158 { 159 g_assert_not_reached(); 160 } 161 static inline void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece, 162 TCGReg dst, int64_t arg) 163 { 164 g_assert_not_reached(); 165 } 166 static inline void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, 167 unsigned vecl, unsigned vece, 168 const TCGArg args[TCG_MAX_OP_ARGS], 169 const int const_args[TCG_MAX_OP_ARGS]) 170 { 171 g_assert_not_reached(); 172 } 173 int tcg_can_emit_vec_op(TCGOpcode o, TCGType t, unsigned ve) 174 { 175 return 0; 176 } 177 #endif 178 static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, TCGReg arg1, 179 intptr_t arg2); 180 static bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val, 181 TCGReg base, intptr_t ofs); 182 static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target, 183 const TCGHelperInfo *info); 184 static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot); 185 static bool tcg_target_const_match(int64_t val, int ct, 186 TCGType type, TCGCond cond, int vece); 187 188 #ifndef CONFIG_USER_ONLY 189 #define guest_base ({ qemu_build_not_reached(); (uintptr_t)0; }) 190 #endif 191 192 typedef struct TCGLdstHelperParam { 193 TCGReg (*ra_gen)(TCGContext *s, const TCGLabelQemuLdst *l, int arg_reg); 194 unsigned ntmp; 195 int tmp[3]; 196 } TCGLdstHelperParam; 197 198 static void tcg_out_ld_helper_args(TCGContext *s, const TCGLabelQemuLdst *l, 199 const TCGLdstHelperParam *p) 200 __attribute__((unused)); 201 static void tcg_out_ld_helper_ret(TCGContext *s, const TCGLabelQemuLdst *l, 202 bool load_sign, const TCGLdstHelperParam *p) 203 __attribute__((unused)); 204 static void tcg_out_st_helper_args(TCGContext *s, const TCGLabelQemuLdst *l, 205 const TCGLdstHelperParam *p) 206 __attribute__((unused)); 207 208 static void * const qemu_ld_helpers[MO_SSIZE + 1] __attribute__((unused)) = { 209 [MO_UB] = helper_ldub_mmu, 210 [MO_SB] = helper_ldsb_mmu, 211 [MO_UW] = helper_lduw_mmu, 212 [MO_SW] = helper_ldsw_mmu, 213 [MO_UL] = helper_ldul_mmu, 214 [MO_UQ] = helper_ldq_mmu, 215 #if TCG_TARGET_REG_BITS == 64 216 [MO_SL] = helper_ldsl_mmu, 217 [MO_128] = helper_ld16_mmu, 218 #endif 219 }; 220 221 static void * const qemu_st_helpers[MO_SIZE + 1] __attribute__((unused)) = { 222 [MO_8] = helper_stb_mmu, 223 [MO_16] = helper_stw_mmu, 224 [MO_32] = helper_stl_mmu, 225 [MO_64] = helper_stq_mmu, 226 #if TCG_TARGET_REG_BITS == 64 227 [MO_128] = helper_st16_mmu, 228 #endif 229 }; 230 231 typedef struct { 232 MemOp atom; /* lg2 bits of atomicity required */ 233 MemOp align; /* lg2 bits of alignment to use */ 234 } TCGAtomAlign; 235 236 static TCGAtomAlign atom_and_align_for_opc(TCGContext *s, MemOp opc, 237 MemOp host_atom, bool allow_two_ops) 238 __attribute__((unused)); 239 240 #ifdef CONFIG_USER_ONLY 241 bool tcg_use_softmmu; 242 #endif 243 244 TCGContext tcg_init_ctx; 245 __thread TCGContext *tcg_ctx; 246 247 TCGContext **tcg_ctxs; 248 unsigned int tcg_cur_ctxs; 249 unsigned int tcg_max_ctxs; 250 TCGv_env tcg_env; 251 const void *tcg_code_gen_epilogue; 252 uintptr_t tcg_splitwx_diff; 253 254 #ifndef CONFIG_TCG_INTERPRETER 255 tcg_prologue_fn *tcg_qemu_tb_exec; 256 #endif 257 258 static TCGRegSet tcg_target_available_regs[TCG_TYPE_COUNT]; 259 static TCGRegSet tcg_target_call_clobber_regs; 260 261 #if TCG_TARGET_INSN_UNIT_SIZE == 1 262 static __attribute__((unused)) inline void tcg_out8(TCGContext *s, uint8_t v) 263 { 264 *s->code_ptr++ = v; 265 } 266 267 static __attribute__((unused)) inline void tcg_patch8(tcg_insn_unit *p, 268 uint8_t v) 269 { 270 *p = v; 271 } 272 #endif 273 274 #if TCG_TARGET_INSN_UNIT_SIZE <= 2 275 static __attribute__((unused)) inline void tcg_out16(TCGContext *s, uint16_t v) 276 { 277 if (TCG_TARGET_INSN_UNIT_SIZE == 2) { 278 *s->code_ptr++ = v; 279 } else { 280 tcg_insn_unit *p = s->code_ptr; 281 memcpy(p, &v, sizeof(v)); 282 s->code_ptr = p + (2 / TCG_TARGET_INSN_UNIT_SIZE); 283 } 284 } 285 286 static __attribute__((unused)) inline void tcg_patch16(tcg_insn_unit *p, 287 uint16_t v) 288 { 289 if (TCG_TARGET_INSN_UNIT_SIZE == 2) { 290 *p = v; 291 } else { 292 memcpy(p, &v, sizeof(v)); 293 } 294 } 295 #endif 296 297 #if TCG_TARGET_INSN_UNIT_SIZE <= 4 298 static __attribute__((unused)) inline void tcg_out32(TCGContext *s, uint32_t v) 299 { 300 if (TCG_TARGET_INSN_UNIT_SIZE == 4) { 301 *s->code_ptr++ = v; 302 } else { 303 tcg_insn_unit *p = s->code_ptr; 304 memcpy(p, &v, sizeof(v)); 305 s->code_ptr = p + (4 / TCG_TARGET_INSN_UNIT_SIZE); 306 } 307 } 308 309 static __attribute__((unused)) inline void tcg_patch32(tcg_insn_unit *p, 310 uint32_t v) 311 { 312 if (TCG_TARGET_INSN_UNIT_SIZE == 4) { 313 *p = v; 314 } else { 315 memcpy(p, &v, sizeof(v)); 316 } 317 } 318 #endif 319 320 #if TCG_TARGET_INSN_UNIT_SIZE <= 8 321 static __attribute__((unused)) inline void tcg_out64(TCGContext *s, uint64_t v) 322 { 323 if (TCG_TARGET_INSN_UNIT_SIZE == 8) { 324 *s->code_ptr++ = v; 325 } else { 326 tcg_insn_unit *p = s->code_ptr; 327 memcpy(p, &v, sizeof(v)); 328 s->code_ptr = p + (8 / TCG_TARGET_INSN_UNIT_SIZE); 329 } 330 } 331 332 static __attribute__((unused)) inline void tcg_patch64(tcg_insn_unit *p, 333 uint64_t v) 334 { 335 if (TCG_TARGET_INSN_UNIT_SIZE == 8) { 336 *p = v; 337 } else { 338 memcpy(p, &v, sizeof(v)); 339 } 340 } 341 #endif 342 343 /* label relocation processing */ 344 345 static void tcg_out_reloc(TCGContext *s, tcg_insn_unit *code_ptr, int type, 346 TCGLabel *l, intptr_t addend) 347 { 348 TCGRelocation *r = tcg_malloc(sizeof(TCGRelocation)); 349 350 r->type = type; 351 r->ptr = code_ptr; 352 r->addend = addend; 353 QSIMPLEQ_INSERT_TAIL(&l->relocs, r, next); 354 } 355 356 static void tcg_out_label(TCGContext *s, TCGLabel *l) 357 { 358 tcg_debug_assert(!l->has_value); 359 l->has_value = 1; 360 l->u.value_ptr = tcg_splitwx_to_rx(s->code_ptr); 361 } 362 363 TCGLabel *gen_new_label(void) 364 { 365 TCGContext *s = tcg_ctx; 366 TCGLabel *l = tcg_malloc(sizeof(TCGLabel)); 367 368 memset(l, 0, sizeof(TCGLabel)); 369 l->id = s->nb_labels++; 370 QSIMPLEQ_INIT(&l->branches); 371 QSIMPLEQ_INIT(&l->relocs); 372 373 QSIMPLEQ_INSERT_TAIL(&s->labels, l, next); 374 375 return l; 376 } 377 378 static bool tcg_resolve_relocs(TCGContext *s) 379 { 380 TCGLabel *l; 381 382 QSIMPLEQ_FOREACH(l, &s->labels, next) { 383 TCGRelocation *r; 384 uintptr_t value = l->u.value; 385 386 QSIMPLEQ_FOREACH(r, &l->relocs, next) { 387 if (!patch_reloc(r->ptr, r->type, value, r->addend)) { 388 return false; 389 } 390 } 391 } 392 return true; 393 } 394 395 static void set_jmp_reset_offset(TCGContext *s, int which) 396 { 397 /* 398 * We will check for overflow at the end of the opcode loop in 399 * tcg_gen_code, where we bound tcg_current_code_size to UINT16_MAX. 400 */ 401 s->gen_tb->jmp_reset_offset[which] = tcg_current_code_size(s); 402 } 403 404 static void G_GNUC_UNUSED set_jmp_insn_offset(TCGContext *s, int which) 405 { 406 /* 407 * We will check for overflow at the end of the opcode loop in 408 * tcg_gen_code, where we bound tcg_current_code_size to UINT16_MAX. 409 */ 410 s->gen_tb->jmp_insn_offset[which] = tcg_current_code_size(s); 411 } 412 413 static uintptr_t G_GNUC_UNUSED get_jmp_target_addr(TCGContext *s, int which) 414 { 415 /* 416 * Return the read-execute version of the pointer, for the benefit 417 * of any pc-relative addressing mode. 418 */ 419 return (uintptr_t)tcg_splitwx_to_rx(&s->gen_tb->jmp_target_addr[which]); 420 } 421 422 static int __attribute__((unused)) 423 tlb_mask_table_ofs(TCGContext *s, int which) 424 { 425 return (offsetof(CPUNegativeOffsetState, tlb.f[which]) - 426 sizeof(CPUNegativeOffsetState)); 427 } 428 429 /* Signal overflow, starting over with fewer guest insns. */ 430 static G_NORETURN 431 void tcg_raise_tb_overflow(TCGContext *s) 432 { 433 siglongjmp(s->jmp_trans, -2); 434 } 435 436 /* 437 * Used by tcg_out_movext{1,2} to hold the arguments for tcg_out_movext. 438 * By the time we arrive at tcg_out_movext1, @dst is always a TCGReg. 439 * 440 * However, tcg_out_helper_load_slots reuses this field to hold an 441 * argument slot number (which may designate a argument register or an 442 * argument stack slot), converting to TCGReg once all arguments that 443 * are destined for the stack are processed. 444 */ 445 typedef struct TCGMovExtend { 446 unsigned dst; 447 TCGReg src; 448 TCGType dst_type; 449 TCGType src_type; 450 MemOp src_ext; 451 } TCGMovExtend; 452 453 /** 454 * tcg_out_movext -- move and extend 455 * @s: tcg context 456 * @dst_type: integral type for destination 457 * @dst: destination register 458 * @src_type: integral type for source 459 * @src_ext: extension to apply to source 460 * @src: source register 461 * 462 * Move or extend @src into @dst, depending on @src_ext and the types. 463 */ 464 static void tcg_out_movext(TCGContext *s, TCGType dst_type, TCGReg dst, 465 TCGType src_type, MemOp src_ext, TCGReg src) 466 { 467 switch (src_ext) { 468 case MO_UB: 469 tcg_out_ext8u(s, dst, src); 470 break; 471 case MO_SB: 472 tcg_out_ext8s(s, dst_type, dst, src); 473 break; 474 case MO_UW: 475 tcg_out_ext16u(s, dst, src); 476 break; 477 case MO_SW: 478 tcg_out_ext16s(s, dst_type, dst, src); 479 break; 480 case MO_UL: 481 case MO_SL: 482 if (dst_type == TCG_TYPE_I32) { 483 if (src_type == TCG_TYPE_I32) { 484 tcg_out_mov(s, TCG_TYPE_I32, dst, src); 485 } else { 486 tcg_out_extrl_i64_i32(s, dst, src); 487 } 488 } else if (src_type == TCG_TYPE_I32) { 489 if (src_ext & MO_SIGN) { 490 tcg_out_exts_i32_i64(s, dst, src); 491 } else { 492 tcg_out_extu_i32_i64(s, dst, src); 493 } 494 } else { 495 if (src_ext & MO_SIGN) { 496 tcg_out_ext32s(s, dst, src); 497 } else { 498 tcg_out_ext32u(s, dst, src); 499 } 500 } 501 break; 502 case MO_UQ: 503 tcg_debug_assert(TCG_TARGET_REG_BITS == 64); 504 if (dst_type == TCG_TYPE_I32) { 505 tcg_out_extrl_i64_i32(s, dst, src); 506 } else { 507 tcg_out_mov(s, TCG_TYPE_I64, dst, src); 508 } 509 break; 510 default: 511 g_assert_not_reached(); 512 } 513 } 514 515 /* Minor variations on a theme, using a structure. */ 516 static void tcg_out_movext1_new_src(TCGContext *s, const TCGMovExtend *i, 517 TCGReg src) 518 { 519 tcg_out_movext(s, i->dst_type, i->dst, i->src_type, i->src_ext, src); 520 } 521 522 static void tcg_out_movext1(TCGContext *s, const TCGMovExtend *i) 523 { 524 tcg_out_movext1_new_src(s, i, i->src); 525 } 526 527 /** 528 * tcg_out_movext2 -- move and extend two pair 529 * @s: tcg context 530 * @i1: first move description 531 * @i2: second move description 532 * @scratch: temporary register, or -1 for none 533 * 534 * As tcg_out_movext, for both @i1 and @i2, caring for overlap 535 * between the sources and destinations. 536 */ 537 538 static void tcg_out_movext2(TCGContext *s, const TCGMovExtend *i1, 539 const TCGMovExtend *i2, int scratch) 540 { 541 TCGReg src1 = i1->src; 542 TCGReg src2 = i2->src; 543 544 if (i1->dst != src2) { 545 tcg_out_movext1(s, i1); 546 tcg_out_movext1(s, i2); 547 return; 548 } 549 if (i2->dst == src1) { 550 TCGType src1_type = i1->src_type; 551 TCGType src2_type = i2->src_type; 552 553 if (tcg_out_xchg(s, MAX(src1_type, src2_type), src1, src2)) { 554 /* The data is now in the correct registers, now extend. */ 555 src1 = i2->src; 556 src2 = i1->src; 557 } else { 558 tcg_debug_assert(scratch >= 0); 559 tcg_out_mov(s, src1_type, scratch, src1); 560 src1 = scratch; 561 } 562 } 563 tcg_out_movext1_new_src(s, i2, src2); 564 tcg_out_movext1_new_src(s, i1, src1); 565 } 566 567 /** 568 * tcg_out_movext3 -- move and extend three pair 569 * @s: tcg context 570 * @i1: first move description 571 * @i2: second move description 572 * @i3: third move description 573 * @scratch: temporary register, or -1 for none 574 * 575 * As tcg_out_movext, for all of @i1, @i2 and @i3, caring for overlap 576 * between the sources and destinations. 577 */ 578 579 static void tcg_out_movext3(TCGContext *s, const TCGMovExtend *i1, 580 const TCGMovExtend *i2, const TCGMovExtend *i3, 581 int scratch) 582 { 583 TCGReg src1 = i1->src; 584 TCGReg src2 = i2->src; 585 TCGReg src3 = i3->src; 586 587 if (i1->dst != src2 && i1->dst != src3) { 588 tcg_out_movext1(s, i1); 589 tcg_out_movext2(s, i2, i3, scratch); 590 return; 591 } 592 if (i2->dst != src1 && i2->dst != src3) { 593 tcg_out_movext1(s, i2); 594 tcg_out_movext2(s, i1, i3, scratch); 595 return; 596 } 597 if (i3->dst != src1 && i3->dst != src2) { 598 tcg_out_movext1(s, i3); 599 tcg_out_movext2(s, i1, i2, scratch); 600 return; 601 } 602 603 /* 604 * There is a cycle. Since there are only 3 nodes, the cycle is 605 * either "clockwise" or "anti-clockwise", and can be solved with 606 * a single scratch or two xchg. 607 */ 608 if (i1->dst == src2 && i2->dst == src3 && i3->dst == src1) { 609 /* "Clockwise" */ 610 if (tcg_out_xchg(s, MAX(i1->src_type, i2->src_type), src1, src2)) { 611 tcg_out_xchg(s, MAX(i2->src_type, i3->src_type), src2, src3); 612 /* The data is now in the correct registers, now extend. */ 613 tcg_out_movext1_new_src(s, i1, i1->dst); 614 tcg_out_movext1_new_src(s, i2, i2->dst); 615 tcg_out_movext1_new_src(s, i3, i3->dst); 616 } else { 617 tcg_debug_assert(scratch >= 0); 618 tcg_out_mov(s, i1->src_type, scratch, src1); 619 tcg_out_movext1(s, i3); 620 tcg_out_movext1(s, i2); 621 tcg_out_movext1_new_src(s, i1, scratch); 622 } 623 } else if (i1->dst == src3 && i2->dst == src1 && i3->dst == src2) { 624 /* "Anti-clockwise" */ 625 if (tcg_out_xchg(s, MAX(i2->src_type, i3->src_type), src2, src3)) { 626 tcg_out_xchg(s, MAX(i1->src_type, i2->src_type), src1, src2); 627 /* The data is now in the correct registers, now extend. */ 628 tcg_out_movext1_new_src(s, i1, i1->dst); 629 tcg_out_movext1_new_src(s, i2, i2->dst); 630 tcg_out_movext1_new_src(s, i3, i3->dst); 631 } else { 632 tcg_debug_assert(scratch >= 0); 633 tcg_out_mov(s, i1->src_type, scratch, src1); 634 tcg_out_movext1(s, i2); 635 tcg_out_movext1(s, i3); 636 tcg_out_movext1_new_src(s, i1, scratch); 637 } 638 } else { 639 g_assert_not_reached(); 640 } 641 } 642 643 /* 644 * Allocate a new TCGLabelQemuLdst entry. 645 */ 646 647 __attribute__((unused)) 648 static TCGLabelQemuLdst *new_ldst_label(TCGContext *s) 649 { 650 TCGLabelQemuLdst *l = tcg_malloc(sizeof(*l)); 651 652 memset(l, 0, sizeof(*l)); 653 QSIMPLEQ_INSERT_TAIL(&s->ldst_labels, l, next); 654 655 return l; 656 } 657 658 /* 659 * Allocate new constant pool entries. 660 */ 661 662 typedef struct TCGLabelPoolData { 663 struct TCGLabelPoolData *next; 664 tcg_insn_unit *label; 665 intptr_t addend; 666 int rtype; 667 unsigned nlong; 668 tcg_target_ulong data[]; 669 } TCGLabelPoolData; 670 671 static TCGLabelPoolData *new_pool_alloc(TCGContext *s, int nlong, int rtype, 672 tcg_insn_unit *label, intptr_t addend) 673 { 674 TCGLabelPoolData *n = tcg_malloc(sizeof(TCGLabelPoolData) 675 + sizeof(tcg_target_ulong) * nlong); 676 677 n->label = label; 678 n->addend = addend; 679 n->rtype = rtype; 680 n->nlong = nlong; 681 return n; 682 } 683 684 static void new_pool_insert(TCGContext *s, TCGLabelPoolData *n) 685 { 686 TCGLabelPoolData *i, **pp; 687 int nlong = n->nlong; 688 689 /* Insertion sort on the pool. */ 690 for (pp = &s->pool_labels; (i = *pp) != NULL; pp = &i->next) { 691 if (nlong > i->nlong) { 692 break; 693 } 694 if (nlong < i->nlong) { 695 continue; 696 } 697 if (memcmp(n->data, i->data, sizeof(tcg_target_ulong) * nlong) >= 0) { 698 break; 699 } 700 } 701 n->next = *pp; 702 *pp = n; 703 } 704 705 /* The "usual" for generic integer code. */ 706 __attribute__((unused)) 707 static void new_pool_label(TCGContext *s, tcg_target_ulong d, int rtype, 708 tcg_insn_unit *label, intptr_t addend) 709 { 710 TCGLabelPoolData *n = new_pool_alloc(s, 1, rtype, label, addend); 711 n->data[0] = d; 712 new_pool_insert(s, n); 713 } 714 715 /* For v64 or v128, depending on the host. */ 716 __attribute__((unused)) 717 static void new_pool_l2(TCGContext *s, int rtype, tcg_insn_unit *label, 718 intptr_t addend, tcg_target_ulong d0, 719 tcg_target_ulong d1) 720 { 721 TCGLabelPoolData *n = new_pool_alloc(s, 2, rtype, label, addend); 722 n->data[0] = d0; 723 n->data[1] = d1; 724 new_pool_insert(s, n); 725 } 726 727 /* For v128 or v256, depending on the host. */ 728 __attribute__((unused)) 729 static void new_pool_l4(TCGContext *s, int rtype, tcg_insn_unit *label, 730 intptr_t addend, tcg_target_ulong d0, 731 tcg_target_ulong d1, tcg_target_ulong d2, 732 tcg_target_ulong d3) 733 { 734 TCGLabelPoolData *n = new_pool_alloc(s, 4, rtype, label, addend); 735 n->data[0] = d0; 736 n->data[1] = d1; 737 n->data[2] = d2; 738 n->data[3] = d3; 739 new_pool_insert(s, n); 740 } 741 742 /* For v256, for 32-bit host. */ 743 __attribute__((unused)) 744 static void new_pool_l8(TCGContext *s, int rtype, tcg_insn_unit *label, 745 intptr_t addend, tcg_target_ulong d0, 746 tcg_target_ulong d1, tcg_target_ulong d2, 747 tcg_target_ulong d3, tcg_target_ulong d4, 748 tcg_target_ulong d5, tcg_target_ulong d6, 749 tcg_target_ulong d7) 750 { 751 TCGLabelPoolData *n = new_pool_alloc(s, 8, rtype, label, addend); 752 n->data[0] = d0; 753 n->data[1] = d1; 754 n->data[2] = d2; 755 n->data[3] = d3; 756 n->data[4] = d4; 757 n->data[5] = d5; 758 n->data[6] = d6; 759 n->data[7] = d7; 760 new_pool_insert(s, n); 761 } 762 763 /* 764 * Generate TB finalization at the end of block 765 */ 766 767 static int tcg_out_ldst_finalize(TCGContext *s) 768 { 769 TCGLabelQemuLdst *lb; 770 771 /* qemu_ld/st slow paths */ 772 QSIMPLEQ_FOREACH(lb, &s->ldst_labels, next) { 773 if (lb->is_ld 774 ? !tcg_out_qemu_ld_slow_path(s, lb) 775 : !tcg_out_qemu_st_slow_path(s, lb)) { 776 return -2; 777 } 778 779 /* 780 * Test for (pending) buffer overflow. The assumption is that any 781 * one operation beginning below the high water mark cannot overrun 782 * the buffer completely. Thus we can test for overflow after 783 * generating code without having to check during generation. 784 */ 785 if (unlikely((void *)s->code_ptr > s->code_gen_highwater)) { 786 return -1; 787 } 788 } 789 return 0; 790 } 791 792 static int tcg_out_pool_finalize(TCGContext *s) 793 { 794 TCGLabelPoolData *p = s->pool_labels; 795 TCGLabelPoolData *l = NULL; 796 void *a; 797 798 if (p == NULL) { 799 return 0; 800 } 801 802 /* 803 * ??? Round up to qemu_icache_linesize, but then do not round 804 * again when allocating the next TranslationBlock structure. 805 */ 806 a = (void *)ROUND_UP((uintptr_t)s->code_ptr, 807 sizeof(tcg_target_ulong) * p->nlong); 808 tcg_out_nop_fill(s->code_ptr, (tcg_insn_unit *)a - s->code_ptr); 809 s->data_gen_ptr = a; 810 811 for (; p != NULL; p = p->next) { 812 size_t size = sizeof(tcg_target_ulong) * p->nlong; 813 uintptr_t value; 814 815 if (!l || l->nlong != p->nlong || memcmp(l->data, p->data, size)) { 816 if (unlikely(a > s->code_gen_highwater)) { 817 return -1; 818 } 819 memcpy(a, p->data, size); 820 a += size; 821 l = p; 822 } 823 824 value = (uintptr_t)tcg_splitwx_to_rx(a) - size; 825 if (!patch_reloc(p->label, p->rtype, value, p->addend)) { 826 return -2; 827 } 828 } 829 830 s->code_ptr = a; 831 return 0; 832 } 833 834 #define C_PFX1(P, A) P##A 835 #define C_PFX2(P, A, B) P##A##_##B 836 #define C_PFX3(P, A, B, C) P##A##_##B##_##C 837 #define C_PFX4(P, A, B, C, D) P##A##_##B##_##C##_##D 838 #define C_PFX5(P, A, B, C, D, E) P##A##_##B##_##C##_##D##_##E 839 #define C_PFX6(P, A, B, C, D, E, F) P##A##_##B##_##C##_##D##_##E##_##F 840 841 /* Define an enumeration for the various combinations. */ 842 843 #define C_O0_I1(I1) C_PFX1(c_o0_i1_, I1), 844 #define C_O0_I2(I1, I2) C_PFX2(c_o0_i2_, I1, I2), 845 #define C_O0_I3(I1, I2, I3) C_PFX3(c_o0_i3_, I1, I2, I3), 846 #define C_O0_I4(I1, I2, I3, I4) C_PFX4(c_o0_i4_, I1, I2, I3, I4), 847 848 #define C_O1_I1(O1, I1) C_PFX2(c_o1_i1_, O1, I1), 849 #define C_O1_I2(O1, I1, I2) C_PFX3(c_o1_i2_, O1, I1, I2), 850 #define C_O1_I3(O1, I1, I2, I3) C_PFX4(c_o1_i3_, O1, I1, I2, I3), 851 #define C_O1_I4(O1, I1, I2, I3, I4) C_PFX5(c_o1_i4_, O1, I1, I2, I3, I4), 852 853 #define C_N1_I2(O1, I1, I2) C_PFX3(c_n1_i2_, O1, I1, I2), 854 #define C_N1O1_I1(O1, O2, I1) C_PFX3(c_n1o1_i1_, O1, O2, I1), 855 #define C_N2_I1(O1, O2, I1) C_PFX3(c_n2_i1_, O1, O2, I1), 856 857 #define C_O2_I1(O1, O2, I1) C_PFX3(c_o2_i1_, O1, O2, I1), 858 #define C_O2_I2(O1, O2, I1, I2) C_PFX4(c_o2_i2_, O1, O2, I1, I2), 859 #define C_O2_I3(O1, O2, I1, I2, I3) C_PFX5(c_o2_i3_, O1, O2, I1, I2, I3), 860 #define C_O2_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_o2_i4_, O1, O2, I1, I2, I3, I4), 861 #define C_N1_O1_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_n1_o1_i4_, O1, O2, I1, I2, I3, I4), 862 863 typedef enum { 864 C_Dynamic = -2, 865 C_NotImplemented = -1, 866 #include "tcg-target-con-set.h" 867 } TCGConstraintSetIndex; 868 869 static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode, TCGType, unsigned); 870 871 #undef C_O0_I1 872 #undef C_O0_I2 873 #undef C_O0_I3 874 #undef C_O0_I4 875 #undef C_O1_I1 876 #undef C_O1_I2 877 #undef C_O1_I3 878 #undef C_O1_I4 879 #undef C_N1_I2 880 #undef C_N1O1_I1 881 #undef C_N2_I1 882 #undef C_O2_I1 883 #undef C_O2_I2 884 #undef C_O2_I3 885 #undef C_O2_I4 886 #undef C_N1_O1_I4 887 888 /* Put all of the constraint sets into an array, indexed by the enum. */ 889 890 typedef struct TCGConstraintSet { 891 uint8_t nb_oargs, nb_iargs; 892 const char *args_ct_str[TCG_MAX_OP_ARGS]; 893 } TCGConstraintSet; 894 895 #define C_O0_I1(I1) { 0, 1, { #I1 } }, 896 #define C_O0_I2(I1, I2) { 0, 2, { #I1, #I2 } }, 897 #define C_O0_I3(I1, I2, I3) { 0, 3, { #I1, #I2, #I3 } }, 898 #define C_O0_I4(I1, I2, I3, I4) { 0, 4, { #I1, #I2, #I3, #I4 } }, 899 900 #define C_O1_I1(O1, I1) { 1, 1, { #O1, #I1 } }, 901 #define C_O1_I2(O1, I1, I2) { 1, 2, { #O1, #I1, #I2 } }, 902 #define C_O1_I3(O1, I1, I2, I3) { 1, 3, { #O1, #I1, #I2, #I3 } }, 903 #define C_O1_I4(O1, I1, I2, I3, I4) { 1, 4, { #O1, #I1, #I2, #I3, #I4 } }, 904 905 #define C_N1_I2(O1, I1, I2) { 1, 2, { "&" #O1, #I1, #I2 } }, 906 #define C_N1O1_I1(O1, O2, I1) { 2, 1, { "&" #O1, #O2, #I1 } }, 907 #define C_N2_I1(O1, O2, I1) { 2, 1, { "&" #O1, "&" #O2, #I1 } }, 908 909 #define C_O2_I1(O1, O2, I1) { 2, 1, { #O1, #O2, #I1 } }, 910 #define C_O2_I2(O1, O2, I1, I2) { 2, 2, { #O1, #O2, #I1, #I2 } }, 911 #define C_O2_I3(O1, O2, I1, I2, I3) { 2, 3, { #O1, #O2, #I1, #I2, #I3 } }, 912 #define C_O2_I4(O1, O2, I1, I2, I3, I4) { 2, 4, { #O1, #O2, #I1, #I2, #I3, #I4 } }, 913 #define C_N1_O1_I4(O1, O2, I1, I2, I3, I4) { 2, 4, { "&" #O1, #O2, #I1, #I2, #I3, #I4 } }, 914 915 static const TCGConstraintSet constraint_sets[] = { 916 #include "tcg-target-con-set.h" 917 }; 918 919 #undef C_O0_I1 920 #undef C_O0_I2 921 #undef C_O0_I3 922 #undef C_O0_I4 923 #undef C_O1_I1 924 #undef C_O1_I2 925 #undef C_O1_I3 926 #undef C_O1_I4 927 #undef C_N1_I2 928 #undef C_N1O1_I1 929 #undef C_N2_I1 930 #undef C_O2_I1 931 #undef C_O2_I2 932 #undef C_O2_I3 933 #undef C_O2_I4 934 #undef C_N1_O1_I4 935 936 /* Expand the enumerator to be returned from tcg_target_op_def(). */ 937 938 #define C_O0_I1(I1) C_PFX1(c_o0_i1_, I1) 939 #define C_O0_I2(I1, I2) C_PFX2(c_o0_i2_, I1, I2) 940 #define C_O0_I3(I1, I2, I3) C_PFX3(c_o0_i3_, I1, I2, I3) 941 #define C_O0_I4(I1, I2, I3, I4) C_PFX4(c_o0_i4_, I1, I2, I3, I4) 942 943 #define C_O1_I1(O1, I1) C_PFX2(c_o1_i1_, O1, I1) 944 #define C_O1_I2(O1, I1, I2) C_PFX3(c_o1_i2_, O1, I1, I2) 945 #define C_O1_I3(O1, I1, I2, I3) C_PFX4(c_o1_i3_, O1, I1, I2, I3) 946 #define C_O1_I4(O1, I1, I2, I3, I4) C_PFX5(c_o1_i4_, O1, I1, I2, I3, I4) 947 948 #define C_N1_I2(O1, I1, I2) C_PFX3(c_n1_i2_, O1, I1, I2) 949 #define C_N1O1_I1(O1, O2, I1) C_PFX3(c_n1o1_i1_, O1, O2, I1) 950 #define C_N2_I1(O1, O2, I1) C_PFX3(c_n2_i1_, O1, O2, I1) 951 952 #define C_O2_I1(O1, O2, I1) C_PFX3(c_o2_i1_, O1, O2, I1) 953 #define C_O2_I2(O1, O2, I1, I2) C_PFX4(c_o2_i2_, O1, O2, I1, I2) 954 #define C_O2_I3(O1, O2, I1, I2, I3) C_PFX5(c_o2_i3_, O1, O2, I1, I2, I3) 955 #define C_O2_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_o2_i4_, O1, O2, I1, I2, I3, I4) 956 #define C_N1_O1_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_n1_o1_i4_, O1, O2, I1, I2, I3, I4) 957 958 /* 959 * TCGOutOp is the base class for a set of structures that describe how 960 * to generate code for a given TCGOpcode. 961 * 962 * @static_constraint: 963 * C_NotImplemented: The TCGOpcode is not supported by the backend. 964 * C_Dynamic: Use @dynamic_constraint to select a constraint set 965 * based on any of @type, @flags, or host isa. 966 * Otherwise: The register allocation constrains for the TCGOpcode. 967 * 968 * Subclasses of TCGOutOp will define a set of output routines that may 969 * be used. Such routines will often be selected by the set of registers 970 * and constants that come out of register allocation. The set of 971 * routines that are provided will guide the set of constraints that are 972 * legal. In particular, assume that tcg_optimize() has done its job in 973 * swapping commutative operands and folding operations for which all 974 * operands are constant. 975 */ 976 typedef struct TCGOutOp { 977 TCGConstraintSetIndex static_constraint; 978 TCGConstraintSetIndex (*dynamic_constraint)(TCGType type, unsigned flags); 979 } TCGOutOp; 980 981 typedef struct TCGOutOpBinary { 982 TCGOutOp base; 983 void (*out_rrr)(TCGContext *s, TCGType type, 984 TCGReg a0, TCGReg a1, TCGReg a2); 985 void (*out_rri)(TCGContext *s, TCGType type, 986 TCGReg a0, TCGReg a1, tcg_target_long a2); 987 } TCGOutOpBinary; 988 989 typedef struct TCGOutOpDivRem { 990 TCGOutOp base; 991 void (*out_rr01r)(TCGContext *s, TCGType type, 992 TCGReg a0, TCGReg a1, TCGReg a4); 993 } TCGOutOpDivRem; 994 995 typedef struct TCGOutOpUnary { 996 TCGOutOp base; 997 void (*out_rr)(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1); 998 } TCGOutOpUnary; 999 1000 typedef struct TCGOutOpSubtract { 1001 TCGOutOp base; 1002 void (*out_rrr)(TCGContext *s, TCGType type, 1003 TCGReg a0, TCGReg a1, TCGReg a2); 1004 void (*out_rir)(TCGContext *s, TCGType type, 1005 TCGReg a0, tcg_target_long a1, TCGReg a2); 1006 } TCGOutOpSubtract; 1007 1008 #include "tcg-target.c.inc" 1009 1010 #ifndef CONFIG_TCG_INTERPRETER 1011 /* Validate CPUTLBDescFast placement. */ 1012 QEMU_BUILD_BUG_ON((int)(offsetof(CPUNegativeOffsetState, tlb.f[0]) - 1013 sizeof(CPUNegativeOffsetState)) 1014 < MIN_TLB_MASK_TABLE_OFS); 1015 #endif 1016 1017 /* 1018 * Register V as the TCGOutOp for O. 1019 * This verifies that V is of type T, otherwise give a nice compiler error. 1020 * This prevents trivial mistakes within each arch/tcg-target.c.inc. 1021 */ 1022 #define OUTOP(O, T, V) [O] = _Generic(V, T: &V.base) 1023 1024 /* Register allocation descriptions for every TCGOpcode. */ 1025 static const TCGOutOp * const all_outop[NB_OPS] = { 1026 OUTOP(INDEX_op_add, TCGOutOpBinary, outop_add), 1027 OUTOP(INDEX_op_and, TCGOutOpBinary, outop_and), 1028 OUTOP(INDEX_op_andc, TCGOutOpBinary, outop_andc), 1029 OUTOP(INDEX_op_divs, TCGOutOpBinary, outop_divs), 1030 OUTOP(INDEX_op_divu, TCGOutOpBinary, outop_divu), 1031 OUTOP(INDEX_op_divs2, TCGOutOpDivRem, outop_divs2), 1032 OUTOP(INDEX_op_divu2, TCGOutOpDivRem, outop_divu2), 1033 OUTOP(INDEX_op_eqv, TCGOutOpBinary, outop_eqv), 1034 OUTOP(INDEX_op_mul, TCGOutOpBinary, outop_mul), 1035 OUTOP(INDEX_op_mulsh, TCGOutOpBinary, outop_mulsh), 1036 OUTOP(INDEX_op_muluh, TCGOutOpBinary, outop_muluh), 1037 OUTOP(INDEX_op_nand, TCGOutOpBinary, outop_nand), 1038 OUTOP(INDEX_op_neg, TCGOutOpUnary, outop_neg), 1039 OUTOP(INDEX_op_nor, TCGOutOpBinary, outop_nor), 1040 OUTOP(INDEX_op_not, TCGOutOpUnary, outop_not), 1041 OUTOP(INDEX_op_or, TCGOutOpBinary, outop_or), 1042 OUTOP(INDEX_op_orc, TCGOutOpBinary, outop_orc), 1043 OUTOP(INDEX_op_rems, TCGOutOpBinary, outop_rems), 1044 OUTOP(INDEX_op_sub, TCGOutOpSubtract, outop_sub), 1045 OUTOP(INDEX_op_xor, TCGOutOpBinary, outop_xor), 1046 }; 1047 1048 #undef OUTOP 1049 1050 /* 1051 * All TCG threads except the parent (i.e. the one that called tcg_context_init 1052 * and registered the target's TCG globals) must register with this function 1053 * before initiating translation. 1054 * 1055 * In user-mode we just point tcg_ctx to tcg_init_ctx. See the documentation 1056 * of tcg_region_init() for the reasoning behind this. 1057 * 1058 * In system-mode each caller registers its context in tcg_ctxs[]. Note that in 1059 * system-mode tcg_ctxs[] does not track tcg_ctx_init, since the initial context 1060 * is not used anymore for translation once this function is called. 1061 * 1062 * Not tracking tcg_init_ctx in tcg_ctxs[] in system-mode keeps code that 1063 * iterates over the array (e.g. tcg_code_size() the same for both system/user 1064 * modes. 1065 */ 1066 #ifdef CONFIG_USER_ONLY 1067 void tcg_register_thread(void) 1068 { 1069 tcg_ctx = &tcg_init_ctx; 1070 } 1071 #else 1072 void tcg_register_thread(void) 1073 { 1074 TCGContext *s = g_malloc(sizeof(*s)); 1075 unsigned int i, n; 1076 1077 *s = tcg_init_ctx; 1078 1079 /* Relink mem_base. */ 1080 for (i = 0, n = tcg_init_ctx.nb_globals; i < n; ++i) { 1081 if (tcg_init_ctx.temps[i].mem_base) { 1082 ptrdiff_t b = tcg_init_ctx.temps[i].mem_base - tcg_init_ctx.temps; 1083 tcg_debug_assert(b >= 0 && b < n); 1084 s->temps[i].mem_base = &s->temps[b]; 1085 } 1086 } 1087 1088 /* Claim an entry in tcg_ctxs */ 1089 n = qatomic_fetch_inc(&tcg_cur_ctxs); 1090 g_assert(n < tcg_max_ctxs); 1091 qatomic_set(&tcg_ctxs[n], s); 1092 1093 if (n > 0) { 1094 tcg_region_initial_alloc(s); 1095 } 1096 1097 tcg_ctx = s; 1098 } 1099 #endif /* !CONFIG_USER_ONLY */ 1100 1101 /* pool based memory allocation */ 1102 void *tcg_malloc_internal(TCGContext *s, int size) 1103 { 1104 TCGPool *p; 1105 int pool_size; 1106 1107 if (size > TCG_POOL_CHUNK_SIZE) { 1108 /* big malloc: insert a new pool (XXX: could optimize) */ 1109 p = g_malloc(sizeof(TCGPool) + size); 1110 p->size = size; 1111 p->next = s->pool_first_large; 1112 s->pool_first_large = p; 1113 return p->data; 1114 } else { 1115 p = s->pool_current; 1116 if (!p) { 1117 p = s->pool_first; 1118 if (!p) 1119 goto new_pool; 1120 } else { 1121 if (!p->next) { 1122 new_pool: 1123 pool_size = TCG_POOL_CHUNK_SIZE; 1124 p = g_malloc(sizeof(TCGPool) + pool_size); 1125 p->size = pool_size; 1126 p->next = NULL; 1127 if (s->pool_current) { 1128 s->pool_current->next = p; 1129 } else { 1130 s->pool_first = p; 1131 } 1132 } else { 1133 p = p->next; 1134 } 1135 } 1136 } 1137 s->pool_current = p; 1138 s->pool_cur = p->data + size; 1139 s->pool_end = p->data + p->size; 1140 return p->data; 1141 } 1142 1143 void tcg_pool_reset(TCGContext *s) 1144 { 1145 TCGPool *p, *t; 1146 for (p = s->pool_first_large; p; p = t) { 1147 t = p->next; 1148 g_free(p); 1149 } 1150 s->pool_first_large = NULL; 1151 s->pool_cur = s->pool_end = NULL; 1152 s->pool_current = NULL; 1153 } 1154 1155 /* 1156 * Create TCGHelperInfo structures for "tcg/tcg-ldst.h" functions, 1157 * akin to what "exec/helper-tcg.h" does with DEF_HELPER_FLAGS_N. 1158 * We only use these for layout in tcg_out_ld_helper_ret and 1159 * tcg_out_st_helper_args, and share them between several of 1160 * the helpers, with the end result that it's easier to build manually. 1161 */ 1162 1163 #if TCG_TARGET_REG_BITS == 32 1164 # define dh_typecode_ttl dh_typecode_i32 1165 #else 1166 # define dh_typecode_ttl dh_typecode_i64 1167 #endif 1168 1169 static TCGHelperInfo info_helper_ld32_mmu = { 1170 .flags = TCG_CALL_NO_WG, 1171 .typemask = dh_typemask(ttl, 0) /* return tcg_target_ulong */ 1172 | dh_typemask(env, 1) 1173 | dh_typemask(i64, 2) /* uint64_t addr */ 1174 | dh_typemask(i32, 3) /* unsigned oi */ 1175 | dh_typemask(ptr, 4) /* uintptr_t ra */ 1176 }; 1177 1178 static TCGHelperInfo info_helper_ld64_mmu = { 1179 .flags = TCG_CALL_NO_WG, 1180 .typemask = dh_typemask(i64, 0) /* return uint64_t */ 1181 | dh_typemask(env, 1) 1182 | dh_typemask(i64, 2) /* uint64_t addr */ 1183 | dh_typemask(i32, 3) /* unsigned oi */ 1184 | dh_typemask(ptr, 4) /* uintptr_t ra */ 1185 }; 1186 1187 static TCGHelperInfo info_helper_ld128_mmu = { 1188 .flags = TCG_CALL_NO_WG, 1189 .typemask = dh_typemask(i128, 0) /* return Int128 */ 1190 | dh_typemask(env, 1) 1191 | dh_typemask(i64, 2) /* uint64_t addr */ 1192 | dh_typemask(i32, 3) /* unsigned oi */ 1193 | dh_typemask(ptr, 4) /* uintptr_t ra */ 1194 }; 1195 1196 static TCGHelperInfo info_helper_st32_mmu = { 1197 .flags = TCG_CALL_NO_WG, 1198 .typemask = dh_typemask(void, 0) 1199 | dh_typemask(env, 1) 1200 | dh_typemask(i64, 2) /* uint64_t addr */ 1201 | dh_typemask(i32, 3) /* uint32_t data */ 1202 | dh_typemask(i32, 4) /* unsigned oi */ 1203 | dh_typemask(ptr, 5) /* uintptr_t ra */ 1204 }; 1205 1206 static TCGHelperInfo info_helper_st64_mmu = { 1207 .flags = TCG_CALL_NO_WG, 1208 .typemask = dh_typemask(void, 0) 1209 | dh_typemask(env, 1) 1210 | dh_typemask(i64, 2) /* uint64_t addr */ 1211 | dh_typemask(i64, 3) /* uint64_t data */ 1212 | dh_typemask(i32, 4) /* unsigned oi */ 1213 | dh_typemask(ptr, 5) /* uintptr_t ra */ 1214 }; 1215 1216 static TCGHelperInfo info_helper_st128_mmu = { 1217 .flags = TCG_CALL_NO_WG, 1218 .typemask = dh_typemask(void, 0) 1219 | dh_typemask(env, 1) 1220 | dh_typemask(i64, 2) /* uint64_t addr */ 1221 | dh_typemask(i128, 3) /* Int128 data */ 1222 | dh_typemask(i32, 4) /* unsigned oi */ 1223 | dh_typemask(ptr, 5) /* uintptr_t ra */ 1224 }; 1225 1226 #ifdef CONFIG_TCG_INTERPRETER 1227 static ffi_type *typecode_to_ffi(int argmask) 1228 { 1229 /* 1230 * libffi does not support __int128_t, so we have forced Int128 1231 * to use the structure definition instead of the builtin type. 1232 */ 1233 static ffi_type *ffi_type_i128_elements[3] = { 1234 &ffi_type_uint64, 1235 &ffi_type_uint64, 1236 NULL 1237 }; 1238 static ffi_type ffi_type_i128 = { 1239 .size = 16, 1240 .alignment = __alignof__(Int128), 1241 .type = FFI_TYPE_STRUCT, 1242 .elements = ffi_type_i128_elements, 1243 }; 1244 1245 switch (argmask) { 1246 case dh_typecode_void: 1247 return &ffi_type_void; 1248 case dh_typecode_i32: 1249 return &ffi_type_uint32; 1250 case dh_typecode_s32: 1251 return &ffi_type_sint32; 1252 case dh_typecode_i64: 1253 return &ffi_type_uint64; 1254 case dh_typecode_s64: 1255 return &ffi_type_sint64; 1256 case dh_typecode_ptr: 1257 return &ffi_type_pointer; 1258 case dh_typecode_i128: 1259 return &ffi_type_i128; 1260 } 1261 g_assert_not_reached(); 1262 } 1263 1264 static ffi_cif *init_ffi_layout(TCGHelperInfo *info) 1265 { 1266 unsigned typemask = info->typemask; 1267 struct { 1268 ffi_cif cif; 1269 ffi_type *args[]; 1270 } *ca; 1271 ffi_status status; 1272 int nargs; 1273 1274 /* Ignoring the return type, find the last non-zero field. */ 1275 nargs = 32 - clz32(typemask >> 3); 1276 nargs = DIV_ROUND_UP(nargs, 3); 1277 assert(nargs <= MAX_CALL_IARGS); 1278 1279 ca = g_malloc0(sizeof(*ca) + nargs * sizeof(ffi_type *)); 1280 ca->cif.rtype = typecode_to_ffi(typemask & 7); 1281 ca->cif.nargs = nargs; 1282 1283 if (nargs != 0) { 1284 ca->cif.arg_types = ca->args; 1285 for (int j = 0; j < nargs; ++j) { 1286 int typecode = extract32(typemask, (j + 1) * 3, 3); 1287 ca->args[j] = typecode_to_ffi(typecode); 1288 } 1289 } 1290 1291 status = ffi_prep_cif(&ca->cif, FFI_DEFAULT_ABI, nargs, 1292 ca->cif.rtype, ca->cif.arg_types); 1293 assert(status == FFI_OK); 1294 1295 return &ca->cif; 1296 } 1297 1298 #define HELPER_INFO_INIT(I) (&(I)->cif) 1299 #define HELPER_INFO_INIT_VAL(I) init_ffi_layout(I) 1300 #else 1301 #define HELPER_INFO_INIT(I) (&(I)->init) 1302 #define HELPER_INFO_INIT_VAL(I) 1 1303 #endif /* CONFIG_TCG_INTERPRETER */ 1304 1305 static inline bool arg_slot_reg_p(unsigned arg_slot) 1306 { 1307 /* 1308 * Split the sizeof away from the comparison to avoid Werror from 1309 * "unsigned < 0 is always false", when iarg_regs is empty. 1310 */ 1311 unsigned nreg = ARRAY_SIZE(tcg_target_call_iarg_regs); 1312 return arg_slot < nreg; 1313 } 1314 1315 static inline int arg_slot_stk_ofs(unsigned arg_slot) 1316 { 1317 unsigned max = TCG_STATIC_CALL_ARGS_SIZE / sizeof(tcg_target_long); 1318 unsigned stk_slot = arg_slot - ARRAY_SIZE(tcg_target_call_iarg_regs); 1319 1320 tcg_debug_assert(stk_slot < max); 1321 return TCG_TARGET_CALL_STACK_OFFSET + stk_slot * sizeof(tcg_target_long); 1322 } 1323 1324 typedef struct TCGCumulativeArgs { 1325 int arg_idx; /* tcg_gen_callN args[] */ 1326 int info_in_idx; /* TCGHelperInfo in[] */ 1327 int arg_slot; /* regs+stack slot */ 1328 int ref_slot; /* stack slots for references */ 1329 } TCGCumulativeArgs; 1330 1331 static void layout_arg_even(TCGCumulativeArgs *cum) 1332 { 1333 cum->arg_slot += cum->arg_slot & 1; 1334 } 1335 1336 static void layout_arg_1(TCGCumulativeArgs *cum, TCGHelperInfo *info, 1337 TCGCallArgumentKind kind) 1338 { 1339 TCGCallArgumentLoc *loc = &info->in[cum->info_in_idx]; 1340 1341 *loc = (TCGCallArgumentLoc){ 1342 .kind = kind, 1343 .arg_idx = cum->arg_idx, 1344 .arg_slot = cum->arg_slot, 1345 }; 1346 cum->info_in_idx++; 1347 cum->arg_slot++; 1348 } 1349 1350 static void layout_arg_normal_n(TCGCumulativeArgs *cum, 1351 TCGHelperInfo *info, int n) 1352 { 1353 TCGCallArgumentLoc *loc = &info->in[cum->info_in_idx]; 1354 1355 for (int i = 0; i < n; ++i) { 1356 /* Layout all using the same arg_idx, adjusting the subindex. */ 1357 loc[i] = (TCGCallArgumentLoc){ 1358 .kind = TCG_CALL_ARG_NORMAL, 1359 .arg_idx = cum->arg_idx, 1360 .tmp_subindex = i, 1361 .arg_slot = cum->arg_slot + i, 1362 }; 1363 } 1364 cum->info_in_idx += n; 1365 cum->arg_slot += n; 1366 } 1367 1368 static void layout_arg_by_ref(TCGCumulativeArgs *cum, TCGHelperInfo *info) 1369 { 1370 TCGCallArgumentLoc *loc = &info->in[cum->info_in_idx]; 1371 int n = 128 / TCG_TARGET_REG_BITS; 1372 1373 /* The first subindex carries the pointer. */ 1374 layout_arg_1(cum, info, TCG_CALL_ARG_BY_REF); 1375 1376 /* 1377 * The callee is allowed to clobber memory associated with 1378 * structure pass by-reference. Therefore we must make copies. 1379 * Allocate space from "ref_slot", which will be adjusted to 1380 * follow the parameters on the stack. 1381 */ 1382 loc[0].ref_slot = cum->ref_slot; 1383 1384 /* 1385 * Subsequent words also go into the reference slot, but 1386 * do not accumulate into the regular arguments. 1387 */ 1388 for (int i = 1; i < n; ++i) { 1389 loc[i] = (TCGCallArgumentLoc){ 1390 .kind = TCG_CALL_ARG_BY_REF_N, 1391 .arg_idx = cum->arg_idx, 1392 .tmp_subindex = i, 1393 .ref_slot = cum->ref_slot + i, 1394 }; 1395 } 1396 cum->info_in_idx += n - 1; /* i=0 accounted for in layout_arg_1 */ 1397 cum->ref_slot += n; 1398 } 1399 1400 static void init_call_layout(TCGHelperInfo *info) 1401 { 1402 int max_reg_slots = ARRAY_SIZE(tcg_target_call_iarg_regs); 1403 int max_stk_slots = TCG_STATIC_CALL_ARGS_SIZE / sizeof(tcg_target_long); 1404 unsigned typemask = info->typemask; 1405 unsigned typecode; 1406 TCGCumulativeArgs cum = { }; 1407 1408 /* 1409 * Parse and place any function return value. 1410 */ 1411 typecode = typemask & 7; 1412 switch (typecode) { 1413 case dh_typecode_void: 1414 info->nr_out = 0; 1415 break; 1416 case dh_typecode_i32: 1417 case dh_typecode_s32: 1418 case dh_typecode_ptr: 1419 info->nr_out = 1; 1420 info->out_kind = TCG_CALL_RET_NORMAL; 1421 break; 1422 case dh_typecode_i64: 1423 case dh_typecode_s64: 1424 info->nr_out = 64 / TCG_TARGET_REG_BITS; 1425 info->out_kind = TCG_CALL_RET_NORMAL; 1426 /* Query the last register now to trigger any assert early. */ 1427 tcg_target_call_oarg_reg(info->out_kind, info->nr_out - 1); 1428 break; 1429 case dh_typecode_i128: 1430 info->nr_out = 128 / TCG_TARGET_REG_BITS; 1431 info->out_kind = TCG_TARGET_CALL_RET_I128; 1432 switch (TCG_TARGET_CALL_RET_I128) { 1433 case TCG_CALL_RET_NORMAL: 1434 /* Query the last register now to trigger any assert early. */ 1435 tcg_target_call_oarg_reg(info->out_kind, info->nr_out - 1); 1436 break; 1437 case TCG_CALL_RET_BY_VEC: 1438 /* Query the single register now to trigger any assert early. */ 1439 tcg_target_call_oarg_reg(TCG_CALL_RET_BY_VEC, 0); 1440 break; 1441 case TCG_CALL_RET_BY_REF: 1442 /* 1443 * Allocate the first argument to the output. 1444 * We don't need to store this anywhere, just make it 1445 * unavailable for use in the input loop below. 1446 */ 1447 cum.arg_slot = 1; 1448 break; 1449 default: 1450 qemu_build_not_reached(); 1451 } 1452 break; 1453 default: 1454 g_assert_not_reached(); 1455 } 1456 1457 /* 1458 * Parse and place function arguments. 1459 */ 1460 for (typemask >>= 3; typemask; typemask >>= 3, cum.arg_idx++) { 1461 TCGCallArgumentKind kind; 1462 TCGType type; 1463 1464 typecode = typemask & 7; 1465 switch (typecode) { 1466 case dh_typecode_i32: 1467 case dh_typecode_s32: 1468 type = TCG_TYPE_I32; 1469 break; 1470 case dh_typecode_i64: 1471 case dh_typecode_s64: 1472 type = TCG_TYPE_I64; 1473 break; 1474 case dh_typecode_ptr: 1475 type = TCG_TYPE_PTR; 1476 break; 1477 case dh_typecode_i128: 1478 type = TCG_TYPE_I128; 1479 break; 1480 default: 1481 g_assert_not_reached(); 1482 } 1483 1484 switch (type) { 1485 case TCG_TYPE_I32: 1486 switch (TCG_TARGET_CALL_ARG_I32) { 1487 case TCG_CALL_ARG_EVEN: 1488 layout_arg_even(&cum); 1489 /* fall through */ 1490 case TCG_CALL_ARG_NORMAL: 1491 layout_arg_1(&cum, info, TCG_CALL_ARG_NORMAL); 1492 break; 1493 case TCG_CALL_ARG_EXTEND: 1494 kind = TCG_CALL_ARG_EXTEND_U + (typecode & 1); 1495 layout_arg_1(&cum, info, kind); 1496 break; 1497 default: 1498 qemu_build_not_reached(); 1499 } 1500 break; 1501 1502 case TCG_TYPE_I64: 1503 switch (TCG_TARGET_CALL_ARG_I64) { 1504 case TCG_CALL_ARG_EVEN: 1505 layout_arg_even(&cum); 1506 /* fall through */ 1507 case TCG_CALL_ARG_NORMAL: 1508 if (TCG_TARGET_REG_BITS == 32) { 1509 layout_arg_normal_n(&cum, info, 2); 1510 } else { 1511 layout_arg_1(&cum, info, TCG_CALL_ARG_NORMAL); 1512 } 1513 break; 1514 default: 1515 qemu_build_not_reached(); 1516 } 1517 break; 1518 1519 case TCG_TYPE_I128: 1520 switch (TCG_TARGET_CALL_ARG_I128) { 1521 case TCG_CALL_ARG_EVEN: 1522 layout_arg_even(&cum); 1523 /* fall through */ 1524 case TCG_CALL_ARG_NORMAL: 1525 layout_arg_normal_n(&cum, info, 128 / TCG_TARGET_REG_BITS); 1526 break; 1527 case TCG_CALL_ARG_BY_REF: 1528 layout_arg_by_ref(&cum, info); 1529 break; 1530 default: 1531 qemu_build_not_reached(); 1532 } 1533 break; 1534 1535 default: 1536 g_assert_not_reached(); 1537 } 1538 } 1539 info->nr_in = cum.info_in_idx; 1540 1541 /* Validate that we didn't overrun the input array. */ 1542 assert(cum.info_in_idx <= ARRAY_SIZE(info->in)); 1543 /* Validate the backend has enough argument space. */ 1544 assert(cum.arg_slot <= max_reg_slots + max_stk_slots); 1545 1546 /* 1547 * Relocate the "ref_slot" area to the end of the parameters. 1548 * Minimizing this stack offset helps code size for x86, 1549 * which has a signed 8-bit offset encoding. 1550 */ 1551 if (cum.ref_slot != 0) { 1552 int ref_base = 0; 1553 1554 if (cum.arg_slot > max_reg_slots) { 1555 int align = __alignof(Int128) / sizeof(tcg_target_long); 1556 1557 ref_base = cum.arg_slot - max_reg_slots; 1558 if (align > 1) { 1559 ref_base = ROUND_UP(ref_base, align); 1560 } 1561 } 1562 assert(ref_base + cum.ref_slot <= max_stk_slots); 1563 ref_base += max_reg_slots; 1564 1565 if (ref_base != 0) { 1566 for (int i = cum.info_in_idx - 1; i >= 0; --i) { 1567 TCGCallArgumentLoc *loc = &info->in[i]; 1568 switch (loc->kind) { 1569 case TCG_CALL_ARG_BY_REF: 1570 case TCG_CALL_ARG_BY_REF_N: 1571 loc->ref_slot += ref_base; 1572 break; 1573 default: 1574 break; 1575 } 1576 } 1577 } 1578 } 1579 } 1580 1581 static int indirect_reg_alloc_order[ARRAY_SIZE(tcg_target_reg_alloc_order)]; 1582 static void process_constraint_sets(void); 1583 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type, 1584 TCGReg reg, const char *name); 1585 1586 static void tcg_context_init(unsigned max_threads) 1587 { 1588 TCGContext *s = &tcg_init_ctx; 1589 int n, i; 1590 TCGTemp *ts; 1591 1592 memset(s, 0, sizeof(*s)); 1593 s->nb_globals = 0; 1594 1595 init_call_layout(&info_helper_ld32_mmu); 1596 init_call_layout(&info_helper_ld64_mmu); 1597 init_call_layout(&info_helper_ld128_mmu); 1598 init_call_layout(&info_helper_st32_mmu); 1599 init_call_layout(&info_helper_st64_mmu); 1600 init_call_layout(&info_helper_st128_mmu); 1601 1602 tcg_target_init(s); 1603 process_constraint_sets(); 1604 1605 /* Reverse the order of the saved registers, assuming they're all at 1606 the start of tcg_target_reg_alloc_order. */ 1607 for (n = 0; n < ARRAY_SIZE(tcg_target_reg_alloc_order); ++n) { 1608 int r = tcg_target_reg_alloc_order[n]; 1609 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, r)) { 1610 break; 1611 } 1612 } 1613 for (i = 0; i < n; ++i) { 1614 indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[n - 1 - i]; 1615 } 1616 for (; i < ARRAY_SIZE(tcg_target_reg_alloc_order); ++i) { 1617 indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[i]; 1618 } 1619 1620 tcg_ctx = s; 1621 /* 1622 * In user-mode we simply share the init context among threads, since we 1623 * use a single region. See the documentation tcg_region_init() for the 1624 * reasoning behind this. 1625 * In system-mode we will have at most max_threads TCG threads. 1626 */ 1627 #ifdef CONFIG_USER_ONLY 1628 tcg_ctxs = &tcg_ctx; 1629 tcg_cur_ctxs = 1; 1630 tcg_max_ctxs = 1; 1631 #else 1632 tcg_max_ctxs = max_threads; 1633 tcg_ctxs = g_new0(TCGContext *, max_threads); 1634 #endif 1635 1636 tcg_debug_assert(!tcg_regset_test_reg(s->reserved_regs, TCG_AREG0)); 1637 ts = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, TCG_AREG0, "env"); 1638 tcg_env = temp_tcgv_ptr(ts); 1639 } 1640 1641 void tcg_init(size_t tb_size, int splitwx, unsigned max_threads) 1642 { 1643 tcg_context_init(max_threads); 1644 tcg_region_init(tb_size, splitwx, max_threads); 1645 } 1646 1647 /* 1648 * Allocate TBs right before their corresponding translated code, making 1649 * sure that TBs and code are on different cache lines. 1650 */ 1651 TranslationBlock *tcg_tb_alloc(TCGContext *s) 1652 { 1653 uintptr_t align = qemu_icache_linesize; 1654 TranslationBlock *tb; 1655 void *next; 1656 1657 retry: 1658 tb = (void *)ROUND_UP((uintptr_t)s->code_gen_ptr, align); 1659 next = (void *)ROUND_UP((uintptr_t)(tb + 1), align); 1660 1661 if (unlikely(next > s->code_gen_highwater)) { 1662 if (tcg_region_alloc(s)) { 1663 return NULL; 1664 } 1665 goto retry; 1666 } 1667 qatomic_set(&s->code_gen_ptr, next); 1668 return tb; 1669 } 1670 1671 void tcg_prologue_init(void) 1672 { 1673 TCGContext *s = tcg_ctx; 1674 size_t prologue_size; 1675 1676 s->code_ptr = s->code_gen_ptr; 1677 s->code_buf = s->code_gen_ptr; 1678 s->data_gen_ptr = NULL; 1679 1680 #ifndef CONFIG_TCG_INTERPRETER 1681 tcg_qemu_tb_exec = (tcg_prologue_fn *)tcg_splitwx_to_rx(s->code_ptr); 1682 #endif 1683 1684 s->pool_labels = NULL; 1685 1686 qemu_thread_jit_write(); 1687 /* Generate the prologue. */ 1688 tcg_target_qemu_prologue(s); 1689 1690 /* Allow the prologue to put e.g. guest_base into a pool entry. */ 1691 { 1692 int result = tcg_out_pool_finalize(s); 1693 tcg_debug_assert(result == 0); 1694 } 1695 1696 prologue_size = tcg_current_code_size(s); 1697 perf_report_prologue(s->code_gen_ptr, prologue_size); 1698 1699 #ifndef CONFIG_TCG_INTERPRETER 1700 flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(s->code_buf), 1701 (uintptr_t)s->code_buf, prologue_size); 1702 #endif 1703 1704 if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM)) { 1705 FILE *logfile = qemu_log_trylock(); 1706 if (logfile) { 1707 fprintf(logfile, "PROLOGUE: [size=%zu]\n", prologue_size); 1708 if (s->data_gen_ptr) { 1709 size_t code_size = s->data_gen_ptr - s->code_gen_ptr; 1710 size_t data_size = prologue_size - code_size; 1711 size_t i; 1712 1713 disas(logfile, s->code_gen_ptr, code_size); 1714 1715 for (i = 0; i < data_size; i += sizeof(tcg_target_ulong)) { 1716 if (sizeof(tcg_target_ulong) == 8) { 1717 fprintf(logfile, 1718 "0x%08" PRIxPTR ": .quad 0x%016" PRIx64 "\n", 1719 (uintptr_t)s->data_gen_ptr + i, 1720 *(uint64_t *)(s->data_gen_ptr + i)); 1721 } else { 1722 fprintf(logfile, 1723 "0x%08" PRIxPTR ": .long 0x%08x\n", 1724 (uintptr_t)s->data_gen_ptr + i, 1725 *(uint32_t *)(s->data_gen_ptr + i)); 1726 } 1727 } 1728 } else { 1729 disas(logfile, s->code_gen_ptr, prologue_size); 1730 } 1731 fprintf(logfile, "\n"); 1732 qemu_log_unlock(logfile); 1733 } 1734 } 1735 1736 #ifndef CONFIG_TCG_INTERPRETER 1737 /* 1738 * Assert that goto_ptr is implemented completely, setting an epilogue. 1739 * For tci, we use NULL as the signal to return from the interpreter, 1740 * so skip this check. 1741 */ 1742 tcg_debug_assert(tcg_code_gen_epilogue != NULL); 1743 #endif 1744 1745 tcg_region_prologue_set(s); 1746 } 1747 1748 void tcg_func_start(TCGContext *s) 1749 { 1750 tcg_pool_reset(s); 1751 s->nb_temps = s->nb_globals; 1752 1753 /* No temps have been previously allocated for size or locality. */ 1754 tcg_temp_ebb_reset_freed(s); 1755 1756 /* No constant temps have been previously allocated. */ 1757 for (int i = 0; i < TCG_TYPE_COUNT; ++i) { 1758 if (s->const_table[i]) { 1759 g_hash_table_remove_all(s->const_table[i]); 1760 } 1761 } 1762 1763 s->nb_ops = 0; 1764 s->nb_labels = 0; 1765 s->current_frame_offset = s->frame_start; 1766 1767 #ifdef CONFIG_DEBUG_TCG 1768 s->goto_tb_issue_mask = 0; 1769 #endif 1770 1771 QTAILQ_INIT(&s->ops); 1772 QTAILQ_INIT(&s->free_ops); 1773 s->emit_before_op = NULL; 1774 QSIMPLEQ_INIT(&s->labels); 1775 1776 tcg_debug_assert(s->addr_type <= TCG_TYPE_REG); 1777 tcg_debug_assert(s->insn_start_words > 0); 1778 } 1779 1780 static TCGTemp *tcg_temp_alloc(TCGContext *s) 1781 { 1782 int n = s->nb_temps++; 1783 1784 if (n >= TCG_MAX_TEMPS) { 1785 tcg_raise_tb_overflow(s); 1786 } 1787 return memset(&s->temps[n], 0, sizeof(TCGTemp)); 1788 } 1789 1790 static TCGTemp *tcg_global_alloc(TCGContext *s) 1791 { 1792 TCGTemp *ts; 1793 1794 tcg_debug_assert(s->nb_globals == s->nb_temps); 1795 tcg_debug_assert(s->nb_globals < TCG_MAX_TEMPS); 1796 s->nb_globals++; 1797 ts = tcg_temp_alloc(s); 1798 ts->kind = TEMP_GLOBAL; 1799 1800 return ts; 1801 } 1802 1803 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type, 1804 TCGReg reg, const char *name) 1805 { 1806 TCGTemp *ts; 1807 1808 tcg_debug_assert(TCG_TARGET_REG_BITS == 64 || type == TCG_TYPE_I32); 1809 1810 ts = tcg_global_alloc(s); 1811 ts->base_type = type; 1812 ts->type = type; 1813 ts->kind = TEMP_FIXED; 1814 ts->reg = reg; 1815 ts->name = name; 1816 tcg_regset_set_reg(s->reserved_regs, reg); 1817 1818 return ts; 1819 } 1820 1821 void tcg_set_frame(TCGContext *s, TCGReg reg, intptr_t start, intptr_t size) 1822 { 1823 s->frame_start = start; 1824 s->frame_end = start + size; 1825 s->frame_temp 1826 = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, reg, "_frame"); 1827 } 1828 1829 static TCGTemp *tcg_global_mem_new_internal(TCGv_ptr base, intptr_t offset, 1830 const char *name, TCGType type) 1831 { 1832 TCGContext *s = tcg_ctx; 1833 TCGTemp *base_ts = tcgv_ptr_temp(base); 1834 TCGTemp *ts = tcg_global_alloc(s); 1835 int indirect_reg = 0; 1836 1837 switch (base_ts->kind) { 1838 case TEMP_FIXED: 1839 break; 1840 case TEMP_GLOBAL: 1841 /* We do not support double-indirect registers. */ 1842 tcg_debug_assert(!base_ts->indirect_reg); 1843 base_ts->indirect_base = 1; 1844 s->nb_indirects += (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64 1845 ? 2 : 1); 1846 indirect_reg = 1; 1847 break; 1848 default: 1849 g_assert_not_reached(); 1850 } 1851 1852 if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) { 1853 TCGTemp *ts2 = tcg_global_alloc(s); 1854 char buf[64]; 1855 1856 ts->base_type = TCG_TYPE_I64; 1857 ts->type = TCG_TYPE_I32; 1858 ts->indirect_reg = indirect_reg; 1859 ts->mem_allocated = 1; 1860 ts->mem_base = base_ts; 1861 ts->mem_offset = offset; 1862 pstrcpy(buf, sizeof(buf), name); 1863 pstrcat(buf, sizeof(buf), "_0"); 1864 ts->name = strdup(buf); 1865 1866 tcg_debug_assert(ts2 == ts + 1); 1867 ts2->base_type = TCG_TYPE_I64; 1868 ts2->type = TCG_TYPE_I32; 1869 ts2->indirect_reg = indirect_reg; 1870 ts2->mem_allocated = 1; 1871 ts2->mem_base = base_ts; 1872 ts2->mem_offset = offset + 4; 1873 ts2->temp_subindex = 1; 1874 pstrcpy(buf, sizeof(buf), name); 1875 pstrcat(buf, sizeof(buf), "_1"); 1876 ts2->name = strdup(buf); 1877 } else { 1878 ts->base_type = type; 1879 ts->type = type; 1880 ts->indirect_reg = indirect_reg; 1881 ts->mem_allocated = 1; 1882 ts->mem_base = base_ts; 1883 ts->mem_offset = offset; 1884 ts->name = name; 1885 } 1886 return ts; 1887 } 1888 1889 TCGv_i32 tcg_global_mem_new_i32(TCGv_ptr reg, intptr_t off, const char *name) 1890 { 1891 TCGTemp *ts = tcg_global_mem_new_internal(reg, off, name, TCG_TYPE_I32); 1892 return temp_tcgv_i32(ts); 1893 } 1894 1895 TCGv_i64 tcg_global_mem_new_i64(TCGv_ptr reg, intptr_t off, const char *name) 1896 { 1897 TCGTemp *ts = tcg_global_mem_new_internal(reg, off, name, TCG_TYPE_I64); 1898 return temp_tcgv_i64(ts); 1899 } 1900 1901 TCGv_ptr tcg_global_mem_new_ptr(TCGv_ptr reg, intptr_t off, const char *name) 1902 { 1903 TCGTemp *ts = tcg_global_mem_new_internal(reg, off, name, TCG_TYPE_PTR); 1904 return temp_tcgv_ptr(ts); 1905 } 1906 1907 TCGTemp *tcg_temp_new_internal(TCGType type, TCGTempKind kind) 1908 { 1909 TCGContext *s = tcg_ctx; 1910 TCGTemp *ts; 1911 int n; 1912 1913 if (kind == TEMP_EBB) { 1914 int idx = find_first_bit(s->free_temps[type].l, TCG_MAX_TEMPS); 1915 1916 if (idx < TCG_MAX_TEMPS) { 1917 /* There is already an available temp with the right type. */ 1918 clear_bit(idx, s->free_temps[type].l); 1919 1920 ts = &s->temps[idx]; 1921 ts->temp_allocated = 1; 1922 tcg_debug_assert(ts->base_type == type); 1923 tcg_debug_assert(ts->kind == kind); 1924 return ts; 1925 } 1926 } else { 1927 tcg_debug_assert(kind == TEMP_TB); 1928 } 1929 1930 switch (type) { 1931 case TCG_TYPE_I32: 1932 case TCG_TYPE_V64: 1933 case TCG_TYPE_V128: 1934 case TCG_TYPE_V256: 1935 n = 1; 1936 break; 1937 case TCG_TYPE_I64: 1938 n = 64 / TCG_TARGET_REG_BITS; 1939 break; 1940 case TCG_TYPE_I128: 1941 n = 128 / TCG_TARGET_REG_BITS; 1942 break; 1943 default: 1944 g_assert_not_reached(); 1945 } 1946 1947 ts = tcg_temp_alloc(s); 1948 ts->base_type = type; 1949 ts->temp_allocated = 1; 1950 ts->kind = kind; 1951 1952 if (n == 1) { 1953 ts->type = type; 1954 } else { 1955 ts->type = TCG_TYPE_REG; 1956 1957 for (int i = 1; i < n; ++i) { 1958 TCGTemp *ts2 = tcg_temp_alloc(s); 1959 1960 tcg_debug_assert(ts2 == ts + i); 1961 ts2->base_type = type; 1962 ts2->type = TCG_TYPE_REG; 1963 ts2->temp_allocated = 1; 1964 ts2->temp_subindex = i; 1965 ts2->kind = kind; 1966 } 1967 } 1968 return ts; 1969 } 1970 1971 TCGv_i32 tcg_temp_new_i32(void) 1972 { 1973 return temp_tcgv_i32(tcg_temp_new_internal(TCG_TYPE_I32, TEMP_TB)); 1974 } 1975 1976 TCGv_i32 tcg_temp_ebb_new_i32(void) 1977 { 1978 return temp_tcgv_i32(tcg_temp_new_internal(TCG_TYPE_I32, TEMP_EBB)); 1979 } 1980 1981 TCGv_i64 tcg_temp_new_i64(void) 1982 { 1983 return temp_tcgv_i64(tcg_temp_new_internal(TCG_TYPE_I64, TEMP_TB)); 1984 } 1985 1986 TCGv_i64 tcg_temp_ebb_new_i64(void) 1987 { 1988 return temp_tcgv_i64(tcg_temp_new_internal(TCG_TYPE_I64, TEMP_EBB)); 1989 } 1990 1991 TCGv_ptr tcg_temp_new_ptr(void) 1992 { 1993 return temp_tcgv_ptr(tcg_temp_new_internal(TCG_TYPE_PTR, TEMP_TB)); 1994 } 1995 1996 TCGv_ptr tcg_temp_ebb_new_ptr(void) 1997 { 1998 return temp_tcgv_ptr(tcg_temp_new_internal(TCG_TYPE_PTR, TEMP_EBB)); 1999 } 2000 2001 TCGv_i128 tcg_temp_new_i128(void) 2002 { 2003 return temp_tcgv_i128(tcg_temp_new_internal(TCG_TYPE_I128, TEMP_TB)); 2004 } 2005 2006 TCGv_i128 tcg_temp_ebb_new_i128(void) 2007 { 2008 return temp_tcgv_i128(tcg_temp_new_internal(TCG_TYPE_I128, TEMP_EBB)); 2009 } 2010 2011 TCGv_vec tcg_temp_new_vec(TCGType type) 2012 { 2013 TCGTemp *t; 2014 2015 #ifdef CONFIG_DEBUG_TCG 2016 switch (type) { 2017 case TCG_TYPE_V64: 2018 assert(TCG_TARGET_HAS_v64); 2019 break; 2020 case TCG_TYPE_V128: 2021 assert(TCG_TARGET_HAS_v128); 2022 break; 2023 case TCG_TYPE_V256: 2024 assert(TCG_TARGET_HAS_v256); 2025 break; 2026 default: 2027 g_assert_not_reached(); 2028 } 2029 #endif 2030 2031 t = tcg_temp_new_internal(type, TEMP_EBB); 2032 return temp_tcgv_vec(t); 2033 } 2034 2035 /* Create a new temp of the same type as an existing temp. */ 2036 TCGv_vec tcg_temp_new_vec_matching(TCGv_vec match) 2037 { 2038 TCGTemp *t = tcgv_vec_temp(match); 2039 2040 tcg_debug_assert(t->temp_allocated != 0); 2041 2042 t = tcg_temp_new_internal(t->base_type, TEMP_EBB); 2043 return temp_tcgv_vec(t); 2044 } 2045 2046 void tcg_temp_free_internal(TCGTemp *ts) 2047 { 2048 TCGContext *s = tcg_ctx; 2049 2050 switch (ts->kind) { 2051 case TEMP_CONST: 2052 case TEMP_TB: 2053 /* Silently ignore free. */ 2054 break; 2055 case TEMP_EBB: 2056 tcg_debug_assert(ts->temp_allocated != 0); 2057 ts->temp_allocated = 0; 2058 set_bit(temp_idx(ts), s->free_temps[ts->base_type].l); 2059 break; 2060 default: 2061 /* It never made sense to free TEMP_FIXED or TEMP_GLOBAL. */ 2062 g_assert_not_reached(); 2063 } 2064 } 2065 2066 void tcg_temp_free_i32(TCGv_i32 arg) 2067 { 2068 tcg_temp_free_internal(tcgv_i32_temp(arg)); 2069 } 2070 2071 void tcg_temp_free_i64(TCGv_i64 arg) 2072 { 2073 tcg_temp_free_internal(tcgv_i64_temp(arg)); 2074 } 2075 2076 void tcg_temp_free_i128(TCGv_i128 arg) 2077 { 2078 tcg_temp_free_internal(tcgv_i128_temp(arg)); 2079 } 2080 2081 void tcg_temp_free_ptr(TCGv_ptr arg) 2082 { 2083 tcg_temp_free_internal(tcgv_ptr_temp(arg)); 2084 } 2085 2086 void tcg_temp_free_vec(TCGv_vec arg) 2087 { 2088 tcg_temp_free_internal(tcgv_vec_temp(arg)); 2089 } 2090 2091 TCGTemp *tcg_constant_internal(TCGType type, int64_t val) 2092 { 2093 TCGContext *s = tcg_ctx; 2094 GHashTable *h = s->const_table[type]; 2095 TCGTemp *ts; 2096 2097 if (h == NULL) { 2098 h = g_hash_table_new(g_int64_hash, g_int64_equal); 2099 s->const_table[type] = h; 2100 } 2101 2102 ts = g_hash_table_lookup(h, &val); 2103 if (ts == NULL) { 2104 int64_t *val_ptr; 2105 2106 ts = tcg_temp_alloc(s); 2107 2108 if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) { 2109 TCGTemp *ts2 = tcg_temp_alloc(s); 2110 2111 tcg_debug_assert(ts2 == ts + 1); 2112 2113 ts->base_type = TCG_TYPE_I64; 2114 ts->type = TCG_TYPE_I32; 2115 ts->kind = TEMP_CONST; 2116 ts->temp_allocated = 1; 2117 2118 ts2->base_type = TCG_TYPE_I64; 2119 ts2->type = TCG_TYPE_I32; 2120 ts2->kind = TEMP_CONST; 2121 ts2->temp_allocated = 1; 2122 ts2->temp_subindex = 1; 2123 2124 /* 2125 * Retain the full value of the 64-bit constant in the low 2126 * part, so that the hash table works. Actual uses will 2127 * truncate the value to the low part. 2128 */ 2129 ts[HOST_BIG_ENDIAN].val = val; 2130 ts[!HOST_BIG_ENDIAN].val = val >> 32; 2131 val_ptr = &ts[HOST_BIG_ENDIAN].val; 2132 } else { 2133 ts->base_type = type; 2134 ts->type = type; 2135 ts->kind = TEMP_CONST; 2136 ts->temp_allocated = 1; 2137 ts->val = val; 2138 val_ptr = &ts->val; 2139 } 2140 g_hash_table_insert(h, val_ptr, ts); 2141 } 2142 2143 return ts; 2144 } 2145 2146 TCGv_i32 tcg_constant_i32(int32_t val) 2147 { 2148 return temp_tcgv_i32(tcg_constant_internal(TCG_TYPE_I32, val)); 2149 } 2150 2151 TCGv_i64 tcg_constant_i64(int64_t val) 2152 { 2153 return temp_tcgv_i64(tcg_constant_internal(TCG_TYPE_I64, val)); 2154 } 2155 2156 TCGv_ptr tcg_constant_ptr_int(intptr_t val) 2157 { 2158 return temp_tcgv_ptr(tcg_constant_internal(TCG_TYPE_PTR, val)); 2159 } 2160 2161 TCGv_vec tcg_constant_vec(TCGType type, unsigned vece, int64_t val) 2162 { 2163 val = dup_const(vece, val); 2164 return temp_tcgv_vec(tcg_constant_internal(type, val)); 2165 } 2166 2167 TCGv_vec tcg_constant_vec_matching(TCGv_vec match, unsigned vece, int64_t val) 2168 { 2169 TCGTemp *t = tcgv_vec_temp(match); 2170 2171 tcg_debug_assert(t->temp_allocated != 0); 2172 return tcg_constant_vec(t->base_type, vece, val); 2173 } 2174 2175 #ifdef CONFIG_DEBUG_TCG 2176 size_t temp_idx(TCGTemp *ts) 2177 { 2178 ptrdiff_t n = ts - tcg_ctx->temps; 2179 assert(n >= 0 && n < tcg_ctx->nb_temps); 2180 return n; 2181 } 2182 2183 TCGTemp *tcgv_i32_temp(TCGv_i32 v) 2184 { 2185 uintptr_t o = (uintptr_t)v - offsetof(TCGContext, temps); 2186 2187 assert(o < sizeof(TCGTemp) * tcg_ctx->nb_temps); 2188 assert(o % sizeof(TCGTemp) == 0); 2189 2190 return (void *)tcg_ctx + (uintptr_t)v; 2191 } 2192 #endif /* CONFIG_DEBUG_TCG */ 2193 2194 /* 2195 * Return true if OP may appear in the opcode stream with TYPE. 2196 * Test the runtime variable that controls each opcode. 2197 */ 2198 bool tcg_op_supported(TCGOpcode op, TCGType type, unsigned flags) 2199 { 2200 bool has_type; 2201 2202 switch (type) { 2203 case TCG_TYPE_I32: 2204 has_type = true; 2205 break; 2206 case TCG_TYPE_I64: 2207 has_type = TCG_TARGET_REG_BITS == 64; 2208 break; 2209 case TCG_TYPE_V64: 2210 has_type = TCG_TARGET_HAS_v64; 2211 break; 2212 case TCG_TYPE_V128: 2213 has_type = TCG_TARGET_HAS_v128; 2214 break; 2215 case TCG_TYPE_V256: 2216 has_type = TCG_TARGET_HAS_v256; 2217 break; 2218 default: 2219 has_type = false; 2220 break; 2221 } 2222 2223 switch (op) { 2224 case INDEX_op_discard: 2225 case INDEX_op_set_label: 2226 case INDEX_op_call: 2227 case INDEX_op_br: 2228 case INDEX_op_mb: 2229 case INDEX_op_insn_start: 2230 case INDEX_op_exit_tb: 2231 case INDEX_op_goto_tb: 2232 case INDEX_op_goto_ptr: 2233 case INDEX_op_qemu_ld_i32: 2234 case INDEX_op_qemu_st_i32: 2235 case INDEX_op_qemu_ld_i64: 2236 case INDEX_op_qemu_st_i64: 2237 return true; 2238 2239 case INDEX_op_qemu_st8_i32: 2240 return TCG_TARGET_HAS_qemu_st8_i32; 2241 2242 case INDEX_op_qemu_ld_i128: 2243 case INDEX_op_qemu_st_i128: 2244 return TCG_TARGET_HAS_qemu_ldst_i128; 2245 2246 case INDEX_op_add: 2247 case INDEX_op_and: 2248 case INDEX_op_mov: 2249 case INDEX_op_or: 2250 case INDEX_op_xor: 2251 return has_type; 2252 2253 case INDEX_op_setcond_i32: 2254 case INDEX_op_brcond_i32: 2255 case INDEX_op_movcond_i32: 2256 case INDEX_op_ld8u_i32: 2257 case INDEX_op_ld8s_i32: 2258 case INDEX_op_ld16u_i32: 2259 case INDEX_op_ld16s_i32: 2260 case INDEX_op_ld_i32: 2261 case INDEX_op_st8_i32: 2262 case INDEX_op_st16_i32: 2263 case INDEX_op_st_i32: 2264 case INDEX_op_shl_i32: 2265 case INDEX_op_shr_i32: 2266 case INDEX_op_sar_i32: 2267 case INDEX_op_extract_i32: 2268 case INDEX_op_sextract_i32: 2269 case INDEX_op_deposit_i32: 2270 return true; 2271 2272 case INDEX_op_negsetcond_i32: 2273 return TCG_TARGET_HAS_negsetcond_i32; 2274 case INDEX_op_remu_i32: 2275 return TCG_TARGET_HAS_rem_i32; 2276 case INDEX_op_rotl_i32: 2277 case INDEX_op_rotr_i32: 2278 return TCG_TARGET_HAS_rot_i32; 2279 case INDEX_op_extract2_i32: 2280 return TCG_TARGET_HAS_extract2_i32; 2281 case INDEX_op_add2_i32: 2282 return TCG_TARGET_HAS_add2_i32; 2283 case INDEX_op_sub2_i32: 2284 return TCG_TARGET_HAS_sub2_i32; 2285 case INDEX_op_mulu2_i32: 2286 return TCG_TARGET_HAS_mulu2_i32; 2287 case INDEX_op_muls2_i32: 2288 return TCG_TARGET_HAS_muls2_i32; 2289 case INDEX_op_bswap16_i32: 2290 return TCG_TARGET_HAS_bswap16_i32; 2291 case INDEX_op_bswap32_i32: 2292 return TCG_TARGET_HAS_bswap32_i32; 2293 case INDEX_op_clz_i32: 2294 return TCG_TARGET_HAS_clz_i32; 2295 case INDEX_op_ctz_i32: 2296 return TCG_TARGET_HAS_ctz_i32; 2297 case INDEX_op_ctpop_i32: 2298 return TCG_TARGET_HAS_ctpop_i32; 2299 2300 case INDEX_op_brcond2_i32: 2301 case INDEX_op_setcond2_i32: 2302 return TCG_TARGET_REG_BITS == 32; 2303 2304 case INDEX_op_setcond_i64: 2305 case INDEX_op_brcond_i64: 2306 case INDEX_op_movcond_i64: 2307 case INDEX_op_ld8u_i64: 2308 case INDEX_op_ld8s_i64: 2309 case INDEX_op_ld16u_i64: 2310 case INDEX_op_ld16s_i64: 2311 case INDEX_op_ld32u_i64: 2312 case INDEX_op_ld32s_i64: 2313 case INDEX_op_ld_i64: 2314 case INDEX_op_st8_i64: 2315 case INDEX_op_st16_i64: 2316 case INDEX_op_st32_i64: 2317 case INDEX_op_st_i64: 2318 case INDEX_op_shl_i64: 2319 case INDEX_op_shr_i64: 2320 case INDEX_op_sar_i64: 2321 case INDEX_op_ext_i32_i64: 2322 case INDEX_op_extu_i32_i64: 2323 case INDEX_op_extract_i64: 2324 case INDEX_op_sextract_i64: 2325 case INDEX_op_deposit_i64: 2326 return TCG_TARGET_REG_BITS == 64; 2327 2328 case INDEX_op_negsetcond_i64: 2329 return TCG_TARGET_HAS_negsetcond_i64; 2330 case INDEX_op_remu_i64: 2331 return TCG_TARGET_HAS_rem_i64; 2332 case INDEX_op_rotl_i64: 2333 case INDEX_op_rotr_i64: 2334 return TCG_TARGET_HAS_rot_i64; 2335 case INDEX_op_extract2_i64: 2336 return TCG_TARGET_HAS_extract2_i64; 2337 case INDEX_op_extrl_i64_i32: 2338 case INDEX_op_extrh_i64_i32: 2339 return TCG_TARGET_HAS_extr_i64_i32; 2340 case INDEX_op_bswap16_i64: 2341 return TCG_TARGET_HAS_bswap16_i64; 2342 case INDEX_op_bswap32_i64: 2343 return TCG_TARGET_HAS_bswap32_i64; 2344 case INDEX_op_bswap64_i64: 2345 return TCG_TARGET_HAS_bswap64_i64; 2346 case INDEX_op_clz_i64: 2347 return TCG_TARGET_HAS_clz_i64; 2348 case INDEX_op_ctz_i64: 2349 return TCG_TARGET_HAS_ctz_i64; 2350 case INDEX_op_ctpop_i64: 2351 return TCG_TARGET_HAS_ctpop_i64; 2352 case INDEX_op_add2_i64: 2353 return TCG_TARGET_HAS_add2_i64; 2354 case INDEX_op_sub2_i64: 2355 return TCG_TARGET_HAS_sub2_i64; 2356 case INDEX_op_mulu2_i64: 2357 return TCG_TARGET_HAS_mulu2_i64; 2358 case INDEX_op_muls2_i64: 2359 return TCG_TARGET_HAS_muls2_i64; 2360 2361 case INDEX_op_mov_vec: 2362 case INDEX_op_dup_vec: 2363 case INDEX_op_dupm_vec: 2364 case INDEX_op_ld_vec: 2365 case INDEX_op_st_vec: 2366 case INDEX_op_add_vec: 2367 case INDEX_op_sub_vec: 2368 case INDEX_op_and_vec: 2369 case INDEX_op_or_vec: 2370 case INDEX_op_xor_vec: 2371 case INDEX_op_cmp_vec: 2372 return has_type; 2373 case INDEX_op_dup2_vec: 2374 return has_type && TCG_TARGET_REG_BITS == 32; 2375 case INDEX_op_not_vec: 2376 return has_type && TCG_TARGET_HAS_not_vec; 2377 case INDEX_op_neg_vec: 2378 return has_type && TCG_TARGET_HAS_neg_vec; 2379 case INDEX_op_abs_vec: 2380 return has_type && TCG_TARGET_HAS_abs_vec; 2381 case INDEX_op_andc_vec: 2382 return has_type && TCG_TARGET_HAS_andc_vec; 2383 case INDEX_op_orc_vec: 2384 return has_type && TCG_TARGET_HAS_orc_vec; 2385 case INDEX_op_nand_vec: 2386 return has_type && TCG_TARGET_HAS_nand_vec; 2387 case INDEX_op_nor_vec: 2388 return has_type && TCG_TARGET_HAS_nor_vec; 2389 case INDEX_op_eqv_vec: 2390 return has_type && TCG_TARGET_HAS_eqv_vec; 2391 case INDEX_op_mul_vec: 2392 return has_type && TCG_TARGET_HAS_mul_vec; 2393 case INDEX_op_shli_vec: 2394 case INDEX_op_shri_vec: 2395 case INDEX_op_sari_vec: 2396 return has_type && TCG_TARGET_HAS_shi_vec; 2397 case INDEX_op_shls_vec: 2398 case INDEX_op_shrs_vec: 2399 case INDEX_op_sars_vec: 2400 return has_type && TCG_TARGET_HAS_shs_vec; 2401 case INDEX_op_shlv_vec: 2402 case INDEX_op_shrv_vec: 2403 case INDEX_op_sarv_vec: 2404 return has_type && TCG_TARGET_HAS_shv_vec; 2405 case INDEX_op_rotli_vec: 2406 return has_type && TCG_TARGET_HAS_roti_vec; 2407 case INDEX_op_rotls_vec: 2408 return has_type && TCG_TARGET_HAS_rots_vec; 2409 case INDEX_op_rotlv_vec: 2410 case INDEX_op_rotrv_vec: 2411 return has_type && TCG_TARGET_HAS_rotv_vec; 2412 case INDEX_op_ssadd_vec: 2413 case INDEX_op_usadd_vec: 2414 case INDEX_op_sssub_vec: 2415 case INDEX_op_ussub_vec: 2416 return has_type && TCG_TARGET_HAS_sat_vec; 2417 case INDEX_op_smin_vec: 2418 case INDEX_op_umin_vec: 2419 case INDEX_op_smax_vec: 2420 case INDEX_op_umax_vec: 2421 return has_type && TCG_TARGET_HAS_minmax_vec; 2422 case INDEX_op_bitsel_vec: 2423 return has_type && TCG_TARGET_HAS_bitsel_vec; 2424 case INDEX_op_cmpsel_vec: 2425 return has_type && TCG_TARGET_HAS_cmpsel_vec; 2426 2427 default: 2428 if (op < INDEX_op_last_generic) { 2429 const TCGOutOp *outop; 2430 TCGConstraintSetIndex con_set; 2431 2432 if (!has_type) { 2433 return false; 2434 } 2435 2436 outop = all_outop[op]; 2437 tcg_debug_assert(outop != NULL); 2438 2439 con_set = outop->static_constraint; 2440 if (con_set == C_Dynamic) { 2441 con_set = outop->dynamic_constraint(type, flags); 2442 } 2443 if (con_set >= 0) { 2444 return true; 2445 } 2446 tcg_debug_assert(con_set == C_NotImplemented); 2447 return false; 2448 } 2449 tcg_debug_assert(op < NB_OPS); 2450 return true; 2451 2452 case INDEX_op_last_generic: 2453 g_assert_not_reached(); 2454 } 2455 } 2456 2457 bool tcg_op_deposit_valid(TCGType type, unsigned ofs, unsigned len) 2458 { 2459 unsigned width; 2460 2461 tcg_debug_assert(type == TCG_TYPE_I32 || type == TCG_TYPE_I64); 2462 width = (type == TCG_TYPE_I32 ? 32 : 64); 2463 2464 tcg_debug_assert(ofs < width); 2465 tcg_debug_assert(len > 0); 2466 tcg_debug_assert(len <= width - ofs); 2467 2468 return TCG_TARGET_deposit_valid(type, ofs, len); 2469 } 2470 2471 static TCGOp *tcg_op_alloc(TCGOpcode opc, unsigned nargs); 2472 2473 static void tcg_gen_callN(void *func, TCGHelperInfo *info, 2474 TCGTemp *ret, TCGTemp **args) 2475 { 2476 TCGv_i64 extend_free[MAX_CALL_IARGS]; 2477 int n_extend = 0; 2478 TCGOp *op; 2479 int i, n, pi = 0, total_args; 2480 2481 if (unlikely(g_once_init_enter(HELPER_INFO_INIT(info)))) { 2482 init_call_layout(info); 2483 g_once_init_leave(HELPER_INFO_INIT(info), HELPER_INFO_INIT_VAL(info)); 2484 } 2485 2486 total_args = info->nr_out + info->nr_in + 2; 2487 op = tcg_op_alloc(INDEX_op_call, total_args); 2488 2489 #ifdef CONFIG_PLUGIN 2490 /* Flag helpers that may affect guest state */ 2491 if (tcg_ctx->plugin_insn && !(info->flags & TCG_CALL_NO_SIDE_EFFECTS)) { 2492 tcg_ctx->plugin_insn->calls_helpers = true; 2493 } 2494 #endif 2495 2496 TCGOP_CALLO(op) = n = info->nr_out; 2497 switch (n) { 2498 case 0: 2499 tcg_debug_assert(ret == NULL); 2500 break; 2501 case 1: 2502 tcg_debug_assert(ret != NULL); 2503 op->args[pi++] = temp_arg(ret); 2504 break; 2505 case 2: 2506 case 4: 2507 tcg_debug_assert(ret != NULL); 2508 tcg_debug_assert(ret->base_type == ret->type + ctz32(n)); 2509 tcg_debug_assert(ret->temp_subindex == 0); 2510 for (i = 0; i < n; ++i) { 2511 op->args[pi++] = temp_arg(ret + i); 2512 } 2513 break; 2514 default: 2515 g_assert_not_reached(); 2516 } 2517 2518 TCGOP_CALLI(op) = n = info->nr_in; 2519 for (i = 0; i < n; i++) { 2520 const TCGCallArgumentLoc *loc = &info->in[i]; 2521 TCGTemp *ts = args[loc->arg_idx] + loc->tmp_subindex; 2522 2523 switch (loc->kind) { 2524 case TCG_CALL_ARG_NORMAL: 2525 case TCG_CALL_ARG_BY_REF: 2526 case TCG_CALL_ARG_BY_REF_N: 2527 op->args[pi++] = temp_arg(ts); 2528 break; 2529 2530 case TCG_CALL_ARG_EXTEND_U: 2531 case TCG_CALL_ARG_EXTEND_S: 2532 { 2533 TCGv_i64 temp = tcg_temp_ebb_new_i64(); 2534 TCGv_i32 orig = temp_tcgv_i32(ts); 2535 2536 if (loc->kind == TCG_CALL_ARG_EXTEND_S) { 2537 tcg_gen_ext_i32_i64(temp, orig); 2538 } else { 2539 tcg_gen_extu_i32_i64(temp, orig); 2540 } 2541 op->args[pi++] = tcgv_i64_arg(temp); 2542 extend_free[n_extend++] = temp; 2543 } 2544 break; 2545 2546 default: 2547 g_assert_not_reached(); 2548 } 2549 } 2550 op->args[pi++] = (uintptr_t)func; 2551 op->args[pi++] = (uintptr_t)info; 2552 tcg_debug_assert(pi == total_args); 2553 2554 if (tcg_ctx->emit_before_op) { 2555 QTAILQ_INSERT_BEFORE(tcg_ctx->emit_before_op, op, link); 2556 } else { 2557 QTAILQ_INSERT_TAIL(&tcg_ctx->ops, op, link); 2558 } 2559 2560 tcg_debug_assert(n_extend < ARRAY_SIZE(extend_free)); 2561 for (i = 0; i < n_extend; ++i) { 2562 tcg_temp_free_i64(extend_free[i]); 2563 } 2564 } 2565 2566 void tcg_gen_call0(void *func, TCGHelperInfo *info, TCGTemp *ret) 2567 { 2568 tcg_gen_callN(func, info, ret, NULL); 2569 } 2570 2571 void tcg_gen_call1(void *func, TCGHelperInfo *info, TCGTemp *ret, TCGTemp *t1) 2572 { 2573 tcg_gen_callN(func, info, ret, &t1); 2574 } 2575 2576 void tcg_gen_call2(void *func, TCGHelperInfo *info, TCGTemp *ret, 2577 TCGTemp *t1, TCGTemp *t2) 2578 { 2579 TCGTemp *args[2] = { t1, t2 }; 2580 tcg_gen_callN(func, info, ret, args); 2581 } 2582 2583 void tcg_gen_call3(void *func, TCGHelperInfo *info, TCGTemp *ret, 2584 TCGTemp *t1, TCGTemp *t2, TCGTemp *t3) 2585 { 2586 TCGTemp *args[3] = { t1, t2, t3 }; 2587 tcg_gen_callN(func, info, ret, args); 2588 } 2589 2590 void tcg_gen_call4(void *func, TCGHelperInfo *info, TCGTemp *ret, 2591 TCGTemp *t1, TCGTemp *t2, TCGTemp *t3, TCGTemp *t4) 2592 { 2593 TCGTemp *args[4] = { t1, t2, t3, t4 }; 2594 tcg_gen_callN(func, info, ret, args); 2595 } 2596 2597 void tcg_gen_call5(void *func, TCGHelperInfo *info, TCGTemp *ret, TCGTemp *t1, 2598 TCGTemp *t2, TCGTemp *t3, TCGTemp *t4, TCGTemp *t5) 2599 { 2600 TCGTemp *args[5] = { t1, t2, t3, t4, t5 }; 2601 tcg_gen_callN(func, info, ret, args); 2602 } 2603 2604 void tcg_gen_call6(void *func, TCGHelperInfo *info, TCGTemp *ret, 2605 TCGTemp *t1, TCGTemp *t2, TCGTemp *t3, 2606 TCGTemp *t4, TCGTemp *t5, TCGTemp *t6) 2607 { 2608 TCGTemp *args[6] = { t1, t2, t3, t4, t5, t6 }; 2609 tcg_gen_callN(func, info, ret, args); 2610 } 2611 2612 void tcg_gen_call7(void *func, TCGHelperInfo *info, TCGTemp *ret, TCGTemp *t1, 2613 TCGTemp *t2, TCGTemp *t3, TCGTemp *t4, 2614 TCGTemp *t5, TCGTemp *t6, TCGTemp *t7) 2615 { 2616 TCGTemp *args[7] = { t1, t2, t3, t4, t5, t6, t7 }; 2617 tcg_gen_callN(func, info, ret, args); 2618 } 2619 2620 static void tcg_reg_alloc_start(TCGContext *s) 2621 { 2622 int i, n; 2623 2624 for (i = 0, n = s->nb_temps; i < n; i++) { 2625 TCGTemp *ts = &s->temps[i]; 2626 TCGTempVal val = TEMP_VAL_MEM; 2627 2628 switch (ts->kind) { 2629 case TEMP_CONST: 2630 val = TEMP_VAL_CONST; 2631 break; 2632 case TEMP_FIXED: 2633 val = TEMP_VAL_REG; 2634 break; 2635 case TEMP_GLOBAL: 2636 break; 2637 case TEMP_EBB: 2638 val = TEMP_VAL_DEAD; 2639 /* fall through */ 2640 case TEMP_TB: 2641 ts->mem_allocated = 0; 2642 break; 2643 default: 2644 g_assert_not_reached(); 2645 } 2646 ts->val_type = val; 2647 } 2648 2649 memset(s->reg_to_temp, 0, sizeof(s->reg_to_temp)); 2650 } 2651 2652 static char *tcg_get_arg_str_ptr(TCGContext *s, char *buf, int buf_size, 2653 TCGTemp *ts) 2654 { 2655 int idx = temp_idx(ts); 2656 2657 switch (ts->kind) { 2658 case TEMP_FIXED: 2659 case TEMP_GLOBAL: 2660 pstrcpy(buf, buf_size, ts->name); 2661 break; 2662 case TEMP_TB: 2663 snprintf(buf, buf_size, "loc%d", idx - s->nb_globals); 2664 break; 2665 case TEMP_EBB: 2666 snprintf(buf, buf_size, "tmp%d", idx - s->nb_globals); 2667 break; 2668 case TEMP_CONST: 2669 switch (ts->type) { 2670 case TCG_TYPE_I32: 2671 snprintf(buf, buf_size, "$0x%x", (int32_t)ts->val); 2672 break; 2673 #if TCG_TARGET_REG_BITS > 32 2674 case TCG_TYPE_I64: 2675 snprintf(buf, buf_size, "$0x%" PRIx64, ts->val); 2676 break; 2677 #endif 2678 case TCG_TYPE_V64: 2679 case TCG_TYPE_V128: 2680 case TCG_TYPE_V256: 2681 snprintf(buf, buf_size, "v%d$0x%" PRIx64, 2682 64 << (ts->type - TCG_TYPE_V64), ts->val); 2683 break; 2684 default: 2685 g_assert_not_reached(); 2686 } 2687 break; 2688 } 2689 return buf; 2690 } 2691 2692 static char *tcg_get_arg_str(TCGContext *s, char *buf, 2693 int buf_size, TCGArg arg) 2694 { 2695 return tcg_get_arg_str_ptr(s, buf, buf_size, arg_temp(arg)); 2696 } 2697 2698 static const char * const cond_name[] = 2699 { 2700 [TCG_COND_NEVER] = "never", 2701 [TCG_COND_ALWAYS] = "always", 2702 [TCG_COND_EQ] = "eq", 2703 [TCG_COND_NE] = "ne", 2704 [TCG_COND_LT] = "lt", 2705 [TCG_COND_GE] = "ge", 2706 [TCG_COND_LE] = "le", 2707 [TCG_COND_GT] = "gt", 2708 [TCG_COND_LTU] = "ltu", 2709 [TCG_COND_GEU] = "geu", 2710 [TCG_COND_LEU] = "leu", 2711 [TCG_COND_GTU] = "gtu", 2712 [TCG_COND_TSTEQ] = "tsteq", 2713 [TCG_COND_TSTNE] = "tstne", 2714 }; 2715 2716 static const char * const ldst_name[(MO_BSWAP | MO_SSIZE) + 1] = 2717 { 2718 [MO_UB] = "ub", 2719 [MO_SB] = "sb", 2720 [MO_LEUW] = "leuw", 2721 [MO_LESW] = "lesw", 2722 [MO_LEUL] = "leul", 2723 [MO_LESL] = "lesl", 2724 [MO_LEUQ] = "leq", 2725 [MO_BEUW] = "beuw", 2726 [MO_BESW] = "besw", 2727 [MO_BEUL] = "beul", 2728 [MO_BESL] = "besl", 2729 [MO_BEUQ] = "beq", 2730 [MO_128 + MO_BE] = "beo", 2731 [MO_128 + MO_LE] = "leo", 2732 }; 2733 2734 static const char * const alignment_name[(MO_AMASK >> MO_ASHIFT) + 1] = { 2735 [MO_UNALN >> MO_ASHIFT] = "un+", 2736 [MO_ALIGN >> MO_ASHIFT] = "al+", 2737 [MO_ALIGN_2 >> MO_ASHIFT] = "al2+", 2738 [MO_ALIGN_4 >> MO_ASHIFT] = "al4+", 2739 [MO_ALIGN_8 >> MO_ASHIFT] = "al8+", 2740 [MO_ALIGN_16 >> MO_ASHIFT] = "al16+", 2741 [MO_ALIGN_32 >> MO_ASHIFT] = "al32+", 2742 [MO_ALIGN_64 >> MO_ASHIFT] = "al64+", 2743 }; 2744 2745 static const char * const atom_name[(MO_ATOM_MASK >> MO_ATOM_SHIFT) + 1] = { 2746 [MO_ATOM_IFALIGN >> MO_ATOM_SHIFT] = "", 2747 [MO_ATOM_IFALIGN_PAIR >> MO_ATOM_SHIFT] = "pair+", 2748 [MO_ATOM_WITHIN16 >> MO_ATOM_SHIFT] = "w16+", 2749 [MO_ATOM_WITHIN16_PAIR >> MO_ATOM_SHIFT] = "w16p+", 2750 [MO_ATOM_SUBALIGN >> MO_ATOM_SHIFT] = "sub+", 2751 [MO_ATOM_NONE >> MO_ATOM_SHIFT] = "noat+", 2752 }; 2753 2754 static const char bswap_flag_name[][6] = { 2755 [TCG_BSWAP_IZ] = "iz", 2756 [TCG_BSWAP_OZ] = "oz", 2757 [TCG_BSWAP_OS] = "os", 2758 [TCG_BSWAP_IZ | TCG_BSWAP_OZ] = "iz,oz", 2759 [TCG_BSWAP_IZ | TCG_BSWAP_OS] = "iz,os", 2760 }; 2761 2762 #ifdef CONFIG_PLUGIN 2763 static const char * const plugin_from_name[] = { 2764 "from-tb", 2765 "from-insn", 2766 "after-insn", 2767 "after-tb", 2768 }; 2769 #endif 2770 2771 static inline bool tcg_regset_single(TCGRegSet d) 2772 { 2773 return (d & (d - 1)) == 0; 2774 } 2775 2776 static inline TCGReg tcg_regset_first(TCGRegSet d) 2777 { 2778 if (TCG_TARGET_NB_REGS <= 32) { 2779 return ctz32(d); 2780 } else { 2781 return ctz64(d); 2782 } 2783 } 2784 2785 /* Return only the number of characters output -- no error return. */ 2786 #define ne_fprintf(...) \ 2787 ({ int ret_ = fprintf(__VA_ARGS__); ret_ >= 0 ? ret_ : 0; }) 2788 2789 void tcg_dump_ops(TCGContext *s, FILE *f, bool have_prefs) 2790 { 2791 char buf[128]; 2792 TCGOp *op; 2793 2794 QTAILQ_FOREACH(op, &s->ops, link) { 2795 int i, k, nb_oargs, nb_iargs, nb_cargs; 2796 const TCGOpDef *def; 2797 TCGOpcode c; 2798 int col = 0; 2799 2800 c = op->opc; 2801 def = &tcg_op_defs[c]; 2802 2803 if (c == INDEX_op_insn_start) { 2804 nb_oargs = 0; 2805 col += ne_fprintf(f, "\n ----"); 2806 2807 for (i = 0, k = s->insn_start_words; i < k; ++i) { 2808 col += ne_fprintf(f, " %016" PRIx64, 2809 tcg_get_insn_start_param(op, i)); 2810 } 2811 } else if (c == INDEX_op_call) { 2812 const TCGHelperInfo *info = tcg_call_info(op); 2813 void *func = tcg_call_func(op); 2814 2815 /* variable number of arguments */ 2816 nb_oargs = TCGOP_CALLO(op); 2817 nb_iargs = TCGOP_CALLI(op); 2818 nb_cargs = def->nb_cargs; 2819 2820 col += ne_fprintf(f, " %s ", def->name); 2821 2822 /* 2823 * Print the function name from TCGHelperInfo, if available. 2824 * Note that plugins have a template function for the info, 2825 * but the actual function pointer comes from the plugin. 2826 */ 2827 if (func == info->func) { 2828 col += ne_fprintf(f, "%s", info->name); 2829 } else { 2830 col += ne_fprintf(f, "plugin(%p)", func); 2831 } 2832 2833 col += ne_fprintf(f, ",$0x%x,$%d", info->flags, nb_oargs); 2834 for (i = 0; i < nb_oargs; i++) { 2835 col += ne_fprintf(f, ",%s", tcg_get_arg_str(s, buf, sizeof(buf), 2836 op->args[i])); 2837 } 2838 for (i = 0; i < nb_iargs; i++) { 2839 TCGArg arg = op->args[nb_oargs + i]; 2840 const char *t = tcg_get_arg_str(s, buf, sizeof(buf), arg); 2841 col += ne_fprintf(f, ",%s", t); 2842 } 2843 } else { 2844 if (def->flags & TCG_OPF_INT) { 2845 col += ne_fprintf(f, " %s_i%d ", 2846 def->name, 2847 8 * tcg_type_size(TCGOP_TYPE(op))); 2848 } else if (def->flags & TCG_OPF_VECTOR) { 2849 col += ne_fprintf(f, "%s v%d,e%d,", 2850 def->name, 2851 8 * tcg_type_size(TCGOP_TYPE(op)), 2852 8 << TCGOP_VECE(op)); 2853 } else { 2854 col += ne_fprintf(f, " %s ", def->name); 2855 } 2856 2857 nb_oargs = def->nb_oargs; 2858 nb_iargs = def->nb_iargs; 2859 nb_cargs = def->nb_cargs; 2860 2861 k = 0; 2862 for (i = 0; i < nb_oargs; i++) { 2863 const char *sep = k ? "," : ""; 2864 col += ne_fprintf(f, "%s%s", sep, 2865 tcg_get_arg_str(s, buf, sizeof(buf), 2866 op->args[k++])); 2867 } 2868 for (i = 0; i < nb_iargs; i++) { 2869 const char *sep = k ? "," : ""; 2870 col += ne_fprintf(f, "%s%s", sep, 2871 tcg_get_arg_str(s, buf, sizeof(buf), 2872 op->args[k++])); 2873 } 2874 switch (c) { 2875 case INDEX_op_brcond_i32: 2876 case INDEX_op_setcond_i32: 2877 case INDEX_op_negsetcond_i32: 2878 case INDEX_op_movcond_i32: 2879 case INDEX_op_brcond2_i32: 2880 case INDEX_op_setcond2_i32: 2881 case INDEX_op_brcond_i64: 2882 case INDEX_op_setcond_i64: 2883 case INDEX_op_negsetcond_i64: 2884 case INDEX_op_movcond_i64: 2885 case INDEX_op_cmp_vec: 2886 case INDEX_op_cmpsel_vec: 2887 if (op->args[k] < ARRAY_SIZE(cond_name) 2888 && cond_name[op->args[k]]) { 2889 col += ne_fprintf(f, ",%s", cond_name[op->args[k++]]); 2890 } else { 2891 col += ne_fprintf(f, ",$0x%" TCG_PRIlx, op->args[k++]); 2892 } 2893 i = 1; 2894 break; 2895 case INDEX_op_qemu_ld_i32: 2896 case INDEX_op_qemu_st_i32: 2897 case INDEX_op_qemu_st8_i32: 2898 case INDEX_op_qemu_ld_i64: 2899 case INDEX_op_qemu_st_i64: 2900 case INDEX_op_qemu_ld_i128: 2901 case INDEX_op_qemu_st_i128: 2902 { 2903 const char *s_al, *s_op, *s_at; 2904 MemOpIdx oi = op->args[k++]; 2905 MemOp mop = get_memop(oi); 2906 unsigned ix = get_mmuidx(oi); 2907 2908 s_al = alignment_name[(mop & MO_AMASK) >> MO_ASHIFT]; 2909 s_op = ldst_name[mop & (MO_BSWAP | MO_SSIZE)]; 2910 s_at = atom_name[(mop & MO_ATOM_MASK) >> MO_ATOM_SHIFT]; 2911 mop &= ~(MO_AMASK | MO_BSWAP | MO_SSIZE | MO_ATOM_MASK); 2912 2913 /* If all fields are accounted for, print symbolically. */ 2914 if (!mop && s_al && s_op && s_at) { 2915 col += ne_fprintf(f, ",%s%s%s,%u", 2916 s_at, s_al, s_op, ix); 2917 } else { 2918 mop = get_memop(oi); 2919 col += ne_fprintf(f, ",$0x%x,%u", mop, ix); 2920 } 2921 i = 1; 2922 } 2923 break; 2924 case INDEX_op_bswap16_i32: 2925 case INDEX_op_bswap16_i64: 2926 case INDEX_op_bswap32_i32: 2927 case INDEX_op_bswap32_i64: 2928 case INDEX_op_bswap64_i64: 2929 { 2930 TCGArg flags = op->args[k]; 2931 const char *name = NULL; 2932 2933 if (flags < ARRAY_SIZE(bswap_flag_name)) { 2934 name = bswap_flag_name[flags]; 2935 } 2936 if (name) { 2937 col += ne_fprintf(f, ",%s", name); 2938 } else { 2939 col += ne_fprintf(f, ",$0x%" TCG_PRIlx, flags); 2940 } 2941 i = k = 1; 2942 } 2943 break; 2944 #ifdef CONFIG_PLUGIN 2945 case INDEX_op_plugin_cb: 2946 { 2947 TCGArg from = op->args[k++]; 2948 const char *name = NULL; 2949 2950 if (from < ARRAY_SIZE(plugin_from_name)) { 2951 name = plugin_from_name[from]; 2952 } 2953 if (name) { 2954 col += ne_fprintf(f, "%s", name); 2955 } else { 2956 col += ne_fprintf(f, "$0x%" TCG_PRIlx, from); 2957 } 2958 i = 1; 2959 } 2960 break; 2961 #endif 2962 default: 2963 i = 0; 2964 break; 2965 } 2966 switch (c) { 2967 case INDEX_op_set_label: 2968 case INDEX_op_br: 2969 case INDEX_op_brcond_i32: 2970 case INDEX_op_brcond_i64: 2971 case INDEX_op_brcond2_i32: 2972 col += ne_fprintf(f, "%s$L%d", k ? "," : "", 2973 arg_label(op->args[k])->id); 2974 i++, k++; 2975 break; 2976 case INDEX_op_mb: 2977 { 2978 TCGBar membar = op->args[k]; 2979 const char *b_op, *m_op; 2980 2981 switch (membar & TCG_BAR_SC) { 2982 case 0: 2983 b_op = "none"; 2984 break; 2985 case TCG_BAR_LDAQ: 2986 b_op = "acq"; 2987 break; 2988 case TCG_BAR_STRL: 2989 b_op = "rel"; 2990 break; 2991 case TCG_BAR_SC: 2992 b_op = "seq"; 2993 break; 2994 default: 2995 g_assert_not_reached(); 2996 } 2997 2998 switch (membar & TCG_MO_ALL) { 2999 case 0: 3000 m_op = "none"; 3001 break; 3002 case TCG_MO_LD_LD: 3003 m_op = "rr"; 3004 break; 3005 case TCG_MO_LD_ST: 3006 m_op = "rw"; 3007 break; 3008 case TCG_MO_ST_LD: 3009 m_op = "wr"; 3010 break; 3011 case TCG_MO_ST_ST: 3012 m_op = "ww"; 3013 break; 3014 case TCG_MO_LD_LD | TCG_MO_LD_ST: 3015 m_op = "rr+rw"; 3016 break; 3017 case TCG_MO_LD_LD | TCG_MO_ST_LD: 3018 m_op = "rr+wr"; 3019 break; 3020 case TCG_MO_LD_LD | TCG_MO_ST_ST: 3021 m_op = "rr+ww"; 3022 break; 3023 case TCG_MO_LD_ST | TCG_MO_ST_LD: 3024 m_op = "rw+wr"; 3025 break; 3026 case TCG_MO_LD_ST | TCG_MO_ST_ST: 3027 m_op = "rw+ww"; 3028 break; 3029 case TCG_MO_ST_LD | TCG_MO_ST_ST: 3030 m_op = "wr+ww"; 3031 break; 3032 case TCG_MO_LD_LD | TCG_MO_LD_ST | TCG_MO_ST_LD: 3033 m_op = "rr+rw+wr"; 3034 break; 3035 case TCG_MO_LD_LD | TCG_MO_LD_ST | TCG_MO_ST_ST: 3036 m_op = "rr+rw+ww"; 3037 break; 3038 case TCG_MO_LD_LD | TCG_MO_ST_LD | TCG_MO_ST_ST: 3039 m_op = "rr+wr+ww"; 3040 break; 3041 case TCG_MO_LD_ST | TCG_MO_ST_LD | TCG_MO_ST_ST: 3042 m_op = "rw+wr+ww"; 3043 break; 3044 case TCG_MO_ALL: 3045 m_op = "all"; 3046 break; 3047 default: 3048 g_assert_not_reached(); 3049 } 3050 3051 col += ne_fprintf(f, "%s%s:%s", (k ? "," : ""), b_op, m_op); 3052 i++, k++; 3053 } 3054 break; 3055 default: 3056 break; 3057 } 3058 for (; i < nb_cargs; i++, k++) { 3059 col += ne_fprintf(f, "%s$0x%" TCG_PRIlx, k ? "," : "", 3060 op->args[k]); 3061 } 3062 } 3063 3064 if (have_prefs || op->life) { 3065 for (; col < 40; ++col) { 3066 putc(' ', f); 3067 } 3068 } 3069 3070 if (op->life) { 3071 unsigned life = op->life; 3072 3073 if (life & (SYNC_ARG * 3)) { 3074 ne_fprintf(f, " sync:"); 3075 for (i = 0; i < 2; ++i) { 3076 if (life & (SYNC_ARG << i)) { 3077 ne_fprintf(f, " %d", i); 3078 } 3079 } 3080 } 3081 life /= DEAD_ARG; 3082 if (life) { 3083 ne_fprintf(f, " dead:"); 3084 for (i = 0; life; ++i, life >>= 1) { 3085 if (life & 1) { 3086 ne_fprintf(f, " %d", i); 3087 } 3088 } 3089 } 3090 } 3091 3092 if (have_prefs) { 3093 for (i = 0; i < nb_oargs; ++i) { 3094 TCGRegSet set = output_pref(op, i); 3095 3096 if (i == 0) { 3097 ne_fprintf(f, " pref="); 3098 } else { 3099 ne_fprintf(f, ","); 3100 } 3101 if (set == 0) { 3102 ne_fprintf(f, "none"); 3103 } else if (set == MAKE_64BIT_MASK(0, TCG_TARGET_NB_REGS)) { 3104 ne_fprintf(f, "all"); 3105 #ifdef CONFIG_DEBUG_TCG 3106 } else if (tcg_regset_single(set)) { 3107 TCGReg reg = tcg_regset_first(set); 3108 ne_fprintf(f, "%s", tcg_target_reg_names[reg]); 3109 #endif 3110 } else if (TCG_TARGET_NB_REGS <= 32) { 3111 ne_fprintf(f, "0x%x", (uint32_t)set); 3112 } else { 3113 ne_fprintf(f, "0x%" PRIx64, (uint64_t)set); 3114 } 3115 } 3116 } 3117 3118 putc('\n', f); 3119 } 3120 } 3121 3122 /* we give more priority to constraints with less registers */ 3123 static int get_constraint_priority(const TCGArgConstraint *arg_ct, int k) 3124 { 3125 int n; 3126 3127 arg_ct += k; 3128 n = ctpop64(arg_ct->regs); 3129 3130 /* 3131 * Sort constraints of a single register first, which includes output 3132 * aliases (which must exactly match the input already allocated). 3133 */ 3134 if (n == 1 || arg_ct->oalias) { 3135 return INT_MAX; 3136 } 3137 3138 /* 3139 * Sort register pairs next, first then second immediately after. 3140 * Arbitrarily sort multiple pairs by the index of the first reg; 3141 * there shouldn't be many pairs. 3142 */ 3143 switch (arg_ct->pair) { 3144 case 1: 3145 case 3: 3146 return (k + 1) * 2; 3147 case 2: 3148 return (arg_ct->pair_index + 1) * 2 - 1; 3149 } 3150 3151 /* Finally, sort by decreasing register count. */ 3152 assert(n > 1); 3153 return -n; 3154 } 3155 3156 /* sort from highest priority to lowest */ 3157 static void sort_constraints(TCGArgConstraint *a, int start, int n) 3158 { 3159 int i, j; 3160 3161 for (i = 0; i < n; i++) { 3162 a[start + i].sort_index = start + i; 3163 } 3164 if (n <= 1) { 3165 return; 3166 } 3167 for (i = 0; i < n - 1; i++) { 3168 for (j = i + 1; j < n; j++) { 3169 int p1 = get_constraint_priority(a, a[start + i].sort_index); 3170 int p2 = get_constraint_priority(a, a[start + j].sort_index); 3171 if (p1 < p2) { 3172 int tmp = a[start + i].sort_index; 3173 a[start + i].sort_index = a[start + j].sort_index; 3174 a[start + j].sort_index = tmp; 3175 } 3176 } 3177 } 3178 } 3179 3180 static const TCGArgConstraint empty_cts[TCG_MAX_OP_ARGS]; 3181 static TCGArgConstraint all_cts[ARRAY_SIZE(constraint_sets)][TCG_MAX_OP_ARGS]; 3182 3183 static void process_constraint_sets(void) 3184 { 3185 for (size_t c = 0; c < ARRAY_SIZE(constraint_sets); ++c) { 3186 const TCGConstraintSet *tdefs = &constraint_sets[c]; 3187 TCGArgConstraint *args_ct = all_cts[c]; 3188 int nb_oargs = tdefs->nb_oargs; 3189 int nb_iargs = tdefs->nb_iargs; 3190 int nb_args = nb_oargs + nb_iargs; 3191 bool saw_alias_pair = false; 3192 3193 for (int i = 0; i < nb_args; i++) { 3194 const char *ct_str = tdefs->args_ct_str[i]; 3195 bool input_p = i >= nb_oargs; 3196 int o; 3197 3198 switch (*ct_str) { 3199 case '0' ... '9': 3200 o = *ct_str - '0'; 3201 tcg_debug_assert(input_p); 3202 tcg_debug_assert(o < nb_oargs); 3203 tcg_debug_assert(args_ct[o].regs != 0); 3204 tcg_debug_assert(!args_ct[o].oalias); 3205 args_ct[i] = args_ct[o]; 3206 /* The output sets oalias. */ 3207 args_ct[o].oalias = 1; 3208 args_ct[o].alias_index = i; 3209 /* The input sets ialias. */ 3210 args_ct[i].ialias = 1; 3211 args_ct[i].alias_index = o; 3212 if (args_ct[i].pair) { 3213 saw_alias_pair = true; 3214 } 3215 tcg_debug_assert(ct_str[1] == '\0'); 3216 continue; 3217 3218 case '&': 3219 tcg_debug_assert(!input_p); 3220 args_ct[i].newreg = true; 3221 ct_str++; 3222 break; 3223 3224 case 'p': /* plus */ 3225 /* Allocate to the register after the previous. */ 3226 tcg_debug_assert(i > (input_p ? nb_oargs : 0)); 3227 o = i - 1; 3228 tcg_debug_assert(!args_ct[o].pair); 3229 tcg_debug_assert(!args_ct[o].ct); 3230 args_ct[i] = (TCGArgConstraint){ 3231 .pair = 2, 3232 .pair_index = o, 3233 .regs = args_ct[o].regs << 1, 3234 .newreg = args_ct[o].newreg, 3235 }; 3236 args_ct[o].pair = 1; 3237 args_ct[o].pair_index = i; 3238 tcg_debug_assert(ct_str[1] == '\0'); 3239 continue; 3240 3241 case 'm': /* minus */ 3242 /* Allocate to the register before the previous. */ 3243 tcg_debug_assert(i > (input_p ? nb_oargs : 0)); 3244 o = i - 1; 3245 tcg_debug_assert(!args_ct[o].pair); 3246 tcg_debug_assert(!args_ct[o].ct); 3247 args_ct[i] = (TCGArgConstraint){ 3248 .pair = 1, 3249 .pair_index = o, 3250 .regs = args_ct[o].regs >> 1, 3251 .newreg = args_ct[o].newreg, 3252 }; 3253 args_ct[o].pair = 2; 3254 args_ct[o].pair_index = i; 3255 tcg_debug_assert(ct_str[1] == '\0'); 3256 continue; 3257 } 3258 3259 do { 3260 switch (*ct_str) { 3261 case 'i': 3262 args_ct[i].ct |= TCG_CT_CONST; 3263 break; 3264 #ifdef TCG_REG_ZERO 3265 case 'z': 3266 args_ct[i].ct |= TCG_CT_REG_ZERO; 3267 break; 3268 #endif 3269 3270 /* Include all of the target-specific constraints. */ 3271 3272 #undef CONST 3273 #define CONST(CASE, MASK) \ 3274 case CASE: args_ct[i].ct |= MASK; break; 3275 #define REGS(CASE, MASK) \ 3276 case CASE: args_ct[i].regs |= MASK; break; 3277 3278 #include "tcg-target-con-str.h" 3279 3280 #undef REGS 3281 #undef CONST 3282 default: 3283 case '0' ... '9': 3284 case '&': 3285 case 'p': 3286 case 'm': 3287 /* Typo in TCGConstraintSet constraint. */ 3288 g_assert_not_reached(); 3289 } 3290 } while (*++ct_str != '\0'); 3291 } 3292 3293 /* 3294 * Fix up output pairs that are aliased with inputs. 3295 * When we created the alias, we copied pair from the output. 3296 * There are three cases: 3297 * (1a) Pairs of inputs alias pairs of outputs. 3298 * (1b) One input aliases the first of a pair of outputs. 3299 * (2) One input aliases the second of a pair of outputs. 3300 * 3301 * Case 1a is handled by making sure that the pair_index'es are 3302 * properly updated so that they appear the same as a pair of inputs. 3303 * 3304 * Case 1b is handled by setting the pair_index of the input to 3305 * itself, simply so it doesn't point to an unrelated argument. 3306 * Since we don't encounter the "second" during the input allocation 3307 * phase, nothing happens with the second half of the input pair. 3308 * 3309 * Case 2 is handled by setting the second input to pair=3, the 3310 * first output to pair=3, and the pair_index'es to match. 3311 */ 3312 if (saw_alias_pair) { 3313 for (int i = nb_oargs; i < nb_args; i++) { 3314 int o, o2, i2; 3315 3316 /* 3317 * Since [0-9pm] must be alone in the constraint string, 3318 * the only way they can both be set is if the pair comes 3319 * from the output alias. 3320 */ 3321 if (!args_ct[i].ialias) { 3322 continue; 3323 } 3324 switch (args_ct[i].pair) { 3325 case 0: 3326 break; 3327 case 1: 3328 o = args_ct[i].alias_index; 3329 o2 = args_ct[o].pair_index; 3330 tcg_debug_assert(args_ct[o].pair == 1); 3331 tcg_debug_assert(args_ct[o2].pair == 2); 3332 if (args_ct[o2].oalias) { 3333 /* Case 1a */ 3334 i2 = args_ct[o2].alias_index; 3335 tcg_debug_assert(args_ct[i2].pair == 2); 3336 args_ct[i2].pair_index = i; 3337 args_ct[i].pair_index = i2; 3338 } else { 3339 /* Case 1b */ 3340 args_ct[i].pair_index = i; 3341 } 3342 break; 3343 case 2: 3344 o = args_ct[i].alias_index; 3345 o2 = args_ct[o].pair_index; 3346 tcg_debug_assert(args_ct[o].pair == 2); 3347 tcg_debug_assert(args_ct[o2].pair == 1); 3348 if (args_ct[o2].oalias) { 3349 /* Case 1a */ 3350 i2 = args_ct[o2].alias_index; 3351 tcg_debug_assert(args_ct[i2].pair == 1); 3352 args_ct[i2].pair_index = i; 3353 args_ct[i].pair_index = i2; 3354 } else { 3355 /* Case 2 */ 3356 args_ct[i].pair = 3; 3357 args_ct[o2].pair = 3; 3358 args_ct[i].pair_index = o2; 3359 args_ct[o2].pair_index = i; 3360 } 3361 break; 3362 default: 3363 g_assert_not_reached(); 3364 } 3365 } 3366 } 3367 3368 /* sort the constraints (XXX: this is just an heuristic) */ 3369 sort_constraints(args_ct, 0, nb_oargs); 3370 sort_constraints(args_ct, nb_oargs, nb_iargs); 3371 } 3372 } 3373 3374 static const TCGArgConstraint *opcode_args_ct(const TCGOp *op) 3375 { 3376 TCGOpcode opc = op->opc; 3377 TCGType type = TCGOP_TYPE(op); 3378 unsigned flags = TCGOP_FLAGS(op); 3379 const TCGOpDef *def = &tcg_op_defs[opc]; 3380 const TCGOutOp *outop = all_outop[opc]; 3381 TCGConstraintSetIndex con_set; 3382 3383 if (def->flags & TCG_OPF_NOT_PRESENT) { 3384 return empty_cts; 3385 } 3386 3387 if (outop) { 3388 con_set = outop->static_constraint; 3389 if (con_set == C_Dynamic) { 3390 con_set = outop->dynamic_constraint(type, flags); 3391 } 3392 } else { 3393 con_set = tcg_target_op_def(opc, type, flags); 3394 } 3395 tcg_debug_assert(con_set >= 0); 3396 tcg_debug_assert(con_set < ARRAY_SIZE(constraint_sets)); 3397 3398 /* The constraint arguments must match TCGOpcode arguments. */ 3399 tcg_debug_assert(constraint_sets[con_set].nb_oargs == def->nb_oargs); 3400 tcg_debug_assert(constraint_sets[con_set].nb_iargs == def->nb_iargs); 3401 3402 return all_cts[con_set]; 3403 } 3404 3405 static void remove_label_use(TCGOp *op, int idx) 3406 { 3407 TCGLabel *label = arg_label(op->args[idx]); 3408 TCGLabelUse *use; 3409 3410 QSIMPLEQ_FOREACH(use, &label->branches, next) { 3411 if (use->op == op) { 3412 QSIMPLEQ_REMOVE(&label->branches, use, TCGLabelUse, next); 3413 return; 3414 } 3415 } 3416 g_assert_not_reached(); 3417 } 3418 3419 void tcg_op_remove(TCGContext *s, TCGOp *op) 3420 { 3421 switch (op->opc) { 3422 case INDEX_op_br: 3423 remove_label_use(op, 0); 3424 break; 3425 case INDEX_op_brcond_i32: 3426 case INDEX_op_brcond_i64: 3427 remove_label_use(op, 3); 3428 break; 3429 case INDEX_op_brcond2_i32: 3430 remove_label_use(op, 5); 3431 break; 3432 default: 3433 break; 3434 } 3435 3436 QTAILQ_REMOVE(&s->ops, op, link); 3437 QTAILQ_INSERT_TAIL(&s->free_ops, op, link); 3438 s->nb_ops--; 3439 } 3440 3441 void tcg_remove_ops_after(TCGOp *op) 3442 { 3443 TCGContext *s = tcg_ctx; 3444 3445 while (true) { 3446 TCGOp *last = tcg_last_op(); 3447 if (last == op) { 3448 return; 3449 } 3450 tcg_op_remove(s, last); 3451 } 3452 } 3453 3454 static TCGOp *tcg_op_alloc(TCGOpcode opc, unsigned nargs) 3455 { 3456 TCGContext *s = tcg_ctx; 3457 TCGOp *op = NULL; 3458 3459 if (unlikely(!QTAILQ_EMPTY(&s->free_ops))) { 3460 QTAILQ_FOREACH(op, &s->free_ops, link) { 3461 if (nargs <= op->nargs) { 3462 QTAILQ_REMOVE(&s->free_ops, op, link); 3463 nargs = op->nargs; 3464 goto found; 3465 } 3466 } 3467 } 3468 3469 /* Most opcodes have 3 or 4 operands: reduce fragmentation. */ 3470 nargs = MAX(4, nargs); 3471 op = tcg_malloc(sizeof(TCGOp) + sizeof(TCGArg) * nargs); 3472 3473 found: 3474 memset(op, 0, offsetof(TCGOp, link)); 3475 op->opc = opc; 3476 op->nargs = nargs; 3477 3478 /* Check for bitfield overflow. */ 3479 tcg_debug_assert(op->nargs == nargs); 3480 3481 s->nb_ops++; 3482 return op; 3483 } 3484 3485 TCGOp *tcg_emit_op(TCGOpcode opc, unsigned nargs) 3486 { 3487 TCGOp *op = tcg_op_alloc(opc, nargs); 3488 3489 if (tcg_ctx->emit_before_op) { 3490 QTAILQ_INSERT_BEFORE(tcg_ctx->emit_before_op, op, link); 3491 } else { 3492 QTAILQ_INSERT_TAIL(&tcg_ctx->ops, op, link); 3493 } 3494 return op; 3495 } 3496 3497 TCGOp *tcg_op_insert_before(TCGContext *s, TCGOp *old_op, 3498 TCGOpcode opc, TCGType type, unsigned nargs) 3499 { 3500 TCGOp *new_op = tcg_op_alloc(opc, nargs); 3501 3502 TCGOP_TYPE(new_op) = type; 3503 QTAILQ_INSERT_BEFORE(old_op, new_op, link); 3504 return new_op; 3505 } 3506 3507 TCGOp *tcg_op_insert_after(TCGContext *s, TCGOp *old_op, 3508 TCGOpcode opc, TCGType type, unsigned nargs) 3509 { 3510 TCGOp *new_op = tcg_op_alloc(opc, nargs); 3511 3512 TCGOP_TYPE(new_op) = type; 3513 QTAILQ_INSERT_AFTER(&s->ops, old_op, new_op, link); 3514 return new_op; 3515 } 3516 3517 static void move_label_uses(TCGLabel *to, TCGLabel *from) 3518 { 3519 TCGLabelUse *u; 3520 3521 QSIMPLEQ_FOREACH(u, &from->branches, next) { 3522 TCGOp *op = u->op; 3523 switch (op->opc) { 3524 case INDEX_op_br: 3525 op->args[0] = label_arg(to); 3526 break; 3527 case INDEX_op_brcond_i32: 3528 case INDEX_op_brcond_i64: 3529 op->args[3] = label_arg(to); 3530 break; 3531 case INDEX_op_brcond2_i32: 3532 op->args[5] = label_arg(to); 3533 break; 3534 default: 3535 g_assert_not_reached(); 3536 } 3537 } 3538 3539 QSIMPLEQ_CONCAT(&to->branches, &from->branches); 3540 } 3541 3542 /* Reachable analysis : remove unreachable code. */ 3543 static void __attribute__((noinline)) 3544 reachable_code_pass(TCGContext *s) 3545 { 3546 TCGOp *op, *op_next, *op_prev; 3547 bool dead = false; 3548 3549 QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) { 3550 bool remove = dead; 3551 TCGLabel *label; 3552 3553 switch (op->opc) { 3554 case INDEX_op_set_label: 3555 label = arg_label(op->args[0]); 3556 3557 /* 3558 * Note that the first op in the TB is always a load, 3559 * so there is always something before a label. 3560 */ 3561 op_prev = QTAILQ_PREV(op, link); 3562 3563 /* 3564 * If we find two sequential labels, move all branches to 3565 * reference the second label and remove the first label. 3566 * Do this before branch to next optimization, so that the 3567 * middle label is out of the way. 3568 */ 3569 if (op_prev->opc == INDEX_op_set_label) { 3570 move_label_uses(label, arg_label(op_prev->args[0])); 3571 tcg_op_remove(s, op_prev); 3572 op_prev = QTAILQ_PREV(op, link); 3573 } 3574 3575 /* 3576 * Optimization can fold conditional branches to unconditional. 3577 * If we find a label which is preceded by an unconditional 3578 * branch to next, remove the branch. We couldn't do this when 3579 * processing the branch because any dead code between the branch 3580 * and label had not yet been removed. 3581 */ 3582 if (op_prev->opc == INDEX_op_br && 3583 label == arg_label(op_prev->args[0])) { 3584 tcg_op_remove(s, op_prev); 3585 /* Fall through means insns become live again. */ 3586 dead = false; 3587 } 3588 3589 if (QSIMPLEQ_EMPTY(&label->branches)) { 3590 /* 3591 * While there is an occasional backward branch, virtually 3592 * all branches generated by the translators are forward. 3593 * Which means that generally we will have already removed 3594 * all references to the label that will be, and there is 3595 * little to be gained by iterating. 3596 */ 3597 remove = true; 3598 } else { 3599 /* Once we see a label, insns become live again. */ 3600 dead = false; 3601 remove = false; 3602 } 3603 break; 3604 3605 case INDEX_op_br: 3606 case INDEX_op_exit_tb: 3607 case INDEX_op_goto_ptr: 3608 /* Unconditional branches; everything following is dead. */ 3609 dead = true; 3610 break; 3611 3612 case INDEX_op_call: 3613 /* Notice noreturn helper calls, raising exceptions. */ 3614 if (tcg_call_flags(op) & TCG_CALL_NO_RETURN) { 3615 dead = true; 3616 } 3617 break; 3618 3619 case INDEX_op_insn_start: 3620 /* Never remove -- we need to keep these for unwind. */ 3621 remove = false; 3622 break; 3623 3624 default: 3625 break; 3626 } 3627 3628 if (remove) { 3629 tcg_op_remove(s, op); 3630 } 3631 } 3632 } 3633 3634 #define TS_DEAD 1 3635 #define TS_MEM 2 3636 3637 #define IS_DEAD_ARG(n) (arg_life & (DEAD_ARG << (n))) 3638 #define NEED_SYNC_ARG(n) (arg_life & (SYNC_ARG << (n))) 3639 3640 /* For liveness_pass_1, the register preferences for a given temp. */ 3641 static inline TCGRegSet *la_temp_pref(TCGTemp *ts) 3642 { 3643 return ts->state_ptr; 3644 } 3645 3646 /* For liveness_pass_1, reset the preferences for a given temp to the 3647 * maximal regset for its type. 3648 */ 3649 static inline void la_reset_pref(TCGTemp *ts) 3650 { 3651 *la_temp_pref(ts) 3652 = (ts->state == TS_DEAD ? 0 : tcg_target_available_regs[ts->type]); 3653 } 3654 3655 /* liveness analysis: end of function: all temps are dead, and globals 3656 should be in memory. */ 3657 static void la_func_end(TCGContext *s, int ng, int nt) 3658 { 3659 int i; 3660 3661 for (i = 0; i < ng; ++i) { 3662 s->temps[i].state = TS_DEAD | TS_MEM; 3663 la_reset_pref(&s->temps[i]); 3664 } 3665 for (i = ng; i < nt; ++i) { 3666 s->temps[i].state = TS_DEAD; 3667 la_reset_pref(&s->temps[i]); 3668 } 3669 } 3670 3671 /* liveness analysis: end of basic block: all temps are dead, globals 3672 and local temps should be in memory. */ 3673 static void la_bb_end(TCGContext *s, int ng, int nt) 3674 { 3675 int i; 3676 3677 for (i = 0; i < nt; ++i) { 3678 TCGTemp *ts = &s->temps[i]; 3679 int state; 3680 3681 switch (ts->kind) { 3682 case TEMP_FIXED: 3683 case TEMP_GLOBAL: 3684 case TEMP_TB: 3685 state = TS_DEAD | TS_MEM; 3686 break; 3687 case TEMP_EBB: 3688 case TEMP_CONST: 3689 state = TS_DEAD; 3690 break; 3691 default: 3692 g_assert_not_reached(); 3693 } 3694 ts->state = state; 3695 la_reset_pref(ts); 3696 } 3697 } 3698 3699 /* liveness analysis: sync globals back to memory. */ 3700 static void la_global_sync(TCGContext *s, int ng) 3701 { 3702 int i; 3703 3704 for (i = 0; i < ng; ++i) { 3705 int state = s->temps[i].state; 3706 s->temps[i].state = state | TS_MEM; 3707 if (state == TS_DEAD) { 3708 /* If the global was previously dead, reset prefs. */ 3709 la_reset_pref(&s->temps[i]); 3710 } 3711 } 3712 } 3713 3714 /* 3715 * liveness analysis: conditional branch: all temps are dead unless 3716 * explicitly live-across-conditional-branch, globals and local temps 3717 * should be synced. 3718 */ 3719 static void la_bb_sync(TCGContext *s, int ng, int nt) 3720 { 3721 la_global_sync(s, ng); 3722 3723 for (int i = ng; i < nt; ++i) { 3724 TCGTemp *ts = &s->temps[i]; 3725 int state; 3726 3727 switch (ts->kind) { 3728 case TEMP_TB: 3729 state = ts->state; 3730 ts->state = state | TS_MEM; 3731 if (state != TS_DEAD) { 3732 continue; 3733 } 3734 break; 3735 case TEMP_EBB: 3736 case TEMP_CONST: 3737 continue; 3738 default: 3739 g_assert_not_reached(); 3740 } 3741 la_reset_pref(&s->temps[i]); 3742 } 3743 } 3744 3745 /* liveness analysis: sync globals back to memory and kill. */ 3746 static void la_global_kill(TCGContext *s, int ng) 3747 { 3748 int i; 3749 3750 for (i = 0; i < ng; i++) { 3751 s->temps[i].state = TS_DEAD | TS_MEM; 3752 la_reset_pref(&s->temps[i]); 3753 } 3754 } 3755 3756 /* liveness analysis: note live globals crossing calls. */ 3757 static void la_cross_call(TCGContext *s, int nt) 3758 { 3759 TCGRegSet mask = ~tcg_target_call_clobber_regs; 3760 int i; 3761 3762 for (i = 0; i < nt; i++) { 3763 TCGTemp *ts = &s->temps[i]; 3764 if (!(ts->state & TS_DEAD)) { 3765 TCGRegSet *pset = la_temp_pref(ts); 3766 TCGRegSet set = *pset; 3767 3768 set &= mask; 3769 /* If the combination is not possible, restart. */ 3770 if (set == 0) { 3771 set = tcg_target_available_regs[ts->type] & mask; 3772 } 3773 *pset = set; 3774 } 3775 } 3776 } 3777 3778 /* 3779 * Liveness analysis: Verify the lifetime of TEMP_TB, and reduce 3780 * to TEMP_EBB, if possible. 3781 */ 3782 static void __attribute__((noinline)) 3783 liveness_pass_0(TCGContext *s) 3784 { 3785 void * const multiple_ebb = (void *)(uintptr_t)-1; 3786 int nb_temps = s->nb_temps; 3787 TCGOp *op, *ebb; 3788 3789 for (int i = s->nb_globals; i < nb_temps; ++i) { 3790 s->temps[i].state_ptr = NULL; 3791 } 3792 3793 /* 3794 * Represent each EBB by the op at which it begins. In the case of 3795 * the first EBB, this is the first op, otherwise it is a label. 3796 * Collect the uses of each TEMP_TB: NULL for unused, EBB for use 3797 * within a single EBB, else MULTIPLE_EBB. 3798 */ 3799 ebb = QTAILQ_FIRST(&s->ops); 3800 QTAILQ_FOREACH(op, &s->ops, link) { 3801 const TCGOpDef *def; 3802 int nb_oargs, nb_iargs; 3803 3804 switch (op->opc) { 3805 case INDEX_op_set_label: 3806 ebb = op; 3807 continue; 3808 case INDEX_op_discard: 3809 continue; 3810 case INDEX_op_call: 3811 nb_oargs = TCGOP_CALLO(op); 3812 nb_iargs = TCGOP_CALLI(op); 3813 break; 3814 default: 3815 def = &tcg_op_defs[op->opc]; 3816 nb_oargs = def->nb_oargs; 3817 nb_iargs = def->nb_iargs; 3818 break; 3819 } 3820 3821 for (int i = 0; i < nb_oargs + nb_iargs; ++i) { 3822 TCGTemp *ts = arg_temp(op->args[i]); 3823 3824 if (ts->kind != TEMP_TB) { 3825 continue; 3826 } 3827 if (ts->state_ptr == NULL) { 3828 ts->state_ptr = ebb; 3829 } else if (ts->state_ptr != ebb) { 3830 ts->state_ptr = multiple_ebb; 3831 } 3832 } 3833 } 3834 3835 /* 3836 * For TEMP_TB that turned out not to be used beyond one EBB, 3837 * reduce the liveness to TEMP_EBB. 3838 */ 3839 for (int i = s->nb_globals; i < nb_temps; ++i) { 3840 TCGTemp *ts = &s->temps[i]; 3841 if (ts->kind == TEMP_TB && ts->state_ptr != multiple_ebb) { 3842 ts->kind = TEMP_EBB; 3843 } 3844 } 3845 } 3846 3847 /* Liveness analysis : update the opc_arg_life array to tell if a 3848 given input arguments is dead. Instructions updating dead 3849 temporaries are removed. */ 3850 static void __attribute__((noinline)) 3851 liveness_pass_1(TCGContext *s) 3852 { 3853 int nb_globals = s->nb_globals; 3854 int nb_temps = s->nb_temps; 3855 TCGOp *op, *op_prev; 3856 TCGRegSet *prefs; 3857 int i; 3858 3859 prefs = tcg_malloc(sizeof(TCGRegSet) * nb_temps); 3860 for (i = 0; i < nb_temps; ++i) { 3861 s->temps[i].state_ptr = prefs + i; 3862 } 3863 3864 /* ??? Should be redundant with the exit_tb that ends the TB. */ 3865 la_func_end(s, nb_globals, nb_temps); 3866 3867 QTAILQ_FOREACH_REVERSE_SAFE(op, &s->ops, link, op_prev) { 3868 int nb_iargs, nb_oargs; 3869 TCGOpcode opc_new, opc_new2; 3870 TCGLifeData arg_life = 0; 3871 TCGTemp *ts; 3872 TCGOpcode opc = op->opc; 3873 const TCGOpDef *def = &tcg_op_defs[opc]; 3874 const TCGArgConstraint *args_ct; 3875 3876 switch (opc) { 3877 case INDEX_op_call: 3878 { 3879 const TCGHelperInfo *info = tcg_call_info(op); 3880 int call_flags = tcg_call_flags(op); 3881 3882 nb_oargs = TCGOP_CALLO(op); 3883 nb_iargs = TCGOP_CALLI(op); 3884 3885 /* pure functions can be removed if their result is unused */ 3886 if (call_flags & TCG_CALL_NO_SIDE_EFFECTS) { 3887 for (i = 0; i < nb_oargs; i++) { 3888 ts = arg_temp(op->args[i]); 3889 if (ts->state != TS_DEAD) { 3890 goto do_not_remove_call; 3891 } 3892 } 3893 goto do_remove; 3894 } 3895 do_not_remove_call: 3896 3897 /* Output args are dead. */ 3898 for (i = 0; i < nb_oargs; i++) { 3899 ts = arg_temp(op->args[i]); 3900 if (ts->state & TS_DEAD) { 3901 arg_life |= DEAD_ARG << i; 3902 } 3903 if (ts->state & TS_MEM) { 3904 arg_life |= SYNC_ARG << i; 3905 } 3906 ts->state = TS_DEAD; 3907 la_reset_pref(ts); 3908 } 3909 3910 /* Not used -- it will be tcg_target_call_oarg_reg(). */ 3911 memset(op->output_pref, 0, sizeof(op->output_pref)); 3912 3913 if (!(call_flags & (TCG_CALL_NO_WRITE_GLOBALS | 3914 TCG_CALL_NO_READ_GLOBALS))) { 3915 la_global_kill(s, nb_globals); 3916 } else if (!(call_flags & TCG_CALL_NO_READ_GLOBALS)) { 3917 la_global_sync(s, nb_globals); 3918 } 3919 3920 /* Record arguments that die in this helper. */ 3921 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) { 3922 ts = arg_temp(op->args[i]); 3923 if (ts->state & TS_DEAD) { 3924 arg_life |= DEAD_ARG << i; 3925 } 3926 } 3927 3928 /* For all live registers, remove call-clobbered prefs. */ 3929 la_cross_call(s, nb_temps); 3930 3931 /* 3932 * Input arguments are live for preceding opcodes. 3933 * 3934 * For those arguments that die, and will be allocated in 3935 * registers, clear the register set for that arg, to be 3936 * filled in below. For args that will be on the stack, 3937 * reset to any available reg. Process arguments in reverse 3938 * order so that if a temp is used more than once, the stack 3939 * reset to max happens before the register reset to 0. 3940 */ 3941 for (i = nb_iargs - 1; i >= 0; i--) { 3942 const TCGCallArgumentLoc *loc = &info->in[i]; 3943 ts = arg_temp(op->args[nb_oargs + i]); 3944 3945 if (ts->state & TS_DEAD) { 3946 switch (loc->kind) { 3947 case TCG_CALL_ARG_NORMAL: 3948 case TCG_CALL_ARG_EXTEND_U: 3949 case TCG_CALL_ARG_EXTEND_S: 3950 if (arg_slot_reg_p(loc->arg_slot)) { 3951 *la_temp_pref(ts) = 0; 3952 break; 3953 } 3954 /* fall through */ 3955 default: 3956 *la_temp_pref(ts) = 3957 tcg_target_available_regs[ts->type]; 3958 break; 3959 } 3960 ts->state &= ~TS_DEAD; 3961 } 3962 } 3963 3964 /* 3965 * For each input argument, add its input register to prefs. 3966 * If a temp is used once, this produces a single set bit; 3967 * if a temp is used multiple times, this produces a set. 3968 */ 3969 for (i = 0; i < nb_iargs; i++) { 3970 const TCGCallArgumentLoc *loc = &info->in[i]; 3971 ts = arg_temp(op->args[nb_oargs + i]); 3972 3973 switch (loc->kind) { 3974 case TCG_CALL_ARG_NORMAL: 3975 case TCG_CALL_ARG_EXTEND_U: 3976 case TCG_CALL_ARG_EXTEND_S: 3977 if (arg_slot_reg_p(loc->arg_slot)) { 3978 tcg_regset_set_reg(*la_temp_pref(ts), 3979 tcg_target_call_iarg_regs[loc->arg_slot]); 3980 } 3981 break; 3982 default: 3983 break; 3984 } 3985 } 3986 } 3987 break; 3988 case INDEX_op_insn_start: 3989 break; 3990 case INDEX_op_discard: 3991 /* mark the temporary as dead */ 3992 ts = arg_temp(op->args[0]); 3993 ts->state = TS_DEAD; 3994 la_reset_pref(ts); 3995 break; 3996 3997 case INDEX_op_add2_i32: 3998 case INDEX_op_add2_i64: 3999 opc_new = INDEX_op_add; 4000 goto do_addsub2; 4001 case INDEX_op_sub2_i32: 4002 case INDEX_op_sub2_i64: 4003 opc_new = INDEX_op_sub; 4004 do_addsub2: 4005 nb_iargs = 4; 4006 nb_oargs = 2; 4007 /* Test if the high part of the operation is dead, but not 4008 the low part. The result can be optimized to a simple 4009 add or sub. This happens often for x86_64 guest when the 4010 cpu mode is set to 32 bit. */ 4011 if (arg_temp(op->args[1])->state == TS_DEAD) { 4012 if (arg_temp(op->args[0])->state == TS_DEAD) { 4013 goto do_remove; 4014 } 4015 /* Replace the opcode and adjust the args in place, 4016 leaving 3 unused args at the end. */ 4017 op->opc = opc = opc_new; 4018 op->args[1] = op->args[2]; 4019 op->args[2] = op->args[4]; 4020 /* Fall through and mark the single-word operation live. */ 4021 nb_iargs = 2; 4022 nb_oargs = 1; 4023 } 4024 goto do_not_remove; 4025 4026 case INDEX_op_muls2_i32: 4027 case INDEX_op_muls2_i64: 4028 opc_new = INDEX_op_mul; 4029 opc_new2 = INDEX_op_mulsh; 4030 goto do_mul2; 4031 case INDEX_op_mulu2_i32: 4032 case INDEX_op_mulu2_i64: 4033 opc_new = INDEX_op_mul; 4034 opc_new2 = INDEX_op_muluh; 4035 do_mul2: 4036 nb_iargs = 2; 4037 nb_oargs = 2; 4038 if (arg_temp(op->args[1])->state == TS_DEAD) { 4039 if (arg_temp(op->args[0])->state == TS_DEAD) { 4040 /* Both parts of the operation are dead. */ 4041 goto do_remove; 4042 } 4043 /* The high part of the operation is dead; generate the low. */ 4044 op->opc = opc = opc_new; 4045 op->args[1] = op->args[2]; 4046 op->args[2] = op->args[3]; 4047 } else if (arg_temp(op->args[0])->state == TS_DEAD && 4048 tcg_op_supported(opc_new2, TCGOP_TYPE(op), 0)) { 4049 /* The low part of the operation is dead; generate the high. */ 4050 op->opc = opc = opc_new2; 4051 op->args[0] = op->args[1]; 4052 op->args[1] = op->args[2]; 4053 op->args[2] = op->args[3]; 4054 } else { 4055 goto do_not_remove; 4056 } 4057 /* Mark the single-word operation live. */ 4058 nb_oargs = 1; 4059 goto do_not_remove; 4060 4061 default: 4062 /* XXX: optimize by hardcoding common cases (e.g. triadic ops) */ 4063 nb_iargs = def->nb_iargs; 4064 nb_oargs = def->nb_oargs; 4065 4066 /* Test if the operation can be removed because all 4067 its outputs are dead. We assume that nb_oargs == 0 4068 implies side effects */ 4069 if (!(def->flags & TCG_OPF_SIDE_EFFECTS) && nb_oargs != 0) { 4070 for (i = 0; i < nb_oargs; i++) { 4071 if (arg_temp(op->args[i])->state != TS_DEAD) { 4072 goto do_not_remove; 4073 } 4074 } 4075 goto do_remove; 4076 } 4077 goto do_not_remove; 4078 4079 do_remove: 4080 tcg_op_remove(s, op); 4081 break; 4082 4083 do_not_remove: 4084 for (i = 0; i < nb_oargs; i++) { 4085 ts = arg_temp(op->args[i]); 4086 4087 /* Remember the preference of the uses that followed. */ 4088 if (i < ARRAY_SIZE(op->output_pref)) { 4089 op->output_pref[i] = *la_temp_pref(ts); 4090 } 4091 4092 /* Output args are dead. */ 4093 if (ts->state & TS_DEAD) { 4094 arg_life |= DEAD_ARG << i; 4095 } 4096 if (ts->state & TS_MEM) { 4097 arg_life |= SYNC_ARG << i; 4098 } 4099 ts->state = TS_DEAD; 4100 la_reset_pref(ts); 4101 } 4102 4103 /* If end of basic block, update. */ 4104 if (def->flags & TCG_OPF_BB_EXIT) { 4105 la_func_end(s, nb_globals, nb_temps); 4106 } else if (def->flags & TCG_OPF_COND_BRANCH) { 4107 la_bb_sync(s, nb_globals, nb_temps); 4108 } else if (def->flags & TCG_OPF_BB_END) { 4109 la_bb_end(s, nb_globals, nb_temps); 4110 } else if (def->flags & TCG_OPF_SIDE_EFFECTS) { 4111 la_global_sync(s, nb_globals); 4112 if (def->flags & TCG_OPF_CALL_CLOBBER) { 4113 la_cross_call(s, nb_temps); 4114 } 4115 } 4116 4117 /* Record arguments that die in this opcode. */ 4118 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) { 4119 ts = arg_temp(op->args[i]); 4120 if (ts->state & TS_DEAD) { 4121 arg_life |= DEAD_ARG << i; 4122 } 4123 } 4124 4125 /* Input arguments are live for preceding opcodes. */ 4126 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) { 4127 ts = arg_temp(op->args[i]); 4128 if (ts->state & TS_DEAD) { 4129 /* For operands that were dead, initially allow 4130 all regs for the type. */ 4131 *la_temp_pref(ts) = tcg_target_available_regs[ts->type]; 4132 ts->state &= ~TS_DEAD; 4133 } 4134 } 4135 4136 /* Incorporate constraints for this operand. */ 4137 switch (opc) { 4138 case INDEX_op_mov: 4139 /* Note that these are TCG_OPF_NOT_PRESENT and do not 4140 have proper constraints. That said, special case 4141 moves to propagate preferences backward. */ 4142 if (IS_DEAD_ARG(1)) { 4143 *la_temp_pref(arg_temp(op->args[0])) 4144 = *la_temp_pref(arg_temp(op->args[1])); 4145 } 4146 break; 4147 4148 default: 4149 args_ct = opcode_args_ct(op); 4150 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) { 4151 const TCGArgConstraint *ct = &args_ct[i]; 4152 TCGRegSet set, *pset; 4153 4154 ts = arg_temp(op->args[i]); 4155 pset = la_temp_pref(ts); 4156 set = *pset; 4157 4158 set &= ct->regs; 4159 if (ct->ialias) { 4160 set &= output_pref(op, ct->alias_index); 4161 } 4162 /* If the combination is not possible, restart. */ 4163 if (set == 0) { 4164 set = ct->regs; 4165 } 4166 *pset = set; 4167 } 4168 break; 4169 } 4170 break; 4171 } 4172 op->life = arg_life; 4173 } 4174 } 4175 4176 /* Liveness analysis: Convert indirect regs to direct temporaries. */ 4177 static bool __attribute__((noinline)) 4178 liveness_pass_2(TCGContext *s) 4179 { 4180 int nb_globals = s->nb_globals; 4181 int nb_temps, i; 4182 bool changes = false; 4183 TCGOp *op, *op_next; 4184 4185 /* Create a temporary for each indirect global. */ 4186 for (i = 0; i < nb_globals; ++i) { 4187 TCGTemp *its = &s->temps[i]; 4188 if (its->indirect_reg) { 4189 TCGTemp *dts = tcg_temp_alloc(s); 4190 dts->type = its->type; 4191 dts->base_type = its->base_type; 4192 dts->temp_subindex = its->temp_subindex; 4193 dts->kind = TEMP_EBB; 4194 its->state_ptr = dts; 4195 } else { 4196 its->state_ptr = NULL; 4197 } 4198 /* All globals begin dead. */ 4199 its->state = TS_DEAD; 4200 } 4201 for (nb_temps = s->nb_temps; i < nb_temps; ++i) { 4202 TCGTemp *its = &s->temps[i]; 4203 its->state_ptr = NULL; 4204 its->state = TS_DEAD; 4205 } 4206 4207 QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) { 4208 TCGOpcode opc = op->opc; 4209 const TCGOpDef *def = &tcg_op_defs[opc]; 4210 TCGLifeData arg_life = op->life; 4211 int nb_iargs, nb_oargs, call_flags; 4212 TCGTemp *arg_ts, *dir_ts; 4213 4214 if (opc == INDEX_op_call) { 4215 nb_oargs = TCGOP_CALLO(op); 4216 nb_iargs = TCGOP_CALLI(op); 4217 call_flags = tcg_call_flags(op); 4218 } else { 4219 nb_iargs = def->nb_iargs; 4220 nb_oargs = def->nb_oargs; 4221 4222 /* Set flags similar to how calls require. */ 4223 if (def->flags & TCG_OPF_COND_BRANCH) { 4224 /* Like reading globals: sync_globals */ 4225 call_flags = TCG_CALL_NO_WRITE_GLOBALS; 4226 } else if (def->flags & TCG_OPF_BB_END) { 4227 /* Like writing globals: save_globals */ 4228 call_flags = 0; 4229 } else if (def->flags & TCG_OPF_SIDE_EFFECTS) { 4230 /* Like reading globals: sync_globals */ 4231 call_flags = TCG_CALL_NO_WRITE_GLOBALS; 4232 } else { 4233 /* No effect on globals. */ 4234 call_flags = (TCG_CALL_NO_READ_GLOBALS | 4235 TCG_CALL_NO_WRITE_GLOBALS); 4236 } 4237 } 4238 4239 /* Make sure that input arguments are available. */ 4240 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) { 4241 arg_ts = arg_temp(op->args[i]); 4242 dir_ts = arg_ts->state_ptr; 4243 if (dir_ts && arg_ts->state == TS_DEAD) { 4244 TCGOpcode lopc = (arg_ts->type == TCG_TYPE_I32 4245 ? INDEX_op_ld_i32 4246 : INDEX_op_ld_i64); 4247 TCGOp *lop = tcg_op_insert_before(s, op, lopc, 4248 arg_ts->type, 3); 4249 4250 lop->args[0] = temp_arg(dir_ts); 4251 lop->args[1] = temp_arg(arg_ts->mem_base); 4252 lop->args[2] = arg_ts->mem_offset; 4253 4254 /* Loaded, but synced with memory. */ 4255 arg_ts->state = TS_MEM; 4256 } 4257 } 4258 4259 /* Perform input replacement, and mark inputs that became dead. 4260 No action is required except keeping temp_state up to date 4261 so that we reload when needed. */ 4262 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) { 4263 arg_ts = arg_temp(op->args[i]); 4264 dir_ts = arg_ts->state_ptr; 4265 if (dir_ts) { 4266 op->args[i] = temp_arg(dir_ts); 4267 changes = true; 4268 if (IS_DEAD_ARG(i)) { 4269 arg_ts->state = TS_DEAD; 4270 } 4271 } 4272 } 4273 4274 /* Liveness analysis should ensure that the following are 4275 all correct, for call sites and basic block end points. */ 4276 if (call_flags & TCG_CALL_NO_READ_GLOBALS) { 4277 /* Nothing to do */ 4278 } else if (call_flags & TCG_CALL_NO_WRITE_GLOBALS) { 4279 for (i = 0; i < nb_globals; ++i) { 4280 /* Liveness should see that globals are synced back, 4281 that is, either TS_DEAD or TS_MEM. */ 4282 arg_ts = &s->temps[i]; 4283 tcg_debug_assert(arg_ts->state_ptr == 0 4284 || arg_ts->state != 0); 4285 } 4286 } else { 4287 for (i = 0; i < nb_globals; ++i) { 4288 /* Liveness should see that globals are saved back, 4289 that is, TS_DEAD, waiting to be reloaded. */ 4290 arg_ts = &s->temps[i]; 4291 tcg_debug_assert(arg_ts->state_ptr == 0 4292 || arg_ts->state == TS_DEAD); 4293 } 4294 } 4295 4296 /* Outputs become available. */ 4297 if (opc == INDEX_op_mov) { 4298 arg_ts = arg_temp(op->args[0]); 4299 dir_ts = arg_ts->state_ptr; 4300 if (dir_ts) { 4301 op->args[0] = temp_arg(dir_ts); 4302 changes = true; 4303 4304 /* The output is now live and modified. */ 4305 arg_ts->state = 0; 4306 4307 if (NEED_SYNC_ARG(0)) { 4308 TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32 4309 ? INDEX_op_st_i32 4310 : INDEX_op_st_i64); 4311 TCGOp *sop = tcg_op_insert_after(s, op, sopc, 4312 arg_ts->type, 3); 4313 TCGTemp *out_ts = dir_ts; 4314 4315 if (IS_DEAD_ARG(0)) { 4316 out_ts = arg_temp(op->args[1]); 4317 arg_ts->state = TS_DEAD; 4318 tcg_op_remove(s, op); 4319 } else { 4320 arg_ts->state = TS_MEM; 4321 } 4322 4323 sop->args[0] = temp_arg(out_ts); 4324 sop->args[1] = temp_arg(arg_ts->mem_base); 4325 sop->args[2] = arg_ts->mem_offset; 4326 } else { 4327 tcg_debug_assert(!IS_DEAD_ARG(0)); 4328 } 4329 } 4330 } else { 4331 for (i = 0; i < nb_oargs; i++) { 4332 arg_ts = arg_temp(op->args[i]); 4333 dir_ts = arg_ts->state_ptr; 4334 if (!dir_ts) { 4335 continue; 4336 } 4337 op->args[i] = temp_arg(dir_ts); 4338 changes = true; 4339 4340 /* The output is now live and modified. */ 4341 arg_ts->state = 0; 4342 4343 /* Sync outputs upon their last write. */ 4344 if (NEED_SYNC_ARG(i)) { 4345 TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32 4346 ? INDEX_op_st_i32 4347 : INDEX_op_st_i64); 4348 TCGOp *sop = tcg_op_insert_after(s, op, sopc, 4349 arg_ts->type, 3); 4350 4351 sop->args[0] = temp_arg(dir_ts); 4352 sop->args[1] = temp_arg(arg_ts->mem_base); 4353 sop->args[2] = arg_ts->mem_offset; 4354 4355 arg_ts->state = TS_MEM; 4356 } 4357 /* Drop outputs that are dead. */ 4358 if (IS_DEAD_ARG(i)) { 4359 arg_ts->state = TS_DEAD; 4360 } 4361 } 4362 } 4363 } 4364 4365 return changes; 4366 } 4367 4368 static void temp_allocate_frame(TCGContext *s, TCGTemp *ts) 4369 { 4370 intptr_t off; 4371 int size, align; 4372 4373 /* When allocating an object, look at the full type. */ 4374 size = tcg_type_size(ts->base_type); 4375 switch (ts->base_type) { 4376 case TCG_TYPE_I32: 4377 align = 4; 4378 break; 4379 case TCG_TYPE_I64: 4380 case TCG_TYPE_V64: 4381 align = 8; 4382 break; 4383 case TCG_TYPE_I128: 4384 case TCG_TYPE_V128: 4385 case TCG_TYPE_V256: 4386 /* 4387 * Note that we do not require aligned storage for V256, 4388 * and that we provide alignment for I128 to match V128, 4389 * even if that's above what the host ABI requires. 4390 */ 4391 align = 16; 4392 break; 4393 default: 4394 g_assert_not_reached(); 4395 } 4396 4397 /* 4398 * Assume the stack is sufficiently aligned. 4399 * This affects e.g. ARM NEON, where we have 8 byte stack alignment 4400 * and do not require 16 byte vector alignment. This seems slightly 4401 * easier than fully parameterizing the above switch statement. 4402 */ 4403 align = MIN(TCG_TARGET_STACK_ALIGN, align); 4404 off = ROUND_UP(s->current_frame_offset, align); 4405 4406 /* If we've exhausted the stack frame, restart with a smaller TB. */ 4407 if (off + size > s->frame_end) { 4408 tcg_raise_tb_overflow(s); 4409 } 4410 s->current_frame_offset = off + size; 4411 #if defined(__sparc__) 4412 off += TCG_TARGET_STACK_BIAS; 4413 #endif 4414 4415 /* If the object was subdivided, assign memory to all the parts. */ 4416 if (ts->base_type != ts->type) { 4417 int part_size = tcg_type_size(ts->type); 4418 int part_count = size / part_size; 4419 4420 /* 4421 * Each part is allocated sequentially in tcg_temp_new_internal. 4422 * Jump back to the first part by subtracting the current index. 4423 */ 4424 ts -= ts->temp_subindex; 4425 for (int i = 0; i < part_count; ++i) { 4426 ts[i].mem_offset = off + i * part_size; 4427 ts[i].mem_base = s->frame_temp; 4428 ts[i].mem_allocated = 1; 4429 } 4430 } else { 4431 ts->mem_offset = off; 4432 ts->mem_base = s->frame_temp; 4433 ts->mem_allocated = 1; 4434 } 4435 } 4436 4437 /* Assign @reg to @ts, and update reg_to_temp[]. */ 4438 static void set_temp_val_reg(TCGContext *s, TCGTemp *ts, TCGReg reg) 4439 { 4440 if (ts->val_type == TEMP_VAL_REG) { 4441 TCGReg old = ts->reg; 4442 tcg_debug_assert(s->reg_to_temp[old] == ts); 4443 if (old == reg) { 4444 return; 4445 } 4446 s->reg_to_temp[old] = NULL; 4447 } 4448 tcg_debug_assert(s->reg_to_temp[reg] == NULL); 4449 s->reg_to_temp[reg] = ts; 4450 ts->val_type = TEMP_VAL_REG; 4451 ts->reg = reg; 4452 } 4453 4454 /* Assign a non-register value type to @ts, and update reg_to_temp[]. */ 4455 static void set_temp_val_nonreg(TCGContext *s, TCGTemp *ts, TCGTempVal type) 4456 { 4457 tcg_debug_assert(type != TEMP_VAL_REG); 4458 if (ts->val_type == TEMP_VAL_REG) { 4459 TCGReg reg = ts->reg; 4460 tcg_debug_assert(s->reg_to_temp[reg] == ts); 4461 s->reg_to_temp[reg] = NULL; 4462 } 4463 ts->val_type = type; 4464 } 4465 4466 static void temp_load(TCGContext *, TCGTemp *, TCGRegSet, TCGRegSet, TCGRegSet); 4467 4468 /* Mark a temporary as free or dead. If 'free_or_dead' is negative, 4469 mark it free; otherwise mark it dead. */ 4470 static void temp_free_or_dead(TCGContext *s, TCGTemp *ts, int free_or_dead) 4471 { 4472 TCGTempVal new_type; 4473 4474 switch (ts->kind) { 4475 case TEMP_FIXED: 4476 return; 4477 case TEMP_GLOBAL: 4478 case TEMP_TB: 4479 new_type = TEMP_VAL_MEM; 4480 break; 4481 case TEMP_EBB: 4482 new_type = free_or_dead < 0 ? TEMP_VAL_MEM : TEMP_VAL_DEAD; 4483 break; 4484 case TEMP_CONST: 4485 new_type = TEMP_VAL_CONST; 4486 break; 4487 default: 4488 g_assert_not_reached(); 4489 } 4490 set_temp_val_nonreg(s, ts, new_type); 4491 } 4492 4493 /* Mark a temporary as dead. */ 4494 static inline void temp_dead(TCGContext *s, TCGTemp *ts) 4495 { 4496 temp_free_or_dead(s, ts, 1); 4497 } 4498 4499 /* Sync a temporary to memory. 'allocated_regs' is used in case a temporary 4500 registers needs to be allocated to store a constant. If 'free_or_dead' 4501 is non-zero, subsequently release the temporary; if it is positive, the 4502 temp is dead; if it is negative, the temp is free. */ 4503 static void temp_sync(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs, 4504 TCGRegSet preferred_regs, int free_or_dead) 4505 { 4506 if (!temp_readonly(ts) && !ts->mem_coherent) { 4507 if (!ts->mem_allocated) { 4508 temp_allocate_frame(s, ts); 4509 } 4510 switch (ts->val_type) { 4511 case TEMP_VAL_CONST: 4512 /* If we're going to free the temp immediately, then we won't 4513 require it later in a register, so attempt to store the 4514 constant to memory directly. */ 4515 if (free_or_dead 4516 && tcg_out_sti(s, ts->type, ts->val, 4517 ts->mem_base->reg, ts->mem_offset)) { 4518 break; 4519 } 4520 temp_load(s, ts, tcg_target_available_regs[ts->type], 4521 allocated_regs, preferred_regs); 4522 /* fallthrough */ 4523 4524 case TEMP_VAL_REG: 4525 tcg_out_st(s, ts->type, ts->reg, 4526 ts->mem_base->reg, ts->mem_offset); 4527 break; 4528 4529 case TEMP_VAL_MEM: 4530 break; 4531 4532 case TEMP_VAL_DEAD: 4533 default: 4534 g_assert_not_reached(); 4535 } 4536 ts->mem_coherent = 1; 4537 } 4538 if (free_or_dead) { 4539 temp_free_or_dead(s, ts, free_or_dead); 4540 } 4541 } 4542 4543 /* free register 'reg' by spilling the corresponding temporary if necessary */ 4544 static void tcg_reg_free(TCGContext *s, TCGReg reg, TCGRegSet allocated_regs) 4545 { 4546 TCGTemp *ts = s->reg_to_temp[reg]; 4547 if (ts != NULL) { 4548 temp_sync(s, ts, allocated_regs, 0, -1); 4549 } 4550 } 4551 4552 /** 4553 * tcg_reg_alloc: 4554 * @required_regs: Set of registers in which we must allocate. 4555 * @allocated_regs: Set of registers which must be avoided. 4556 * @preferred_regs: Set of registers we should prefer. 4557 * @rev: True if we search the registers in "indirect" order. 4558 * 4559 * The allocated register must be in @required_regs & ~@allocated_regs, 4560 * but if we can put it in @preferred_regs we may save a move later. 4561 */ 4562 static TCGReg tcg_reg_alloc(TCGContext *s, TCGRegSet required_regs, 4563 TCGRegSet allocated_regs, 4564 TCGRegSet preferred_regs, bool rev) 4565 { 4566 int i, j, f, n = ARRAY_SIZE(tcg_target_reg_alloc_order); 4567 TCGRegSet reg_ct[2]; 4568 const int *order; 4569 4570 reg_ct[1] = required_regs & ~allocated_regs; 4571 tcg_debug_assert(reg_ct[1] != 0); 4572 reg_ct[0] = reg_ct[1] & preferred_regs; 4573 4574 /* Skip the preferred_regs option if it cannot be satisfied, 4575 or if the preference made no difference. */ 4576 f = reg_ct[0] == 0 || reg_ct[0] == reg_ct[1]; 4577 4578 order = rev ? indirect_reg_alloc_order : tcg_target_reg_alloc_order; 4579 4580 /* Try free registers, preferences first. */ 4581 for (j = f; j < 2; j++) { 4582 TCGRegSet set = reg_ct[j]; 4583 4584 if (tcg_regset_single(set)) { 4585 /* One register in the set. */ 4586 TCGReg reg = tcg_regset_first(set); 4587 if (s->reg_to_temp[reg] == NULL) { 4588 return reg; 4589 } 4590 } else { 4591 for (i = 0; i < n; i++) { 4592 TCGReg reg = order[i]; 4593 if (s->reg_to_temp[reg] == NULL && 4594 tcg_regset_test_reg(set, reg)) { 4595 return reg; 4596 } 4597 } 4598 } 4599 } 4600 4601 /* We must spill something. */ 4602 for (j = f; j < 2; j++) { 4603 TCGRegSet set = reg_ct[j]; 4604 4605 if (tcg_regset_single(set)) { 4606 /* One register in the set. */ 4607 TCGReg reg = tcg_regset_first(set); 4608 tcg_reg_free(s, reg, allocated_regs); 4609 return reg; 4610 } else { 4611 for (i = 0; i < n; i++) { 4612 TCGReg reg = order[i]; 4613 if (tcg_regset_test_reg(set, reg)) { 4614 tcg_reg_free(s, reg, allocated_regs); 4615 return reg; 4616 } 4617 } 4618 } 4619 } 4620 4621 g_assert_not_reached(); 4622 } 4623 4624 static TCGReg tcg_reg_alloc_pair(TCGContext *s, TCGRegSet required_regs, 4625 TCGRegSet allocated_regs, 4626 TCGRegSet preferred_regs, bool rev) 4627 { 4628 int i, j, k, fmin, n = ARRAY_SIZE(tcg_target_reg_alloc_order); 4629 TCGRegSet reg_ct[2]; 4630 const int *order; 4631 4632 /* Ensure that if I is not in allocated_regs, I+1 is not either. */ 4633 reg_ct[1] = required_regs & ~(allocated_regs | (allocated_regs >> 1)); 4634 tcg_debug_assert(reg_ct[1] != 0); 4635 reg_ct[0] = reg_ct[1] & preferred_regs; 4636 4637 order = rev ? indirect_reg_alloc_order : tcg_target_reg_alloc_order; 4638 4639 /* 4640 * Skip the preferred_regs option if it cannot be satisfied, 4641 * or if the preference made no difference. 4642 */ 4643 k = reg_ct[0] == 0 || reg_ct[0] == reg_ct[1]; 4644 4645 /* 4646 * Minimize the number of flushes by looking for 2 free registers first, 4647 * then a single flush, then two flushes. 4648 */ 4649 for (fmin = 2; fmin >= 0; fmin--) { 4650 for (j = k; j < 2; j++) { 4651 TCGRegSet set = reg_ct[j]; 4652 4653 for (i = 0; i < n; i++) { 4654 TCGReg reg = order[i]; 4655 4656 if (tcg_regset_test_reg(set, reg)) { 4657 int f = !s->reg_to_temp[reg] + !s->reg_to_temp[reg + 1]; 4658 if (f >= fmin) { 4659 tcg_reg_free(s, reg, allocated_regs); 4660 tcg_reg_free(s, reg + 1, allocated_regs); 4661 return reg; 4662 } 4663 } 4664 } 4665 } 4666 } 4667 g_assert_not_reached(); 4668 } 4669 4670 /* Make sure the temporary is in a register. If needed, allocate the register 4671 from DESIRED while avoiding ALLOCATED. */ 4672 static void temp_load(TCGContext *s, TCGTemp *ts, TCGRegSet desired_regs, 4673 TCGRegSet allocated_regs, TCGRegSet preferred_regs) 4674 { 4675 TCGReg reg; 4676 4677 switch (ts->val_type) { 4678 case TEMP_VAL_REG: 4679 return; 4680 case TEMP_VAL_CONST: 4681 reg = tcg_reg_alloc(s, desired_regs, allocated_regs, 4682 preferred_regs, ts->indirect_base); 4683 if (ts->type <= TCG_TYPE_I64) { 4684 tcg_out_movi(s, ts->type, reg, ts->val); 4685 } else { 4686 uint64_t val = ts->val; 4687 MemOp vece = MO_64; 4688 4689 /* 4690 * Find the minimal vector element that matches the constant. 4691 * The targets will, in general, have to do this search anyway, 4692 * do this generically. 4693 */ 4694 if (val == dup_const(MO_8, val)) { 4695 vece = MO_8; 4696 } else if (val == dup_const(MO_16, val)) { 4697 vece = MO_16; 4698 } else if (val == dup_const(MO_32, val)) { 4699 vece = MO_32; 4700 } 4701 4702 tcg_out_dupi_vec(s, ts->type, vece, reg, ts->val); 4703 } 4704 ts->mem_coherent = 0; 4705 break; 4706 case TEMP_VAL_MEM: 4707 if (!ts->mem_allocated) { 4708 temp_allocate_frame(s, ts); 4709 } 4710 reg = tcg_reg_alloc(s, desired_regs, allocated_regs, 4711 preferred_regs, ts->indirect_base); 4712 tcg_out_ld(s, ts->type, reg, ts->mem_base->reg, ts->mem_offset); 4713 ts->mem_coherent = 1; 4714 break; 4715 case TEMP_VAL_DEAD: 4716 default: 4717 g_assert_not_reached(); 4718 } 4719 set_temp_val_reg(s, ts, reg); 4720 } 4721 4722 /* Save a temporary to memory. 'allocated_regs' is used in case a 4723 temporary registers needs to be allocated to store a constant. */ 4724 static void temp_save(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs) 4725 { 4726 /* The liveness analysis already ensures that globals are back 4727 in memory. Keep an tcg_debug_assert for safety. */ 4728 tcg_debug_assert(ts->val_type == TEMP_VAL_MEM || temp_readonly(ts)); 4729 } 4730 4731 /* save globals to their canonical location and assume they can be 4732 modified be the following code. 'allocated_regs' is used in case a 4733 temporary registers needs to be allocated to store a constant. */ 4734 static void save_globals(TCGContext *s, TCGRegSet allocated_regs) 4735 { 4736 int i, n; 4737 4738 for (i = 0, n = s->nb_globals; i < n; i++) { 4739 temp_save(s, &s->temps[i], allocated_regs); 4740 } 4741 } 4742 4743 /* sync globals to their canonical location and assume they can be 4744 read by the following code. 'allocated_regs' is used in case a 4745 temporary registers needs to be allocated to store a constant. */ 4746 static void sync_globals(TCGContext *s, TCGRegSet allocated_regs) 4747 { 4748 int i, n; 4749 4750 for (i = 0, n = s->nb_globals; i < n; i++) { 4751 TCGTemp *ts = &s->temps[i]; 4752 tcg_debug_assert(ts->val_type != TEMP_VAL_REG 4753 || ts->kind == TEMP_FIXED 4754 || ts->mem_coherent); 4755 } 4756 } 4757 4758 /* at the end of a basic block, we assume all temporaries are dead and 4759 all globals are stored at their canonical location. */ 4760 static void tcg_reg_alloc_bb_end(TCGContext *s, TCGRegSet allocated_regs) 4761 { 4762 int i; 4763 4764 for (i = s->nb_globals; i < s->nb_temps; i++) { 4765 TCGTemp *ts = &s->temps[i]; 4766 4767 switch (ts->kind) { 4768 case TEMP_TB: 4769 temp_save(s, ts, allocated_regs); 4770 break; 4771 case TEMP_EBB: 4772 /* The liveness analysis already ensures that temps are dead. 4773 Keep an tcg_debug_assert for safety. */ 4774 tcg_debug_assert(ts->val_type == TEMP_VAL_DEAD); 4775 break; 4776 case TEMP_CONST: 4777 /* Similarly, we should have freed any allocated register. */ 4778 tcg_debug_assert(ts->val_type == TEMP_VAL_CONST); 4779 break; 4780 default: 4781 g_assert_not_reached(); 4782 } 4783 } 4784 4785 save_globals(s, allocated_regs); 4786 } 4787 4788 /* 4789 * At a conditional branch, we assume all temporaries are dead unless 4790 * explicitly live-across-conditional-branch; all globals and local 4791 * temps are synced to their location. 4792 */ 4793 static void tcg_reg_alloc_cbranch(TCGContext *s, TCGRegSet allocated_regs) 4794 { 4795 sync_globals(s, allocated_regs); 4796 4797 for (int i = s->nb_globals; i < s->nb_temps; i++) { 4798 TCGTemp *ts = &s->temps[i]; 4799 /* 4800 * The liveness analysis already ensures that temps are dead. 4801 * Keep tcg_debug_asserts for safety. 4802 */ 4803 switch (ts->kind) { 4804 case TEMP_TB: 4805 tcg_debug_assert(ts->val_type != TEMP_VAL_REG || ts->mem_coherent); 4806 break; 4807 case TEMP_EBB: 4808 case TEMP_CONST: 4809 break; 4810 default: 4811 g_assert_not_reached(); 4812 } 4813 } 4814 } 4815 4816 /* 4817 * Specialized code generation for INDEX_op_mov_* with a constant. 4818 */ 4819 static void tcg_reg_alloc_do_movi(TCGContext *s, TCGTemp *ots, 4820 tcg_target_ulong val, TCGLifeData arg_life, 4821 TCGRegSet preferred_regs) 4822 { 4823 /* ENV should not be modified. */ 4824 tcg_debug_assert(!temp_readonly(ots)); 4825 4826 /* The movi is not explicitly generated here. */ 4827 set_temp_val_nonreg(s, ots, TEMP_VAL_CONST); 4828 ots->val = val; 4829 ots->mem_coherent = 0; 4830 if (NEED_SYNC_ARG(0)) { 4831 temp_sync(s, ots, s->reserved_regs, preferred_regs, IS_DEAD_ARG(0)); 4832 } else if (IS_DEAD_ARG(0)) { 4833 temp_dead(s, ots); 4834 } 4835 } 4836 4837 /* 4838 * Specialized code generation for INDEX_op_mov_*. 4839 */ 4840 static void tcg_reg_alloc_mov(TCGContext *s, const TCGOp *op) 4841 { 4842 const TCGLifeData arg_life = op->life; 4843 TCGRegSet allocated_regs, preferred_regs; 4844 TCGTemp *ts, *ots; 4845 TCGType otype, itype; 4846 TCGReg oreg, ireg; 4847 4848 allocated_regs = s->reserved_regs; 4849 preferred_regs = output_pref(op, 0); 4850 ots = arg_temp(op->args[0]); 4851 ts = arg_temp(op->args[1]); 4852 4853 /* ENV should not be modified. */ 4854 tcg_debug_assert(!temp_readonly(ots)); 4855 4856 /* Note that otype != itype for no-op truncation. */ 4857 otype = ots->type; 4858 itype = ts->type; 4859 4860 if (ts->val_type == TEMP_VAL_CONST) { 4861 /* propagate constant or generate sti */ 4862 tcg_target_ulong val = ts->val; 4863 if (IS_DEAD_ARG(1)) { 4864 temp_dead(s, ts); 4865 } 4866 tcg_reg_alloc_do_movi(s, ots, val, arg_life, preferred_regs); 4867 return; 4868 } 4869 4870 /* If the source value is in memory we're going to be forced 4871 to have it in a register in order to perform the copy. Copy 4872 the SOURCE value into its own register first, that way we 4873 don't have to reload SOURCE the next time it is used. */ 4874 if (ts->val_type == TEMP_VAL_MEM) { 4875 temp_load(s, ts, tcg_target_available_regs[itype], 4876 allocated_regs, preferred_regs); 4877 } 4878 tcg_debug_assert(ts->val_type == TEMP_VAL_REG); 4879 ireg = ts->reg; 4880 4881 if (IS_DEAD_ARG(0)) { 4882 /* mov to a non-saved dead register makes no sense (even with 4883 liveness analysis disabled). */ 4884 tcg_debug_assert(NEED_SYNC_ARG(0)); 4885 if (!ots->mem_allocated) { 4886 temp_allocate_frame(s, ots); 4887 } 4888 tcg_out_st(s, otype, ireg, ots->mem_base->reg, ots->mem_offset); 4889 if (IS_DEAD_ARG(1)) { 4890 temp_dead(s, ts); 4891 } 4892 temp_dead(s, ots); 4893 return; 4894 } 4895 4896 if (IS_DEAD_ARG(1) && ts->kind != TEMP_FIXED) { 4897 /* 4898 * The mov can be suppressed. Kill input first, so that it 4899 * is unlinked from reg_to_temp, then set the output to the 4900 * reg that we saved from the input. 4901 */ 4902 temp_dead(s, ts); 4903 oreg = ireg; 4904 } else { 4905 if (ots->val_type == TEMP_VAL_REG) { 4906 oreg = ots->reg; 4907 } else { 4908 /* Make sure to not spill the input register during allocation. */ 4909 oreg = tcg_reg_alloc(s, tcg_target_available_regs[otype], 4910 allocated_regs | ((TCGRegSet)1 << ireg), 4911 preferred_regs, ots->indirect_base); 4912 } 4913 if (!tcg_out_mov(s, otype, oreg, ireg)) { 4914 /* 4915 * Cross register class move not supported. 4916 * Store the source register into the destination slot 4917 * and leave the destination temp as TEMP_VAL_MEM. 4918 */ 4919 assert(!temp_readonly(ots)); 4920 if (!ts->mem_allocated) { 4921 temp_allocate_frame(s, ots); 4922 } 4923 tcg_out_st(s, ts->type, ireg, ots->mem_base->reg, ots->mem_offset); 4924 set_temp_val_nonreg(s, ts, TEMP_VAL_MEM); 4925 ots->mem_coherent = 1; 4926 return; 4927 } 4928 } 4929 set_temp_val_reg(s, ots, oreg); 4930 ots->mem_coherent = 0; 4931 4932 if (NEED_SYNC_ARG(0)) { 4933 temp_sync(s, ots, allocated_regs, 0, 0); 4934 } 4935 } 4936 4937 /* 4938 * Specialized code generation for INDEX_op_dup_vec. 4939 */ 4940 static void tcg_reg_alloc_dup(TCGContext *s, const TCGOp *op) 4941 { 4942 const TCGLifeData arg_life = op->life; 4943 TCGRegSet dup_out_regs, dup_in_regs; 4944 const TCGArgConstraint *dup_args_ct; 4945 TCGTemp *its, *ots; 4946 TCGType itype, vtype; 4947 unsigned vece; 4948 int lowpart_ofs; 4949 bool ok; 4950 4951 ots = arg_temp(op->args[0]); 4952 its = arg_temp(op->args[1]); 4953 4954 /* ENV should not be modified. */ 4955 tcg_debug_assert(!temp_readonly(ots)); 4956 4957 itype = its->type; 4958 vece = TCGOP_VECE(op); 4959 vtype = TCGOP_TYPE(op); 4960 4961 if (its->val_type == TEMP_VAL_CONST) { 4962 /* Propagate constant via movi -> dupi. */ 4963 tcg_target_ulong val = its->val; 4964 if (IS_DEAD_ARG(1)) { 4965 temp_dead(s, its); 4966 } 4967 tcg_reg_alloc_do_movi(s, ots, val, arg_life, output_pref(op, 0)); 4968 return; 4969 } 4970 4971 dup_args_ct = opcode_args_ct(op); 4972 dup_out_regs = dup_args_ct[0].regs; 4973 dup_in_regs = dup_args_ct[1].regs; 4974 4975 /* Allocate the output register now. */ 4976 if (ots->val_type != TEMP_VAL_REG) { 4977 TCGRegSet allocated_regs = s->reserved_regs; 4978 TCGReg oreg; 4979 4980 if (!IS_DEAD_ARG(1) && its->val_type == TEMP_VAL_REG) { 4981 /* Make sure to not spill the input register. */ 4982 tcg_regset_set_reg(allocated_regs, its->reg); 4983 } 4984 oreg = tcg_reg_alloc(s, dup_out_regs, allocated_regs, 4985 output_pref(op, 0), ots->indirect_base); 4986 set_temp_val_reg(s, ots, oreg); 4987 } 4988 4989 switch (its->val_type) { 4990 case TEMP_VAL_REG: 4991 /* 4992 * The dup constriaints must be broad, covering all possible VECE. 4993 * However, tcg_op_dup_vec() gets to see the VECE and we allow it 4994 * to fail, indicating that extra moves are required for that case. 4995 */ 4996 if (tcg_regset_test_reg(dup_in_regs, its->reg)) { 4997 if (tcg_out_dup_vec(s, vtype, vece, ots->reg, its->reg)) { 4998 goto done; 4999 } 5000 /* Try again from memory or a vector input register. */ 5001 } 5002 if (!its->mem_coherent) { 5003 /* 5004 * The input register is not synced, and so an extra store 5005 * would be required to use memory. Attempt an integer-vector 5006 * register move first. We do not have a TCGRegSet for this. 5007 */ 5008 if (tcg_out_mov(s, itype, ots->reg, its->reg)) { 5009 break; 5010 } 5011 /* Sync the temp back to its slot and load from there. */ 5012 temp_sync(s, its, s->reserved_regs, 0, 0); 5013 } 5014 /* fall through */ 5015 5016 case TEMP_VAL_MEM: 5017 lowpart_ofs = 0; 5018 if (HOST_BIG_ENDIAN) { 5019 lowpart_ofs = tcg_type_size(itype) - (1 << vece); 5020 } 5021 if (tcg_out_dupm_vec(s, vtype, vece, ots->reg, its->mem_base->reg, 5022 its->mem_offset + lowpart_ofs)) { 5023 goto done; 5024 } 5025 /* Load the input into the destination vector register. */ 5026 tcg_out_ld(s, itype, ots->reg, its->mem_base->reg, its->mem_offset); 5027 break; 5028 5029 default: 5030 g_assert_not_reached(); 5031 } 5032 5033 /* We now have a vector input register, so dup must succeed. */ 5034 ok = tcg_out_dup_vec(s, vtype, vece, ots->reg, ots->reg); 5035 tcg_debug_assert(ok); 5036 5037 done: 5038 ots->mem_coherent = 0; 5039 if (IS_DEAD_ARG(1)) { 5040 temp_dead(s, its); 5041 } 5042 if (NEED_SYNC_ARG(0)) { 5043 temp_sync(s, ots, s->reserved_regs, 0, 0); 5044 } 5045 if (IS_DEAD_ARG(0)) { 5046 temp_dead(s, ots); 5047 } 5048 } 5049 5050 static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op) 5051 { 5052 const TCGLifeData arg_life = op->life; 5053 const TCGOpDef * const def = &tcg_op_defs[op->opc]; 5054 TCGRegSet i_allocated_regs; 5055 TCGRegSet o_allocated_regs; 5056 int i, k, nb_iargs, nb_oargs; 5057 TCGReg reg; 5058 TCGArg arg; 5059 const TCGArgConstraint *args_ct; 5060 const TCGArgConstraint *arg_ct; 5061 TCGTemp *ts; 5062 TCGArg new_args[TCG_MAX_OP_ARGS]; 5063 int const_args[TCG_MAX_OP_ARGS]; 5064 TCGCond op_cond; 5065 5066 nb_oargs = def->nb_oargs; 5067 nb_iargs = def->nb_iargs; 5068 5069 /* copy constants */ 5070 memcpy(new_args + nb_oargs + nb_iargs, 5071 op->args + nb_oargs + nb_iargs, 5072 sizeof(TCGArg) * def->nb_cargs); 5073 5074 i_allocated_regs = s->reserved_regs; 5075 o_allocated_regs = s->reserved_regs; 5076 5077 switch (op->opc) { 5078 case INDEX_op_brcond_i32: 5079 case INDEX_op_brcond_i64: 5080 op_cond = op->args[2]; 5081 break; 5082 case INDEX_op_setcond_i32: 5083 case INDEX_op_setcond_i64: 5084 case INDEX_op_negsetcond_i32: 5085 case INDEX_op_negsetcond_i64: 5086 case INDEX_op_cmp_vec: 5087 op_cond = op->args[3]; 5088 break; 5089 case INDEX_op_brcond2_i32: 5090 op_cond = op->args[4]; 5091 break; 5092 case INDEX_op_movcond_i32: 5093 case INDEX_op_movcond_i64: 5094 case INDEX_op_setcond2_i32: 5095 case INDEX_op_cmpsel_vec: 5096 op_cond = op->args[5]; 5097 break; 5098 default: 5099 /* No condition within opcode. */ 5100 op_cond = TCG_COND_ALWAYS; 5101 break; 5102 } 5103 5104 args_ct = opcode_args_ct(op); 5105 5106 /* satisfy input constraints */ 5107 for (k = 0; k < nb_iargs; k++) { 5108 TCGRegSet i_preferred_regs, i_required_regs; 5109 bool allocate_new_reg, copyto_new_reg; 5110 TCGTemp *ts2; 5111 int i1, i2; 5112 5113 i = args_ct[nb_oargs + k].sort_index; 5114 arg = op->args[i]; 5115 arg_ct = &args_ct[i]; 5116 ts = arg_temp(arg); 5117 5118 if (ts->val_type == TEMP_VAL_CONST) { 5119 #ifdef TCG_REG_ZERO 5120 if (ts->val == 0 && (arg_ct->ct & TCG_CT_REG_ZERO)) { 5121 /* Hardware zero register: indicate register via non-const. */ 5122 const_args[i] = 0; 5123 new_args[i] = TCG_REG_ZERO; 5124 continue; 5125 } 5126 #endif 5127 5128 if (tcg_target_const_match(ts->val, arg_ct->ct, ts->type, 5129 op_cond, TCGOP_VECE(op))) { 5130 /* constant is OK for instruction */ 5131 const_args[i] = 1; 5132 new_args[i] = ts->val; 5133 continue; 5134 } 5135 } 5136 5137 reg = ts->reg; 5138 i_preferred_regs = 0; 5139 i_required_regs = arg_ct->regs; 5140 allocate_new_reg = false; 5141 copyto_new_reg = false; 5142 5143 switch (arg_ct->pair) { 5144 case 0: /* not paired */ 5145 if (arg_ct->ialias) { 5146 i_preferred_regs = output_pref(op, arg_ct->alias_index); 5147 5148 /* 5149 * If the input is readonly, then it cannot also be an 5150 * output and aliased to itself. If the input is not 5151 * dead after the instruction, we must allocate a new 5152 * register and move it. 5153 */ 5154 if (temp_readonly(ts) || !IS_DEAD_ARG(i) 5155 || args_ct[arg_ct->alias_index].newreg) { 5156 allocate_new_reg = true; 5157 } else if (ts->val_type == TEMP_VAL_REG) { 5158 /* 5159 * Check if the current register has already been 5160 * allocated for another input. 5161 */ 5162 allocate_new_reg = 5163 tcg_regset_test_reg(i_allocated_regs, reg); 5164 } 5165 } 5166 if (!allocate_new_reg) { 5167 temp_load(s, ts, i_required_regs, i_allocated_regs, 5168 i_preferred_regs); 5169 reg = ts->reg; 5170 allocate_new_reg = !tcg_regset_test_reg(i_required_regs, reg); 5171 } 5172 if (allocate_new_reg) { 5173 /* 5174 * Allocate a new register matching the constraint 5175 * and move the temporary register into it. 5176 */ 5177 temp_load(s, ts, tcg_target_available_regs[ts->type], 5178 i_allocated_regs, 0); 5179 reg = tcg_reg_alloc(s, i_required_regs, i_allocated_regs, 5180 i_preferred_regs, ts->indirect_base); 5181 copyto_new_reg = true; 5182 } 5183 break; 5184 5185 case 1: 5186 /* First of an input pair; if i1 == i2, the second is an output. */ 5187 i1 = i; 5188 i2 = arg_ct->pair_index; 5189 ts2 = i1 != i2 ? arg_temp(op->args[i2]) : NULL; 5190 5191 /* 5192 * It is easier to default to allocating a new pair 5193 * and to identify a few cases where it's not required. 5194 */ 5195 if (arg_ct->ialias) { 5196 i_preferred_regs = output_pref(op, arg_ct->alias_index); 5197 if (IS_DEAD_ARG(i1) && 5198 IS_DEAD_ARG(i2) && 5199 !temp_readonly(ts) && 5200 ts->val_type == TEMP_VAL_REG && 5201 ts->reg < TCG_TARGET_NB_REGS - 1 && 5202 tcg_regset_test_reg(i_required_regs, reg) && 5203 !tcg_regset_test_reg(i_allocated_regs, reg) && 5204 !tcg_regset_test_reg(i_allocated_regs, reg + 1) && 5205 (ts2 5206 ? ts2->val_type == TEMP_VAL_REG && 5207 ts2->reg == reg + 1 && 5208 !temp_readonly(ts2) 5209 : s->reg_to_temp[reg + 1] == NULL)) { 5210 break; 5211 } 5212 } else { 5213 /* Without aliasing, the pair must also be an input. */ 5214 tcg_debug_assert(ts2); 5215 if (ts->val_type == TEMP_VAL_REG && 5216 ts2->val_type == TEMP_VAL_REG && 5217 ts2->reg == reg + 1 && 5218 tcg_regset_test_reg(i_required_regs, reg)) { 5219 break; 5220 } 5221 } 5222 reg = tcg_reg_alloc_pair(s, i_required_regs, i_allocated_regs, 5223 0, ts->indirect_base); 5224 goto do_pair; 5225 5226 case 2: /* pair second */ 5227 reg = new_args[arg_ct->pair_index] + 1; 5228 goto do_pair; 5229 5230 case 3: /* ialias with second output, no first input */ 5231 tcg_debug_assert(arg_ct->ialias); 5232 i_preferred_regs = output_pref(op, arg_ct->alias_index); 5233 5234 if (IS_DEAD_ARG(i) && 5235 !temp_readonly(ts) && 5236 ts->val_type == TEMP_VAL_REG && 5237 reg > 0 && 5238 s->reg_to_temp[reg - 1] == NULL && 5239 tcg_regset_test_reg(i_required_regs, reg) && 5240 !tcg_regset_test_reg(i_allocated_regs, reg) && 5241 !tcg_regset_test_reg(i_allocated_regs, reg - 1)) { 5242 tcg_regset_set_reg(i_allocated_regs, reg - 1); 5243 break; 5244 } 5245 reg = tcg_reg_alloc_pair(s, i_required_regs >> 1, 5246 i_allocated_regs, 0, 5247 ts->indirect_base); 5248 tcg_regset_set_reg(i_allocated_regs, reg); 5249 reg += 1; 5250 goto do_pair; 5251 5252 do_pair: 5253 /* 5254 * If an aliased input is not dead after the instruction, 5255 * we must allocate a new register and move it. 5256 */ 5257 if (arg_ct->ialias && (!IS_DEAD_ARG(i) || temp_readonly(ts))) { 5258 TCGRegSet t_allocated_regs = i_allocated_regs; 5259 5260 /* 5261 * Because of the alias, and the continued life, make sure 5262 * that the temp is somewhere *other* than the reg pair, 5263 * and we get a copy in reg. 5264 */ 5265 tcg_regset_set_reg(t_allocated_regs, reg); 5266 tcg_regset_set_reg(t_allocated_regs, reg + 1); 5267 if (ts->val_type == TEMP_VAL_REG && ts->reg == reg) { 5268 /* If ts was already in reg, copy it somewhere else. */ 5269 TCGReg nr; 5270 bool ok; 5271 5272 tcg_debug_assert(ts->kind != TEMP_FIXED); 5273 nr = tcg_reg_alloc(s, tcg_target_available_regs[ts->type], 5274 t_allocated_regs, 0, ts->indirect_base); 5275 ok = tcg_out_mov(s, ts->type, nr, reg); 5276 tcg_debug_assert(ok); 5277 5278 set_temp_val_reg(s, ts, nr); 5279 } else { 5280 temp_load(s, ts, tcg_target_available_regs[ts->type], 5281 t_allocated_regs, 0); 5282 copyto_new_reg = true; 5283 } 5284 } else { 5285 /* Preferably allocate to reg, otherwise copy. */ 5286 i_required_regs = (TCGRegSet)1 << reg; 5287 temp_load(s, ts, i_required_regs, i_allocated_regs, 5288 i_preferred_regs); 5289 copyto_new_reg = ts->reg != reg; 5290 } 5291 break; 5292 5293 default: 5294 g_assert_not_reached(); 5295 } 5296 5297 if (copyto_new_reg) { 5298 if (!tcg_out_mov(s, ts->type, reg, ts->reg)) { 5299 /* 5300 * Cross register class move not supported. Sync the 5301 * temp back to its slot and load from there. 5302 */ 5303 temp_sync(s, ts, i_allocated_regs, 0, 0); 5304 tcg_out_ld(s, ts->type, reg, 5305 ts->mem_base->reg, ts->mem_offset); 5306 } 5307 } 5308 new_args[i] = reg; 5309 const_args[i] = 0; 5310 tcg_regset_set_reg(i_allocated_regs, reg); 5311 } 5312 5313 /* mark dead temporaries and free the associated registers */ 5314 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) { 5315 if (IS_DEAD_ARG(i)) { 5316 temp_dead(s, arg_temp(op->args[i])); 5317 } 5318 } 5319 5320 if (def->flags & TCG_OPF_COND_BRANCH) { 5321 tcg_reg_alloc_cbranch(s, i_allocated_regs); 5322 } else if (def->flags & TCG_OPF_BB_END) { 5323 tcg_reg_alloc_bb_end(s, i_allocated_regs); 5324 } else { 5325 if (def->flags & TCG_OPF_CALL_CLOBBER) { 5326 /* XXX: permit generic clobber register list ? */ 5327 for (i = 0; i < TCG_TARGET_NB_REGS; i++) { 5328 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) { 5329 tcg_reg_free(s, i, i_allocated_regs); 5330 } 5331 } 5332 } 5333 if (def->flags & TCG_OPF_SIDE_EFFECTS) { 5334 /* sync globals if the op has side effects and might trigger 5335 an exception. */ 5336 sync_globals(s, i_allocated_regs); 5337 } 5338 5339 /* satisfy the output constraints */ 5340 for (k = 0; k < nb_oargs; k++) { 5341 i = args_ct[k].sort_index; 5342 arg = op->args[i]; 5343 arg_ct = &args_ct[i]; 5344 ts = arg_temp(arg); 5345 5346 /* ENV should not be modified. */ 5347 tcg_debug_assert(!temp_readonly(ts)); 5348 5349 switch (arg_ct->pair) { 5350 case 0: /* not paired */ 5351 if (arg_ct->oalias && !const_args[arg_ct->alias_index]) { 5352 reg = new_args[arg_ct->alias_index]; 5353 } else if (arg_ct->newreg) { 5354 reg = tcg_reg_alloc(s, arg_ct->regs, 5355 i_allocated_regs | o_allocated_regs, 5356 output_pref(op, k), ts->indirect_base); 5357 } else { 5358 reg = tcg_reg_alloc(s, arg_ct->regs, o_allocated_regs, 5359 output_pref(op, k), ts->indirect_base); 5360 } 5361 break; 5362 5363 case 1: /* first of pair */ 5364 if (arg_ct->oalias) { 5365 reg = new_args[arg_ct->alias_index]; 5366 } else if (arg_ct->newreg) { 5367 reg = tcg_reg_alloc_pair(s, arg_ct->regs, 5368 i_allocated_regs | o_allocated_regs, 5369 output_pref(op, k), 5370 ts->indirect_base); 5371 } else { 5372 reg = tcg_reg_alloc_pair(s, arg_ct->regs, o_allocated_regs, 5373 output_pref(op, k), 5374 ts->indirect_base); 5375 } 5376 break; 5377 5378 case 2: /* second of pair */ 5379 if (arg_ct->oalias) { 5380 reg = new_args[arg_ct->alias_index]; 5381 } else { 5382 reg = new_args[arg_ct->pair_index] + 1; 5383 } 5384 break; 5385 5386 case 3: /* first of pair, aliasing with a second input */ 5387 tcg_debug_assert(!arg_ct->newreg); 5388 reg = new_args[arg_ct->pair_index] - 1; 5389 break; 5390 5391 default: 5392 g_assert_not_reached(); 5393 } 5394 tcg_regset_set_reg(o_allocated_regs, reg); 5395 set_temp_val_reg(s, ts, reg); 5396 ts->mem_coherent = 0; 5397 new_args[i] = reg; 5398 } 5399 } 5400 5401 /* emit instruction */ 5402 TCGType type = TCGOP_TYPE(op); 5403 switch (op->opc) { 5404 case INDEX_op_ext_i32_i64: 5405 tcg_out_exts_i32_i64(s, new_args[0], new_args[1]); 5406 break; 5407 case INDEX_op_extu_i32_i64: 5408 tcg_out_extu_i32_i64(s, new_args[0], new_args[1]); 5409 break; 5410 case INDEX_op_extrl_i64_i32: 5411 tcg_out_extrl_i64_i32(s, new_args[0], new_args[1]); 5412 break; 5413 5414 case INDEX_op_add: 5415 case INDEX_op_and: 5416 case INDEX_op_andc: 5417 case INDEX_op_divs: 5418 case INDEX_op_divu: 5419 case INDEX_op_eqv: 5420 case INDEX_op_mul: 5421 case INDEX_op_mulsh: 5422 case INDEX_op_muluh: 5423 case INDEX_op_nand: 5424 case INDEX_op_nor: 5425 case INDEX_op_or: 5426 case INDEX_op_orc: 5427 case INDEX_op_rems: 5428 case INDEX_op_xor: 5429 { 5430 const TCGOutOpBinary *out = 5431 container_of(all_outop[op->opc], TCGOutOpBinary, base); 5432 5433 /* Constants should never appear in the first source operand. */ 5434 tcg_debug_assert(!const_args[1]); 5435 if (const_args[2]) { 5436 out->out_rri(s, type, new_args[0], new_args[1], new_args[2]); 5437 } else { 5438 out->out_rrr(s, type, new_args[0], new_args[1], new_args[2]); 5439 } 5440 } 5441 break; 5442 5443 case INDEX_op_sub: 5444 { 5445 const TCGOutOpSubtract *out = &outop_sub; 5446 5447 /* 5448 * Constants should never appear in the second source operand. 5449 * These are folded to add with negative constant. 5450 */ 5451 tcg_debug_assert(!const_args[2]); 5452 if (const_args[1]) { 5453 out->out_rir(s, type, new_args[0], new_args[1], new_args[2]); 5454 } else { 5455 out->out_rrr(s, type, new_args[0], new_args[1], new_args[2]); 5456 } 5457 } 5458 break; 5459 5460 case INDEX_op_neg: 5461 case INDEX_op_not: 5462 { 5463 const TCGOutOpUnary *out = 5464 container_of(all_outop[op->opc], TCGOutOpUnary, base); 5465 5466 /* Constants should have been folded. */ 5467 tcg_debug_assert(!const_args[1]); 5468 out->out_rr(s, type, new_args[0], new_args[1]); 5469 } 5470 break; 5471 5472 case INDEX_op_divs2: 5473 case INDEX_op_divu2: 5474 { 5475 const TCGOutOpDivRem *out = 5476 container_of(all_outop[op->opc], TCGOutOpDivRem, base); 5477 5478 /* Only used by x86 and s390x, which use matching constraints. */ 5479 tcg_debug_assert(new_args[0] == new_args[2]); 5480 tcg_debug_assert(new_args[1] == new_args[3]); 5481 tcg_debug_assert(!const_args[4]); 5482 out->out_rr01r(s, type, new_args[0], new_args[1], new_args[4]); 5483 } 5484 break; 5485 5486 default: 5487 if (def->flags & TCG_OPF_VECTOR) { 5488 tcg_out_vec_op(s, op->opc, type - TCG_TYPE_V64, 5489 TCGOP_VECE(op), new_args, const_args); 5490 } else { 5491 tcg_out_op(s, op->opc, type, new_args, const_args); 5492 } 5493 break; 5494 } 5495 5496 /* move the outputs in the correct register if needed */ 5497 for(i = 0; i < nb_oargs; i++) { 5498 ts = arg_temp(op->args[i]); 5499 5500 /* ENV should not be modified. */ 5501 tcg_debug_assert(!temp_readonly(ts)); 5502 5503 if (NEED_SYNC_ARG(i)) { 5504 temp_sync(s, ts, o_allocated_regs, 0, IS_DEAD_ARG(i)); 5505 } else if (IS_DEAD_ARG(i)) { 5506 temp_dead(s, ts); 5507 } 5508 } 5509 } 5510 5511 static bool tcg_reg_alloc_dup2(TCGContext *s, const TCGOp *op) 5512 { 5513 const TCGLifeData arg_life = op->life; 5514 TCGTemp *ots, *itsl, *itsh; 5515 TCGType vtype = TCGOP_TYPE(op); 5516 5517 /* This opcode is only valid for 32-bit hosts, for 64-bit elements. */ 5518 tcg_debug_assert(TCG_TARGET_REG_BITS == 32); 5519 tcg_debug_assert(TCGOP_VECE(op) == MO_64); 5520 5521 ots = arg_temp(op->args[0]); 5522 itsl = arg_temp(op->args[1]); 5523 itsh = arg_temp(op->args[2]); 5524 5525 /* ENV should not be modified. */ 5526 tcg_debug_assert(!temp_readonly(ots)); 5527 5528 /* Allocate the output register now. */ 5529 if (ots->val_type != TEMP_VAL_REG) { 5530 TCGRegSet allocated_regs = s->reserved_regs; 5531 TCGRegSet dup_out_regs = opcode_args_ct(op)[0].regs; 5532 TCGReg oreg; 5533 5534 /* Make sure to not spill the input registers. */ 5535 if (!IS_DEAD_ARG(1) && itsl->val_type == TEMP_VAL_REG) { 5536 tcg_regset_set_reg(allocated_regs, itsl->reg); 5537 } 5538 if (!IS_DEAD_ARG(2) && itsh->val_type == TEMP_VAL_REG) { 5539 tcg_regset_set_reg(allocated_regs, itsh->reg); 5540 } 5541 5542 oreg = tcg_reg_alloc(s, dup_out_regs, allocated_regs, 5543 output_pref(op, 0), ots->indirect_base); 5544 set_temp_val_reg(s, ots, oreg); 5545 } 5546 5547 /* Promote dup2 of immediates to dupi_vec. */ 5548 if (itsl->val_type == TEMP_VAL_CONST && itsh->val_type == TEMP_VAL_CONST) { 5549 uint64_t val = deposit64(itsl->val, 32, 32, itsh->val); 5550 MemOp vece = MO_64; 5551 5552 if (val == dup_const(MO_8, val)) { 5553 vece = MO_8; 5554 } else if (val == dup_const(MO_16, val)) { 5555 vece = MO_16; 5556 } else if (val == dup_const(MO_32, val)) { 5557 vece = MO_32; 5558 } 5559 5560 tcg_out_dupi_vec(s, vtype, vece, ots->reg, val); 5561 goto done; 5562 } 5563 5564 /* If the two inputs form one 64-bit value, try dupm_vec. */ 5565 if (itsl->temp_subindex == HOST_BIG_ENDIAN && 5566 itsh->temp_subindex == !HOST_BIG_ENDIAN && 5567 itsl == itsh + (HOST_BIG_ENDIAN ? 1 : -1)) { 5568 TCGTemp *its = itsl - HOST_BIG_ENDIAN; 5569 5570 temp_sync(s, its + 0, s->reserved_regs, 0, 0); 5571 temp_sync(s, its + 1, s->reserved_regs, 0, 0); 5572 5573 if (tcg_out_dupm_vec(s, vtype, MO_64, ots->reg, 5574 its->mem_base->reg, its->mem_offset)) { 5575 goto done; 5576 } 5577 } 5578 5579 /* Fall back to generic expansion. */ 5580 return false; 5581 5582 done: 5583 ots->mem_coherent = 0; 5584 if (IS_DEAD_ARG(1)) { 5585 temp_dead(s, itsl); 5586 } 5587 if (IS_DEAD_ARG(2)) { 5588 temp_dead(s, itsh); 5589 } 5590 if (NEED_SYNC_ARG(0)) { 5591 temp_sync(s, ots, s->reserved_regs, 0, IS_DEAD_ARG(0)); 5592 } else if (IS_DEAD_ARG(0)) { 5593 temp_dead(s, ots); 5594 } 5595 return true; 5596 } 5597 5598 static void load_arg_reg(TCGContext *s, TCGReg reg, TCGTemp *ts, 5599 TCGRegSet allocated_regs) 5600 { 5601 if (ts->val_type == TEMP_VAL_REG) { 5602 if (ts->reg != reg) { 5603 tcg_reg_free(s, reg, allocated_regs); 5604 if (!tcg_out_mov(s, ts->type, reg, ts->reg)) { 5605 /* 5606 * Cross register class move not supported. Sync the 5607 * temp back to its slot and load from there. 5608 */ 5609 temp_sync(s, ts, allocated_regs, 0, 0); 5610 tcg_out_ld(s, ts->type, reg, 5611 ts->mem_base->reg, ts->mem_offset); 5612 } 5613 } 5614 } else { 5615 TCGRegSet arg_set = 0; 5616 5617 tcg_reg_free(s, reg, allocated_regs); 5618 tcg_regset_set_reg(arg_set, reg); 5619 temp_load(s, ts, arg_set, allocated_regs, 0); 5620 } 5621 } 5622 5623 static void load_arg_stk(TCGContext *s, unsigned arg_slot, TCGTemp *ts, 5624 TCGRegSet allocated_regs) 5625 { 5626 /* 5627 * When the destination is on the stack, load up the temp and store. 5628 * If there are many call-saved registers, the temp might live to 5629 * see another use; otherwise it'll be discarded. 5630 */ 5631 temp_load(s, ts, tcg_target_available_regs[ts->type], allocated_regs, 0); 5632 tcg_out_st(s, ts->type, ts->reg, TCG_REG_CALL_STACK, 5633 arg_slot_stk_ofs(arg_slot)); 5634 } 5635 5636 static void load_arg_normal(TCGContext *s, const TCGCallArgumentLoc *l, 5637 TCGTemp *ts, TCGRegSet *allocated_regs) 5638 { 5639 if (arg_slot_reg_p(l->arg_slot)) { 5640 TCGReg reg = tcg_target_call_iarg_regs[l->arg_slot]; 5641 load_arg_reg(s, reg, ts, *allocated_regs); 5642 tcg_regset_set_reg(*allocated_regs, reg); 5643 } else { 5644 load_arg_stk(s, l->arg_slot, ts, *allocated_regs); 5645 } 5646 } 5647 5648 static void load_arg_ref(TCGContext *s, unsigned arg_slot, TCGReg ref_base, 5649 intptr_t ref_off, TCGRegSet *allocated_regs) 5650 { 5651 TCGReg reg; 5652 5653 if (arg_slot_reg_p(arg_slot)) { 5654 reg = tcg_target_call_iarg_regs[arg_slot]; 5655 tcg_reg_free(s, reg, *allocated_regs); 5656 tcg_out_addi_ptr(s, reg, ref_base, ref_off); 5657 tcg_regset_set_reg(*allocated_regs, reg); 5658 } else { 5659 reg = tcg_reg_alloc(s, tcg_target_available_regs[TCG_TYPE_PTR], 5660 *allocated_regs, 0, false); 5661 tcg_out_addi_ptr(s, reg, ref_base, ref_off); 5662 tcg_out_st(s, TCG_TYPE_PTR, reg, TCG_REG_CALL_STACK, 5663 arg_slot_stk_ofs(arg_slot)); 5664 } 5665 } 5666 5667 static void tcg_reg_alloc_call(TCGContext *s, TCGOp *op) 5668 { 5669 const int nb_oargs = TCGOP_CALLO(op); 5670 const int nb_iargs = TCGOP_CALLI(op); 5671 const TCGLifeData arg_life = op->life; 5672 const TCGHelperInfo *info = tcg_call_info(op); 5673 TCGRegSet allocated_regs = s->reserved_regs; 5674 int i; 5675 5676 /* 5677 * Move inputs into place in reverse order, 5678 * so that we place stacked arguments first. 5679 */ 5680 for (i = nb_iargs - 1; i >= 0; --i) { 5681 const TCGCallArgumentLoc *loc = &info->in[i]; 5682 TCGTemp *ts = arg_temp(op->args[nb_oargs + i]); 5683 5684 switch (loc->kind) { 5685 case TCG_CALL_ARG_NORMAL: 5686 case TCG_CALL_ARG_EXTEND_U: 5687 case TCG_CALL_ARG_EXTEND_S: 5688 load_arg_normal(s, loc, ts, &allocated_regs); 5689 break; 5690 case TCG_CALL_ARG_BY_REF: 5691 load_arg_stk(s, loc->ref_slot, ts, allocated_regs); 5692 load_arg_ref(s, loc->arg_slot, TCG_REG_CALL_STACK, 5693 arg_slot_stk_ofs(loc->ref_slot), 5694 &allocated_regs); 5695 break; 5696 case TCG_CALL_ARG_BY_REF_N: 5697 load_arg_stk(s, loc->ref_slot, ts, allocated_regs); 5698 break; 5699 default: 5700 g_assert_not_reached(); 5701 } 5702 } 5703 5704 /* Mark dead temporaries and free the associated registers. */ 5705 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) { 5706 if (IS_DEAD_ARG(i)) { 5707 temp_dead(s, arg_temp(op->args[i])); 5708 } 5709 } 5710 5711 /* Clobber call registers. */ 5712 for (i = 0; i < TCG_TARGET_NB_REGS; i++) { 5713 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) { 5714 tcg_reg_free(s, i, allocated_regs); 5715 } 5716 } 5717 5718 /* 5719 * Save globals if they might be written by the helper, 5720 * sync them if they might be read. 5721 */ 5722 if (info->flags & TCG_CALL_NO_READ_GLOBALS) { 5723 /* Nothing to do */ 5724 } else if (info->flags & TCG_CALL_NO_WRITE_GLOBALS) { 5725 sync_globals(s, allocated_regs); 5726 } else { 5727 save_globals(s, allocated_regs); 5728 } 5729 5730 /* 5731 * If the ABI passes a pointer to the returned struct as the first 5732 * argument, load that now. Pass a pointer to the output home slot. 5733 */ 5734 if (info->out_kind == TCG_CALL_RET_BY_REF) { 5735 TCGTemp *ts = arg_temp(op->args[0]); 5736 5737 if (!ts->mem_allocated) { 5738 temp_allocate_frame(s, ts); 5739 } 5740 load_arg_ref(s, 0, ts->mem_base->reg, ts->mem_offset, &allocated_regs); 5741 } 5742 5743 tcg_out_call(s, tcg_call_func(op), info); 5744 5745 /* Assign output registers and emit moves if needed. */ 5746 switch (info->out_kind) { 5747 case TCG_CALL_RET_NORMAL: 5748 for (i = 0; i < nb_oargs; i++) { 5749 TCGTemp *ts = arg_temp(op->args[i]); 5750 TCGReg reg = tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, i); 5751 5752 /* ENV should not be modified. */ 5753 tcg_debug_assert(!temp_readonly(ts)); 5754 5755 set_temp_val_reg(s, ts, reg); 5756 ts->mem_coherent = 0; 5757 } 5758 break; 5759 5760 case TCG_CALL_RET_BY_VEC: 5761 { 5762 TCGTemp *ts = arg_temp(op->args[0]); 5763 5764 tcg_debug_assert(ts->base_type == TCG_TYPE_I128); 5765 tcg_debug_assert(ts->temp_subindex == 0); 5766 if (!ts->mem_allocated) { 5767 temp_allocate_frame(s, ts); 5768 } 5769 tcg_out_st(s, TCG_TYPE_V128, 5770 tcg_target_call_oarg_reg(TCG_CALL_RET_BY_VEC, 0), 5771 ts->mem_base->reg, ts->mem_offset); 5772 } 5773 /* fall through to mark all parts in memory */ 5774 5775 case TCG_CALL_RET_BY_REF: 5776 /* The callee has performed a write through the reference. */ 5777 for (i = 0; i < nb_oargs; i++) { 5778 TCGTemp *ts = arg_temp(op->args[i]); 5779 ts->val_type = TEMP_VAL_MEM; 5780 } 5781 break; 5782 5783 default: 5784 g_assert_not_reached(); 5785 } 5786 5787 /* Flush or discard output registers as needed. */ 5788 for (i = 0; i < nb_oargs; i++) { 5789 TCGTemp *ts = arg_temp(op->args[i]); 5790 if (NEED_SYNC_ARG(i)) { 5791 temp_sync(s, ts, s->reserved_regs, 0, IS_DEAD_ARG(i)); 5792 } else if (IS_DEAD_ARG(i)) { 5793 temp_dead(s, ts); 5794 } 5795 } 5796 } 5797 5798 /** 5799 * atom_and_align_for_opc: 5800 * @s: tcg context 5801 * @opc: memory operation code 5802 * @host_atom: MO_ATOM_{IFALIGN,WITHIN16,SUBALIGN} for host operations 5803 * @allow_two_ops: true if we are prepared to issue two operations 5804 * 5805 * Return the alignment and atomicity to use for the inline fast path 5806 * for the given memory operation. The alignment may be larger than 5807 * that specified in @opc, and the correct alignment will be diagnosed 5808 * by the slow path helper. 5809 * 5810 * If @allow_two_ops, the host is prepared to test for 2x alignment, 5811 * and issue two loads or stores for subalignment. 5812 */ 5813 static TCGAtomAlign atom_and_align_for_opc(TCGContext *s, MemOp opc, 5814 MemOp host_atom, bool allow_two_ops) 5815 { 5816 MemOp align = memop_alignment_bits(opc); 5817 MemOp size = opc & MO_SIZE; 5818 MemOp half = size ? size - 1 : 0; 5819 MemOp atom = opc & MO_ATOM_MASK; 5820 MemOp atmax; 5821 5822 switch (atom) { 5823 case MO_ATOM_NONE: 5824 /* The operation requires no specific atomicity. */ 5825 atmax = MO_8; 5826 break; 5827 5828 case MO_ATOM_IFALIGN: 5829 atmax = size; 5830 break; 5831 5832 case MO_ATOM_IFALIGN_PAIR: 5833 atmax = half; 5834 break; 5835 5836 case MO_ATOM_WITHIN16: 5837 atmax = size; 5838 if (size == MO_128) { 5839 /* Misalignment implies !within16, and therefore no atomicity. */ 5840 } else if (host_atom != MO_ATOM_WITHIN16) { 5841 /* The host does not implement within16, so require alignment. */ 5842 align = MAX(align, size); 5843 } 5844 break; 5845 5846 case MO_ATOM_WITHIN16_PAIR: 5847 atmax = size; 5848 /* 5849 * Misalignment implies !within16, and therefore half atomicity. 5850 * Any host prepared for two operations can implement this with 5851 * half alignment. 5852 */ 5853 if (host_atom != MO_ATOM_WITHIN16 && allow_two_ops) { 5854 align = MAX(align, half); 5855 } 5856 break; 5857 5858 case MO_ATOM_SUBALIGN: 5859 atmax = size; 5860 if (host_atom != MO_ATOM_SUBALIGN) { 5861 /* If unaligned but not odd, there are subobjects up to half. */ 5862 if (allow_two_ops) { 5863 align = MAX(align, half); 5864 } else { 5865 align = MAX(align, size); 5866 } 5867 } 5868 break; 5869 5870 default: 5871 g_assert_not_reached(); 5872 } 5873 5874 return (TCGAtomAlign){ .atom = atmax, .align = align }; 5875 } 5876 5877 /* 5878 * Similarly for qemu_ld/st slow path helpers. 5879 * We must re-implement tcg_gen_callN and tcg_reg_alloc_call simultaneously, 5880 * using only the provided backend tcg_out_* functions. 5881 */ 5882 5883 static int tcg_out_helper_stk_ofs(TCGType type, unsigned slot) 5884 { 5885 int ofs = arg_slot_stk_ofs(slot); 5886 5887 /* 5888 * Each stack slot is TCG_TARGET_LONG_BITS. If the host does not 5889 * require extension to uint64_t, adjust the address for uint32_t. 5890 */ 5891 if (HOST_BIG_ENDIAN && 5892 TCG_TARGET_REG_BITS == 64 && 5893 type == TCG_TYPE_I32) { 5894 ofs += 4; 5895 } 5896 return ofs; 5897 } 5898 5899 static void tcg_out_helper_load_slots(TCGContext *s, 5900 unsigned nmov, TCGMovExtend *mov, 5901 const TCGLdstHelperParam *parm) 5902 { 5903 unsigned i; 5904 TCGReg dst3; 5905 5906 /* 5907 * Start from the end, storing to the stack first. 5908 * This frees those registers, so we need not consider overlap. 5909 */ 5910 for (i = nmov; i-- > 0; ) { 5911 unsigned slot = mov[i].dst; 5912 5913 if (arg_slot_reg_p(slot)) { 5914 goto found_reg; 5915 } 5916 5917 TCGReg src = mov[i].src; 5918 TCGType dst_type = mov[i].dst_type; 5919 MemOp dst_mo = dst_type == TCG_TYPE_I32 ? MO_32 : MO_64; 5920 5921 /* The argument is going onto the stack; extend into scratch. */ 5922 if ((mov[i].src_ext & MO_SIZE) != dst_mo) { 5923 tcg_debug_assert(parm->ntmp != 0); 5924 mov[i].dst = src = parm->tmp[0]; 5925 tcg_out_movext1(s, &mov[i]); 5926 } 5927 5928 tcg_out_st(s, dst_type, src, TCG_REG_CALL_STACK, 5929 tcg_out_helper_stk_ofs(dst_type, slot)); 5930 } 5931 return; 5932 5933 found_reg: 5934 /* 5935 * The remaining arguments are in registers. 5936 * Convert slot numbers to argument registers. 5937 */ 5938 nmov = i + 1; 5939 for (i = 0; i < nmov; ++i) { 5940 mov[i].dst = tcg_target_call_iarg_regs[mov[i].dst]; 5941 } 5942 5943 switch (nmov) { 5944 case 4: 5945 /* The backend must have provided enough temps for the worst case. */ 5946 tcg_debug_assert(parm->ntmp >= 2); 5947 5948 dst3 = mov[3].dst; 5949 for (unsigned j = 0; j < 3; ++j) { 5950 if (dst3 == mov[j].src) { 5951 /* 5952 * Conflict. Copy the source to a temporary, perform the 5953 * remaining moves, then the extension from our scratch 5954 * on the way out. 5955 */ 5956 TCGReg scratch = parm->tmp[1]; 5957 5958 tcg_out_mov(s, mov[3].src_type, scratch, mov[3].src); 5959 tcg_out_movext3(s, mov, mov + 1, mov + 2, parm->tmp[0]); 5960 tcg_out_movext1_new_src(s, &mov[3], scratch); 5961 break; 5962 } 5963 } 5964 5965 /* No conflicts: perform this move and continue. */ 5966 tcg_out_movext1(s, &mov[3]); 5967 /* fall through */ 5968 5969 case 3: 5970 tcg_out_movext3(s, mov, mov + 1, mov + 2, 5971 parm->ntmp ? parm->tmp[0] : -1); 5972 break; 5973 case 2: 5974 tcg_out_movext2(s, mov, mov + 1, 5975 parm->ntmp ? parm->tmp[0] : -1); 5976 break; 5977 case 1: 5978 tcg_out_movext1(s, mov); 5979 break; 5980 default: 5981 g_assert_not_reached(); 5982 } 5983 } 5984 5985 static void tcg_out_helper_load_imm(TCGContext *s, unsigned slot, 5986 TCGType type, tcg_target_long imm, 5987 const TCGLdstHelperParam *parm) 5988 { 5989 if (arg_slot_reg_p(slot)) { 5990 tcg_out_movi(s, type, tcg_target_call_iarg_regs[slot], imm); 5991 } else { 5992 int ofs = tcg_out_helper_stk_ofs(type, slot); 5993 if (!tcg_out_sti(s, type, imm, TCG_REG_CALL_STACK, ofs)) { 5994 tcg_debug_assert(parm->ntmp != 0); 5995 tcg_out_movi(s, type, parm->tmp[0], imm); 5996 tcg_out_st(s, type, parm->tmp[0], TCG_REG_CALL_STACK, ofs); 5997 } 5998 } 5999 } 6000 6001 static void tcg_out_helper_load_common_args(TCGContext *s, 6002 const TCGLabelQemuLdst *ldst, 6003 const TCGLdstHelperParam *parm, 6004 const TCGHelperInfo *info, 6005 unsigned next_arg) 6006 { 6007 TCGMovExtend ptr_mov = { 6008 .dst_type = TCG_TYPE_PTR, 6009 .src_type = TCG_TYPE_PTR, 6010 .src_ext = sizeof(void *) == 4 ? MO_32 : MO_64 6011 }; 6012 const TCGCallArgumentLoc *loc = &info->in[0]; 6013 TCGType type; 6014 unsigned slot; 6015 tcg_target_ulong imm; 6016 6017 /* 6018 * Handle env, which is always first. 6019 */ 6020 ptr_mov.dst = loc->arg_slot; 6021 ptr_mov.src = TCG_AREG0; 6022 tcg_out_helper_load_slots(s, 1, &ptr_mov, parm); 6023 6024 /* 6025 * Handle oi. 6026 */ 6027 imm = ldst->oi; 6028 loc = &info->in[next_arg]; 6029 type = TCG_TYPE_I32; 6030 switch (loc->kind) { 6031 case TCG_CALL_ARG_NORMAL: 6032 break; 6033 case TCG_CALL_ARG_EXTEND_U: 6034 case TCG_CALL_ARG_EXTEND_S: 6035 /* No extension required for MemOpIdx. */ 6036 tcg_debug_assert(imm <= INT32_MAX); 6037 type = TCG_TYPE_REG; 6038 break; 6039 default: 6040 g_assert_not_reached(); 6041 } 6042 tcg_out_helper_load_imm(s, loc->arg_slot, type, imm, parm); 6043 next_arg++; 6044 6045 /* 6046 * Handle ra. 6047 */ 6048 loc = &info->in[next_arg]; 6049 slot = loc->arg_slot; 6050 if (parm->ra_gen) { 6051 int arg_reg = -1; 6052 TCGReg ra_reg; 6053 6054 if (arg_slot_reg_p(slot)) { 6055 arg_reg = tcg_target_call_iarg_regs[slot]; 6056 } 6057 ra_reg = parm->ra_gen(s, ldst, arg_reg); 6058 6059 ptr_mov.dst = slot; 6060 ptr_mov.src = ra_reg; 6061 tcg_out_helper_load_slots(s, 1, &ptr_mov, parm); 6062 } else { 6063 imm = (uintptr_t)ldst->raddr; 6064 tcg_out_helper_load_imm(s, slot, TCG_TYPE_PTR, imm, parm); 6065 } 6066 } 6067 6068 static unsigned tcg_out_helper_add_mov(TCGMovExtend *mov, 6069 const TCGCallArgumentLoc *loc, 6070 TCGType dst_type, TCGType src_type, 6071 TCGReg lo, TCGReg hi) 6072 { 6073 MemOp reg_mo; 6074 6075 if (dst_type <= TCG_TYPE_REG) { 6076 MemOp src_ext; 6077 6078 switch (loc->kind) { 6079 case TCG_CALL_ARG_NORMAL: 6080 src_ext = src_type == TCG_TYPE_I32 ? MO_32 : MO_64; 6081 break; 6082 case TCG_CALL_ARG_EXTEND_U: 6083 dst_type = TCG_TYPE_REG; 6084 src_ext = MO_UL; 6085 break; 6086 case TCG_CALL_ARG_EXTEND_S: 6087 dst_type = TCG_TYPE_REG; 6088 src_ext = MO_SL; 6089 break; 6090 default: 6091 g_assert_not_reached(); 6092 } 6093 6094 mov[0].dst = loc->arg_slot; 6095 mov[0].dst_type = dst_type; 6096 mov[0].src = lo; 6097 mov[0].src_type = src_type; 6098 mov[0].src_ext = src_ext; 6099 return 1; 6100 } 6101 6102 if (TCG_TARGET_REG_BITS == 32) { 6103 assert(dst_type == TCG_TYPE_I64); 6104 reg_mo = MO_32; 6105 } else { 6106 assert(dst_type == TCG_TYPE_I128); 6107 reg_mo = MO_64; 6108 } 6109 6110 mov[0].dst = loc[HOST_BIG_ENDIAN].arg_slot; 6111 mov[0].src = lo; 6112 mov[0].dst_type = TCG_TYPE_REG; 6113 mov[0].src_type = TCG_TYPE_REG; 6114 mov[0].src_ext = reg_mo; 6115 6116 mov[1].dst = loc[!HOST_BIG_ENDIAN].arg_slot; 6117 mov[1].src = hi; 6118 mov[1].dst_type = TCG_TYPE_REG; 6119 mov[1].src_type = TCG_TYPE_REG; 6120 mov[1].src_ext = reg_mo; 6121 6122 return 2; 6123 } 6124 6125 static void tcg_out_ld_helper_args(TCGContext *s, const TCGLabelQemuLdst *ldst, 6126 const TCGLdstHelperParam *parm) 6127 { 6128 const TCGHelperInfo *info; 6129 const TCGCallArgumentLoc *loc; 6130 TCGMovExtend mov[2]; 6131 unsigned next_arg, nmov; 6132 MemOp mop = get_memop(ldst->oi); 6133 6134 switch (mop & MO_SIZE) { 6135 case MO_8: 6136 case MO_16: 6137 case MO_32: 6138 info = &info_helper_ld32_mmu; 6139 break; 6140 case MO_64: 6141 info = &info_helper_ld64_mmu; 6142 break; 6143 case MO_128: 6144 info = &info_helper_ld128_mmu; 6145 break; 6146 default: 6147 g_assert_not_reached(); 6148 } 6149 6150 /* Defer env argument. */ 6151 next_arg = 1; 6152 6153 loc = &info->in[next_arg]; 6154 if (TCG_TARGET_REG_BITS == 32 && s->addr_type == TCG_TYPE_I32) { 6155 /* 6156 * 32-bit host with 32-bit guest: zero-extend the guest address 6157 * to 64-bits for the helper by storing the low part, then 6158 * load a zero for the high part. 6159 */ 6160 tcg_out_helper_add_mov(mov, loc + HOST_BIG_ENDIAN, 6161 TCG_TYPE_I32, TCG_TYPE_I32, 6162 ldst->addr_reg, -1); 6163 tcg_out_helper_load_slots(s, 1, mov, parm); 6164 6165 tcg_out_helper_load_imm(s, loc[!HOST_BIG_ENDIAN].arg_slot, 6166 TCG_TYPE_I32, 0, parm); 6167 next_arg += 2; 6168 } else { 6169 nmov = tcg_out_helper_add_mov(mov, loc, TCG_TYPE_I64, s->addr_type, 6170 ldst->addr_reg, -1); 6171 tcg_out_helper_load_slots(s, nmov, mov, parm); 6172 next_arg += nmov; 6173 } 6174 6175 switch (info->out_kind) { 6176 case TCG_CALL_RET_NORMAL: 6177 case TCG_CALL_RET_BY_VEC: 6178 break; 6179 case TCG_CALL_RET_BY_REF: 6180 /* 6181 * The return reference is in the first argument slot. 6182 * We need memory in which to return: re-use the top of stack. 6183 */ 6184 { 6185 int ofs_slot0 = TCG_TARGET_CALL_STACK_OFFSET; 6186 6187 if (arg_slot_reg_p(0)) { 6188 tcg_out_addi_ptr(s, tcg_target_call_iarg_regs[0], 6189 TCG_REG_CALL_STACK, ofs_slot0); 6190 } else { 6191 tcg_debug_assert(parm->ntmp != 0); 6192 tcg_out_addi_ptr(s, parm->tmp[0], 6193 TCG_REG_CALL_STACK, ofs_slot0); 6194 tcg_out_st(s, TCG_TYPE_PTR, parm->tmp[0], 6195 TCG_REG_CALL_STACK, ofs_slot0); 6196 } 6197 } 6198 break; 6199 default: 6200 g_assert_not_reached(); 6201 } 6202 6203 tcg_out_helper_load_common_args(s, ldst, parm, info, next_arg); 6204 } 6205 6206 static void tcg_out_ld_helper_ret(TCGContext *s, const TCGLabelQemuLdst *ldst, 6207 bool load_sign, 6208 const TCGLdstHelperParam *parm) 6209 { 6210 MemOp mop = get_memop(ldst->oi); 6211 TCGMovExtend mov[2]; 6212 int ofs_slot0; 6213 6214 switch (ldst->type) { 6215 case TCG_TYPE_I64: 6216 if (TCG_TARGET_REG_BITS == 32) { 6217 break; 6218 } 6219 /* fall through */ 6220 6221 case TCG_TYPE_I32: 6222 mov[0].dst = ldst->datalo_reg; 6223 mov[0].src = tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, 0); 6224 mov[0].dst_type = ldst->type; 6225 mov[0].src_type = TCG_TYPE_REG; 6226 6227 /* 6228 * If load_sign, then we allowed the helper to perform the 6229 * appropriate sign extension to tcg_target_ulong, and all 6230 * we need now is a plain move. 6231 * 6232 * If they do not, then we expect the relevant extension 6233 * instruction to be no more expensive than a move, and 6234 * we thus save the icache etc by only using one of two 6235 * helper functions. 6236 */ 6237 if (load_sign || !(mop & MO_SIGN)) { 6238 if (TCG_TARGET_REG_BITS == 32 || ldst->type == TCG_TYPE_I32) { 6239 mov[0].src_ext = MO_32; 6240 } else { 6241 mov[0].src_ext = MO_64; 6242 } 6243 } else { 6244 mov[0].src_ext = mop & MO_SSIZE; 6245 } 6246 tcg_out_movext1(s, mov); 6247 return; 6248 6249 case TCG_TYPE_I128: 6250 tcg_debug_assert(TCG_TARGET_REG_BITS == 64); 6251 ofs_slot0 = TCG_TARGET_CALL_STACK_OFFSET; 6252 switch (TCG_TARGET_CALL_RET_I128) { 6253 case TCG_CALL_RET_NORMAL: 6254 break; 6255 case TCG_CALL_RET_BY_VEC: 6256 tcg_out_st(s, TCG_TYPE_V128, 6257 tcg_target_call_oarg_reg(TCG_CALL_RET_BY_VEC, 0), 6258 TCG_REG_CALL_STACK, ofs_slot0); 6259 /* fall through */ 6260 case TCG_CALL_RET_BY_REF: 6261 tcg_out_ld(s, TCG_TYPE_I64, ldst->datalo_reg, 6262 TCG_REG_CALL_STACK, ofs_slot0 + 8 * HOST_BIG_ENDIAN); 6263 tcg_out_ld(s, TCG_TYPE_I64, ldst->datahi_reg, 6264 TCG_REG_CALL_STACK, ofs_slot0 + 8 * !HOST_BIG_ENDIAN); 6265 return; 6266 default: 6267 g_assert_not_reached(); 6268 } 6269 break; 6270 6271 default: 6272 g_assert_not_reached(); 6273 } 6274 6275 mov[0].dst = ldst->datalo_reg; 6276 mov[0].src = 6277 tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, HOST_BIG_ENDIAN); 6278 mov[0].dst_type = TCG_TYPE_REG; 6279 mov[0].src_type = TCG_TYPE_REG; 6280 mov[0].src_ext = TCG_TARGET_REG_BITS == 32 ? MO_32 : MO_64; 6281 6282 mov[1].dst = ldst->datahi_reg; 6283 mov[1].src = 6284 tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, !HOST_BIG_ENDIAN); 6285 mov[1].dst_type = TCG_TYPE_REG; 6286 mov[1].src_type = TCG_TYPE_REG; 6287 mov[1].src_ext = TCG_TARGET_REG_BITS == 32 ? MO_32 : MO_64; 6288 6289 tcg_out_movext2(s, mov, mov + 1, parm->ntmp ? parm->tmp[0] : -1); 6290 } 6291 6292 static void tcg_out_st_helper_args(TCGContext *s, const TCGLabelQemuLdst *ldst, 6293 const TCGLdstHelperParam *parm) 6294 { 6295 const TCGHelperInfo *info; 6296 const TCGCallArgumentLoc *loc; 6297 TCGMovExtend mov[4]; 6298 TCGType data_type; 6299 unsigned next_arg, nmov, n; 6300 MemOp mop = get_memop(ldst->oi); 6301 6302 switch (mop & MO_SIZE) { 6303 case MO_8: 6304 case MO_16: 6305 case MO_32: 6306 info = &info_helper_st32_mmu; 6307 data_type = TCG_TYPE_I32; 6308 break; 6309 case MO_64: 6310 info = &info_helper_st64_mmu; 6311 data_type = TCG_TYPE_I64; 6312 break; 6313 case MO_128: 6314 info = &info_helper_st128_mmu; 6315 data_type = TCG_TYPE_I128; 6316 break; 6317 default: 6318 g_assert_not_reached(); 6319 } 6320 6321 /* Defer env argument. */ 6322 next_arg = 1; 6323 nmov = 0; 6324 6325 /* Handle addr argument. */ 6326 loc = &info->in[next_arg]; 6327 tcg_debug_assert(s->addr_type <= TCG_TYPE_REG); 6328 if (TCG_TARGET_REG_BITS == 32) { 6329 /* 6330 * 32-bit host (and thus 32-bit guest): zero-extend the guest address 6331 * to 64-bits for the helper by storing the low part. Later, 6332 * after we have processed the register inputs, we will load a 6333 * zero for the high part. 6334 */ 6335 tcg_out_helper_add_mov(mov, loc + HOST_BIG_ENDIAN, 6336 TCG_TYPE_I32, TCG_TYPE_I32, 6337 ldst->addr_reg, -1); 6338 next_arg += 2; 6339 nmov += 1; 6340 } else { 6341 n = tcg_out_helper_add_mov(mov, loc, TCG_TYPE_I64, s->addr_type, 6342 ldst->addr_reg, -1); 6343 next_arg += n; 6344 nmov += n; 6345 } 6346 6347 /* Handle data argument. */ 6348 loc = &info->in[next_arg]; 6349 switch (loc->kind) { 6350 case TCG_CALL_ARG_NORMAL: 6351 case TCG_CALL_ARG_EXTEND_U: 6352 case TCG_CALL_ARG_EXTEND_S: 6353 n = tcg_out_helper_add_mov(mov + nmov, loc, data_type, ldst->type, 6354 ldst->datalo_reg, ldst->datahi_reg); 6355 next_arg += n; 6356 nmov += n; 6357 tcg_out_helper_load_slots(s, nmov, mov, parm); 6358 break; 6359 6360 case TCG_CALL_ARG_BY_REF: 6361 tcg_debug_assert(TCG_TARGET_REG_BITS == 64); 6362 tcg_debug_assert(data_type == TCG_TYPE_I128); 6363 tcg_out_st(s, TCG_TYPE_I64, 6364 HOST_BIG_ENDIAN ? ldst->datahi_reg : ldst->datalo_reg, 6365 TCG_REG_CALL_STACK, arg_slot_stk_ofs(loc[0].ref_slot)); 6366 tcg_out_st(s, TCG_TYPE_I64, 6367 HOST_BIG_ENDIAN ? ldst->datalo_reg : ldst->datahi_reg, 6368 TCG_REG_CALL_STACK, arg_slot_stk_ofs(loc[1].ref_slot)); 6369 6370 tcg_out_helper_load_slots(s, nmov, mov, parm); 6371 6372 if (arg_slot_reg_p(loc->arg_slot)) { 6373 tcg_out_addi_ptr(s, tcg_target_call_iarg_regs[loc->arg_slot], 6374 TCG_REG_CALL_STACK, 6375 arg_slot_stk_ofs(loc->ref_slot)); 6376 } else { 6377 tcg_debug_assert(parm->ntmp != 0); 6378 tcg_out_addi_ptr(s, parm->tmp[0], TCG_REG_CALL_STACK, 6379 arg_slot_stk_ofs(loc->ref_slot)); 6380 tcg_out_st(s, TCG_TYPE_PTR, parm->tmp[0], 6381 TCG_REG_CALL_STACK, arg_slot_stk_ofs(loc->arg_slot)); 6382 } 6383 next_arg += 2; 6384 break; 6385 6386 default: 6387 g_assert_not_reached(); 6388 } 6389 6390 if (TCG_TARGET_REG_BITS == 32) { 6391 /* Zero extend the address by loading a zero for the high part. */ 6392 loc = &info->in[1 + !HOST_BIG_ENDIAN]; 6393 tcg_out_helper_load_imm(s, loc->arg_slot, TCG_TYPE_I32, 0, parm); 6394 } 6395 6396 tcg_out_helper_load_common_args(s, ldst, parm, info, next_arg); 6397 } 6398 6399 int tcg_gen_code(TCGContext *s, TranslationBlock *tb, uint64_t pc_start) 6400 { 6401 int i, start_words, num_insns; 6402 TCGOp *op; 6403 6404 if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP) 6405 && qemu_log_in_addr_range(pc_start))) { 6406 FILE *logfile = qemu_log_trylock(); 6407 if (logfile) { 6408 fprintf(logfile, "OP:\n"); 6409 tcg_dump_ops(s, logfile, false); 6410 fprintf(logfile, "\n"); 6411 qemu_log_unlock(logfile); 6412 } 6413 } 6414 6415 #ifdef CONFIG_DEBUG_TCG 6416 /* Ensure all labels referenced have been emitted. */ 6417 { 6418 TCGLabel *l; 6419 bool error = false; 6420 6421 QSIMPLEQ_FOREACH(l, &s->labels, next) { 6422 if (unlikely(!l->present) && !QSIMPLEQ_EMPTY(&l->branches)) { 6423 qemu_log_mask(CPU_LOG_TB_OP, 6424 "$L%d referenced but not present.\n", l->id); 6425 error = true; 6426 } 6427 } 6428 assert(!error); 6429 } 6430 #endif 6431 6432 /* Do not reuse any EBB that may be allocated within the TB. */ 6433 tcg_temp_ebb_reset_freed(s); 6434 6435 tcg_optimize(s); 6436 6437 reachable_code_pass(s); 6438 liveness_pass_0(s); 6439 liveness_pass_1(s); 6440 6441 if (s->nb_indirects > 0) { 6442 if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_IND) 6443 && qemu_log_in_addr_range(pc_start))) { 6444 FILE *logfile = qemu_log_trylock(); 6445 if (logfile) { 6446 fprintf(logfile, "OP before indirect lowering:\n"); 6447 tcg_dump_ops(s, logfile, false); 6448 fprintf(logfile, "\n"); 6449 qemu_log_unlock(logfile); 6450 } 6451 } 6452 6453 /* Replace indirect temps with direct temps. */ 6454 if (liveness_pass_2(s)) { 6455 /* If changes were made, re-run liveness. */ 6456 liveness_pass_1(s); 6457 } 6458 } 6459 6460 if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_OPT) 6461 && qemu_log_in_addr_range(pc_start))) { 6462 FILE *logfile = qemu_log_trylock(); 6463 if (logfile) { 6464 fprintf(logfile, "OP after optimization and liveness analysis:\n"); 6465 tcg_dump_ops(s, logfile, true); 6466 fprintf(logfile, "\n"); 6467 qemu_log_unlock(logfile); 6468 } 6469 } 6470 6471 /* Initialize goto_tb jump offsets. */ 6472 tb->jmp_reset_offset[0] = TB_JMP_OFFSET_INVALID; 6473 tb->jmp_reset_offset[1] = TB_JMP_OFFSET_INVALID; 6474 tb->jmp_insn_offset[0] = TB_JMP_OFFSET_INVALID; 6475 tb->jmp_insn_offset[1] = TB_JMP_OFFSET_INVALID; 6476 6477 tcg_reg_alloc_start(s); 6478 6479 /* 6480 * Reset the buffer pointers when restarting after overflow. 6481 * TODO: Move this into translate-all.c with the rest of the 6482 * buffer management. Having only this done here is confusing. 6483 */ 6484 s->code_buf = tcg_splitwx_to_rw(tb->tc.ptr); 6485 s->code_ptr = s->code_buf; 6486 s->data_gen_ptr = NULL; 6487 6488 QSIMPLEQ_INIT(&s->ldst_labels); 6489 s->pool_labels = NULL; 6490 6491 start_words = s->insn_start_words; 6492 s->gen_insn_data = 6493 tcg_malloc(sizeof(uint64_t) * s->gen_tb->icount * start_words); 6494 6495 tcg_out_tb_start(s); 6496 6497 num_insns = -1; 6498 QTAILQ_FOREACH(op, &s->ops, link) { 6499 TCGOpcode opc = op->opc; 6500 6501 switch (opc) { 6502 case INDEX_op_mov: 6503 case INDEX_op_mov_vec: 6504 tcg_reg_alloc_mov(s, op); 6505 break; 6506 case INDEX_op_dup_vec: 6507 tcg_reg_alloc_dup(s, op); 6508 break; 6509 case INDEX_op_insn_start: 6510 if (num_insns >= 0) { 6511 size_t off = tcg_current_code_size(s); 6512 s->gen_insn_end_off[num_insns] = off; 6513 /* Assert that we do not overflow our stored offset. */ 6514 assert(s->gen_insn_end_off[num_insns] == off); 6515 } 6516 num_insns++; 6517 for (i = 0; i < start_words; ++i) { 6518 s->gen_insn_data[num_insns * start_words + i] = 6519 tcg_get_insn_start_param(op, i); 6520 } 6521 break; 6522 case INDEX_op_discard: 6523 temp_dead(s, arg_temp(op->args[0])); 6524 break; 6525 case INDEX_op_set_label: 6526 tcg_reg_alloc_bb_end(s, s->reserved_regs); 6527 tcg_out_label(s, arg_label(op->args[0])); 6528 break; 6529 case INDEX_op_call: 6530 tcg_reg_alloc_call(s, op); 6531 break; 6532 case INDEX_op_exit_tb: 6533 tcg_out_exit_tb(s, op->args[0]); 6534 break; 6535 case INDEX_op_goto_tb: 6536 tcg_out_goto_tb(s, op->args[0]); 6537 break; 6538 case INDEX_op_dup2_vec: 6539 if (tcg_reg_alloc_dup2(s, op)) { 6540 break; 6541 } 6542 /* fall through */ 6543 default: 6544 /* Sanity check that we've not introduced any unhandled opcodes. */ 6545 tcg_debug_assert(tcg_op_supported(opc, TCGOP_TYPE(op), 6546 TCGOP_FLAGS(op))); 6547 /* Note: in order to speed up the code, it would be much 6548 faster to have specialized register allocator functions for 6549 some common argument patterns */ 6550 tcg_reg_alloc_op(s, op); 6551 break; 6552 } 6553 /* Test for (pending) buffer overflow. The assumption is that any 6554 one operation beginning below the high water mark cannot overrun 6555 the buffer completely. Thus we can test for overflow after 6556 generating code without having to check during generation. */ 6557 if (unlikely((void *)s->code_ptr > s->code_gen_highwater)) { 6558 return -1; 6559 } 6560 /* Test for TB overflow, as seen by gen_insn_end_off. */ 6561 if (unlikely(tcg_current_code_size(s) > UINT16_MAX)) { 6562 return -2; 6563 } 6564 } 6565 tcg_debug_assert(num_insns + 1 == s->gen_tb->icount); 6566 s->gen_insn_end_off[num_insns] = tcg_current_code_size(s); 6567 6568 /* Generate TB finalization at the end of block */ 6569 i = tcg_out_ldst_finalize(s); 6570 if (i < 0) { 6571 return i; 6572 } 6573 i = tcg_out_pool_finalize(s); 6574 if (i < 0) { 6575 return i; 6576 } 6577 if (!tcg_resolve_relocs(s)) { 6578 return -2; 6579 } 6580 6581 #ifndef CONFIG_TCG_INTERPRETER 6582 /* flush instruction cache */ 6583 flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(s->code_buf), 6584 (uintptr_t)s->code_buf, 6585 tcg_ptr_byte_diff(s->code_ptr, s->code_buf)); 6586 #endif 6587 6588 return tcg_current_code_size(s); 6589 } 6590 6591 #ifdef ELF_HOST_MACHINE 6592 /* In order to use this feature, the backend needs to do three things: 6593 6594 (1) Define ELF_HOST_MACHINE to indicate both what value to 6595 put into the ELF image and to indicate support for the feature. 6596 6597 (2) Define tcg_register_jit. This should create a buffer containing 6598 the contents of a .debug_frame section that describes the post- 6599 prologue unwind info for the tcg machine. 6600 6601 (3) Call tcg_register_jit_int, with the constructed .debug_frame. 6602 */ 6603 6604 /* Begin GDB interface. THE FOLLOWING MUST MATCH GDB DOCS. */ 6605 typedef enum { 6606 JIT_NOACTION = 0, 6607 JIT_REGISTER_FN, 6608 JIT_UNREGISTER_FN 6609 } jit_actions_t; 6610 6611 struct jit_code_entry { 6612 struct jit_code_entry *next_entry; 6613 struct jit_code_entry *prev_entry; 6614 const void *symfile_addr; 6615 uint64_t symfile_size; 6616 }; 6617 6618 struct jit_descriptor { 6619 uint32_t version; 6620 uint32_t action_flag; 6621 struct jit_code_entry *relevant_entry; 6622 struct jit_code_entry *first_entry; 6623 }; 6624 6625 void __jit_debug_register_code(void) __attribute__((noinline)); 6626 void __jit_debug_register_code(void) 6627 { 6628 asm(""); 6629 } 6630 6631 /* Must statically initialize the version, because GDB may check 6632 the version before we can set it. */ 6633 struct jit_descriptor __jit_debug_descriptor = { 1, 0, 0, 0 }; 6634 6635 /* End GDB interface. */ 6636 6637 static int find_string(const char *strtab, const char *str) 6638 { 6639 const char *p = strtab + 1; 6640 6641 while (1) { 6642 if (strcmp(p, str) == 0) { 6643 return p - strtab; 6644 } 6645 p += strlen(p) + 1; 6646 } 6647 } 6648 6649 static void tcg_register_jit_int(const void *buf_ptr, size_t buf_size, 6650 const void *debug_frame, 6651 size_t debug_frame_size) 6652 { 6653 struct __attribute__((packed)) DebugInfo { 6654 uint32_t len; 6655 uint16_t version; 6656 uint32_t abbrev; 6657 uint8_t ptr_size; 6658 uint8_t cu_die; 6659 uint16_t cu_lang; 6660 uintptr_t cu_low_pc; 6661 uintptr_t cu_high_pc; 6662 uint8_t fn_die; 6663 char fn_name[16]; 6664 uintptr_t fn_low_pc; 6665 uintptr_t fn_high_pc; 6666 uint8_t cu_eoc; 6667 }; 6668 6669 struct ElfImage { 6670 ElfW(Ehdr) ehdr; 6671 ElfW(Phdr) phdr; 6672 ElfW(Shdr) shdr[7]; 6673 ElfW(Sym) sym[2]; 6674 struct DebugInfo di; 6675 uint8_t da[24]; 6676 char str[80]; 6677 }; 6678 6679 struct ElfImage *img; 6680 6681 static const struct ElfImage img_template = { 6682 .ehdr = { 6683 .e_ident[EI_MAG0] = ELFMAG0, 6684 .e_ident[EI_MAG1] = ELFMAG1, 6685 .e_ident[EI_MAG2] = ELFMAG2, 6686 .e_ident[EI_MAG3] = ELFMAG3, 6687 .e_ident[EI_CLASS] = ELF_CLASS, 6688 .e_ident[EI_DATA] = ELF_DATA, 6689 .e_ident[EI_VERSION] = EV_CURRENT, 6690 .e_type = ET_EXEC, 6691 .e_machine = ELF_HOST_MACHINE, 6692 .e_version = EV_CURRENT, 6693 .e_phoff = offsetof(struct ElfImage, phdr), 6694 .e_shoff = offsetof(struct ElfImage, shdr), 6695 .e_ehsize = sizeof(ElfW(Shdr)), 6696 .e_phentsize = sizeof(ElfW(Phdr)), 6697 .e_phnum = 1, 6698 .e_shentsize = sizeof(ElfW(Shdr)), 6699 .e_shnum = ARRAY_SIZE(img->shdr), 6700 .e_shstrndx = ARRAY_SIZE(img->shdr) - 1, 6701 #ifdef ELF_HOST_FLAGS 6702 .e_flags = ELF_HOST_FLAGS, 6703 #endif 6704 #ifdef ELF_OSABI 6705 .e_ident[EI_OSABI] = ELF_OSABI, 6706 #endif 6707 }, 6708 .phdr = { 6709 .p_type = PT_LOAD, 6710 .p_flags = PF_X, 6711 }, 6712 .shdr = { 6713 [0] = { .sh_type = SHT_NULL }, 6714 /* Trick: The contents of code_gen_buffer are not present in 6715 this fake ELF file; that got allocated elsewhere. Therefore 6716 we mark .text as SHT_NOBITS (similar to .bss) so that readers 6717 will not look for contents. We can record any address. */ 6718 [1] = { /* .text */ 6719 .sh_type = SHT_NOBITS, 6720 .sh_flags = SHF_EXECINSTR | SHF_ALLOC, 6721 }, 6722 [2] = { /* .debug_info */ 6723 .sh_type = SHT_PROGBITS, 6724 .sh_offset = offsetof(struct ElfImage, di), 6725 .sh_size = sizeof(struct DebugInfo), 6726 }, 6727 [3] = { /* .debug_abbrev */ 6728 .sh_type = SHT_PROGBITS, 6729 .sh_offset = offsetof(struct ElfImage, da), 6730 .sh_size = sizeof(img->da), 6731 }, 6732 [4] = { /* .debug_frame */ 6733 .sh_type = SHT_PROGBITS, 6734 .sh_offset = sizeof(struct ElfImage), 6735 }, 6736 [5] = { /* .symtab */ 6737 .sh_type = SHT_SYMTAB, 6738 .sh_offset = offsetof(struct ElfImage, sym), 6739 .sh_size = sizeof(img->sym), 6740 .sh_info = 1, 6741 .sh_link = ARRAY_SIZE(img->shdr) - 1, 6742 .sh_entsize = sizeof(ElfW(Sym)), 6743 }, 6744 [6] = { /* .strtab */ 6745 .sh_type = SHT_STRTAB, 6746 .sh_offset = offsetof(struct ElfImage, str), 6747 .sh_size = sizeof(img->str), 6748 } 6749 }, 6750 .sym = { 6751 [1] = { /* code_gen_buffer */ 6752 .st_info = ELF_ST_INFO(STB_GLOBAL, STT_FUNC), 6753 .st_shndx = 1, 6754 } 6755 }, 6756 .di = { 6757 .len = sizeof(struct DebugInfo) - 4, 6758 .version = 2, 6759 .ptr_size = sizeof(void *), 6760 .cu_die = 1, 6761 .cu_lang = 0x8001, /* DW_LANG_Mips_Assembler */ 6762 .fn_die = 2, 6763 .fn_name = "code_gen_buffer" 6764 }, 6765 .da = { 6766 1, /* abbrev number (the cu) */ 6767 0x11, 1, /* DW_TAG_compile_unit, has children */ 6768 0x13, 0x5, /* DW_AT_language, DW_FORM_data2 */ 6769 0x11, 0x1, /* DW_AT_low_pc, DW_FORM_addr */ 6770 0x12, 0x1, /* DW_AT_high_pc, DW_FORM_addr */ 6771 0, 0, /* end of abbrev */ 6772 2, /* abbrev number (the fn) */ 6773 0x2e, 0, /* DW_TAG_subprogram, no children */ 6774 0x3, 0x8, /* DW_AT_name, DW_FORM_string */ 6775 0x11, 0x1, /* DW_AT_low_pc, DW_FORM_addr */ 6776 0x12, 0x1, /* DW_AT_high_pc, DW_FORM_addr */ 6777 0, 0, /* end of abbrev */ 6778 0 /* no more abbrev */ 6779 }, 6780 .str = "\0" ".text\0" ".debug_info\0" ".debug_abbrev\0" 6781 ".debug_frame\0" ".symtab\0" ".strtab\0" "code_gen_buffer", 6782 }; 6783 6784 /* We only need a single jit entry; statically allocate it. */ 6785 static struct jit_code_entry one_entry; 6786 6787 uintptr_t buf = (uintptr_t)buf_ptr; 6788 size_t img_size = sizeof(struct ElfImage) + debug_frame_size; 6789 DebugFrameHeader *dfh; 6790 6791 img = g_malloc(img_size); 6792 *img = img_template; 6793 6794 img->phdr.p_vaddr = buf; 6795 img->phdr.p_paddr = buf; 6796 img->phdr.p_memsz = buf_size; 6797 6798 img->shdr[1].sh_name = find_string(img->str, ".text"); 6799 img->shdr[1].sh_addr = buf; 6800 img->shdr[1].sh_size = buf_size; 6801 6802 img->shdr[2].sh_name = find_string(img->str, ".debug_info"); 6803 img->shdr[3].sh_name = find_string(img->str, ".debug_abbrev"); 6804 6805 img->shdr[4].sh_name = find_string(img->str, ".debug_frame"); 6806 img->shdr[4].sh_size = debug_frame_size; 6807 6808 img->shdr[5].sh_name = find_string(img->str, ".symtab"); 6809 img->shdr[6].sh_name = find_string(img->str, ".strtab"); 6810 6811 img->sym[1].st_name = find_string(img->str, "code_gen_buffer"); 6812 img->sym[1].st_value = buf; 6813 img->sym[1].st_size = buf_size; 6814 6815 img->di.cu_low_pc = buf; 6816 img->di.cu_high_pc = buf + buf_size; 6817 img->di.fn_low_pc = buf; 6818 img->di.fn_high_pc = buf + buf_size; 6819 6820 dfh = (DebugFrameHeader *)(img + 1); 6821 memcpy(dfh, debug_frame, debug_frame_size); 6822 dfh->fde.func_start = buf; 6823 dfh->fde.func_len = buf_size; 6824 6825 #ifdef DEBUG_JIT 6826 /* Enable this block to be able to debug the ELF image file creation. 6827 One can use readelf, objdump, or other inspection utilities. */ 6828 { 6829 g_autofree char *jit = g_strdup_printf("%s/qemu.jit", g_get_tmp_dir()); 6830 FILE *f = fopen(jit, "w+b"); 6831 if (f) { 6832 if (fwrite(img, img_size, 1, f) != img_size) { 6833 /* Avoid stupid unused return value warning for fwrite. */ 6834 } 6835 fclose(f); 6836 } 6837 } 6838 #endif 6839 6840 one_entry.symfile_addr = img; 6841 one_entry.symfile_size = img_size; 6842 6843 __jit_debug_descriptor.action_flag = JIT_REGISTER_FN; 6844 __jit_debug_descriptor.relevant_entry = &one_entry; 6845 __jit_debug_descriptor.first_entry = &one_entry; 6846 __jit_debug_register_code(); 6847 } 6848 #else 6849 /* No support for the feature. Provide the entry point expected by exec.c, 6850 and implement the internal function we declared earlier. */ 6851 6852 static void tcg_register_jit_int(const void *buf, size_t size, 6853 const void *debug_frame, 6854 size_t debug_frame_size) 6855 { 6856 } 6857 6858 void tcg_register_jit(const void *buf, size_t buf_size) 6859 { 6860 } 6861 #endif /* ELF_HOST_MACHINE */ 6862 6863 #if !TCG_TARGET_MAYBE_vec 6864 void tcg_expand_vec_op(TCGOpcode o, TCGType t, unsigned e, TCGArg a0, ...) 6865 { 6866 g_assert_not_reached(); 6867 } 6868 #endif 6869