1 /* 2 * Tiny Code Generator for QEMU 3 * 4 * Copyright (c) 2008 Fabrice Bellard 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a copy 7 * of this software and associated documentation files (the "Software"), to deal 8 * in the Software without restriction, including without limitation the rights 9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 * copies of the Software, and to permit persons to whom the Software is 11 * furnished to do so, subject to the following conditions: 12 * 13 * The above copyright notice and this permission notice shall be included in 14 * all copies or substantial portions of the Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 22 * THE SOFTWARE. 23 */ 24 25 #include "qemu/osdep.h" 26 27 /* Define to jump the ELF file used to communicate with GDB. */ 28 #undef DEBUG_JIT 29 30 #include "qemu/error-report.h" 31 #include "qemu/cutils.h" 32 #include "qemu/host-utils.h" 33 #include "qemu/qemu-print.h" 34 #include "qemu/cacheflush.h" 35 #include "qemu/cacheinfo.h" 36 #include "qemu/timer.h" 37 #include "exec/translation-block.h" 38 #include "exec/tlb-common.h" 39 #include "tcg/startup.h" 40 #include "tcg/tcg-op-common.h" 41 42 #if UINTPTR_MAX == UINT32_MAX 43 # define ELF_CLASS ELFCLASS32 44 #else 45 # define ELF_CLASS ELFCLASS64 46 #endif 47 #if HOST_BIG_ENDIAN 48 # define ELF_DATA ELFDATA2MSB 49 #else 50 # define ELF_DATA ELFDATA2LSB 51 #endif 52 53 #include "elf.h" 54 #include "exec/log.h" 55 #include "tcg/tcg-ldst.h" 56 #include "tcg/tcg-temp-internal.h" 57 #include "tcg-internal.h" 58 #include "tcg/perf.h" 59 #include "tcg-has.h" 60 #ifdef CONFIG_USER_ONLY 61 #include "user/guest-base.h" 62 #endif 63 64 /* Forward declarations for functions declared in tcg-target.c.inc and 65 used here. */ 66 static void tcg_target_init(TCGContext *s); 67 static void tcg_target_qemu_prologue(TCGContext *s); 68 static bool patch_reloc(tcg_insn_unit *code_ptr, int type, 69 intptr_t value, intptr_t addend); 70 static void tcg_out_nop_fill(tcg_insn_unit *p, int count); 71 72 typedef struct TCGLabelQemuLdst TCGLabelQemuLdst; 73 static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l); 74 static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l); 75 76 /* The CIE and FDE header definitions will be common to all hosts. */ 77 typedef struct { 78 uint32_t len __attribute__((aligned((sizeof(void *))))); 79 uint32_t id; 80 uint8_t version; 81 char augmentation[1]; 82 uint8_t code_align; 83 uint8_t data_align; 84 uint8_t return_column; 85 } DebugFrameCIE; 86 87 typedef struct QEMU_PACKED { 88 uint32_t len __attribute__((aligned((sizeof(void *))))); 89 uint32_t cie_offset; 90 uintptr_t func_start; 91 uintptr_t func_len; 92 } DebugFrameFDEHeader; 93 94 typedef struct QEMU_PACKED { 95 DebugFrameCIE cie; 96 DebugFrameFDEHeader fde; 97 } DebugFrameHeader; 98 99 struct TCGLabelQemuLdst { 100 bool is_ld; /* qemu_ld: true, qemu_st: false */ 101 MemOpIdx oi; 102 TCGType type; /* result type of a load */ 103 TCGReg addrlo_reg; /* reg index for low word of guest virtual addr */ 104 TCGReg addrhi_reg; /* reg index for high word of guest virtual addr */ 105 TCGReg datalo_reg; /* reg index for low word to be loaded or stored */ 106 TCGReg datahi_reg; /* reg index for high word to be loaded or stored */ 107 const tcg_insn_unit *raddr; /* addr of the next IR of qemu_ld/st IR */ 108 tcg_insn_unit *label_ptr[2]; /* label pointers to be updated */ 109 QSIMPLEQ_ENTRY(TCGLabelQemuLdst) next; 110 }; 111 112 static void tcg_register_jit_int(const void *buf, size_t size, 113 const void *debug_frame, 114 size_t debug_frame_size) 115 __attribute__((unused)); 116 117 /* Forward declarations for functions declared and used in tcg-target.c.inc. */ 118 static void tcg_out_tb_start(TCGContext *s); 119 static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg1, 120 intptr_t arg2); 121 static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg); 122 static void tcg_out_movi(TCGContext *s, TCGType type, 123 TCGReg ret, tcg_target_long arg); 124 static void tcg_out_ext8s(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg); 125 static void tcg_out_ext16s(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg); 126 static void tcg_out_ext8u(TCGContext *s, TCGReg ret, TCGReg arg); 127 static void tcg_out_ext16u(TCGContext *s, TCGReg ret, TCGReg arg); 128 static void tcg_out_ext32s(TCGContext *s, TCGReg ret, TCGReg arg); 129 static void tcg_out_ext32u(TCGContext *s, TCGReg ret, TCGReg arg); 130 static void tcg_out_exts_i32_i64(TCGContext *s, TCGReg ret, TCGReg arg); 131 static void tcg_out_extu_i32_i64(TCGContext *s, TCGReg ret, TCGReg arg); 132 static void tcg_out_extrl_i64_i32(TCGContext *s, TCGReg ret, TCGReg arg); 133 static void tcg_out_addi_ptr(TCGContext *s, TCGReg, TCGReg, tcg_target_long); 134 static bool tcg_out_xchg(TCGContext *s, TCGType type, TCGReg r1, TCGReg r2); 135 static void tcg_out_exit_tb(TCGContext *s, uintptr_t arg); 136 static void tcg_out_goto_tb(TCGContext *s, int which); 137 static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, 138 const TCGArg args[TCG_MAX_OP_ARGS], 139 const int const_args[TCG_MAX_OP_ARGS]); 140 #if TCG_TARGET_MAYBE_vec 141 static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece, 142 TCGReg dst, TCGReg src); 143 static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece, 144 TCGReg dst, TCGReg base, intptr_t offset); 145 static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece, 146 TCGReg dst, int64_t arg); 147 static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, 148 unsigned vecl, unsigned vece, 149 const TCGArg args[TCG_MAX_OP_ARGS], 150 const int const_args[TCG_MAX_OP_ARGS]); 151 #else 152 static inline bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece, 153 TCGReg dst, TCGReg src) 154 { 155 g_assert_not_reached(); 156 } 157 static inline bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece, 158 TCGReg dst, TCGReg base, intptr_t offset) 159 { 160 g_assert_not_reached(); 161 } 162 static inline void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece, 163 TCGReg dst, int64_t arg) 164 { 165 g_assert_not_reached(); 166 } 167 static inline void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, 168 unsigned vecl, unsigned vece, 169 const TCGArg args[TCG_MAX_OP_ARGS], 170 const int const_args[TCG_MAX_OP_ARGS]) 171 { 172 g_assert_not_reached(); 173 } 174 int tcg_can_emit_vec_op(TCGOpcode o, TCGType t, unsigned ve) 175 { 176 return 0; 177 } 178 #endif 179 static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, TCGReg arg1, 180 intptr_t arg2); 181 static bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val, 182 TCGReg base, intptr_t ofs); 183 static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target, 184 const TCGHelperInfo *info); 185 static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot); 186 static bool tcg_target_const_match(int64_t val, int ct, 187 TCGType type, TCGCond cond, int vece); 188 189 #ifndef CONFIG_USER_ONLY 190 #define guest_base ({ qemu_build_not_reached(); (uintptr_t)0; }) 191 #endif 192 193 typedef struct TCGLdstHelperParam { 194 TCGReg (*ra_gen)(TCGContext *s, const TCGLabelQemuLdst *l, int arg_reg); 195 unsigned ntmp; 196 int tmp[3]; 197 } TCGLdstHelperParam; 198 199 static void tcg_out_ld_helper_args(TCGContext *s, const TCGLabelQemuLdst *l, 200 const TCGLdstHelperParam *p) 201 __attribute__((unused)); 202 static void tcg_out_ld_helper_ret(TCGContext *s, const TCGLabelQemuLdst *l, 203 bool load_sign, const TCGLdstHelperParam *p) 204 __attribute__((unused)); 205 static void tcg_out_st_helper_args(TCGContext *s, const TCGLabelQemuLdst *l, 206 const TCGLdstHelperParam *p) 207 __attribute__((unused)); 208 209 static void * const qemu_ld_helpers[MO_SSIZE + 1] __attribute__((unused)) = { 210 [MO_UB] = helper_ldub_mmu, 211 [MO_SB] = helper_ldsb_mmu, 212 [MO_UW] = helper_lduw_mmu, 213 [MO_SW] = helper_ldsw_mmu, 214 [MO_UL] = helper_ldul_mmu, 215 [MO_UQ] = helper_ldq_mmu, 216 #if TCG_TARGET_REG_BITS == 64 217 [MO_SL] = helper_ldsl_mmu, 218 [MO_128] = helper_ld16_mmu, 219 #endif 220 }; 221 222 static void * const qemu_st_helpers[MO_SIZE + 1] __attribute__((unused)) = { 223 [MO_8] = helper_stb_mmu, 224 [MO_16] = helper_stw_mmu, 225 [MO_32] = helper_stl_mmu, 226 [MO_64] = helper_stq_mmu, 227 #if TCG_TARGET_REG_BITS == 64 228 [MO_128] = helper_st16_mmu, 229 #endif 230 }; 231 232 typedef struct { 233 MemOp atom; /* lg2 bits of atomicity required */ 234 MemOp align; /* lg2 bits of alignment to use */ 235 } TCGAtomAlign; 236 237 static TCGAtomAlign atom_and_align_for_opc(TCGContext *s, MemOp opc, 238 MemOp host_atom, bool allow_two_ops) 239 __attribute__((unused)); 240 241 #ifdef CONFIG_USER_ONLY 242 bool tcg_use_softmmu; 243 #endif 244 245 TCGContext tcg_init_ctx; 246 __thread TCGContext *tcg_ctx; 247 248 TCGContext **tcg_ctxs; 249 unsigned int tcg_cur_ctxs; 250 unsigned int tcg_max_ctxs; 251 TCGv_env tcg_env; 252 const void *tcg_code_gen_epilogue; 253 uintptr_t tcg_splitwx_diff; 254 255 #ifndef CONFIG_TCG_INTERPRETER 256 tcg_prologue_fn *tcg_qemu_tb_exec; 257 #endif 258 259 static TCGRegSet tcg_target_available_regs[TCG_TYPE_COUNT]; 260 static TCGRegSet tcg_target_call_clobber_regs; 261 262 #if TCG_TARGET_INSN_UNIT_SIZE == 1 263 static __attribute__((unused)) inline void tcg_out8(TCGContext *s, uint8_t v) 264 { 265 *s->code_ptr++ = v; 266 } 267 268 static __attribute__((unused)) inline void tcg_patch8(tcg_insn_unit *p, 269 uint8_t v) 270 { 271 *p = v; 272 } 273 #endif 274 275 #if TCG_TARGET_INSN_UNIT_SIZE <= 2 276 static __attribute__((unused)) inline void tcg_out16(TCGContext *s, uint16_t v) 277 { 278 if (TCG_TARGET_INSN_UNIT_SIZE == 2) { 279 *s->code_ptr++ = v; 280 } else { 281 tcg_insn_unit *p = s->code_ptr; 282 memcpy(p, &v, sizeof(v)); 283 s->code_ptr = p + (2 / TCG_TARGET_INSN_UNIT_SIZE); 284 } 285 } 286 287 static __attribute__((unused)) inline void tcg_patch16(tcg_insn_unit *p, 288 uint16_t v) 289 { 290 if (TCG_TARGET_INSN_UNIT_SIZE == 2) { 291 *p = v; 292 } else { 293 memcpy(p, &v, sizeof(v)); 294 } 295 } 296 #endif 297 298 #if TCG_TARGET_INSN_UNIT_SIZE <= 4 299 static __attribute__((unused)) inline void tcg_out32(TCGContext *s, uint32_t v) 300 { 301 if (TCG_TARGET_INSN_UNIT_SIZE == 4) { 302 *s->code_ptr++ = v; 303 } else { 304 tcg_insn_unit *p = s->code_ptr; 305 memcpy(p, &v, sizeof(v)); 306 s->code_ptr = p + (4 / TCG_TARGET_INSN_UNIT_SIZE); 307 } 308 } 309 310 static __attribute__((unused)) inline void tcg_patch32(tcg_insn_unit *p, 311 uint32_t v) 312 { 313 if (TCG_TARGET_INSN_UNIT_SIZE == 4) { 314 *p = v; 315 } else { 316 memcpy(p, &v, sizeof(v)); 317 } 318 } 319 #endif 320 321 #if TCG_TARGET_INSN_UNIT_SIZE <= 8 322 static __attribute__((unused)) inline void tcg_out64(TCGContext *s, uint64_t v) 323 { 324 if (TCG_TARGET_INSN_UNIT_SIZE == 8) { 325 *s->code_ptr++ = v; 326 } else { 327 tcg_insn_unit *p = s->code_ptr; 328 memcpy(p, &v, sizeof(v)); 329 s->code_ptr = p + (8 / TCG_TARGET_INSN_UNIT_SIZE); 330 } 331 } 332 333 static __attribute__((unused)) inline void tcg_patch64(tcg_insn_unit *p, 334 uint64_t v) 335 { 336 if (TCG_TARGET_INSN_UNIT_SIZE == 8) { 337 *p = v; 338 } else { 339 memcpy(p, &v, sizeof(v)); 340 } 341 } 342 #endif 343 344 /* label relocation processing */ 345 346 static void tcg_out_reloc(TCGContext *s, tcg_insn_unit *code_ptr, int type, 347 TCGLabel *l, intptr_t addend) 348 { 349 TCGRelocation *r = tcg_malloc(sizeof(TCGRelocation)); 350 351 r->type = type; 352 r->ptr = code_ptr; 353 r->addend = addend; 354 QSIMPLEQ_INSERT_TAIL(&l->relocs, r, next); 355 } 356 357 static void tcg_out_label(TCGContext *s, TCGLabel *l) 358 { 359 tcg_debug_assert(!l->has_value); 360 l->has_value = 1; 361 l->u.value_ptr = tcg_splitwx_to_rx(s->code_ptr); 362 } 363 364 TCGLabel *gen_new_label(void) 365 { 366 TCGContext *s = tcg_ctx; 367 TCGLabel *l = tcg_malloc(sizeof(TCGLabel)); 368 369 memset(l, 0, sizeof(TCGLabel)); 370 l->id = s->nb_labels++; 371 QSIMPLEQ_INIT(&l->branches); 372 QSIMPLEQ_INIT(&l->relocs); 373 374 QSIMPLEQ_INSERT_TAIL(&s->labels, l, next); 375 376 return l; 377 } 378 379 static bool tcg_resolve_relocs(TCGContext *s) 380 { 381 TCGLabel *l; 382 383 QSIMPLEQ_FOREACH(l, &s->labels, next) { 384 TCGRelocation *r; 385 uintptr_t value = l->u.value; 386 387 QSIMPLEQ_FOREACH(r, &l->relocs, next) { 388 if (!patch_reloc(r->ptr, r->type, value, r->addend)) { 389 return false; 390 } 391 } 392 } 393 return true; 394 } 395 396 static void set_jmp_reset_offset(TCGContext *s, int which) 397 { 398 /* 399 * We will check for overflow at the end of the opcode loop in 400 * tcg_gen_code, where we bound tcg_current_code_size to UINT16_MAX. 401 */ 402 s->gen_tb->jmp_reset_offset[which] = tcg_current_code_size(s); 403 } 404 405 static void G_GNUC_UNUSED set_jmp_insn_offset(TCGContext *s, int which) 406 { 407 /* 408 * We will check for overflow at the end of the opcode loop in 409 * tcg_gen_code, where we bound tcg_current_code_size to UINT16_MAX. 410 */ 411 s->gen_tb->jmp_insn_offset[which] = tcg_current_code_size(s); 412 } 413 414 static uintptr_t G_GNUC_UNUSED get_jmp_target_addr(TCGContext *s, int which) 415 { 416 /* 417 * Return the read-execute version of the pointer, for the benefit 418 * of any pc-relative addressing mode. 419 */ 420 return (uintptr_t)tcg_splitwx_to_rx(&s->gen_tb->jmp_target_addr[which]); 421 } 422 423 static int __attribute__((unused)) 424 tlb_mask_table_ofs(TCGContext *s, int which) 425 { 426 return (offsetof(CPUNegativeOffsetState, tlb.f[which]) - 427 sizeof(CPUNegativeOffsetState)); 428 } 429 430 /* Signal overflow, starting over with fewer guest insns. */ 431 static G_NORETURN 432 void tcg_raise_tb_overflow(TCGContext *s) 433 { 434 siglongjmp(s->jmp_trans, -2); 435 } 436 437 /* 438 * Used by tcg_out_movext{1,2} to hold the arguments for tcg_out_movext. 439 * By the time we arrive at tcg_out_movext1, @dst is always a TCGReg. 440 * 441 * However, tcg_out_helper_load_slots reuses this field to hold an 442 * argument slot number (which may designate a argument register or an 443 * argument stack slot), converting to TCGReg once all arguments that 444 * are destined for the stack are processed. 445 */ 446 typedef struct TCGMovExtend { 447 unsigned dst; 448 TCGReg src; 449 TCGType dst_type; 450 TCGType src_type; 451 MemOp src_ext; 452 } TCGMovExtend; 453 454 /** 455 * tcg_out_movext -- move and extend 456 * @s: tcg context 457 * @dst_type: integral type for destination 458 * @dst: destination register 459 * @src_type: integral type for source 460 * @src_ext: extension to apply to source 461 * @src: source register 462 * 463 * Move or extend @src into @dst, depending on @src_ext and the types. 464 */ 465 static void tcg_out_movext(TCGContext *s, TCGType dst_type, TCGReg dst, 466 TCGType src_type, MemOp src_ext, TCGReg src) 467 { 468 switch (src_ext) { 469 case MO_UB: 470 tcg_out_ext8u(s, dst, src); 471 break; 472 case MO_SB: 473 tcg_out_ext8s(s, dst_type, dst, src); 474 break; 475 case MO_UW: 476 tcg_out_ext16u(s, dst, src); 477 break; 478 case MO_SW: 479 tcg_out_ext16s(s, dst_type, dst, src); 480 break; 481 case MO_UL: 482 case MO_SL: 483 if (dst_type == TCG_TYPE_I32) { 484 if (src_type == TCG_TYPE_I32) { 485 tcg_out_mov(s, TCG_TYPE_I32, dst, src); 486 } else { 487 tcg_out_extrl_i64_i32(s, dst, src); 488 } 489 } else if (src_type == TCG_TYPE_I32) { 490 if (src_ext & MO_SIGN) { 491 tcg_out_exts_i32_i64(s, dst, src); 492 } else { 493 tcg_out_extu_i32_i64(s, dst, src); 494 } 495 } else { 496 if (src_ext & MO_SIGN) { 497 tcg_out_ext32s(s, dst, src); 498 } else { 499 tcg_out_ext32u(s, dst, src); 500 } 501 } 502 break; 503 case MO_UQ: 504 tcg_debug_assert(TCG_TARGET_REG_BITS == 64); 505 if (dst_type == TCG_TYPE_I32) { 506 tcg_out_extrl_i64_i32(s, dst, src); 507 } else { 508 tcg_out_mov(s, TCG_TYPE_I64, dst, src); 509 } 510 break; 511 default: 512 g_assert_not_reached(); 513 } 514 } 515 516 /* Minor variations on a theme, using a structure. */ 517 static void tcg_out_movext1_new_src(TCGContext *s, const TCGMovExtend *i, 518 TCGReg src) 519 { 520 tcg_out_movext(s, i->dst_type, i->dst, i->src_type, i->src_ext, src); 521 } 522 523 static void tcg_out_movext1(TCGContext *s, const TCGMovExtend *i) 524 { 525 tcg_out_movext1_new_src(s, i, i->src); 526 } 527 528 /** 529 * tcg_out_movext2 -- move and extend two pair 530 * @s: tcg context 531 * @i1: first move description 532 * @i2: second move description 533 * @scratch: temporary register, or -1 for none 534 * 535 * As tcg_out_movext, for both @i1 and @i2, caring for overlap 536 * between the sources and destinations. 537 */ 538 539 static void tcg_out_movext2(TCGContext *s, const TCGMovExtend *i1, 540 const TCGMovExtend *i2, int scratch) 541 { 542 TCGReg src1 = i1->src; 543 TCGReg src2 = i2->src; 544 545 if (i1->dst != src2) { 546 tcg_out_movext1(s, i1); 547 tcg_out_movext1(s, i2); 548 return; 549 } 550 if (i2->dst == src1) { 551 TCGType src1_type = i1->src_type; 552 TCGType src2_type = i2->src_type; 553 554 if (tcg_out_xchg(s, MAX(src1_type, src2_type), src1, src2)) { 555 /* The data is now in the correct registers, now extend. */ 556 src1 = i2->src; 557 src2 = i1->src; 558 } else { 559 tcg_debug_assert(scratch >= 0); 560 tcg_out_mov(s, src1_type, scratch, src1); 561 src1 = scratch; 562 } 563 } 564 tcg_out_movext1_new_src(s, i2, src2); 565 tcg_out_movext1_new_src(s, i1, src1); 566 } 567 568 /** 569 * tcg_out_movext3 -- move and extend three pair 570 * @s: tcg context 571 * @i1: first move description 572 * @i2: second move description 573 * @i3: third move description 574 * @scratch: temporary register, or -1 for none 575 * 576 * As tcg_out_movext, for all of @i1, @i2 and @i3, caring for overlap 577 * between the sources and destinations. 578 */ 579 580 static void tcg_out_movext3(TCGContext *s, const TCGMovExtend *i1, 581 const TCGMovExtend *i2, const TCGMovExtend *i3, 582 int scratch) 583 { 584 TCGReg src1 = i1->src; 585 TCGReg src2 = i2->src; 586 TCGReg src3 = i3->src; 587 588 if (i1->dst != src2 && i1->dst != src3) { 589 tcg_out_movext1(s, i1); 590 tcg_out_movext2(s, i2, i3, scratch); 591 return; 592 } 593 if (i2->dst != src1 && i2->dst != src3) { 594 tcg_out_movext1(s, i2); 595 tcg_out_movext2(s, i1, i3, scratch); 596 return; 597 } 598 if (i3->dst != src1 && i3->dst != src2) { 599 tcg_out_movext1(s, i3); 600 tcg_out_movext2(s, i1, i2, scratch); 601 return; 602 } 603 604 /* 605 * There is a cycle. Since there are only 3 nodes, the cycle is 606 * either "clockwise" or "anti-clockwise", and can be solved with 607 * a single scratch or two xchg. 608 */ 609 if (i1->dst == src2 && i2->dst == src3 && i3->dst == src1) { 610 /* "Clockwise" */ 611 if (tcg_out_xchg(s, MAX(i1->src_type, i2->src_type), src1, src2)) { 612 tcg_out_xchg(s, MAX(i2->src_type, i3->src_type), src2, src3); 613 /* The data is now in the correct registers, now extend. */ 614 tcg_out_movext1_new_src(s, i1, i1->dst); 615 tcg_out_movext1_new_src(s, i2, i2->dst); 616 tcg_out_movext1_new_src(s, i3, i3->dst); 617 } else { 618 tcg_debug_assert(scratch >= 0); 619 tcg_out_mov(s, i1->src_type, scratch, src1); 620 tcg_out_movext1(s, i3); 621 tcg_out_movext1(s, i2); 622 tcg_out_movext1_new_src(s, i1, scratch); 623 } 624 } else if (i1->dst == src3 && i2->dst == src1 && i3->dst == src2) { 625 /* "Anti-clockwise" */ 626 if (tcg_out_xchg(s, MAX(i2->src_type, i3->src_type), src2, src3)) { 627 tcg_out_xchg(s, MAX(i1->src_type, i2->src_type), src1, src2); 628 /* The data is now in the correct registers, now extend. */ 629 tcg_out_movext1_new_src(s, i1, i1->dst); 630 tcg_out_movext1_new_src(s, i2, i2->dst); 631 tcg_out_movext1_new_src(s, i3, i3->dst); 632 } else { 633 tcg_debug_assert(scratch >= 0); 634 tcg_out_mov(s, i1->src_type, scratch, src1); 635 tcg_out_movext1(s, i2); 636 tcg_out_movext1(s, i3); 637 tcg_out_movext1_new_src(s, i1, scratch); 638 } 639 } else { 640 g_assert_not_reached(); 641 } 642 } 643 644 /* 645 * Allocate a new TCGLabelQemuLdst entry. 646 */ 647 648 __attribute__((unused)) 649 static TCGLabelQemuLdst *new_ldst_label(TCGContext *s) 650 { 651 TCGLabelQemuLdst *l = tcg_malloc(sizeof(*l)); 652 653 memset(l, 0, sizeof(*l)); 654 QSIMPLEQ_INSERT_TAIL(&s->ldst_labels, l, next); 655 656 return l; 657 } 658 659 /* 660 * Allocate new constant pool entries. 661 */ 662 663 typedef struct TCGLabelPoolData { 664 struct TCGLabelPoolData *next; 665 tcg_insn_unit *label; 666 intptr_t addend; 667 int rtype; 668 unsigned nlong; 669 tcg_target_ulong data[]; 670 } TCGLabelPoolData; 671 672 static TCGLabelPoolData *new_pool_alloc(TCGContext *s, int nlong, int rtype, 673 tcg_insn_unit *label, intptr_t addend) 674 { 675 TCGLabelPoolData *n = tcg_malloc(sizeof(TCGLabelPoolData) 676 + sizeof(tcg_target_ulong) * nlong); 677 678 n->label = label; 679 n->addend = addend; 680 n->rtype = rtype; 681 n->nlong = nlong; 682 return n; 683 } 684 685 static void new_pool_insert(TCGContext *s, TCGLabelPoolData *n) 686 { 687 TCGLabelPoolData *i, **pp; 688 int nlong = n->nlong; 689 690 /* Insertion sort on the pool. */ 691 for (pp = &s->pool_labels; (i = *pp) != NULL; pp = &i->next) { 692 if (nlong > i->nlong) { 693 break; 694 } 695 if (nlong < i->nlong) { 696 continue; 697 } 698 if (memcmp(n->data, i->data, sizeof(tcg_target_ulong) * nlong) >= 0) { 699 break; 700 } 701 } 702 n->next = *pp; 703 *pp = n; 704 } 705 706 /* The "usual" for generic integer code. */ 707 __attribute__((unused)) 708 static void new_pool_label(TCGContext *s, tcg_target_ulong d, int rtype, 709 tcg_insn_unit *label, intptr_t addend) 710 { 711 TCGLabelPoolData *n = new_pool_alloc(s, 1, rtype, label, addend); 712 n->data[0] = d; 713 new_pool_insert(s, n); 714 } 715 716 /* For v64 or v128, depending on the host. */ 717 __attribute__((unused)) 718 static void new_pool_l2(TCGContext *s, int rtype, tcg_insn_unit *label, 719 intptr_t addend, tcg_target_ulong d0, 720 tcg_target_ulong d1) 721 { 722 TCGLabelPoolData *n = new_pool_alloc(s, 2, rtype, label, addend); 723 n->data[0] = d0; 724 n->data[1] = d1; 725 new_pool_insert(s, n); 726 } 727 728 /* For v128 or v256, depending on the host. */ 729 __attribute__((unused)) 730 static void new_pool_l4(TCGContext *s, int rtype, tcg_insn_unit *label, 731 intptr_t addend, tcg_target_ulong d0, 732 tcg_target_ulong d1, tcg_target_ulong d2, 733 tcg_target_ulong d3) 734 { 735 TCGLabelPoolData *n = new_pool_alloc(s, 4, rtype, label, addend); 736 n->data[0] = d0; 737 n->data[1] = d1; 738 n->data[2] = d2; 739 n->data[3] = d3; 740 new_pool_insert(s, n); 741 } 742 743 /* For v256, for 32-bit host. */ 744 __attribute__((unused)) 745 static void new_pool_l8(TCGContext *s, int rtype, tcg_insn_unit *label, 746 intptr_t addend, tcg_target_ulong d0, 747 tcg_target_ulong d1, tcg_target_ulong d2, 748 tcg_target_ulong d3, tcg_target_ulong d4, 749 tcg_target_ulong d5, tcg_target_ulong d6, 750 tcg_target_ulong d7) 751 { 752 TCGLabelPoolData *n = new_pool_alloc(s, 8, rtype, label, addend); 753 n->data[0] = d0; 754 n->data[1] = d1; 755 n->data[2] = d2; 756 n->data[3] = d3; 757 n->data[4] = d4; 758 n->data[5] = d5; 759 n->data[6] = d6; 760 n->data[7] = d7; 761 new_pool_insert(s, n); 762 } 763 764 /* 765 * Generate TB finalization at the end of block 766 */ 767 768 static int tcg_out_ldst_finalize(TCGContext *s) 769 { 770 TCGLabelQemuLdst *lb; 771 772 /* qemu_ld/st slow paths */ 773 QSIMPLEQ_FOREACH(lb, &s->ldst_labels, next) { 774 if (lb->is_ld 775 ? !tcg_out_qemu_ld_slow_path(s, lb) 776 : !tcg_out_qemu_st_slow_path(s, lb)) { 777 return -2; 778 } 779 780 /* 781 * Test for (pending) buffer overflow. The assumption is that any 782 * one operation beginning below the high water mark cannot overrun 783 * the buffer completely. Thus we can test for overflow after 784 * generating code without having to check during generation. 785 */ 786 if (unlikely((void *)s->code_ptr > s->code_gen_highwater)) { 787 return -1; 788 } 789 } 790 return 0; 791 } 792 793 static int tcg_out_pool_finalize(TCGContext *s) 794 { 795 TCGLabelPoolData *p = s->pool_labels; 796 TCGLabelPoolData *l = NULL; 797 void *a; 798 799 if (p == NULL) { 800 return 0; 801 } 802 803 /* 804 * ??? Round up to qemu_icache_linesize, but then do not round 805 * again when allocating the next TranslationBlock structure. 806 */ 807 a = (void *)ROUND_UP((uintptr_t)s->code_ptr, 808 sizeof(tcg_target_ulong) * p->nlong); 809 tcg_out_nop_fill(s->code_ptr, (tcg_insn_unit *)a - s->code_ptr); 810 s->data_gen_ptr = a; 811 812 for (; p != NULL; p = p->next) { 813 size_t size = sizeof(tcg_target_ulong) * p->nlong; 814 uintptr_t value; 815 816 if (!l || l->nlong != p->nlong || memcmp(l->data, p->data, size)) { 817 if (unlikely(a > s->code_gen_highwater)) { 818 return -1; 819 } 820 memcpy(a, p->data, size); 821 a += size; 822 l = p; 823 } 824 825 value = (uintptr_t)tcg_splitwx_to_rx(a) - size; 826 if (!patch_reloc(p->label, p->rtype, value, p->addend)) { 827 return -2; 828 } 829 } 830 831 s->code_ptr = a; 832 return 0; 833 } 834 835 #define C_PFX1(P, A) P##A 836 #define C_PFX2(P, A, B) P##A##_##B 837 #define C_PFX3(P, A, B, C) P##A##_##B##_##C 838 #define C_PFX4(P, A, B, C, D) P##A##_##B##_##C##_##D 839 #define C_PFX5(P, A, B, C, D, E) P##A##_##B##_##C##_##D##_##E 840 #define C_PFX6(P, A, B, C, D, E, F) P##A##_##B##_##C##_##D##_##E##_##F 841 842 /* Define an enumeration for the various combinations. */ 843 844 #define C_O0_I1(I1) C_PFX1(c_o0_i1_, I1), 845 #define C_O0_I2(I1, I2) C_PFX2(c_o0_i2_, I1, I2), 846 #define C_O0_I3(I1, I2, I3) C_PFX3(c_o0_i3_, I1, I2, I3), 847 #define C_O0_I4(I1, I2, I3, I4) C_PFX4(c_o0_i4_, I1, I2, I3, I4), 848 849 #define C_O1_I1(O1, I1) C_PFX2(c_o1_i1_, O1, I1), 850 #define C_O1_I2(O1, I1, I2) C_PFX3(c_o1_i2_, O1, I1, I2), 851 #define C_O1_I3(O1, I1, I2, I3) C_PFX4(c_o1_i3_, O1, I1, I2, I3), 852 #define C_O1_I4(O1, I1, I2, I3, I4) C_PFX5(c_o1_i4_, O1, I1, I2, I3, I4), 853 854 #define C_N1_I2(O1, I1, I2) C_PFX3(c_n1_i2_, O1, I1, I2), 855 #define C_N1O1_I1(O1, O2, I1) C_PFX3(c_n1o1_i1_, O1, O2, I1), 856 #define C_N2_I1(O1, O2, I1) C_PFX3(c_n2_i1_, O1, O2, I1), 857 858 #define C_O2_I1(O1, O2, I1) C_PFX3(c_o2_i1_, O1, O2, I1), 859 #define C_O2_I2(O1, O2, I1, I2) C_PFX4(c_o2_i2_, O1, O2, I1, I2), 860 #define C_O2_I3(O1, O2, I1, I2, I3) C_PFX5(c_o2_i3_, O1, O2, I1, I2, I3), 861 #define C_O2_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_o2_i4_, O1, O2, I1, I2, I3, I4), 862 #define C_N1_O1_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_n1_o1_i4_, O1, O2, I1, I2, I3, I4), 863 864 typedef enum { 865 C_NotImplemented = -1, 866 #include "tcg-target-con-set.h" 867 } TCGConstraintSetIndex; 868 869 static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode, TCGType, unsigned); 870 871 #undef C_O0_I1 872 #undef C_O0_I2 873 #undef C_O0_I3 874 #undef C_O0_I4 875 #undef C_O1_I1 876 #undef C_O1_I2 877 #undef C_O1_I3 878 #undef C_O1_I4 879 #undef C_N1_I2 880 #undef C_N1O1_I1 881 #undef C_N2_I1 882 #undef C_O2_I1 883 #undef C_O2_I2 884 #undef C_O2_I3 885 #undef C_O2_I4 886 #undef C_N1_O1_I4 887 888 /* Put all of the constraint sets into an array, indexed by the enum. */ 889 890 typedef struct TCGConstraintSet { 891 uint8_t nb_oargs, nb_iargs; 892 const char *args_ct_str[TCG_MAX_OP_ARGS]; 893 } TCGConstraintSet; 894 895 #define C_O0_I1(I1) { 0, 1, { #I1 } }, 896 #define C_O0_I2(I1, I2) { 0, 2, { #I1, #I2 } }, 897 #define C_O0_I3(I1, I2, I3) { 0, 3, { #I1, #I2, #I3 } }, 898 #define C_O0_I4(I1, I2, I3, I4) { 0, 4, { #I1, #I2, #I3, #I4 } }, 899 900 #define C_O1_I1(O1, I1) { 1, 1, { #O1, #I1 } }, 901 #define C_O1_I2(O1, I1, I2) { 1, 2, { #O1, #I1, #I2 } }, 902 #define C_O1_I3(O1, I1, I2, I3) { 1, 3, { #O1, #I1, #I2, #I3 } }, 903 #define C_O1_I4(O1, I1, I2, I3, I4) { 1, 4, { #O1, #I1, #I2, #I3, #I4 } }, 904 905 #define C_N1_I2(O1, I1, I2) { 1, 2, { "&" #O1, #I1, #I2 } }, 906 #define C_N1O1_I1(O1, O2, I1) { 2, 1, { "&" #O1, #O2, #I1 } }, 907 #define C_N2_I1(O1, O2, I1) { 2, 1, { "&" #O1, "&" #O2, #I1 } }, 908 909 #define C_O2_I1(O1, O2, I1) { 2, 1, { #O1, #O2, #I1 } }, 910 #define C_O2_I2(O1, O2, I1, I2) { 2, 2, { #O1, #O2, #I1, #I2 } }, 911 #define C_O2_I3(O1, O2, I1, I2, I3) { 2, 3, { #O1, #O2, #I1, #I2, #I3 } }, 912 #define C_O2_I4(O1, O2, I1, I2, I3, I4) { 2, 4, { #O1, #O2, #I1, #I2, #I3, #I4 } }, 913 #define C_N1_O1_I4(O1, O2, I1, I2, I3, I4) { 2, 4, { "&" #O1, #O2, #I1, #I2, #I3, #I4 } }, 914 915 static const TCGConstraintSet constraint_sets[] = { 916 #include "tcg-target-con-set.h" 917 }; 918 919 #undef C_O0_I1 920 #undef C_O0_I2 921 #undef C_O0_I3 922 #undef C_O0_I4 923 #undef C_O1_I1 924 #undef C_O1_I2 925 #undef C_O1_I3 926 #undef C_O1_I4 927 #undef C_N1_I2 928 #undef C_N1O1_I1 929 #undef C_N2_I1 930 #undef C_O2_I1 931 #undef C_O2_I2 932 #undef C_O2_I3 933 #undef C_O2_I4 934 #undef C_N1_O1_I4 935 936 /* Expand the enumerator to be returned from tcg_target_op_def(). */ 937 938 #define C_O0_I1(I1) C_PFX1(c_o0_i1_, I1) 939 #define C_O0_I2(I1, I2) C_PFX2(c_o0_i2_, I1, I2) 940 #define C_O0_I3(I1, I2, I3) C_PFX3(c_o0_i3_, I1, I2, I3) 941 #define C_O0_I4(I1, I2, I3, I4) C_PFX4(c_o0_i4_, I1, I2, I3, I4) 942 943 #define C_O1_I1(O1, I1) C_PFX2(c_o1_i1_, O1, I1) 944 #define C_O1_I2(O1, I1, I2) C_PFX3(c_o1_i2_, O1, I1, I2) 945 #define C_O1_I3(O1, I1, I2, I3) C_PFX4(c_o1_i3_, O1, I1, I2, I3) 946 #define C_O1_I4(O1, I1, I2, I3, I4) C_PFX5(c_o1_i4_, O1, I1, I2, I3, I4) 947 948 #define C_N1_I2(O1, I1, I2) C_PFX3(c_n1_i2_, O1, I1, I2) 949 #define C_N1O1_I1(O1, O2, I1) C_PFX3(c_n1o1_i1_, O1, O2, I1) 950 #define C_N2_I1(O1, O2, I1) C_PFX3(c_n2_i1_, O1, O2, I1) 951 952 #define C_O2_I1(O1, O2, I1) C_PFX3(c_o2_i1_, O1, O2, I1) 953 #define C_O2_I2(O1, O2, I1, I2) C_PFX4(c_o2_i2_, O1, O2, I1, I2) 954 #define C_O2_I3(O1, O2, I1, I2, I3) C_PFX5(c_o2_i3_, O1, O2, I1, I2, I3) 955 #define C_O2_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_o2_i4_, O1, O2, I1, I2, I3, I4) 956 #define C_N1_O1_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_n1_o1_i4_, O1, O2, I1, I2, I3, I4) 957 958 #include "tcg-target.c.inc" 959 960 #ifndef CONFIG_TCG_INTERPRETER 961 /* Validate CPUTLBDescFast placement. */ 962 QEMU_BUILD_BUG_ON((int)(offsetof(CPUNegativeOffsetState, tlb.f[0]) - 963 sizeof(CPUNegativeOffsetState)) 964 < MIN_TLB_MASK_TABLE_OFS); 965 #endif 966 967 /* 968 * All TCG threads except the parent (i.e. the one that called tcg_context_init 969 * and registered the target's TCG globals) must register with this function 970 * before initiating translation. 971 * 972 * In user-mode we just point tcg_ctx to tcg_init_ctx. See the documentation 973 * of tcg_region_init() for the reasoning behind this. 974 * 975 * In system-mode each caller registers its context in tcg_ctxs[]. Note that in 976 * system-mode tcg_ctxs[] does not track tcg_ctx_init, since the initial context 977 * is not used anymore for translation once this function is called. 978 * 979 * Not tracking tcg_init_ctx in tcg_ctxs[] in system-mode keeps code that 980 * iterates over the array (e.g. tcg_code_size() the same for both system/user 981 * modes. 982 */ 983 #ifdef CONFIG_USER_ONLY 984 void tcg_register_thread(void) 985 { 986 tcg_ctx = &tcg_init_ctx; 987 } 988 #else 989 void tcg_register_thread(void) 990 { 991 TCGContext *s = g_malloc(sizeof(*s)); 992 unsigned int i, n; 993 994 *s = tcg_init_ctx; 995 996 /* Relink mem_base. */ 997 for (i = 0, n = tcg_init_ctx.nb_globals; i < n; ++i) { 998 if (tcg_init_ctx.temps[i].mem_base) { 999 ptrdiff_t b = tcg_init_ctx.temps[i].mem_base - tcg_init_ctx.temps; 1000 tcg_debug_assert(b >= 0 && b < n); 1001 s->temps[i].mem_base = &s->temps[b]; 1002 } 1003 } 1004 1005 /* Claim an entry in tcg_ctxs */ 1006 n = qatomic_fetch_inc(&tcg_cur_ctxs); 1007 g_assert(n < tcg_max_ctxs); 1008 qatomic_set(&tcg_ctxs[n], s); 1009 1010 if (n > 0) { 1011 tcg_region_initial_alloc(s); 1012 } 1013 1014 tcg_ctx = s; 1015 } 1016 #endif /* !CONFIG_USER_ONLY */ 1017 1018 /* pool based memory allocation */ 1019 void *tcg_malloc_internal(TCGContext *s, int size) 1020 { 1021 TCGPool *p; 1022 int pool_size; 1023 1024 if (size > TCG_POOL_CHUNK_SIZE) { 1025 /* big malloc: insert a new pool (XXX: could optimize) */ 1026 p = g_malloc(sizeof(TCGPool) + size); 1027 p->size = size; 1028 p->next = s->pool_first_large; 1029 s->pool_first_large = p; 1030 return p->data; 1031 } else { 1032 p = s->pool_current; 1033 if (!p) { 1034 p = s->pool_first; 1035 if (!p) 1036 goto new_pool; 1037 } else { 1038 if (!p->next) { 1039 new_pool: 1040 pool_size = TCG_POOL_CHUNK_SIZE; 1041 p = g_malloc(sizeof(TCGPool) + pool_size); 1042 p->size = pool_size; 1043 p->next = NULL; 1044 if (s->pool_current) { 1045 s->pool_current->next = p; 1046 } else { 1047 s->pool_first = p; 1048 } 1049 } else { 1050 p = p->next; 1051 } 1052 } 1053 } 1054 s->pool_current = p; 1055 s->pool_cur = p->data + size; 1056 s->pool_end = p->data + p->size; 1057 return p->data; 1058 } 1059 1060 void tcg_pool_reset(TCGContext *s) 1061 { 1062 TCGPool *p, *t; 1063 for (p = s->pool_first_large; p; p = t) { 1064 t = p->next; 1065 g_free(p); 1066 } 1067 s->pool_first_large = NULL; 1068 s->pool_cur = s->pool_end = NULL; 1069 s->pool_current = NULL; 1070 } 1071 1072 /* 1073 * Create TCGHelperInfo structures for "tcg/tcg-ldst.h" functions, 1074 * akin to what "exec/helper-tcg.h" does with DEF_HELPER_FLAGS_N. 1075 * We only use these for layout in tcg_out_ld_helper_ret and 1076 * tcg_out_st_helper_args, and share them between several of 1077 * the helpers, with the end result that it's easier to build manually. 1078 */ 1079 1080 #if TCG_TARGET_REG_BITS == 32 1081 # define dh_typecode_ttl dh_typecode_i32 1082 #else 1083 # define dh_typecode_ttl dh_typecode_i64 1084 #endif 1085 1086 static TCGHelperInfo info_helper_ld32_mmu = { 1087 .flags = TCG_CALL_NO_WG, 1088 .typemask = dh_typemask(ttl, 0) /* return tcg_target_ulong */ 1089 | dh_typemask(env, 1) 1090 | dh_typemask(i64, 2) /* uint64_t addr */ 1091 | dh_typemask(i32, 3) /* unsigned oi */ 1092 | dh_typemask(ptr, 4) /* uintptr_t ra */ 1093 }; 1094 1095 static TCGHelperInfo info_helper_ld64_mmu = { 1096 .flags = TCG_CALL_NO_WG, 1097 .typemask = dh_typemask(i64, 0) /* return uint64_t */ 1098 | dh_typemask(env, 1) 1099 | dh_typemask(i64, 2) /* uint64_t addr */ 1100 | dh_typemask(i32, 3) /* unsigned oi */ 1101 | dh_typemask(ptr, 4) /* uintptr_t ra */ 1102 }; 1103 1104 static TCGHelperInfo info_helper_ld128_mmu = { 1105 .flags = TCG_CALL_NO_WG, 1106 .typemask = dh_typemask(i128, 0) /* return Int128 */ 1107 | dh_typemask(env, 1) 1108 | dh_typemask(i64, 2) /* uint64_t addr */ 1109 | dh_typemask(i32, 3) /* unsigned oi */ 1110 | dh_typemask(ptr, 4) /* uintptr_t ra */ 1111 }; 1112 1113 static TCGHelperInfo info_helper_st32_mmu = { 1114 .flags = TCG_CALL_NO_WG, 1115 .typemask = dh_typemask(void, 0) 1116 | dh_typemask(env, 1) 1117 | dh_typemask(i64, 2) /* uint64_t addr */ 1118 | dh_typemask(i32, 3) /* uint32_t data */ 1119 | dh_typemask(i32, 4) /* unsigned oi */ 1120 | dh_typemask(ptr, 5) /* uintptr_t ra */ 1121 }; 1122 1123 static TCGHelperInfo info_helper_st64_mmu = { 1124 .flags = TCG_CALL_NO_WG, 1125 .typemask = dh_typemask(void, 0) 1126 | dh_typemask(env, 1) 1127 | dh_typemask(i64, 2) /* uint64_t addr */ 1128 | dh_typemask(i64, 3) /* uint64_t data */ 1129 | dh_typemask(i32, 4) /* unsigned oi */ 1130 | dh_typemask(ptr, 5) /* uintptr_t ra */ 1131 }; 1132 1133 static TCGHelperInfo info_helper_st128_mmu = { 1134 .flags = TCG_CALL_NO_WG, 1135 .typemask = dh_typemask(void, 0) 1136 | dh_typemask(env, 1) 1137 | dh_typemask(i64, 2) /* uint64_t addr */ 1138 | dh_typemask(i128, 3) /* Int128 data */ 1139 | dh_typemask(i32, 4) /* unsigned oi */ 1140 | dh_typemask(ptr, 5) /* uintptr_t ra */ 1141 }; 1142 1143 #ifdef CONFIG_TCG_INTERPRETER 1144 static ffi_type *typecode_to_ffi(int argmask) 1145 { 1146 /* 1147 * libffi does not support __int128_t, so we have forced Int128 1148 * to use the structure definition instead of the builtin type. 1149 */ 1150 static ffi_type *ffi_type_i128_elements[3] = { 1151 &ffi_type_uint64, 1152 &ffi_type_uint64, 1153 NULL 1154 }; 1155 static ffi_type ffi_type_i128 = { 1156 .size = 16, 1157 .alignment = __alignof__(Int128), 1158 .type = FFI_TYPE_STRUCT, 1159 .elements = ffi_type_i128_elements, 1160 }; 1161 1162 switch (argmask) { 1163 case dh_typecode_void: 1164 return &ffi_type_void; 1165 case dh_typecode_i32: 1166 return &ffi_type_uint32; 1167 case dh_typecode_s32: 1168 return &ffi_type_sint32; 1169 case dh_typecode_i64: 1170 return &ffi_type_uint64; 1171 case dh_typecode_s64: 1172 return &ffi_type_sint64; 1173 case dh_typecode_ptr: 1174 return &ffi_type_pointer; 1175 case dh_typecode_i128: 1176 return &ffi_type_i128; 1177 } 1178 g_assert_not_reached(); 1179 } 1180 1181 static ffi_cif *init_ffi_layout(TCGHelperInfo *info) 1182 { 1183 unsigned typemask = info->typemask; 1184 struct { 1185 ffi_cif cif; 1186 ffi_type *args[]; 1187 } *ca; 1188 ffi_status status; 1189 int nargs; 1190 1191 /* Ignoring the return type, find the last non-zero field. */ 1192 nargs = 32 - clz32(typemask >> 3); 1193 nargs = DIV_ROUND_UP(nargs, 3); 1194 assert(nargs <= MAX_CALL_IARGS); 1195 1196 ca = g_malloc0(sizeof(*ca) + nargs * sizeof(ffi_type *)); 1197 ca->cif.rtype = typecode_to_ffi(typemask & 7); 1198 ca->cif.nargs = nargs; 1199 1200 if (nargs != 0) { 1201 ca->cif.arg_types = ca->args; 1202 for (int j = 0; j < nargs; ++j) { 1203 int typecode = extract32(typemask, (j + 1) * 3, 3); 1204 ca->args[j] = typecode_to_ffi(typecode); 1205 } 1206 } 1207 1208 status = ffi_prep_cif(&ca->cif, FFI_DEFAULT_ABI, nargs, 1209 ca->cif.rtype, ca->cif.arg_types); 1210 assert(status == FFI_OK); 1211 1212 return &ca->cif; 1213 } 1214 1215 #define HELPER_INFO_INIT(I) (&(I)->cif) 1216 #define HELPER_INFO_INIT_VAL(I) init_ffi_layout(I) 1217 #else 1218 #define HELPER_INFO_INIT(I) (&(I)->init) 1219 #define HELPER_INFO_INIT_VAL(I) 1 1220 #endif /* CONFIG_TCG_INTERPRETER */ 1221 1222 static inline bool arg_slot_reg_p(unsigned arg_slot) 1223 { 1224 /* 1225 * Split the sizeof away from the comparison to avoid Werror from 1226 * "unsigned < 0 is always false", when iarg_regs is empty. 1227 */ 1228 unsigned nreg = ARRAY_SIZE(tcg_target_call_iarg_regs); 1229 return arg_slot < nreg; 1230 } 1231 1232 static inline int arg_slot_stk_ofs(unsigned arg_slot) 1233 { 1234 unsigned max = TCG_STATIC_CALL_ARGS_SIZE / sizeof(tcg_target_long); 1235 unsigned stk_slot = arg_slot - ARRAY_SIZE(tcg_target_call_iarg_regs); 1236 1237 tcg_debug_assert(stk_slot < max); 1238 return TCG_TARGET_CALL_STACK_OFFSET + stk_slot * sizeof(tcg_target_long); 1239 } 1240 1241 typedef struct TCGCumulativeArgs { 1242 int arg_idx; /* tcg_gen_callN args[] */ 1243 int info_in_idx; /* TCGHelperInfo in[] */ 1244 int arg_slot; /* regs+stack slot */ 1245 int ref_slot; /* stack slots for references */ 1246 } TCGCumulativeArgs; 1247 1248 static void layout_arg_even(TCGCumulativeArgs *cum) 1249 { 1250 cum->arg_slot += cum->arg_slot & 1; 1251 } 1252 1253 static void layout_arg_1(TCGCumulativeArgs *cum, TCGHelperInfo *info, 1254 TCGCallArgumentKind kind) 1255 { 1256 TCGCallArgumentLoc *loc = &info->in[cum->info_in_idx]; 1257 1258 *loc = (TCGCallArgumentLoc){ 1259 .kind = kind, 1260 .arg_idx = cum->arg_idx, 1261 .arg_slot = cum->arg_slot, 1262 }; 1263 cum->info_in_idx++; 1264 cum->arg_slot++; 1265 } 1266 1267 static void layout_arg_normal_n(TCGCumulativeArgs *cum, 1268 TCGHelperInfo *info, int n) 1269 { 1270 TCGCallArgumentLoc *loc = &info->in[cum->info_in_idx]; 1271 1272 for (int i = 0; i < n; ++i) { 1273 /* Layout all using the same arg_idx, adjusting the subindex. */ 1274 loc[i] = (TCGCallArgumentLoc){ 1275 .kind = TCG_CALL_ARG_NORMAL, 1276 .arg_idx = cum->arg_idx, 1277 .tmp_subindex = i, 1278 .arg_slot = cum->arg_slot + i, 1279 }; 1280 } 1281 cum->info_in_idx += n; 1282 cum->arg_slot += n; 1283 } 1284 1285 static void layout_arg_by_ref(TCGCumulativeArgs *cum, TCGHelperInfo *info) 1286 { 1287 TCGCallArgumentLoc *loc = &info->in[cum->info_in_idx]; 1288 int n = 128 / TCG_TARGET_REG_BITS; 1289 1290 /* The first subindex carries the pointer. */ 1291 layout_arg_1(cum, info, TCG_CALL_ARG_BY_REF); 1292 1293 /* 1294 * The callee is allowed to clobber memory associated with 1295 * structure pass by-reference. Therefore we must make copies. 1296 * Allocate space from "ref_slot", which will be adjusted to 1297 * follow the parameters on the stack. 1298 */ 1299 loc[0].ref_slot = cum->ref_slot; 1300 1301 /* 1302 * Subsequent words also go into the reference slot, but 1303 * do not accumulate into the regular arguments. 1304 */ 1305 for (int i = 1; i < n; ++i) { 1306 loc[i] = (TCGCallArgumentLoc){ 1307 .kind = TCG_CALL_ARG_BY_REF_N, 1308 .arg_idx = cum->arg_idx, 1309 .tmp_subindex = i, 1310 .ref_slot = cum->ref_slot + i, 1311 }; 1312 } 1313 cum->info_in_idx += n - 1; /* i=0 accounted for in layout_arg_1 */ 1314 cum->ref_slot += n; 1315 } 1316 1317 static void init_call_layout(TCGHelperInfo *info) 1318 { 1319 int max_reg_slots = ARRAY_SIZE(tcg_target_call_iarg_regs); 1320 int max_stk_slots = TCG_STATIC_CALL_ARGS_SIZE / sizeof(tcg_target_long); 1321 unsigned typemask = info->typemask; 1322 unsigned typecode; 1323 TCGCumulativeArgs cum = { }; 1324 1325 /* 1326 * Parse and place any function return value. 1327 */ 1328 typecode = typemask & 7; 1329 switch (typecode) { 1330 case dh_typecode_void: 1331 info->nr_out = 0; 1332 break; 1333 case dh_typecode_i32: 1334 case dh_typecode_s32: 1335 case dh_typecode_ptr: 1336 info->nr_out = 1; 1337 info->out_kind = TCG_CALL_RET_NORMAL; 1338 break; 1339 case dh_typecode_i64: 1340 case dh_typecode_s64: 1341 info->nr_out = 64 / TCG_TARGET_REG_BITS; 1342 info->out_kind = TCG_CALL_RET_NORMAL; 1343 /* Query the last register now to trigger any assert early. */ 1344 tcg_target_call_oarg_reg(info->out_kind, info->nr_out - 1); 1345 break; 1346 case dh_typecode_i128: 1347 info->nr_out = 128 / TCG_TARGET_REG_BITS; 1348 info->out_kind = TCG_TARGET_CALL_RET_I128; 1349 switch (TCG_TARGET_CALL_RET_I128) { 1350 case TCG_CALL_RET_NORMAL: 1351 /* Query the last register now to trigger any assert early. */ 1352 tcg_target_call_oarg_reg(info->out_kind, info->nr_out - 1); 1353 break; 1354 case TCG_CALL_RET_BY_VEC: 1355 /* Query the single register now to trigger any assert early. */ 1356 tcg_target_call_oarg_reg(TCG_CALL_RET_BY_VEC, 0); 1357 break; 1358 case TCG_CALL_RET_BY_REF: 1359 /* 1360 * Allocate the first argument to the output. 1361 * We don't need to store this anywhere, just make it 1362 * unavailable for use in the input loop below. 1363 */ 1364 cum.arg_slot = 1; 1365 break; 1366 default: 1367 qemu_build_not_reached(); 1368 } 1369 break; 1370 default: 1371 g_assert_not_reached(); 1372 } 1373 1374 /* 1375 * Parse and place function arguments. 1376 */ 1377 for (typemask >>= 3; typemask; typemask >>= 3, cum.arg_idx++) { 1378 TCGCallArgumentKind kind; 1379 TCGType type; 1380 1381 typecode = typemask & 7; 1382 switch (typecode) { 1383 case dh_typecode_i32: 1384 case dh_typecode_s32: 1385 type = TCG_TYPE_I32; 1386 break; 1387 case dh_typecode_i64: 1388 case dh_typecode_s64: 1389 type = TCG_TYPE_I64; 1390 break; 1391 case dh_typecode_ptr: 1392 type = TCG_TYPE_PTR; 1393 break; 1394 case dh_typecode_i128: 1395 type = TCG_TYPE_I128; 1396 break; 1397 default: 1398 g_assert_not_reached(); 1399 } 1400 1401 switch (type) { 1402 case TCG_TYPE_I32: 1403 switch (TCG_TARGET_CALL_ARG_I32) { 1404 case TCG_CALL_ARG_EVEN: 1405 layout_arg_even(&cum); 1406 /* fall through */ 1407 case TCG_CALL_ARG_NORMAL: 1408 layout_arg_1(&cum, info, TCG_CALL_ARG_NORMAL); 1409 break; 1410 case TCG_CALL_ARG_EXTEND: 1411 kind = TCG_CALL_ARG_EXTEND_U + (typecode & 1); 1412 layout_arg_1(&cum, info, kind); 1413 break; 1414 default: 1415 qemu_build_not_reached(); 1416 } 1417 break; 1418 1419 case TCG_TYPE_I64: 1420 switch (TCG_TARGET_CALL_ARG_I64) { 1421 case TCG_CALL_ARG_EVEN: 1422 layout_arg_even(&cum); 1423 /* fall through */ 1424 case TCG_CALL_ARG_NORMAL: 1425 if (TCG_TARGET_REG_BITS == 32) { 1426 layout_arg_normal_n(&cum, info, 2); 1427 } else { 1428 layout_arg_1(&cum, info, TCG_CALL_ARG_NORMAL); 1429 } 1430 break; 1431 default: 1432 qemu_build_not_reached(); 1433 } 1434 break; 1435 1436 case TCG_TYPE_I128: 1437 switch (TCG_TARGET_CALL_ARG_I128) { 1438 case TCG_CALL_ARG_EVEN: 1439 layout_arg_even(&cum); 1440 /* fall through */ 1441 case TCG_CALL_ARG_NORMAL: 1442 layout_arg_normal_n(&cum, info, 128 / TCG_TARGET_REG_BITS); 1443 break; 1444 case TCG_CALL_ARG_BY_REF: 1445 layout_arg_by_ref(&cum, info); 1446 break; 1447 default: 1448 qemu_build_not_reached(); 1449 } 1450 break; 1451 1452 default: 1453 g_assert_not_reached(); 1454 } 1455 } 1456 info->nr_in = cum.info_in_idx; 1457 1458 /* Validate that we didn't overrun the input array. */ 1459 assert(cum.info_in_idx <= ARRAY_SIZE(info->in)); 1460 /* Validate the backend has enough argument space. */ 1461 assert(cum.arg_slot <= max_reg_slots + max_stk_slots); 1462 1463 /* 1464 * Relocate the "ref_slot" area to the end of the parameters. 1465 * Minimizing this stack offset helps code size for x86, 1466 * which has a signed 8-bit offset encoding. 1467 */ 1468 if (cum.ref_slot != 0) { 1469 int ref_base = 0; 1470 1471 if (cum.arg_slot > max_reg_slots) { 1472 int align = __alignof(Int128) / sizeof(tcg_target_long); 1473 1474 ref_base = cum.arg_slot - max_reg_slots; 1475 if (align > 1) { 1476 ref_base = ROUND_UP(ref_base, align); 1477 } 1478 } 1479 assert(ref_base + cum.ref_slot <= max_stk_slots); 1480 ref_base += max_reg_slots; 1481 1482 if (ref_base != 0) { 1483 for (int i = cum.info_in_idx - 1; i >= 0; --i) { 1484 TCGCallArgumentLoc *loc = &info->in[i]; 1485 switch (loc->kind) { 1486 case TCG_CALL_ARG_BY_REF: 1487 case TCG_CALL_ARG_BY_REF_N: 1488 loc->ref_slot += ref_base; 1489 break; 1490 default: 1491 break; 1492 } 1493 } 1494 } 1495 } 1496 } 1497 1498 static int indirect_reg_alloc_order[ARRAY_SIZE(tcg_target_reg_alloc_order)]; 1499 static void process_constraint_sets(void); 1500 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type, 1501 TCGReg reg, const char *name); 1502 1503 static void tcg_context_init(unsigned max_cpus) 1504 { 1505 TCGContext *s = &tcg_init_ctx; 1506 int n, i; 1507 TCGTemp *ts; 1508 1509 memset(s, 0, sizeof(*s)); 1510 s->nb_globals = 0; 1511 1512 init_call_layout(&info_helper_ld32_mmu); 1513 init_call_layout(&info_helper_ld64_mmu); 1514 init_call_layout(&info_helper_ld128_mmu); 1515 init_call_layout(&info_helper_st32_mmu); 1516 init_call_layout(&info_helper_st64_mmu); 1517 init_call_layout(&info_helper_st128_mmu); 1518 1519 tcg_target_init(s); 1520 process_constraint_sets(); 1521 1522 /* Reverse the order of the saved registers, assuming they're all at 1523 the start of tcg_target_reg_alloc_order. */ 1524 for (n = 0; n < ARRAY_SIZE(tcg_target_reg_alloc_order); ++n) { 1525 int r = tcg_target_reg_alloc_order[n]; 1526 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, r)) { 1527 break; 1528 } 1529 } 1530 for (i = 0; i < n; ++i) { 1531 indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[n - 1 - i]; 1532 } 1533 for (; i < ARRAY_SIZE(tcg_target_reg_alloc_order); ++i) { 1534 indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[i]; 1535 } 1536 1537 tcg_ctx = s; 1538 /* 1539 * In user-mode we simply share the init context among threads, since we 1540 * use a single region. See the documentation tcg_region_init() for the 1541 * reasoning behind this. 1542 * In system-mode we will have at most max_cpus TCG threads. 1543 */ 1544 #ifdef CONFIG_USER_ONLY 1545 tcg_ctxs = &tcg_ctx; 1546 tcg_cur_ctxs = 1; 1547 tcg_max_ctxs = 1; 1548 #else 1549 tcg_max_ctxs = max_cpus; 1550 tcg_ctxs = g_new0(TCGContext *, max_cpus); 1551 #endif 1552 1553 tcg_debug_assert(!tcg_regset_test_reg(s->reserved_regs, TCG_AREG0)); 1554 ts = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, TCG_AREG0, "env"); 1555 tcg_env = temp_tcgv_ptr(ts); 1556 } 1557 1558 void tcg_init(size_t tb_size, int splitwx, unsigned max_cpus) 1559 { 1560 tcg_context_init(max_cpus); 1561 tcg_region_init(tb_size, splitwx, max_cpus); 1562 } 1563 1564 /* 1565 * Allocate TBs right before their corresponding translated code, making 1566 * sure that TBs and code are on different cache lines. 1567 */ 1568 TranslationBlock *tcg_tb_alloc(TCGContext *s) 1569 { 1570 uintptr_t align = qemu_icache_linesize; 1571 TranslationBlock *tb; 1572 void *next; 1573 1574 retry: 1575 tb = (void *)ROUND_UP((uintptr_t)s->code_gen_ptr, align); 1576 next = (void *)ROUND_UP((uintptr_t)(tb + 1), align); 1577 1578 if (unlikely(next > s->code_gen_highwater)) { 1579 if (tcg_region_alloc(s)) { 1580 return NULL; 1581 } 1582 goto retry; 1583 } 1584 qatomic_set(&s->code_gen_ptr, next); 1585 return tb; 1586 } 1587 1588 void tcg_prologue_init(void) 1589 { 1590 TCGContext *s = tcg_ctx; 1591 size_t prologue_size; 1592 1593 s->code_ptr = s->code_gen_ptr; 1594 s->code_buf = s->code_gen_ptr; 1595 s->data_gen_ptr = NULL; 1596 1597 #ifndef CONFIG_TCG_INTERPRETER 1598 tcg_qemu_tb_exec = (tcg_prologue_fn *)tcg_splitwx_to_rx(s->code_ptr); 1599 #endif 1600 1601 s->pool_labels = NULL; 1602 1603 qemu_thread_jit_write(); 1604 /* Generate the prologue. */ 1605 tcg_target_qemu_prologue(s); 1606 1607 /* Allow the prologue to put e.g. guest_base into a pool entry. */ 1608 { 1609 int result = tcg_out_pool_finalize(s); 1610 tcg_debug_assert(result == 0); 1611 } 1612 1613 prologue_size = tcg_current_code_size(s); 1614 perf_report_prologue(s->code_gen_ptr, prologue_size); 1615 1616 #ifndef CONFIG_TCG_INTERPRETER 1617 flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(s->code_buf), 1618 (uintptr_t)s->code_buf, prologue_size); 1619 #endif 1620 1621 if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM)) { 1622 FILE *logfile = qemu_log_trylock(); 1623 if (logfile) { 1624 fprintf(logfile, "PROLOGUE: [size=%zu]\n", prologue_size); 1625 if (s->data_gen_ptr) { 1626 size_t code_size = s->data_gen_ptr - s->code_gen_ptr; 1627 size_t data_size = prologue_size - code_size; 1628 size_t i; 1629 1630 disas(logfile, s->code_gen_ptr, code_size); 1631 1632 for (i = 0; i < data_size; i += sizeof(tcg_target_ulong)) { 1633 if (sizeof(tcg_target_ulong) == 8) { 1634 fprintf(logfile, 1635 "0x%08" PRIxPTR ": .quad 0x%016" PRIx64 "\n", 1636 (uintptr_t)s->data_gen_ptr + i, 1637 *(uint64_t *)(s->data_gen_ptr + i)); 1638 } else { 1639 fprintf(logfile, 1640 "0x%08" PRIxPTR ": .long 0x%08x\n", 1641 (uintptr_t)s->data_gen_ptr + i, 1642 *(uint32_t *)(s->data_gen_ptr + i)); 1643 } 1644 } 1645 } else { 1646 disas(logfile, s->code_gen_ptr, prologue_size); 1647 } 1648 fprintf(logfile, "\n"); 1649 qemu_log_unlock(logfile); 1650 } 1651 } 1652 1653 #ifndef CONFIG_TCG_INTERPRETER 1654 /* 1655 * Assert that goto_ptr is implemented completely, setting an epilogue. 1656 * For tci, we use NULL as the signal to return from the interpreter, 1657 * so skip this check. 1658 */ 1659 tcg_debug_assert(tcg_code_gen_epilogue != NULL); 1660 #endif 1661 1662 tcg_region_prologue_set(s); 1663 } 1664 1665 void tcg_func_start(TCGContext *s) 1666 { 1667 tcg_pool_reset(s); 1668 s->nb_temps = s->nb_globals; 1669 1670 /* No temps have been previously allocated for size or locality. */ 1671 tcg_temp_ebb_reset_freed(s); 1672 1673 /* No constant temps have been previously allocated. */ 1674 for (int i = 0; i < TCG_TYPE_COUNT; ++i) { 1675 if (s->const_table[i]) { 1676 g_hash_table_remove_all(s->const_table[i]); 1677 } 1678 } 1679 1680 s->nb_ops = 0; 1681 s->nb_labels = 0; 1682 s->current_frame_offset = s->frame_start; 1683 1684 #ifdef CONFIG_DEBUG_TCG 1685 s->goto_tb_issue_mask = 0; 1686 #endif 1687 1688 QTAILQ_INIT(&s->ops); 1689 QTAILQ_INIT(&s->free_ops); 1690 s->emit_before_op = NULL; 1691 QSIMPLEQ_INIT(&s->labels); 1692 1693 tcg_debug_assert(s->addr_type == TCG_TYPE_I32 || 1694 s->addr_type == TCG_TYPE_I64); 1695 1696 tcg_debug_assert(s->insn_start_words > 0); 1697 } 1698 1699 static TCGTemp *tcg_temp_alloc(TCGContext *s) 1700 { 1701 int n = s->nb_temps++; 1702 1703 if (n >= TCG_MAX_TEMPS) { 1704 tcg_raise_tb_overflow(s); 1705 } 1706 return memset(&s->temps[n], 0, sizeof(TCGTemp)); 1707 } 1708 1709 static TCGTemp *tcg_global_alloc(TCGContext *s) 1710 { 1711 TCGTemp *ts; 1712 1713 tcg_debug_assert(s->nb_globals == s->nb_temps); 1714 tcg_debug_assert(s->nb_globals < TCG_MAX_TEMPS); 1715 s->nb_globals++; 1716 ts = tcg_temp_alloc(s); 1717 ts->kind = TEMP_GLOBAL; 1718 1719 return ts; 1720 } 1721 1722 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type, 1723 TCGReg reg, const char *name) 1724 { 1725 TCGTemp *ts; 1726 1727 tcg_debug_assert(TCG_TARGET_REG_BITS == 64 || type == TCG_TYPE_I32); 1728 1729 ts = tcg_global_alloc(s); 1730 ts->base_type = type; 1731 ts->type = type; 1732 ts->kind = TEMP_FIXED; 1733 ts->reg = reg; 1734 ts->name = name; 1735 tcg_regset_set_reg(s->reserved_regs, reg); 1736 1737 return ts; 1738 } 1739 1740 void tcg_set_frame(TCGContext *s, TCGReg reg, intptr_t start, intptr_t size) 1741 { 1742 s->frame_start = start; 1743 s->frame_end = start + size; 1744 s->frame_temp 1745 = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, reg, "_frame"); 1746 } 1747 1748 static TCGTemp *tcg_global_mem_new_internal(TCGv_ptr base, intptr_t offset, 1749 const char *name, TCGType type) 1750 { 1751 TCGContext *s = tcg_ctx; 1752 TCGTemp *base_ts = tcgv_ptr_temp(base); 1753 TCGTemp *ts = tcg_global_alloc(s); 1754 int indirect_reg = 0; 1755 1756 switch (base_ts->kind) { 1757 case TEMP_FIXED: 1758 break; 1759 case TEMP_GLOBAL: 1760 /* We do not support double-indirect registers. */ 1761 tcg_debug_assert(!base_ts->indirect_reg); 1762 base_ts->indirect_base = 1; 1763 s->nb_indirects += (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64 1764 ? 2 : 1); 1765 indirect_reg = 1; 1766 break; 1767 default: 1768 g_assert_not_reached(); 1769 } 1770 1771 if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) { 1772 TCGTemp *ts2 = tcg_global_alloc(s); 1773 char buf[64]; 1774 1775 ts->base_type = TCG_TYPE_I64; 1776 ts->type = TCG_TYPE_I32; 1777 ts->indirect_reg = indirect_reg; 1778 ts->mem_allocated = 1; 1779 ts->mem_base = base_ts; 1780 ts->mem_offset = offset; 1781 pstrcpy(buf, sizeof(buf), name); 1782 pstrcat(buf, sizeof(buf), "_0"); 1783 ts->name = strdup(buf); 1784 1785 tcg_debug_assert(ts2 == ts + 1); 1786 ts2->base_type = TCG_TYPE_I64; 1787 ts2->type = TCG_TYPE_I32; 1788 ts2->indirect_reg = indirect_reg; 1789 ts2->mem_allocated = 1; 1790 ts2->mem_base = base_ts; 1791 ts2->mem_offset = offset + 4; 1792 ts2->temp_subindex = 1; 1793 pstrcpy(buf, sizeof(buf), name); 1794 pstrcat(buf, sizeof(buf), "_1"); 1795 ts2->name = strdup(buf); 1796 } else { 1797 ts->base_type = type; 1798 ts->type = type; 1799 ts->indirect_reg = indirect_reg; 1800 ts->mem_allocated = 1; 1801 ts->mem_base = base_ts; 1802 ts->mem_offset = offset; 1803 ts->name = name; 1804 } 1805 return ts; 1806 } 1807 1808 TCGv_i32 tcg_global_mem_new_i32(TCGv_ptr reg, intptr_t off, const char *name) 1809 { 1810 TCGTemp *ts = tcg_global_mem_new_internal(reg, off, name, TCG_TYPE_I32); 1811 return temp_tcgv_i32(ts); 1812 } 1813 1814 TCGv_i64 tcg_global_mem_new_i64(TCGv_ptr reg, intptr_t off, const char *name) 1815 { 1816 TCGTemp *ts = tcg_global_mem_new_internal(reg, off, name, TCG_TYPE_I64); 1817 return temp_tcgv_i64(ts); 1818 } 1819 1820 TCGv_ptr tcg_global_mem_new_ptr(TCGv_ptr reg, intptr_t off, const char *name) 1821 { 1822 TCGTemp *ts = tcg_global_mem_new_internal(reg, off, name, TCG_TYPE_PTR); 1823 return temp_tcgv_ptr(ts); 1824 } 1825 1826 TCGTemp *tcg_temp_new_internal(TCGType type, TCGTempKind kind) 1827 { 1828 TCGContext *s = tcg_ctx; 1829 TCGTemp *ts; 1830 int n; 1831 1832 if (kind == TEMP_EBB) { 1833 int idx = find_first_bit(s->free_temps[type].l, TCG_MAX_TEMPS); 1834 1835 if (idx < TCG_MAX_TEMPS) { 1836 /* There is already an available temp with the right type. */ 1837 clear_bit(idx, s->free_temps[type].l); 1838 1839 ts = &s->temps[idx]; 1840 ts->temp_allocated = 1; 1841 tcg_debug_assert(ts->base_type == type); 1842 tcg_debug_assert(ts->kind == kind); 1843 return ts; 1844 } 1845 } else { 1846 tcg_debug_assert(kind == TEMP_TB); 1847 } 1848 1849 switch (type) { 1850 case TCG_TYPE_I32: 1851 case TCG_TYPE_V64: 1852 case TCG_TYPE_V128: 1853 case TCG_TYPE_V256: 1854 n = 1; 1855 break; 1856 case TCG_TYPE_I64: 1857 n = 64 / TCG_TARGET_REG_BITS; 1858 break; 1859 case TCG_TYPE_I128: 1860 n = 128 / TCG_TARGET_REG_BITS; 1861 break; 1862 default: 1863 g_assert_not_reached(); 1864 } 1865 1866 ts = tcg_temp_alloc(s); 1867 ts->base_type = type; 1868 ts->temp_allocated = 1; 1869 ts->kind = kind; 1870 1871 if (n == 1) { 1872 ts->type = type; 1873 } else { 1874 ts->type = TCG_TYPE_REG; 1875 1876 for (int i = 1; i < n; ++i) { 1877 TCGTemp *ts2 = tcg_temp_alloc(s); 1878 1879 tcg_debug_assert(ts2 == ts + i); 1880 ts2->base_type = type; 1881 ts2->type = TCG_TYPE_REG; 1882 ts2->temp_allocated = 1; 1883 ts2->temp_subindex = i; 1884 ts2->kind = kind; 1885 } 1886 } 1887 return ts; 1888 } 1889 1890 TCGv_i32 tcg_temp_new_i32(void) 1891 { 1892 return temp_tcgv_i32(tcg_temp_new_internal(TCG_TYPE_I32, TEMP_TB)); 1893 } 1894 1895 TCGv_i32 tcg_temp_ebb_new_i32(void) 1896 { 1897 return temp_tcgv_i32(tcg_temp_new_internal(TCG_TYPE_I32, TEMP_EBB)); 1898 } 1899 1900 TCGv_i64 tcg_temp_new_i64(void) 1901 { 1902 return temp_tcgv_i64(tcg_temp_new_internal(TCG_TYPE_I64, TEMP_TB)); 1903 } 1904 1905 TCGv_i64 tcg_temp_ebb_new_i64(void) 1906 { 1907 return temp_tcgv_i64(tcg_temp_new_internal(TCG_TYPE_I64, TEMP_EBB)); 1908 } 1909 1910 TCGv_ptr tcg_temp_new_ptr(void) 1911 { 1912 return temp_tcgv_ptr(tcg_temp_new_internal(TCG_TYPE_PTR, TEMP_TB)); 1913 } 1914 1915 TCGv_ptr tcg_temp_ebb_new_ptr(void) 1916 { 1917 return temp_tcgv_ptr(tcg_temp_new_internal(TCG_TYPE_PTR, TEMP_EBB)); 1918 } 1919 1920 TCGv_i128 tcg_temp_new_i128(void) 1921 { 1922 return temp_tcgv_i128(tcg_temp_new_internal(TCG_TYPE_I128, TEMP_TB)); 1923 } 1924 1925 TCGv_i128 tcg_temp_ebb_new_i128(void) 1926 { 1927 return temp_tcgv_i128(tcg_temp_new_internal(TCG_TYPE_I128, TEMP_EBB)); 1928 } 1929 1930 TCGv_vec tcg_temp_new_vec(TCGType type) 1931 { 1932 TCGTemp *t; 1933 1934 #ifdef CONFIG_DEBUG_TCG 1935 switch (type) { 1936 case TCG_TYPE_V64: 1937 assert(TCG_TARGET_HAS_v64); 1938 break; 1939 case TCG_TYPE_V128: 1940 assert(TCG_TARGET_HAS_v128); 1941 break; 1942 case TCG_TYPE_V256: 1943 assert(TCG_TARGET_HAS_v256); 1944 break; 1945 default: 1946 g_assert_not_reached(); 1947 } 1948 #endif 1949 1950 t = tcg_temp_new_internal(type, TEMP_EBB); 1951 return temp_tcgv_vec(t); 1952 } 1953 1954 /* Create a new temp of the same type as an existing temp. */ 1955 TCGv_vec tcg_temp_new_vec_matching(TCGv_vec match) 1956 { 1957 TCGTemp *t = tcgv_vec_temp(match); 1958 1959 tcg_debug_assert(t->temp_allocated != 0); 1960 1961 t = tcg_temp_new_internal(t->base_type, TEMP_EBB); 1962 return temp_tcgv_vec(t); 1963 } 1964 1965 void tcg_temp_free_internal(TCGTemp *ts) 1966 { 1967 TCGContext *s = tcg_ctx; 1968 1969 switch (ts->kind) { 1970 case TEMP_CONST: 1971 case TEMP_TB: 1972 /* Silently ignore free. */ 1973 break; 1974 case TEMP_EBB: 1975 tcg_debug_assert(ts->temp_allocated != 0); 1976 ts->temp_allocated = 0; 1977 set_bit(temp_idx(ts), s->free_temps[ts->base_type].l); 1978 break; 1979 default: 1980 /* It never made sense to free TEMP_FIXED or TEMP_GLOBAL. */ 1981 g_assert_not_reached(); 1982 } 1983 } 1984 1985 void tcg_temp_free_i32(TCGv_i32 arg) 1986 { 1987 tcg_temp_free_internal(tcgv_i32_temp(arg)); 1988 } 1989 1990 void tcg_temp_free_i64(TCGv_i64 arg) 1991 { 1992 tcg_temp_free_internal(tcgv_i64_temp(arg)); 1993 } 1994 1995 void tcg_temp_free_i128(TCGv_i128 arg) 1996 { 1997 tcg_temp_free_internal(tcgv_i128_temp(arg)); 1998 } 1999 2000 void tcg_temp_free_ptr(TCGv_ptr arg) 2001 { 2002 tcg_temp_free_internal(tcgv_ptr_temp(arg)); 2003 } 2004 2005 void tcg_temp_free_vec(TCGv_vec arg) 2006 { 2007 tcg_temp_free_internal(tcgv_vec_temp(arg)); 2008 } 2009 2010 TCGTemp *tcg_constant_internal(TCGType type, int64_t val) 2011 { 2012 TCGContext *s = tcg_ctx; 2013 GHashTable *h = s->const_table[type]; 2014 TCGTemp *ts; 2015 2016 if (h == NULL) { 2017 h = g_hash_table_new(g_int64_hash, g_int64_equal); 2018 s->const_table[type] = h; 2019 } 2020 2021 ts = g_hash_table_lookup(h, &val); 2022 if (ts == NULL) { 2023 int64_t *val_ptr; 2024 2025 ts = tcg_temp_alloc(s); 2026 2027 if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) { 2028 TCGTemp *ts2 = tcg_temp_alloc(s); 2029 2030 tcg_debug_assert(ts2 == ts + 1); 2031 2032 ts->base_type = TCG_TYPE_I64; 2033 ts->type = TCG_TYPE_I32; 2034 ts->kind = TEMP_CONST; 2035 ts->temp_allocated = 1; 2036 2037 ts2->base_type = TCG_TYPE_I64; 2038 ts2->type = TCG_TYPE_I32; 2039 ts2->kind = TEMP_CONST; 2040 ts2->temp_allocated = 1; 2041 ts2->temp_subindex = 1; 2042 2043 /* 2044 * Retain the full value of the 64-bit constant in the low 2045 * part, so that the hash table works. Actual uses will 2046 * truncate the value to the low part. 2047 */ 2048 ts[HOST_BIG_ENDIAN].val = val; 2049 ts[!HOST_BIG_ENDIAN].val = val >> 32; 2050 val_ptr = &ts[HOST_BIG_ENDIAN].val; 2051 } else { 2052 ts->base_type = type; 2053 ts->type = type; 2054 ts->kind = TEMP_CONST; 2055 ts->temp_allocated = 1; 2056 ts->val = val; 2057 val_ptr = &ts->val; 2058 } 2059 g_hash_table_insert(h, val_ptr, ts); 2060 } 2061 2062 return ts; 2063 } 2064 2065 TCGv_i32 tcg_constant_i32(int32_t val) 2066 { 2067 return temp_tcgv_i32(tcg_constant_internal(TCG_TYPE_I32, val)); 2068 } 2069 2070 TCGv_i64 tcg_constant_i64(int64_t val) 2071 { 2072 return temp_tcgv_i64(tcg_constant_internal(TCG_TYPE_I64, val)); 2073 } 2074 2075 TCGv_ptr tcg_constant_ptr_int(intptr_t val) 2076 { 2077 return temp_tcgv_ptr(tcg_constant_internal(TCG_TYPE_PTR, val)); 2078 } 2079 2080 TCGv_vec tcg_constant_vec(TCGType type, unsigned vece, int64_t val) 2081 { 2082 val = dup_const(vece, val); 2083 return temp_tcgv_vec(tcg_constant_internal(type, val)); 2084 } 2085 2086 TCGv_vec tcg_constant_vec_matching(TCGv_vec match, unsigned vece, int64_t val) 2087 { 2088 TCGTemp *t = tcgv_vec_temp(match); 2089 2090 tcg_debug_assert(t->temp_allocated != 0); 2091 return tcg_constant_vec(t->base_type, vece, val); 2092 } 2093 2094 #ifdef CONFIG_DEBUG_TCG 2095 size_t temp_idx(TCGTemp *ts) 2096 { 2097 ptrdiff_t n = ts - tcg_ctx->temps; 2098 assert(n >= 0 && n < tcg_ctx->nb_temps); 2099 return n; 2100 } 2101 2102 TCGTemp *tcgv_i32_temp(TCGv_i32 v) 2103 { 2104 uintptr_t o = (uintptr_t)v - offsetof(TCGContext, temps); 2105 2106 assert(o < sizeof(TCGTemp) * tcg_ctx->nb_temps); 2107 assert(o % sizeof(TCGTemp) == 0); 2108 2109 return (void *)tcg_ctx + (uintptr_t)v; 2110 } 2111 #endif /* CONFIG_DEBUG_TCG */ 2112 2113 /* 2114 * Return true if OP may appear in the opcode stream with TYPE. 2115 * Test the runtime variable that controls each opcode. 2116 */ 2117 bool tcg_op_supported(TCGOpcode op, TCGType type, unsigned flags) 2118 { 2119 bool has_type; 2120 2121 switch (type) { 2122 case TCG_TYPE_I32: 2123 has_type = true; 2124 break; 2125 case TCG_TYPE_I64: 2126 has_type = TCG_TARGET_REG_BITS == 64; 2127 break; 2128 case TCG_TYPE_V64: 2129 has_type = TCG_TARGET_HAS_v64; 2130 break; 2131 case TCG_TYPE_V128: 2132 has_type = TCG_TARGET_HAS_v128; 2133 break; 2134 case TCG_TYPE_V256: 2135 has_type = TCG_TARGET_HAS_v256; 2136 break; 2137 default: 2138 has_type = false; 2139 break; 2140 } 2141 2142 switch (op) { 2143 case INDEX_op_discard: 2144 case INDEX_op_set_label: 2145 case INDEX_op_call: 2146 case INDEX_op_br: 2147 case INDEX_op_mb: 2148 case INDEX_op_insn_start: 2149 case INDEX_op_exit_tb: 2150 case INDEX_op_goto_tb: 2151 case INDEX_op_goto_ptr: 2152 case INDEX_op_qemu_ld_a32_i32: 2153 case INDEX_op_qemu_ld_a64_i32: 2154 case INDEX_op_qemu_st_a32_i32: 2155 case INDEX_op_qemu_st_a64_i32: 2156 case INDEX_op_qemu_ld_a32_i64: 2157 case INDEX_op_qemu_ld_a64_i64: 2158 case INDEX_op_qemu_st_a32_i64: 2159 case INDEX_op_qemu_st_a64_i64: 2160 return true; 2161 2162 case INDEX_op_qemu_st8_a32_i32: 2163 case INDEX_op_qemu_st8_a64_i32: 2164 return TCG_TARGET_HAS_qemu_st8_i32; 2165 2166 case INDEX_op_qemu_ld_a32_i128: 2167 case INDEX_op_qemu_ld_a64_i128: 2168 case INDEX_op_qemu_st_a32_i128: 2169 case INDEX_op_qemu_st_a64_i128: 2170 return TCG_TARGET_HAS_qemu_ldst_i128; 2171 2172 case INDEX_op_mov_i32: 2173 case INDEX_op_setcond_i32: 2174 case INDEX_op_brcond_i32: 2175 case INDEX_op_movcond_i32: 2176 case INDEX_op_ld8u_i32: 2177 case INDEX_op_ld8s_i32: 2178 case INDEX_op_ld16u_i32: 2179 case INDEX_op_ld16s_i32: 2180 case INDEX_op_ld_i32: 2181 case INDEX_op_st8_i32: 2182 case INDEX_op_st16_i32: 2183 case INDEX_op_st_i32: 2184 case INDEX_op_add_i32: 2185 case INDEX_op_sub_i32: 2186 case INDEX_op_neg_i32: 2187 case INDEX_op_mul_i32: 2188 case INDEX_op_and_i32: 2189 case INDEX_op_or_i32: 2190 case INDEX_op_xor_i32: 2191 case INDEX_op_shl_i32: 2192 case INDEX_op_shr_i32: 2193 case INDEX_op_sar_i32: 2194 case INDEX_op_extract_i32: 2195 case INDEX_op_sextract_i32: 2196 case INDEX_op_deposit_i32: 2197 return true; 2198 2199 case INDEX_op_negsetcond_i32: 2200 return TCG_TARGET_HAS_negsetcond_i32; 2201 case INDEX_op_div_i32: 2202 case INDEX_op_divu_i32: 2203 return TCG_TARGET_HAS_div_i32; 2204 case INDEX_op_rem_i32: 2205 case INDEX_op_remu_i32: 2206 return TCG_TARGET_HAS_rem_i32; 2207 case INDEX_op_div2_i32: 2208 case INDEX_op_divu2_i32: 2209 return TCG_TARGET_HAS_div2_i32; 2210 case INDEX_op_rotl_i32: 2211 case INDEX_op_rotr_i32: 2212 return TCG_TARGET_HAS_rot_i32; 2213 case INDEX_op_extract2_i32: 2214 return TCG_TARGET_HAS_extract2_i32; 2215 case INDEX_op_add2_i32: 2216 return TCG_TARGET_HAS_add2_i32; 2217 case INDEX_op_sub2_i32: 2218 return TCG_TARGET_HAS_sub2_i32; 2219 case INDEX_op_mulu2_i32: 2220 return TCG_TARGET_HAS_mulu2_i32; 2221 case INDEX_op_muls2_i32: 2222 return TCG_TARGET_HAS_muls2_i32; 2223 case INDEX_op_muluh_i32: 2224 return TCG_TARGET_HAS_muluh_i32; 2225 case INDEX_op_mulsh_i32: 2226 return TCG_TARGET_HAS_mulsh_i32; 2227 case INDEX_op_ext8s_i32: 2228 return TCG_TARGET_HAS_ext8s_i32; 2229 case INDEX_op_ext16s_i32: 2230 return TCG_TARGET_HAS_ext16s_i32; 2231 case INDEX_op_ext8u_i32: 2232 return TCG_TARGET_HAS_ext8u_i32; 2233 case INDEX_op_ext16u_i32: 2234 return TCG_TARGET_HAS_ext16u_i32; 2235 case INDEX_op_bswap16_i32: 2236 return TCG_TARGET_HAS_bswap16_i32; 2237 case INDEX_op_bswap32_i32: 2238 return TCG_TARGET_HAS_bswap32_i32; 2239 case INDEX_op_not_i32: 2240 return TCG_TARGET_HAS_not_i32; 2241 case INDEX_op_andc_i32: 2242 return TCG_TARGET_HAS_andc_i32; 2243 case INDEX_op_orc_i32: 2244 return TCG_TARGET_HAS_orc_i32; 2245 case INDEX_op_eqv_i32: 2246 return TCG_TARGET_HAS_eqv_i32; 2247 case INDEX_op_nand_i32: 2248 return TCG_TARGET_HAS_nand_i32; 2249 case INDEX_op_nor_i32: 2250 return TCG_TARGET_HAS_nor_i32; 2251 case INDEX_op_clz_i32: 2252 return TCG_TARGET_HAS_clz_i32; 2253 case INDEX_op_ctz_i32: 2254 return TCG_TARGET_HAS_ctz_i32; 2255 case INDEX_op_ctpop_i32: 2256 return TCG_TARGET_HAS_ctpop_i32; 2257 2258 case INDEX_op_brcond2_i32: 2259 case INDEX_op_setcond2_i32: 2260 return TCG_TARGET_REG_BITS == 32; 2261 2262 case INDEX_op_mov_i64: 2263 case INDEX_op_setcond_i64: 2264 case INDEX_op_brcond_i64: 2265 case INDEX_op_movcond_i64: 2266 case INDEX_op_ld8u_i64: 2267 case INDEX_op_ld8s_i64: 2268 case INDEX_op_ld16u_i64: 2269 case INDEX_op_ld16s_i64: 2270 case INDEX_op_ld32u_i64: 2271 case INDEX_op_ld32s_i64: 2272 case INDEX_op_ld_i64: 2273 case INDEX_op_st8_i64: 2274 case INDEX_op_st16_i64: 2275 case INDEX_op_st32_i64: 2276 case INDEX_op_st_i64: 2277 case INDEX_op_add_i64: 2278 case INDEX_op_sub_i64: 2279 case INDEX_op_neg_i64: 2280 case INDEX_op_mul_i64: 2281 case INDEX_op_and_i64: 2282 case INDEX_op_or_i64: 2283 case INDEX_op_xor_i64: 2284 case INDEX_op_shl_i64: 2285 case INDEX_op_shr_i64: 2286 case INDEX_op_sar_i64: 2287 case INDEX_op_ext_i32_i64: 2288 case INDEX_op_extu_i32_i64: 2289 case INDEX_op_extract_i64: 2290 case INDEX_op_sextract_i64: 2291 case INDEX_op_deposit_i64: 2292 return TCG_TARGET_REG_BITS == 64; 2293 2294 case INDEX_op_negsetcond_i64: 2295 return TCG_TARGET_HAS_negsetcond_i64; 2296 case INDEX_op_div_i64: 2297 case INDEX_op_divu_i64: 2298 return TCG_TARGET_HAS_div_i64; 2299 case INDEX_op_rem_i64: 2300 case INDEX_op_remu_i64: 2301 return TCG_TARGET_HAS_rem_i64; 2302 case INDEX_op_div2_i64: 2303 case INDEX_op_divu2_i64: 2304 return TCG_TARGET_HAS_div2_i64; 2305 case INDEX_op_rotl_i64: 2306 case INDEX_op_rotr_i64: 2307 return TCG_TARGET_HAS_rot_i64; 2308 case INDEX_op_extract2_i64: 2309 return TCG_TARGET_HAS_extract2_i64; 2310 case INDEX_op_extrl_i64_i32: 2311 case INDEX_op_extrh_i64_i32: 2312 return TCG_TARGET_HAS_extr_i64_i32; 2313 case INDEX_op_ext8s_i64: 2314 return TCG_TARGET_HAS_ext8s_i64; 2315 case INDEX_op_ext16s_i64: 2316 return TCG_TARGET_HAS_ext16s_i64; 2317 case INDEX_op_ext32s_i64: 2318 return TCG_TARGET_HAS_ext32s_i64; 2319 case INDEX_op_ext8u_i64: 2320 return TCG_TARGET_HAS_ext8u_i64; 2321 case INDEX_op_ext16u_i64: 2322 return TCG_TARGET_HAS_ext16u_i64; 2323 case INDEX_op_ext32u_i64: 2324 return TCG_TARGET_HAS_ext32u_i64; 2325 case INDEX_op_bswap16_i64: 2326 return TCG_TARGET_HAS_bswap16_i64; 2327 case INDEX_op_bswap32_i64: 2328 return TCG_TARGET_HAS_bswap32_i64; 2329 case INDEX_op_bswap64_i64: 2330 return TCG_TARGET_HAS_bswap64_i64; 2331 case INDEX_op_not_i64: 2332 return TCG_TARGET_HAS_not_i64; 2333 case INDEX_op_andc_i64: 2334 return TCG_TARGET_HAS_andc_i64; 2335 case INDEX_op_orc_i64: 2336 return TCG_TARGET_HAS_orc_i64; 2337 case INDEX_op_eqv_i64: 2338 return TCG_TARGET_HAS_eqv_i64; 2339 case INDEX_op_nand_i64: 2340 return TCG_TARGET_HAS_nand_i64; 2341 case INDEX_op_nor_i64: 2342 return TCG_TARGET_HAS_nor_i64; 2343 case INDEX_op_clz_i64: 2344 return TCG_TARGET_HAS_clz_i64; 2345 case INDEX_op_ctz_i64: 2346 return TCG_TARGET_HAS_ctz_i64; 2347 case INDEX_op_ctpop_i64: 2348 return TCG_TARGET_HAS_ctpop_i64; 2349 case INDEX_op_add2_i64: 2350 return TCG_TARGET_HAS_add2_i64; 2351 case INDEX_op_sub2_i64: 2352 return TCG_TARGET_HAS_sub2_i64; 2353 case INDEX_op_mulu2_i64: 2354 return TCG_TARGET_HAS_mulu2_i64; 2355 case INDEX_op_muls2_i64: 2356 return TCG_TARGET_HAS_muls2_i64; 2357 case INDEX_op_muluh_i64: 2358 return TCG_TARGET_HAS_muluh_i64; 2359 case INDEX_op_mulsh_i64: 2360 return TCG_TARGET_HAS_mulsh_i64; 2361 2362 case INDEX_op_mov_vec: 2363 case INDEX_op_dup_vec: 2364 case INDEX_op_dupm_vec: 2365 case INDEX_op_ld_vec: 2366 case INDEX_op_st_vec: 2367 case INDEX_op_add_vec: 2368 case INDEX_op_sub_vec: 2369 case INDEX_op_and_vec: 2370 case INDEX_op_or_vec: 2371 case INDEX_op_xor_vec: 2372 case INDEX_op_cmp_vec: 2373 return has_type; 2374 case INDEX_op_dup2_vec: 2375 return has_type && TCG_TARGET_REG_BITS == 32; 2376 case INDEX_op_not_vec: 2377 return has_type && TCG_TARGET_HAS_not_vec; 2378 case INDEX_op_neg_vec: 2379 return has_type && TCG_TARGET_HAS_neg_vec; 2380 case INDEX_op_abs_vec: 2381 return has_type && TCG_TARGET_HAS_abs_vec; 2382 case INDEX_op_andc_vec: 2383 return has_type && TCG_TARGET_HAS_andc_vec; 2384 case INDEX_op_orc_vec: 2385 return has_type && TCG_TARGET_HAS_orc_vec; 2386 case INDEX_op_nand_vec: 2387 return has_type && TCG_TARGET_HAS_nand_vec; 2388 case INDEX_op_nor_vec: 2389 return has_type && TCG_TARGET_HAS_nor_vec; 2390 case INDEX_op_eqv_vec: 2391 return has_type && TCG_TARGET_HAS_eqv_vec; 2392 case INDEX_op_mul_vec: 2393 return has_type && TCG_TARGET_HAS_mul_vec; 2394 case INDEX_op_shli_vec: 2395 case INDEX_op_shri_vec: 2396 case INDEX_op_sari_vec: 2397 return has_type && TCG_TARGET_HAS_shi_vec; 2398 case INDEX_op_shls_vec: 2399 case INDEX_op_shrs_vec: 2400 case INDEX_op_sars_vec: 2401 return has_type && TCG_TARGET_HAS_shs_vec; 2402 case INDEX_op_shlv_vec: 2403 case INDEX_op_shrv_vec: 2404 case INDEX_op_sarv_vec: 2405 return has_type && TCG_TARGET_HAS_shv_vec; 2406 case INDEX_op_rotli_vec: 2407 return has_type && TCG_TARGET_HAS_roti_vec; 2408 case INDEX_op_rotls_vec: 2409 return has_type && TCG_TARGET_HAS_rots_vec; 2410 case INDEX_op_rotlv_vec: 2411 case INDEX_op_rotrv_vec: 2412 return has_type && TCG_TARGET_HAS_rotv_vec; 2413 case INDEX_op_ssadd_vec: 2414 case INDEX_op_usadd_vec: 2415 case INDEX_op_sssub_vec: 2416 case INDEX_op_ussub_vec: 2417 return has_type && TCG_TARGET_HAS_sat_vec; 2418 case INDEX_op_smin_vec: 2419 case INDEX_op_umin_vec: 2420 case INDEX_op_smax_vec: 2421 case INDEX_op_umax_vec: 2422 return has_type && TCG_TARGET_HAS_minmax_vec; 2423 case INDEX_op_bitsel_vec: 2424 return has_type && TCG_TARGET_HAS_bitsel_vec; 2425 case INDEX_op_cmpsel_vec: 2426 return has_type && TCG_TARGET_HAS_cmpsel_vec; 2427 2428 default: 2429 tcg_debug_assert(op > INDEX_op_last_generic && op < NB_OPS); 2430 return true; 2431 } 2432 } 2433 2434 bool tcg_op_deposit_valid(TCGType type, unsigned ofs, unsigned len) 2435 { 2436 unsigned width; 2437 2438 tcg_debug_assert(type == TCG_TYPE_I32 || type == TCG_TYPE_I64); 2439 width = (type == TCG_TYPE_I32 ? 32 : 64); 2440 2441 tcg_debug_assert(ofs < width); 2442 tcg_debug_assert(len > 0); 2443 tcg_debug_assert(len <= width - ofs); 2444 2445 return TCG_TARGET_deposit_valid(type, ofs, len); 2446 } 2447 2448 static TCGOp *tcg_op_alloc(TCGOpcode opc, unsigned nargs); 2449 2450 static void tcg_gen_callN(void *func, TCGHelperInfo *info, 2451 TCGTemp *ret, TCGTemp **args) 2452 { 2453 TCGv_i64 extend_free[MAX_CALL_IARGS]; 2454 int n_extend = 0; 2455 TCGOp *op; 2456 int i, n, pi = 0, total_args; 2457 2458 if (unlikely(g_once_init_enter(HELPER_INFO_INIT(info)))) { 2459 init_call_layout(info); 2460 g_once_init_leave(HELPER_INFO_INIT(info), HELPER_INFO_INIT_VAL(info)); 2461 } 2462 2463 total_args = info->nr_out + info->nr_in + 2; 2464 op = tcg_op_alloc(INDEX_op_call, total_args); 2465 2466 #ifdef CONFIG_PLUGIN 2467 /* Flag helpers that may affect guest state */ 2468 if (tcg_ctx->plugin_insn && !(info->flags & TCG_CALL_NO_SIDE_EFFECTS)) { 2469 tcg_ctx->plugin_insn->calls_helpers = true; 2470 } 2471 #endif 2472 2473 TCGOP_CALLO(op) = n = info->nr_out; 2474 switch (n) { 2475 case 0: 2476 tcg_debug_assert(ret == NULL); 2477 break; 2478 case 1: 2479 tcg_debug_assert(ret != NULL); 2480 op->args[pi++] = temp_arg(ret); 2481 break; 2482 case 2: 2483 case 4: 2484 tcg_debug_assert(ret != NULL); 2485 tcg_debug_assert(ret->base_type == ret->type + ctz32(n)); 2486 tcg_debug_assert(ret->temp_subindex == 0); 2487 for (i = 0; i < n; ++i) { 2488 op->args[pi++] = temp_arg(ret + i); 2489 } 2490 break; 2491 default: 2492 g_assert_not_reached(); 2493 } 2494 2495 TCGOP_CALLI(op) = n = info->nr_in; 2496 for (i = 0; i < n; i++) { 2497 const TCGCallArgumentLoc *loc = &info->in[i]; 2498 TCGTemp *ts = args[loc->arg_idx] + loc->tmp_subindex; 2499 2500 switch (loc->kind) { 2501 case TCG_CALL_ARG_NORMAL: 2502 case TCG_CALL_ARG_BY_REF: 2503 case TCG_CALL_ARG_BY_REF_N: 2504 op->args[pi++] = temp_arg(ts); 2505 break; 2506 2507 case TCG_CALL_ARG_EXTEND_U: 2508 case TCG_CALL_ARG_EXTEND_S: 2509 { 2510 TCGv_i64 temp = tcg_temp_ebb_new_i64(); 2511 TCGv_i32 orig = temp_tcgv_i32(ts); 2512 2513 if (loc->kind == TCG_CALL_ARG_EXTEND_S) { 2514 tcg_gen_ext_i32_i64(temp, orig); 2515 } else { 2516 tcg_gen_extu_i32_i64(temp, orig); 2517 } 2518 op->args[pi++] = tcgv_i64_arg(temp); 2519 extend_free[n_extend++] = temp; 2520 } 2521 break; 2522 2523 default: 2524 g_assert_not_reached(); 2525 } 2526 } 2527 op->args[pi++] = (uintptr_t)func; 2528 op->args[pi++] = (uintptr_t)info; 2529 tcg_debug_assert(pi == total_args); 2530 2531 if (tcg_ctx->emit_before_op) { 2532 QTAILQ_INSERT_BEFORE(tcg_ctx->emit_before_op, op, link); 2533 } else { 2534 QTAILQ_INSERT_TAIL(&tcg_ctx->ops, op, link); 2535 } 2536 2537 tcg_debug_assert(n_extend < ARRAY_SIZE(extend_free)); 2538 for (i = 0; i < n_extend; ++i) { 2539 tcg_temp_free_i64(extend_free[i]); 2540 } 2541 } 2542 2543 void tcg_gen_call0(void *func, TCGHelperInfo *info, TCGTemp *ret) 2544 { 2545 tcg_gen_callN(func, info, ret, NULL); 2546 } 2547 2548 void tcg_gen_call1(void *func, TCGHelperInfo *info, TCGTemp *ret, TCGTemp *t1) 2549 { 2550 tcg_gen_callN(func, info, ret, &t1); 2551 } 2552 2553 void tcg_gen_call2(void *func, TCGHelperInfo *info, TCGTemp *ret, 2554 TCGTemp *t1, TCGTemp *t2) 2555 { 2556 TCGTemp *args[2] = { t1, t2 }; 2557 tcg_gen_callN(func, info, ret, args); 2558 } 2559 2560 void tcg_gen_call3(void *func, TCGHelperInfo *info, TCGTemp *ret, 2561 TCGTemp *t1, TCGTemp *t2, TCGTemp *t3) 2562 { 2563 TCGTemp *args[3] = { t1, t2, t3 }; 2564 tcg_gen_callN(func, info, ret, args); 2565 } 2566 2567 void tcg_gen_call4(void *func, TCGHelperInfo *info, TCGTemp *ret, 2568 TCGTemp *t1, TCGTemp *t2, TCGTemp *t3, TCGTemp *t4) 2569 { 2570 TCGTemp *args[4] = { t1, t2, t3, t4 }; 2571 tcg_gen_callN(func, info, ret, args); 2572 } 2573 2574 void tcg_gen_call5(void *func, TCGHelperInfo *info, TCGTemp *ret, TCGTemp *t1, 2575 TCGTemp *t2, TCGTemp *t3, TCGTemp *t4, TCGTemp *t5) 2576 { 2577 TCGTemp *args[5] = { t1, t2, t3, t4, t5 }; 2578 tcg_gen_callN(func, info, ret, args); 2579 } 2580 2581 void tcg_gen_call6(void *func, TCGHelperInfo *info, TCGTemp *ret, 2582 TCGTemp *t1, TCGTemp *t2, TCGTemp *t3, 2583 TCGTemp *t4, TCGTemp *t5, TCGTemp *t6) 2584 { 2585 TCGTemp *args[6] = { t1, t2, t3, t4, t5, t6 }; 2586 tcg_gen_callN(func, info, ret, args); 2587 } 2588 2589 void tcg_gen_call7(void *func, TCGHelperInfo *info, TCGTemp *ret, TCGTemp *t1, 2590 TCGTemp *t2, TCGTemp *t3, TCGTemp *t4, 2591 TCGTemp *t5, TCGTemp *t6, TCGTemp *t7) 2592 { 2593 TCGTemp *args[7] = { t1, t2, t3, t4, t5, t6, t7 }; 2594 tcg_gen_callN(func, info, ret, args); 2595 } 2596 2597 static void tcg_reg_alloc_start(TCGContext *s) 2598 { 2599 int i, n; 2600 2601 for (i = 0, n = s->nb_temps; i < n; i++) { 2602 TCGTemp *ts = &s->temps[i]; 2603 TCGTempVal val = TEMP_VAL_MEM; 2604 2605 switch (ts->kind) { 2606 case TEMP_CONST: 2607 val = TEMP_VAL_CONST; 2608 break; 2609 case TEMP_FIXED: 2610 val = TEMP_VAL_REG; 2611 break; 2612 case TEMP_GLOBAL: 2613 break; 2614 case TEMP_EBB: 2615 val = TEMP_VAL_DEAD; 2616 /* fall through */ 2617 case TEMP_TB: 2618 ts->mem_allocated = 0; 2619 break; 2620 default: 2621 g_assert_not_reached(); 2622 } 2623 ts->val_type = val; 2624 } 2625 2626 memset(s->reg_to_temp, 0, sizeof(s->reg_to_temp)); 2627 } 2628 2629 static char *tcg_get_arg_str_ptr(TCGContext *s, char *buf, int buf_size, 2630 TCGTemp *ts) 2631 { 2632 int idx = temp_idx(ts); 2633 2634 switch (ts->kind) { 2635 case TEMP_FIXED: 2636 case TEMP_GLOBAL: 2637 pstrcpy(buf, buf_size, ts->name); 2638 break; 2639 case TEMP_TB: 2640 snprintf(buf, buf_size, "loc%d", idx - s->nb_globals); 2641 break; 2642 case TEMP_EBB: 2643 snprintf(buf, buf_size, "tmp%d", idx - s->nb_globals); 2644 break; 2645 case TEMP_CONST: 2646 switch (ts->type) { 2647 case TCG_TYPE_I32: 2648 snprintf(buf, buf_size, "$0x%x", (int32_t)ts->val); 2649 break; 2650 #if TCG_TARGET_REG_BITS > 32 2651 case TCG_TYPE_I64: 2652 snprintf(buf, buf_size, "$0x%" PRIx64, ts->val); 2653 break; 2654 #endif 2655 case TCG_TYPE_V64: 2656 case TCG_TYPE_V128: 2657 case TCG_TYPE_V256: 2658 snprintf(buf, buf_size, "v%d$0x%" PRIx64, 2659 64 << (ts->type - TCG_TYPE_V64), ts->val); 2660 break; 2661 default: 2662 g_assert_not_reached(); 2663 } 2664 break; 2665 } 2666 return buf; 2667 } 2668 2669 static char *tcg_get_arg_str(TCGContext *s, char *buf, 2670 int buf_size, TCGArg arg) 2671 { 2672 return tcg_get_arg_str_ptr(s, buf, buf_size, arg_temp(arg)); 2673 } 2674 2675 static const char * const cond_name[] = 2676 { 2677 [TCG_COND_NEVER] = "never", 2678 [TCG_COND_ALWAYS] = "always", 2679 [TCG_COND_EQ] = "eq", 2680 [TCG_COND_NE] = "ne", 2681 [TCG_COND_LT] = "lt", 2682 [TCG_COND_GE] = "ge", 2683 [TCG_COND_LE] = "le", 2684 [TCG_COND_GT] = "gt", 2685 [TCG_COND_LTU] = "ltu", 2686 [TCG_COND_GEU] = "geu", 2687 [TCG_COND_LEU] = "leu", 2688 [TCG_COND_GTU] = "gtu", 2689 [TCG_COND_TSTEQ] = "tsteq", 2690 [TCG_COND_TSTNE] = "tstne", 2691 }; 2692 2693 static const char * const ldst_name[(MO_BSWAP | MO_SSIZE) + 1] = 2694 { 2695 [MO_UB] = "ub", 2696 [MO_SB] = "sb", 2697 [MO_LEUW] = "leuw", 2698 [MO_LESW] = "lesw", 2699 [MO_LEUL] = "leul", 2700 [MO_LESL] = "lesl", 2701 [MO_LEUQ] = "leq", 2702 [MO_BEUW] = "beuw", 2703 [MO_BESW] = "besw", 2704 [MO_BEUL] = "beul", 2705 [MO_BESL] = "besl", 2706 [MO_BEUQ] = "beq", 2707 [MO_128 + MO_BE] = "beo", 2708 [MO_128 + MO_LE] = "leo", 2709 }; 2710 2711 static const char * const alignment_name[(MO_AMASK >> MO_ASHIFT) + 1] = { 2712 [MO_UNALN >> MO_ASHIFT] = "un+", 2713 [MO_ALIGN >> MO_ASHIFT] = "al+", 2714 [MO_ALIGN_2 >> MO_ASHIFT] = "al2+", 2715 [MO_ALIGN_4 >> MO_ASHIFT] = "al4+", 2716 [MO_ALIGN_8 >> MO_ASHIFT] = "al8+", 2717 [MO_ALIGN_16 >> MO_ASHIFT] = "al16+", 2718 [MO_ALIGN_32 >> MO_ASHIFT] = "al32+", 2719 [MO_ALIGN_64 >> MO_ASHIFT] = "al64+", 2720 }; 2721 2722 static const char * const atom_name[(MO_ATOM_MASK >> MO_ATOM_SHIFT) + 1] = { 2723 [MO_ATOM_IFALIGN >> MO_ATOM_SHIFT] = "", 2724 [MO_ATOM_IFALIGN_PAIR >> MO_ATOM_SHIFT] = "pair+", 2725 [MO_ATOM_WITHIN16 >> MO_ATOM_SHIFT] = "w16+", 2726 [MO_ATOM_WITHIN16_PAIR >> MO_ATOM_SHIFT] = "w16p+", 2727 [MO_ATOM_SUBALIGN >> MO_ATOM_SHIFT] = "sub+", 2728 [MO_ATOM_NONE >> MO_ATOM_SHIFT] = "noat+", 2729 }; 2730 2731 static const char bswap_flag_name[][6] = { 2732 [TCG_BSWAP_IZ] = "iz", 2733 [TCG_BSWAP_OZ] = "oz", 2734 [TCG_BSWAP_OS] = "os", 2735 [TCG_BSWAP_IZ | TCG_BSWAP_OZ] = "iz,oz", 2736 [TCG_BSWAP_IZ | TCG_BSWAP_OS] = "iz,os", 2737 }; 2738 2739 #ifdef CONFIG_PLUGIN 2740 static const char * const plugin_from_name[] = { 2741 "from-tb", 2742 "from-insn", 2743 "after-insn", 2744 "after-tb", 2745 }; 2746 #endif 2747 2748 static inline bool tcg_regset_single(TCGRegSet d) 2749 { 2750 return (d & (d - 1)) == 0; 2751 } 2752 2753 static inline TCGReg tcg_regset_first(TCGRegSet d) 2754 { 2755 if (TCG_TARGET_NB_REGS <= 32) { 2756 return ctz32(d); 2757 } else { 2758 return ctz64(d); 2759 } 2760 } 2761 2762 /* Return only the number of characters output -- no error return. */ 2763 #define ne_fprintf(...) \ 2764 ({ int ret_ = fprintf(__VA_ARGS__); ret_ >= 0 ? ret_ : 0; }) 2765 2766 void tcg_dump_ops(TCGContext *s, FILE *f, bool have_prefs) 2767 { 2768 char buf[128]; 2769 TCGOp *op; 2770 2771 QTAILQ_FOREACH(op, &s->ops, link) { 2772 int i, k, nb_oargs, nb_iargs, nb_cargs; 2773 const TCGOpDef *def; 2774 TCGOpcode c; 2775 int col = 0; 2776 2777 c = op->opc; 2778 def = &tcg_op_defs[c]; 2779 2780 if (c == INDEX_op_insn_start) { 2781 nb_oargs = 0; 2782 col += ne_fprintf(f, "\n ----"); 2783 2784 for (i = 0, k = s->insn_start_words; i < k; ++i) { 2785 col += ne_fprintf(f, " %016" PRIx64, 2786 tcg_get_insn_start_param(op, i)); 2787 } 2788 } else if (c == INDEX_op_call) { 2789 const TCGHelperInfo *info = tcg_call_info(op); 2790 void *func = tcg_call_func(op); 2791 2792 /* variable number of arguments */ 2793 nb_oargs = TCGOP_CALLO(op); 2794 nb_iargs = TCGOP_CALLI(op); 2795 nb_cargs = def->nb_cargs; 2796 2797 col += ne_fprintf(f, " %s ", def->name); 2798 2799 /* 2800 * Print the function name from TCGHelperInfo, if available. 2801 * Note that plugins have a template function for the info, 2802 * but the actual function pointer comes from the plugin. 2803 */ 2804 if (func == info->func) { 2805 col += ne_fprintf(f, "%s", info->name); 2806 } else { 2807 col += ne_fprintf(f, "plugin(%p)", func); 2808 } 2809 2810 col += ne_fprintf(f, ",$0x%x,$%d", info->flags, nb_oargs); 2811 for (i = 0; i < nb_oargs; i++) { 2812 col += ne_fprintf(f, ",%s", tcg_get_arg_str(s, buf, sizeof(buf), 2813 op->args[i])); 2814 } 2815 for (i = 0; i < nb_iargs; i++) { 2816 TCGArg arg = op->args[nb_oargs + i]; 2817 const char *t = tcg_get_arg_str(s, buf, sizeof(buf), arg); 2818 col += ne_fprintf(f, ",%s", t); 2819 } 2820 } else { 2821 col += ne_fprintf(f, " %s ", def->name); 2822 2823 nb_oargs = def->nb_oargs; 2824 nb_iargs = def->nb_iargs; 2825 nb_cargs = def->nb_cargs; 2826 2827 if (def->flags & TCG_OPF_VECTOR) { 2828 col += ne_fprintf(f, "v%d,e%d,", 2829 8 * tcg_type_size(TCGOP_TYPE(op)), 2830 8 << TCGOP_VECE(op)); 2831 } 2832 2833 k = 0; 2834 for (i = 0; i < nb_oargs; i++) { 2835 const char *sep = k ? "," : ""; 2836 col += ne_fprintf(f, "%s%s", sep, 2837 tcg_get_arg_str(s, buf, sizeof(buf), 2838 op->args[k++])); 2839 } 2840 for (i = 0; i < nb_iargs; i++) { 2841 const char *sep = k ? "," : ""; 2842 col += ne_fprintf(f, "%s%s", sep, 2843 tcg_get_arg_str(s, buf, sizeof(buf), 2844 op->args[k++])); 2845 } 2846 switch (c) { 2847 case INDEX_op_brcond_i32: 2848 case INDEX_op_setcond_i32: 2849 case INDEX_op_negsetcond_i32: 2850 case INDEX_op_movcond_i32: 2851 case INDEX_op_brcond2_i32: 2852 case INDEX_op_setcond2_i32: 2853 case INDEX_op_brcond_i64: 2854 case INDEX_op_setcond_i64: 2855 case INDEX_op_negsetcond_i64: 2856 case INDEX_op_movcond_i64: 2857 case INDEX_op_cmp_vec: 2858 case INDEX_op_cmpsel_vec: 2859 if (op->args[k] < ARRAY_SIZE(cond_name) 2860 && cond_name[op->args[k]]) { 2861 col += ne_fprintf(f, ",%s", cond_name[op->args[k++]]); 2862 } else { 2863 col += ne_fprintf(f, ",$0x%" TCG_PRIlx, op->args[k++]); 2864 } 2865 i = 1; 2866 break; 2867 case INDEX_op_qemu_ld_a32_i32: 2868 case INDEX_op_qemu_ld_a64_i32: 2869 case INDEX_op_qemu_st_a32_i32: 2870 case INDEX_op_qemu_st_a64_i32: 2871 case INDEX_op_qemu_st8_a32_i32: 2872 case INDEX_op_qemu_st8_a64_i32: 2873 case INDEX_op_qemu_ld_a32_i64: 2874 case INDEX_op_qemu_ld_a64_i64: 2875 case INDEX_op_qemu_st_a32_i64: 2876 case INDEX_op_qemu_st_a64_i64: 2877 case INDEX_op_qemu_ld_a32_i128: 2878 case INDEX_op_qemu_ld_a64_i128: 2879 case INDEX_op_qemu_st_a32_i128: 2880 case INDEX_op_qemu_st_a64_i128: 2881 { 2882 const char *s_al, *s_op, *s_at; 2883 MemOpIdx oi = op->args[k++]; 2884 MemOp mop = get_memop(oi); 2885 unsigned ix = get_mmuidx(oi); 2886 2887 s_al = alignment_name[(mop & MO_AMASK) >> MO_ASHIFT]; 2888 s_op = ldst_name[mop & (MO_BSWAP | MO_SSIZE)]; 2889 s_at = atom_name[(mop & MO_ATOM_MASK) >> MO_ATOM_SHIFT]; 2890 mop &= ~(MO_AMASK | MO_BSWAP | MO_SSIZE | MO_ATOM_MASK); 2891 2892 /* If all fields are accounted for, print symbolically. */ 2893 if (!mop && s_al && s_op && s_at) { 2894 col += ne_fprintf(f, ",%s%s%s,%u", 2895 s_at, s_al, s_op, ix); 2896 } else { 2897 mop = get_memop(oi); 2898 col += ne_fprintf(f, ",$0x%x,%u", mop, ix); 2899 } 2900 i = 1; 2901 } 2902 break; 2903 case INDEX_op_bswap16_i32: 2904 case INDEX_op_bswap16_i64: 2905 case INDEX_op_bswap32_i32: 2906 case INDEX_op_bswap32_i64: 2907 case INDEX_op_bswap64_i64: 2908 { 2909 TCGArg flags = op->args[k]; 2910 const char *name = NULL; 2911 2912 if (flags < ARRAY_SIZE(bswap_flag_name)) { 2913 name = bswap_flag_name[flags]; 2914 } 2915 if (name) { 2916 col += ne_fprintf(f, ",%s", name); 2917 } else { 2918 col += ne_fprintf(f, ",$0x%" TCG_PRIlx, flags); 2919 } 2920 i = k = 1; 2921 } 2922 break; 2923 #ifdef CONFIG_PLUGIN 2924 case INDEX_op_plugin_cb: 2925 { 2926 TCGArg from = op->args[k++]; 2927 const char *name = NULL; 2928 2929 if (from < ARRAY_SIZE(plugin_from_name)) { 2930 name = plugin_from_name[from]; 2931 } 2932 if (name) { 2933 col += ne_fprintf(f, "%s", name); 2934 } else { 2935 col += ne_fprintf(f, "$0x%" TCG_PRIlx, from); 2936 } 2937 i = 1; 2938 } 2939 break; 2940 #endif 2941 default: 2942 i = 0; 2943 break; 2944 } 2945 switch (c) { 2946 case INDEX_op_set_label: 2947 case INDEX_op_br: 2948 case INDEX_op_brcond_i32: 2949 case INDEX_op_brcond_i64: 2950 case INDEX_op_brcond2_i32: 2951 col += ne_fprintf(f, "%s$L%d", k ? "," : "", 2952 arg_label(op->args[k])->id); 2953 i++, k++; 2954 break; 2955 case INDEX_op_mb: 2956 { 2957 TCGBar membar = op->args[k]; 2958 const char *b_op, *m_op; 2959 2960 switch (membar & TCG_BAR_SC) { 2961 case 0: 2962 b_op = "none"; 2963 break; 2964 case TCG_BAR_LDAQ: 2965 b_op = "acq"; 2966 break; 2967 case TCG_BAR_STRL: 2968 b_op = "rel"; 2969 break; 2970 case TCG_BAR_SC: 2971 b_op = "seq"; 2972 break; 2973 default: 2974 g_assert_not_reached(); 2975 } 2976 2977 switch (membar & TCG_MO_ALL) { 2978 case 0: 2979 m_op = "none"; 2980 break; 2981 case TCG_MO_LD_LD: 2982 m_op = "rr"; 2983 break; 2984 case TCG_MO_LD_ST: 2985 m_op = "rw"; 2986 break; 2987 case TCG_MO_ST_LD: 2988 m_op = "wr"; 2989 break; 2990 case TCG_MO_ST_ST: 2991 m_op = "ww"; 2992 break; 2993 case TCG_MO_LD_LD | TCG_MO_LD_ST: 2994 m_op = "rr+rw"; 2995 break; 2996 case TCG_MO_LD_LD | TCG_MO_ST_LD: 2997 m_op = "rr+wr"; 2998 break; 2999 case TCG_MO_LD_LD | TCG_MO_ST_ST: 3000 m_op = "rr+ww"; 3001 break; 3002 case TCG_MO_LD_ST | TCG_MO_ST_LD: 3003 m_op = "rw+wr"; 3004 break; 3005 case TCG_MO_LD_ST | TCG_MO_ST_ST: 3006 m_op = "rw+ww"; 3007 break; 3008 case TCG_MO_ST_LD | TCG_MO_ST_ST: 3009 m_op = "wr+ww"; 3010 break; 3011 case TCG_MO_LD_LD | TCG_MO_LD_ST | TCG_MO_ST_LD: 3012 m_op = "rr+rw+wr"; 3013 break; 3014 case TCG_MO_LD_LD | TCG_MO_LD_ST | TCG_MO_ST_ST: 3015 m_op = "rr+rw+ww"; 3016 break; 3017 case TCG_MO_LD_LD | TCG_MO_ST_LD | TCG_MO_ST_ST: 3018 m_op = "rr+wr+ww"; 3019 break; 3020 case TCG_MO_LD_ST | TCG_MO_ST_LD | TCG_MO_ST_ST: 3021 m_op = "rw+wr+ww"; 3022 break; 3023 case TCG_MO_ALL: 3024 m_op = "all"; 3025 break; 3026 default: 3027 g_assert_not_reached(); 3028 } 3029 3030 col += ne_fprintf(f, "%s%s:%s", (k ? "," : ""), b_op, m_op); 3031 i++, k++; 3032 } 3033 break; 3034 default: 3035 break; 3036 } 3037 for (; i < nb_cargs; i++, k++) { 3038 col += ne_fprintf(f, "%s$0x%" TCG_PRIlx, k ? "," : "", 3039 op->args[k]); 3040 } 3041 } 3042 3043 if (have_prefs || op->life) { 3044 for (; col < 40; ++col) { 3045 putc(' ', f); 3046 } 3047 } 3048 3049 if (op->life) { 3050 unsigned life = op->life; 3051 3052 if (life & (SYNC_ARG * 3)) { 3053 ne_fprintf(f, " sync:"); 3054 for (i = 0; i < 2; ++i) { 3055 if (life & (SYNC_ARG << i)) { 3056 ne_fprintf(f, " %d", i); 3057 } 3058 } 3059 } 3060 life /= DEAD_ARG; 3061 if (life) { 3062 ne_fprintf(f, " dead:"); 3063 for (i = 0; life; ++i, life >>= 1) { 3064 if (life & 1) { 3065 ne_fprintf(f, " %d", i); 3066 } 3067 } 3068 } 3069 } 3070 3071 if (have_prefs) { 3072 for (i = 0; i < nb_oargs; ++i) { 3073 TCGRegSet set = output_pref(op, i); 3074 3075 if (i == 0) { 3076 ne_fprintf(f, " pref="); 3077 } else { 3078 ne_fprintf(f, ","); 3079 } 3080 if (set == 0) { 3081 ne_fprintf(f, "none"); 3082 } else if (set == MAKE_64BIT_MASK(0, TCG_TARGET_NB_REGS)) { 3083 ne_fprintf(f, "all"); 3084 #ifdef CONFIG_DEBUG_TCG 3085 } else if (tcg_regset_single(set)) { 3086 TCGReg reg = tcg_regset_first(set); 3087 ne_fprintf(f, "%s", tcg_target_reg_names[reg]); 3088 #endif 3089 } else if (TCG_TARGET_NB_REGS <= 32) { 3090 ne_fprintf(f, "0x%x", (uint32_t)set); 3091 } else { 3092 ne_fprintf(f, "0x%" PRIx64, (uint64_t)set); 3093 } 3094 } 3095 } 3096 3097 putc('\n', f); 3098 } 3099 } 3100 3101 /* we give more priority to constraints with less registers */ 3102 static int get_constraint_priority(const TCGArgConstraint *arg_ct, int k) 3103 { 3104 int n; 3105 3106 arg_ct += k; 3107 n = ctpop64(arg_ct->regs); 3108 3109 /* 3110 * Sort constraints of a single register first, which includes output 3111 * aliases (which must exactly match the input already allocated). 3112 */ 3113 if (n == 1 || arg_ct->oalias) { 3114 return INT_MAX; 3115 } 3116 3117 /* 3118 * Sort register pairs next, first then second immediately after. 3119 * Arbitrarily sort multiple pairs by the index of the first reg; 3120 * there shouldn't be many pairs. 3121 */ 3122 switch (arg_ct->pair) { 3123 case 1: 3124 case 3: 3125 return (k + 1) * 2; 3126 case 2: 3127 return (arg_ct->pair_index + 1) * 2 - 1; 3128 } 3129 3130 /* Finally, sort by decreasing register count. */ 3131 assert(n > 1); 3132 return -n; 3133 } 3134 3135 /* sort from highest priority to lowest */ 3136 static void sort_constraints(TCGArgConstraint *a, int start, int n) 3137 { 3138 int i, j; 3139 3140 for (i = 0; i < n; i++) { 3141 a[start + i].sort_index = start + i; 3142 } 3143 if (n <= 1) { 3144 return; 3145 } 3146 for (i = 0; i < n - 1; i++) { 3147 for (j = i + 1; j < n; j++) { 3148 int p1 = get_constraint_priority(a, a[start + i].sort_index); 3149 int p2 = get_constraint_priority(a, a[start + j].sort_index); 3150 if (p1 < p2) { 3151 int tmp = a[start + i].sort_index; 3152 a[start + i].sort_index = a[start + j].sort_index; 3153 a[start + j].sort_index = tmp; 3154 } 3155 } 3156 } 3157 } 3158 3159 static const TCGArgConstraint empty_cts[TCG_MAX_OP_ARGS]; 3160 static TCGArgConstraint all_cts[ARRAY_SIZE(constraint_sets)][TCG_MAX_OP_ARGS]; 3161 3162 static void process_constraint_sets(void) 3163 { 3164 for (size_t c = 0; c < ARRAY_SIZE(constraint_sets); ++c) { 3165 const TCGConstraintSet *tdefs = &constraint_sets[c]; 3166 TCGArgConstraint *args_ct = all_cts[c]; 3167 int nb_oargs = tdefs->nb_oargs; 3168 int nb_iargs = tdefs->nb_iargs; 3169 int nb_args = nb_oargs + nb_iargs; 3170 bool saw_alias_pair = false; 3171 3172 for (int i = 0; i < nb_args; i++) { 3173 const char *ct_str = tdefs->args_ct_str[i]; 3174 bool input_p = i >= nb_oargs; 3175 int o; 3176 3177 switch (*ct_str) { 3178 case '0' ... '9': 3179 o = *ct_str - '0'; 3180 tcg_debug_assert(input_p); 3181 tcg_debug_assert(o < nb_oargs); 3182 tcg_debug_assert(args_ct[o].regs != 0); 3183 tcg_debug_assert(!args_ct[o].oalias); 3184 args_ct[i] = args_ct[o]; 3185 /* The output sets oalias. */ 3186 args_ct[o].oalias = 1; 3187 args_ct[o].alias_index = i; 3188 /* The input sets ialias. */ 3189 args_ct[i].ialias = 1; 3190 args_ct[i].alias_index = o; 3191 if (args_ct[i].pair) { 3192 saw_alias_pair = true; 3193 } 3194 tcg_debug_assert(ct_str[1] == '\0'); 3195 continue; 3196 3197 case '&': 3198 tcg_debug_assert(!input_p); 3199 args_ct[i].newreg = true; 3200 ct_str++; 3201 break; 3202 3203 case 'p': /* plus */ 3204 /* Allocate to the register after the previous. */ 3205 tcg_debug_assert(i > (input_p ? nb_oargs : 0)); 3206 o = i - 1; 3207 tcg_debug_assert(!args_ct[o].pair); 3208 tcg_debug_assert(!args_ct[o].ct); 3209 args_ct[i] = (TCGArgConstraint){ 3210 .pair = 2, 3211 .pair_index = o, 3212 .regs = args_ct[o].regs << 1, 3213 .newreg = args_ct[o].newreg, 3214 }; 3215 args_ct[o].pair = 1; 3216 args_ct[o].pair_index = i; 3217 tcg_debug_assert(ct_str[1] == '\0'); 3218 continue; 3219 3220 case 'm': /* minus */ 3221 /* Allocate to the register before the previous. */ 3222 tcg_debug_assert(i > (input_p ? nb_oargs : 0)); 3223 o = i - 1; 3224 tcg_debug_assert(!args_ct[o].pair); 3225 tcg_debug_assert(!args_ct[o].ct); 3226 args_ct[i] = (TCGArgConstraint){ 3227 .pair = 1, 3228 .pair_index = o, 3229 .regs = args_ct[o].regs >> 1, 3230 .newreg = args_ct[o].newreg, 3231 }; 3232 args_ct[o].pair = 2; 3233 args_ct[o].pair_index = i; 3234 tcg_debug_assert(ct_str[1] == '\0'); 3235 continue; 3236 } 3237 3238 do { 3239 switch (*ct_str) { 3240 case 'i': 3241 args_ct[i].ct |= TCG_CT_CONST; 3242 break; 3243 3244 /* Include all of the target-specific constraints. */ 3245 3246 #undef CONST 3247 #define CONST(CASE, MASK) \ 3248 case CASE: args_ct[i].ct |= MASK; break; 3249 #define REGS(CASE, MASK) \ 3250 case CASE: args_ct[i].regs |= MASK; break; 3251 3252 #include "tcg-target-con-str.h" 3253 3254 #undef REGS 3255 #undef CONST 3256 default: 3257 case '0' ... '9': 3258 case '&': 3259 case 'p': 3260 case 'm': 3261 /* Typo in TCGConstraintSet constraint. */ 3262 g_assert_not_reached(); 3263 } 3264 } while (*++ct_str != '\0'); 3265 } 3266 3267 /* 3268 * Fix up output pairs that are aliased with inputs. 3269 * When we created the alias, we copied pair from the output. 3270 * There are three cases: 3271 * (1a) Pairs of inputs alias pairs of outputs. 3272 * (1b) One input aliases the first of a pair of outputs. 3273 * (2) One input aliases the second of a pair of outputs. 3274 * 3275 * Case 1a is handled by making sure that the pair_index'es are 3276 * properly updated so that they appear the same as a pair of inputs. 3277 * 3278 * Case 1b is handled by setting the pair_index of the input to 3279 * itself, simply so it doesn't point to an unrelated argument. 3280 * Since we don't encounter the "second" during the input allocation 3281 * phase, nothing happens with the second half of the input pair. 3282 * 3283 * Case 2 is handled by setting the second input to pair=3, the 3284 * first output to pair=3, and the pair_index'es to match. 3285 */ 3286 if (saw_alias_pair) { 3287 for (int i = nb_oargs; i < nb_args; i++) { 3288 int o, o2, i2; 3289 3290 /* 3291 * Since [0-9pm] must be alone in the constraint string, 3292 * the only way they can both be set is if the pair comes 3293 * from the output alias. 3294 */ 3295 if (!args_ct[i].ialias) { 3296 continue; 3297 } 3298 switch (args_ct[i].pair) { 3299 case 0: 3300 break; 3301 case 1: 3302 o = args_ct[i].alias_index; 3303 o2 = args_ct[o].pair_index; 3304 tcg_debug_assert(args_ct[o].pair == 1); 3305 tcg_debug_assert(args_ct[o2].pair == 2); 3306 if (args_ct[o2].oalias) { 3307 /* Case 1a */ 3308 i2 = args_ct[o2].alias_index; 3309 tcg_debug_assert(args_ct[i2].pair == 2); 3310 args_ct[i2].pair_index = i; 3311 args_ct[i].pair_index = i2; 3312 } else { 3313 /* Case 1b */ 3314 args_ct[i].pair_index = i; 3315 } 3316 break; 3317 case 2: 3318 o = args_ct[i].alias_index; 3319 o2 = args_ct[o].pair_index; 3320 tcg_debug_assert(args_ct[o].pair == 2); 3321 tcg_debug_assert(args_ct[o2].pair == 1); 3322 if (args_ct[o2].oalias) { 3323 /* Case 1a */ 3324 i2 = args_ct[o2].alias_index; 3325 tcg_debug_assert(args_ct[i2].pair == 1); 3326 args_ct[i2].pair_index = i; 3327 args_ct[i].pair_index = i2; 3328 } else { 3329 /* Case 2 */ 3330 args_ct[i].pair = 3; 3331 args_ct[o2].pair = 3; 3332 args_ct[i].pair_index = o2; 3333 args_ct[o2].pair_index = i; 3334 } 3335 break; 3336 default: 3337 g_assert_not_reached(); 3338 } 3339 } 3340 } 3341 3342 /* sort the constraints (XXX: this is just an heuristic) */ 3343 sort_constraints(args_ct, 0, nb_oargs); 3344 sort_constraints(args_ct, nb_oargs, nb_iargs); 3345 } 3346 } 3347 3348 static const TCGArgConstraint *opcode_args_ct(const TCGOp *op) 3349 { 3350 const TCGOpDef *def = &tcg_op_defs[op->opc]; 3351 TCGConstraintSetIndex con_set; 3352 3353 #ifdef CONFIG_DEBUG_TCG 3354 assert(tcg_op_supported(op->opc, TCGOP_TYPE(op), TCGOP_FLAGS(op))); 3355 #endif 3356 3357 if (def->flags & TCG_OPF_NOT_PRESENT) { 3358 return empty_cts; 3359 } 3360 3361 con_set = tcg_target_op_def(op->opc, TCGOP_TYPE(op), TCGOP_FLAGS(op)); 3362 tcg_debug_assert(con_set >= 0 && con_set < ARRAY_SIZE(constraint_sets)); 3363 3364 /* The constraint arguments must match TCGOpcode arguments. */ 3365 tcg_debug_assert(constraint_sets[con_set].nb_oargs == def->nb_oargs); 3366 tcg_debug_assert(constraint_sets[con_set].nb_iargs == def->nb_iargs); 3367 3368 return all_cts[con_set]; 3369 } 3370 3371 static void remove_label_use(TCGOp *op, int idx) 3372 { 3373 TCGLabel *label = arg_label(op->args[idx]); 3374 TCGLabelUse *use; 3375 3376 QSIMPLEQ_FOREACH(use, &label->branches, next) { 3377 if (use->op == op) { 3378 QSIMPLEQ_REMOVE(&label->branches, use, TCGLabelUse, next); 3379 return; 3380 } 3381 } 3382 g_assert_not_reached(); 3383 } 3384 3385 void tcg_op_remove(TCGContext *s, TCGOp *op) 3386 { 3387 switch (op->opc) { 3388 case INDEX_op_br: 3389 remove_label_use(op, 0); 3390 break; 3391 case INDEX_op_brcond_i32: 3392 case INDEX_op_brcond_i64: 3393 remove_label_use(op, 3); 3394 break; 3395 case INDEX_op_brcond2_i32: 3396 remove_label_use(op, 5); 3397 break; 3398 default: 3399 break; 3400 } 3401 3402 QTAILQ_REMOVE(&s->ops, op, link); 3403 QTAILQ_INSERT_TAIL(&s->free_ops, op, link); 3404 s->nb_ops--; 3405 } 3406 3407 void tcg_remove_ops_after(TCGOp *op) 3408 { 3409 TCGContext *s = tcg_ctx; 3410 3411 while (true) { 3412 TCGOp *last = tcg_last_op(); 3413 if (last == op) { 3414 return; 3415 } 3416 tcg_op_remove(s, last); 3417 } 3418 } 3419 3420 static TCGOp *tcg_op_alloc(TCGOpcode opc, unsigned nargs) 3421 { 3422 TCGContext *s = tcg_ctx; 3423 TCGOp *op = NULL; 3424 3425 if (unlikely(!QTAILQ_EMPTY(&s->free_ops))) { 3426 QTAILQ_FOREACH(op, &s->free_ops, link) { 3427 if (nargs <= op->nargs) { 3428 QTAILQ_REMOVE(&s->free_ops, op, link); 3429 nargs = op->nargs; 3430 goto found; 3431 } 3432 } 3433 } 3434 3435 /* Most opcodes have 3 or 4 operands: reduce fragmentation. */ 3436 nargs = MAX(4, nargs); 3437 op = tcg_malloc(sizeof(TCGOp) + sizeof(TCGArg) * nargs); 3438 3439 found: 3440 memset(op, 0, offsetof(TCGOp, link)); 3441 op->opc = opc; 3442 op->nargs = nargs; 3443 3444 /* Check for bitfield overflow. */ 3445 tcg_debug_assert(op->nargs == nargs); 3446 3447 s->nb_ops++; 3448 return op; 3449 } 3450 3451 TCGOp *tcg_emit_op(TCGOpcode opc, unsigned nargs) 3452 { 3453 TCGOp *op = tcg_op_alloc(opc, nargs); 3454 3455 if (tcg_ctx->emit_before_op) { 3456 QTAILQ_INSERT_BEFORE(tcg_ctx->emit_before_op, op, link); 3457 } else { 3458 QTAILQ_INSERT_TAIL(&tcg_ctx->ops, op, link); 3459 } 3460 return op; 3461 } 3462 3463 TCGOp *tcg_op_insert_before(TCGContext *s, TCGOp *old_op, 3464 TCGOpcode opc, unsigned nargs) 3465 { 3466 TCGOp *new_op = tcg_op_alloc(opc, nargs); 3467 3468 TCGOP_TYPE(new_op) = TCGOP_TYPE(old_op); 3469 QTAILQ_INSERT_BEFORE(old_op, new_op, link); 3470 return new_op; 3471 } 3472 3473 TCGOp *tcg_op_insert_after(TCGContext *s, TCGOp *old_op, 3474 TCGOpcode opc, unsigned nargs) 3475 { 3476 TCGOp *new_op = tcg_op_alloc(opc, nargs); 3477 3478 TCGOP_TYPE(new_op) = TCGOP_TYPE(old_op); 3479 QTAILQ_INSERT_AFTER(&s->ops, old_op, new_op, link); 3480 return new_op; 3481 } 3482 3483 static void move_label_uses(TCGLabel *to, TCGLabel *from) 3484 { 3485 TCGLabelUse *u; 3486 3487 QSIMPLEQ_FOREACH(u, &from->branches, next) { 3488 TCGOp *op = u->op; 3489 switch (op->opc) { 3490 case INDEX_op_br: 3491 op->args[0] = label_arg(to); 3492 break; 3493 case INDEX_op_brcond_i32: 3494 case INDEX_op_brcond_i64: 3495 op->args[3] = label_arg(to); 3496 break; 3497 case INDEX_op_brcond2_i32: 3498 op->args[5] = label_arg(to); 3499 break; 3500 default: 3501 g_assert_not_reached(); 3502 } 3503 } 3504 3505 QSIMPLEQ_CONCAT(&to->branches, &from->branches); 3506 } 3507 3508 /* Reachable analysis : remove unreachable code. */ 3509 static void __attribute__((noinline)) 3510 reachable_code_pass(TCGContext *s) 3511 { 3512 TCGOp *op, *op_next, *op_prev; 3513 bool dead = false; 3514 3515 QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) { 3516 bool remove = dead; 3517 TCGLabel *label; 3518 3519 switch (op->opc) { 3520 case INDEX_op_set_label: 3521 label = arg_label(op->args[0]); 3522 3523 /* 3524 * Note that the first op in the TB is always a load, 3525 * so there is always something before a label. 3526 */ 3527 op_prev = QTAILQ_PREV(op, link); 3528 3529 /* 3530 * If we find two sequential labels, move all branches to 3531 * reference the second label and remove the first label. 3532 * Do this before branch to next optimization, so that the 3533 * middle label is out of the way. 3534 */ 3535 if (op_prev->opc == INDEX_op_set_label) { 3536 move_label_uses(label, arg_label(op_prev->args[0])); 3537 tcg_op_remove(s, op_prev); 3538 op_prev = QTAILQ_PREV(op, link); 3539 } 3540 3541 /* 3542 * Optimization can fold conditional branches to unconditional. 3543 * If we find a label which is preceded by an unconditional 3544 * branch to next, remove the branch. We couldn't do this when 3545 * processing the branch because any dead code between the branch 3546 * and label had not yet been removed. 3547 */ 3548 if (op_prev->opc == INDEX_op_br && 3549 label == arg_label(op_prev->args[0])) { 3550 tcg_op_remove(s, op_prev); 3551 /* Fall through means insns become live again. */ 3552 dead = false; 3553 } 3554 3555 if (QSIMPLEQ_EMPTY(&label->branches)) { 3556 /* 3557 * While there is an occasional backward branch, virtually 3558 * all branches generated by the translators are forward. 3559 * Which means that generally we will have already removed 3560 * all references to the label that will be, and there is 3561 * little to be gained by iterating. 3562 */ 3563 remove = true; 3564 } else { 3565 /* Once we see a label, insns become live again. */ 3566 dead = false; 3567 remove = false; 3568 } 3569 break; 3570 3571 case INDEX_op_br: 3572 case INDEX_op_exit_tb: 3573 case INDEX_op_goto_ptr: 3574 /* Unconditional branches; everything following is dead. */ 3575 dead = true; 3576 break; 3577 3578 case INDEX_op_call: 3579 /* Notice noreturn helper calls, raising exceptions. */ 3580 if (tcg_call_flags(op) & TCG_CALL_NO_RETURN) { 3581 dead = true; 3582 } 3583 break; 3584 3585 case INDEX_op_insn_start: 3586 /* Never remove -- we need to keep these for unwind. */ 3587 remove = false; 3588 break; 3589 3590 default: 3591 break; 3592 } 3593 3594 if (remove) { 3595 tcg_op_remove(s, op); 3596 } 3597 } 3598 } 3599 3600 #define TS_DEAD 1 3601 #define TS_MEM 2 3602 3603 #define IS_DEAD_ARG(n) (arg_life & (DEAD_ARG << (n))) 3604 #define NEED_SYNC_ARG(n) (arg_life & (SYNC_ARG << (n))) 3605 3606 /* For liveness_pass_1, the register preferences for a given temp. */ 3607 static inline TCGRegSet *la_temp_pref(TCGTemp *ts) 3608 { 3609 return ts->state_ptr; 3610 } 3611 3612 /* For liveness_pass_1, reset the preferences for a given temp to the 3613 * maximal regset for its type. 3614 */ 3615 static inline void la_reset_pref(TCGTemp *ts) 3616 { 3617 *la_temp_pref(ts) 3618 = (ts->state == TS_DEAD ? 0 : tcg_target_available_regs[ts->type]); 3619 } 3620 3621 /* liveness analysis: end of function: all temps are dead, and globals 3622 should be in memory. */ 3623 static void la_func_end(TCGContext *s, int ng, int nt) 3624 { 3625 int i; 3626 3627 for (i = 0; i < ng; ++i) { 3628 s->temps[i].state = TS_DEAD | TS_MEM; 3629 la_reset_pref(&s->temps[i]); 3630 } 3631 for (i = ng; i < nt; ++i) { 3632 s->temps[i].state = TS_DEAD; 3633 la_reset_pref(&s->temps[i]); 3634 } 3635 } 3636 3637 /* liveness analysis: end of basic block: all temps are dead, globals 3638 and local temps should be in memory. */ 3639 static void la_bb_end(TCGContext *s, int ng, int nt) 3640 { 3641 int i; 3642 3643 for (i = 0; i < nt; ++i) { 3644 TCGTemp *ts = &s->temps[i]; 3645 int state; 3646 3647 switch (ts->kind) { 3648 case TEMP_FIXED: 3649 case TEMP_GLOBAL: 3650 case TEMP_TB: 3651 state = TS_DEAD | TS_MEM; 3652 break; 3653 case TEMP_EBB: 3654 case TEMP_CONST: 3655 state = TS_DEAD; 3656 break; 3657 default: 3658 g_assert_not_reached(); 3659 } 3660 ts->state = state; 3661 la_reset_pref(ts); 3662 } 3663 } 3664 3665 /* liveness analysis: sync globals back to memory. */ 3666 static void la_global_sync(TCGContext *s, int ng) 3667 { 3668 int i; 3669 3670 for (i = 0; i < ng; ++i) { 3671 int state = s->temps[i].state; 3672 s->temps[i].state = state | TS_MEM; 3673 if (state == TS_DEAD) { 3674 /* If the global was previously dead, reset prefs. */ 3675 la_reset_pref(&s->temps[i]); 3676 } 3677 } 3678 } 3679 3680 /* 3681 * liveness analysis: conditional branch: all temps are dead unless 3682 * explicitly live-across-conditional-branch, globals and local temps 3683 * should be synced. 3684 */ 3685 static void la_bb_sync(TCGContext *s, int ng, int nt) 3686 { 3687 la_global_sync(s, ng); 3688 3689 for (int i = ng; i < nt; ++i) { 3690 TCGTemp *ts = &s->temps[i]; 3691 int state; 3692 3693 switch (ts->kind) { 3694 case TEMP_TB: 3695 state = ts->state; 3696 ts->state = state | TS_MEM; 3697 if (state != TS_DEAD) { 3698 continue; 3699 } 3700 break; 3701 case TEMP_EBB: 3702 case TEMP_CONST: 3703 continue; 3704 default: 3705 g_assert_not_reached(); 3706 } 3707 la_reset_pref(&s->temps[i]); 3708 } 3709 } 3710 3711 /* liveness analysis: sync globals back to memory and kill. */ 3712 static void la_global_kill(TCGContext *s, int ng) 3713 { 3714 int i; 3715 3716 for (i = 0; i < ng; i++) { 3717 s->temps[i].state = TS_DEAD | TS_MEM; 3718 la_reset_pref(&s->temps[i]); 3719 } 3720 } 3721 3722 /* liveness analysis: note live globals crossing calls. */ 3723 static void la_cross_call(TCGContext *s, int nt) 3724 { 3725 TCGRegSet mask = ~tcg_target_call_clobber_regs; 3726 int i; 3727 3728 for (i = 0; i < nt; i++) { 3729 TCGTemp *ts = &s->temps[i]; 3730 if (!(ts->state & TS_DEAD)) { 3731 TCGRegSet *pset = la_temp_pref(ts); 3732 TCGRegSet set = *pset; 3733 3734 set &= mask; 3735 /* If the combination is not possible, restart. */ 3736 if (set == 0) { 3737 set = tcg_target_available_regs[ts->type] & mask; 3738 } 3739 *pset = set; 3740 } 3741 } 3742 } 3743 3744 /* 3745 * Liveness analysis: Verify the lifetime of TEMP_TB, and reduce 3746 * to TEMP_EBB, if possible. 3747 */ 3748 static void __attribute__((noinline)) 3749 liveness_pass_0(TCGContext *s) 3750 { 3751 void * const multiple_ebb = (void *)(uintptr_t)-1; 3752 int nb_temps = s->nb_temps; 3753 TCGOp *op, *ebb; 3754 3755 for (int i = s->nb_globals; i < nb_temps; ++i) { 3756 s->temps[i].state_ptr = NULL; 3757 } 3758 3759 /* 3760 * Represent each EBB by the op at which it begins. In the case of 3761 * the first EBB, this is the first op, otherwise it is a label. 3762 * Collect the uses of each TEMP_TB: NULL for unused, EBB for use 3763 * within a single EBB, else MULTIPLE_EBB. 3764 */ 3765 ebb = QTAILQ_FIRST(&s->ops); 3766 QTAILQ_FOREACH(op, &s->ops, link) { 3767 const TCGOpDef *def; 3768 int nb_oargs, nb_iargs; 3769 3770 switch (op->opc) { 3771 case INDEX_op_set_label: 3772 ebb = op; 3773 continue; 3774 case INDEX_op_discard: 3775 continue; 3776 case INDEX_op_call: 3777 nb_oargs = TCGOP_CALLO(op); 3778 nb_iargs = TCGOP_CALLI(op); 3779 break; 3780 default: 3781 def = &tcg_op_defs[op->opc]; 3782 nb_oargs = def->nb_oargs; 3783 nb_iargs = def->nb_iargs; 3784 break; 3785 } 3786 3787 for (int i = 0; i < nb_oargs + nb_iargs; ++i) { 3788 TCGTemp *ts = arg_temp(op->args[i]); 3789 3790 if (ts->kind != TEMP_TB) { 3791 continue; 3792 } 3793 if (ts->state_ptr == NULL) { 3794 ts->state_ptr = ebb; 3795 } else if (ts->state_ptr != ebb) { 3796 ts->state_ptr = multiple_ebb; 3797 } 3798 } 3799 } 3800 3801 /* 3802 * For TEMP_TB that turned out not to be used beyond one EBB, 3803 * reduce the liveness to TEMP_EBB. 3804 */ 3805 for (int i = s->nb_globals; i < nb_temps; ++i) { 3806 TCGTemp *ts = &s->temps[i]; 3807 if (ts->kind == TEMP_TB && ts->state_ptr != multiple_ebb) { 3808 ts->kind = TEMP_EBB; 3809 } 3810 } 3811 } 3812 3813 /* Liveness analysis : update the opc_arg_life array to tell if a 3814 given input arguments is dead. Instructions updating dead 3815 temporaries are removed. */ 3816 static void __attribute__((noinline)) 3817 liveness_pass_1(TCGContext *s) 3818 { 3819 int nb_globals = s->nb_globals; 3820 int nb_temps = s->nb_temps; 3821 TCGOp *op, *op_prev; 3822 TCGRegSet *prefs; 3823 int i; 3824 3825 prefs = tcg_malloc(sizeof(TCGRegSet) * nb_temps); 3826 for (i = 0; i < nb_temps; ++i) { 3827 s->temps[i].state_ptr = prefs + i; 3828 } 3829 3830 /* ??? Should be redundant with the exit_tb that ends the TB. */ 3831 la_func_end(s, nb_globals, nb_temps); 3832 3833 QTAILQ_FOREACH_REVERSE_SAFE(op, &s->ops, link, op_prev) { 3834 int nb_iargs, nb_oargs; 3835 TCGOpcode opc_new, opc_new2; 3836 bool have_opc_new2; 3837 TCGLifeData arg_life = 0; 3838 TCGTemp *ts; 3839 TCGOpcode opc = op->opc; 3840 const TCGOpDef *def = &tcg_op_defs[opc]; 3841 const TCGArgConstraint *args_ct; 3842 3843 switch (opc) { 3844 case INDEX_op_call: 3845 { 3846 const TCGHelperInfo *info = tcg_call_info(op); 3847 int call_flags = tcg_call_flags(op); 3848 3849 nb_oargs = TCGOP_CALLO(op); 3850 nb_iargs = TCGOP_CALLI(op); 3851 3852 /* pure functions can be removed if their result is unused */ 3853 if (call_flags & TCG_CALL_NO_SIDE_EFFECTS) { 3854 for (i = 0; i < nb_oargs; i++) { 3855 ts = arg_temp(op->args[i]); 3856 if (ts->state != TS_DEAD) { 3857 goto do_not_remove_call; 3858 } 3859 } 3860 goto do_remove; 3861 } 3862 do_not_remove_call: 3863 3864 /* Output args are dead. */ 3865 for (i = 0; i < nb_oargs; i++) { 3866 ts = arg_temp(op->args[i]); 3867 if (ts->state & TS_DEAD) { 3868 arg_life |= DEAD_ARG << i; 3869 } 3870 if (ts->state & TS_MEM) { 3871 arg_life |= SYNC_ARG << i; 3872 } 3873 ts->state = TS_DEAD; 3874 la_reset_pref(ts); 3875 } 3876 3877 /* Not used -- it will be tcg_target_call_oarg_reg(). */ 3878 memset(op->output_pref, 0, sizeof(op->output_pref)); 3879 3880 if (!(call_flags & (TCG_CALL_NO_WRITE_GLOBALS | 3881 TCG_CALL_NO_READ_GLOBALS))) { 3882 la_global_kill(s, nb_globals); 3883 } else if (!(call_flags & TCG_CALL_NO_READ_GLOBALS)) { 3884 la_global_sync(s, nb_globals); 3885 } 3886 3887 /* Record arguments that die in this helper. */ 3888 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) { 3889 ts = arg_temp(op->args[i]); 3890 if (ts->state & TS_DEAD) { 3891 arg_life |= DEAD_ARG << i; 3892 } 3893 } 3894 3895 /* For all live registers, remove call-clobbered prefs. */ 3896 la_cross_call(s, nb_temps); 3897 3898 /* 3899 * Input arguments are live for preceding opcodes. 3900 * 3901 * For those arguments that die, and will be allocated in 3902 * registers, clear the register set for that arg, to be 3903 * filled in below. For args that will be on the stack, 3904 * reset to any available reg. Process arguments in reverse 3905 * order so that if a temp is used more than once, the stack 3906 * reset to max happens before the register reset to 0. 3907 */ 3908 for (i = nb_iargs - 1; i >= 0; i--) { 3909 const TCGCallArgumentLoc *loc = &info->in[i]; 3910 ts = arg_temp(op->args[nb_oargs + i]); 3911 3912 if (ts->state & TS_DEAD) { 3913 switch (loc->kind) { 3914 case TCG_CALL_ARG_NORMAL: 3915 case TCG_CALL_ARG_EXTEND_U: 3916 case TCG_CALL_ARG_EXTEND_S: 3917 if (arg_slot_reg_p(loc->arg_slot)) { 3918 *la_temp_pref(ts) = 0; 3919 break; 3920 } 3921 /* fall through */ 3922 default: 3923 *la_temp_pref(ts) = 3924 tcg_target_available_regs[ts->type]; 3925 break; 3926 } 3927 ts->state &= ~TS_DEAD; 3928 } 3929 } 3930 3931 /* 3932 * For each input argument, add its input register to prefs. 3933 * If a temp is used once, this produces a single set bit; 3934 * if a temp is used multiple times, this produces a set. 3935 */ 3936 for (i = 0; i < nb_iargs; i++) { 3937 const TCGCallArgumentLoc *loc = &info->in[i]; 3938 ts = arg_temp(op->args[nb_oargs + i]); 3939 3940 switch (loc->kind) { 3941 case TCG_CALL_ARG_NORMAL: 3942 case TCG_CALL_ARG_EXTEND_U: 3943 case TCG_CALL_ARG_EXTEND_S: 3944 if (arg_slot_reg_p(loc->arg_slot)) { 3945 tcg_regset_set_reg(*la_temp_pref(ts), 3946 tcg_target_call_iarg_regs[loc->arg_slot]); 3947 } 3948 break; 3949 default: 3950 break; 3951 } 3952 } 3953 } 3954 break; 3955 case INDEX_op_insn_start: 3956 break; 3957 case INDEX_op_discard: 3958 /* mark the temporary as dead */ 3959 ts = arg_temp(op->args[0]); 3960 ts->state = TS_DEAD; 3961 la_reset_pref(ts); 3962 break; 3963 3964 case INDEX_op_add2_i32: 3965 opc_new = INDEX_op_add_i32; 3966 goto do_addsub2; 3967 case INDEX_op_sub2_i32: 3968 opc_new = INDEX_op_sub_i32; 3969 goto do_addsub2; 3970 case INDEX_op_add2_i64: 3971 opc_new = INDEX_op_add_i64; 3972 goto do_addsub2; 3973 case INDEX_op_sub2_i64: 3974 opc_new = INDEX_op_sub_i64; 3975 do_addsub2: 3976 nb_iargs = 4; 3977 nb_oargs = 2; 3978 /* Test if the high part of the operation is dead, but not 3979 the low part. The result can be optimized to a simple 3980 add or sub. This happens often for x86_64 guest when the 3981 cpu mode is set to 32 bit. */ 3982 if (arg_temp(op->args[1])->state == TS_DEAD) { 3983 if (arg_temp(op->args[0])->state == TS_DEAD) { 3984 goto do_remove; 3985 } 3986 /* Replace the opcode and adjust the args in place, 3987 leaving 3 unused args at the end. */ 3988 op->opc = opc = opc_new; 3989 op->args[1] = op->args[2]; 3990 op->args[2] = op->args[4]; 3991 /* Fall through and mark the single-word operation live. */ 3992 nb_iargs = 2; 3993 nb_oargs = 1; 3994 } 3995 goto do_not_remove; 3996 3997 case INDEX_op_mulu2_i32: 3998 opc_new = INDEX_op_mul_i32; 3999 opc_new2 = INDEX_op_muluh_i32; 4000 have_opc_new2 = TCG_TARGET_HAS_muluh_i32; 4001 goto do_mul2; 4002 case INDEX_op_muls2_i32: 4003 opc_new = INDEX_op_mul_i32; 4004 opc_new2 = INDEX_op_mulsh_i32; 4005 have_opc_new2 = TCG_TARGET_HAS_mulsh_i32; 4006 goto do_mul2; 4007 case INDEX_op_mulu2_i64: 4008 opc_new = INDEX_op_mul_i64; 4009 opc_new2 = INDEX_op_muluh_i64; 4010 have_opc_new2 = TCG_TARGET_HAS_muluh_i64; 4011 goto do_mul2; 4012 case INDEX_op_muls2_i64: 4013 opc_new = INDEX_op_mul_i64; 4014 opc_new2 = INDEX_op_mulsh_i64; 4015 have_opc_new2 = TCG_TARGET_HAS_mulsh_i64; 4016 goto do_mul2; 4017 do_mul2: 4018 nb_iargs = 2; 4019 nb_oargs = 2; 4020 if (arg_temp(op->args[1])->state == TS_DEAD) { 4021 if (arg_temp(op->args[0])->state == TS_DEAD) { 4022 /* Both parts of the operation are dead. */ 4023 goto do_remove; 4024 } 4025 /* The high part of the operation is dead; generate the low. */ 4026 op->opc = opc = opc_new; 4027 op->args[1] = op->args[2]; 4028 op->args[2] = op->args[3]; 4029 } else if (arg_temp(op->args[0])->state == TS_DEAD && have_opc_new2) { 4030 /* The low part of the operation is dead; generate the high. */ 4031 op->opc = opc = opc_new2; 4032 op->args[0] = op->args[1]; 4033 op->args[1] = op->args[2]; 4034 op->args[2] = op->args[3]; 4035 } else { 4036 goto do_not_remove; 4037 } 4038 /* Mark the single-word operation live. */ 4039 nb_oargs = 1; 4040 goto do_not_remove; 4041 4042 default: 4043 /* XXX: optimize by hardcoding common cases (e.g. triadic ops) */ 4044 nb_iargs = def->nb_iargs; 4045 nb_oargs = def->nb_oargs; 4046 4047 /* Test if the operation can be removed because all 4048 its outputs are dead. We assume that nb_oargs == 0 4049 implies side effects */ 4050 if (!(def->flags & TCG_OPF_SIDE_EFFECTS) && nb_oargs != 0) { 4051 for (i = 0; i < nb_oargs; i++) { 4052 if (arg_temp(op->args[i])->state != TS_DEAD) { 4053 goto do_not_remove; 4054 } 4055 } 4056 goto do_remove; 4057 } 4058 goto do_not_remove; 4059 4060 do_remove: 4061 tcg_op_remove(s, op); 4062 break; 4063 4064 do_not_remove: 4065 for (i = 0; i < nb_oargs; i++) { 4066 ts = arg_temp(op->args[i]); 4067 4068 /* Remember the preference of the uses that followed. */ 4069 if (i < ARRAY_SIZE(op->output_pref)) { 4070 op->output_pref[i] = *la_temp_pref(ts); 4071 } 4072 4073 /* Output args are dead. */ 4074 if (ts->state & TS_DEAD) { 4075 arg_life |= DEAD_ARG << i; 4076 } 4077 if (ts->state & TS_MEM) { 4078 arg_life |= SYNC_ARG << i; 4079 } 4080 ts->state = TS_DEAD; 4081 la_reset_pref(ts); 4082 } 4083 4084 /* If end of basic block, update. */ 4085 if (def->flags & TCG_OPF_BB_EXIT) { 4086 la_func_end(s, nb_globals, nb_temps); 4087 } else if (def->flags & TCG_OPF_COND_BRANCH) { 4088 la_bb_sync(s, nb_globals, nb_temps); 4089 } else if (def->flags & TCG_OPF_BB_END) { 4090 la_bb_end(s, nb_globals, nb_temps); 4091 } else if (def->flags & TCG_OPF_SIDE_EFFECTS) { 4092 la_global_sync(s, nb_globals); 4093 if (def->flags & TCG_OPF_CALL_CLOBBER) { 4094 la_cross_call(s, nb_temps); 4095 } 4096 } 4097 4098 /* Record arguments that die in this opcode. */ 4099 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) { 4100 ts = arg_temp(op->args[i]); 4101 if (ts->state & TS_DEAD) { 4102 arg_life |= DEAD_ARG << i; 4103 } 4104 } 4105 4106 /* Input arguments are live for preceding opcodes. */ 4107 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) { 4108 ts = arg_temp(op->args[i]); 4109 if (ts->state & TS_DEAD) { 4110 /* For operands that were dead, initially allow 4111 all regs for the type. */ 4112 *la_temp_pref(ts) = tcg_target_available_regs[ts->type]; 4113 ts->state &= ~TS_DEAD; 4114 } 4115 } 4116 4117 /* Incorporate constraints for this operand. */ 4118 switch (opc) { 4119 case INDEX_op_mov_i32: 4120 case INDEX_op_mov_i64: 4121 /* Note that these are TCG_OPF_NOT_PRESENT and do not 4122 have proper constraints. That said, special case 4123 moves to propagate preferences backward. */ 4124 if (IS_DEAD_ARG(1)) { 4125 *la_temp_pref(arg_temp(op->args[0])) 4126 = *la_temp_pref(arg_temp(op->args[1])); 4127 } 4128 break; 4129 4130 default: 4131 args_ct = opcode_args_ct(op); 4132 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) { 4133 const TCGArgConstraint *ct = &args_ct[i]; 4134 TCGRegSet set, *pset; 4135 4136 ts = arg_temp(op->args[i]); 4137 pset = la_temp_pref(ts); 4138 set = *pset; 4139 4140 set &= ct->regs; 4141 if (ct->ialias) { 4142 set &= output_pref(op, ct->alias_index); 4143 } 4144 /* If the combination is not possible, restart. */ 4145 if (set == 0) { 4146 set = ct->regs; 4147 } 4148 *pset = set; 4149 } 4150 break; 4151 } 4152 break; 4153 } 4154 op->life = arg_life; 4155 } 4156 } 4157 4158 /* Liveness analysis: Convert indirect regs to direct temporaries. */ 4159 static bool __attribute__((noinline)) 4160 liveness_pass_2(TCGContext *s) 4161 { 4162 int nb_globals = s->nb_globals; 4163 int nb_temps, i; 4164 bool changes = false; 4165 TCGOp *op, *op_next; 4166 4167 /* Create a temporary for each indirect global. */ 4168 for (i = 0; i < nb_globals; ++i) { 4169 TCGTemp *its = &s->temps[i]; 4170 if (its->indirect_reg) { 4171 TCGTemp *dts = tcg_temp_alloc(s); 4172 dts->type = its->type; 4173 dts->base_type = its->base_type; 4174 dts->temp_subindex = its->temp_subindex; 4175 dts->kind = TEMP_EBB; 4176 its->state_ptr = dts; 4177 } else { 4178 its->state_ptr = NULL; 4179 } 4180 /* All globals begin dead. */ 4181 its->state = TS_DEAD; 4182 } 4183 for (nb_temps = s->nb_temps; i < nb_temps; ++i) { 4184 TCGTemp *its = &s->temps[i]; 4185 its->state_ptr = NULL; 4186 its->state = TS_DEAD; 4187 } 4188 4189 QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) { 4190 TCGOpcode opc = op->opc; 4191 const TCGOpDef *def = &tcg_op_defs[opc]; 4192 TCGLifeData arg_life = op->life; 4193 int nb_iargs, nb_oargs, call_flags; 4194 TCGTemp *arg_ts, *dir_ts; 4195 4196 if (opc == INDEX_op_call) { 4197 nb_oargs = TCGOP_CALLO(op); 4198 nb_iargs = TCGOP_CALLI(op); 4199 call_flags = tcg_call_flags(op); 4200 } else { 4201 nb_iargs = def->nb_iargs; 4202 nb_oargs = def->nb_oargs; 4203 4204 /* Set flags similar to how calls require. */ 4205 if (def->flags & TCG_OPF_COND_BRANCH) { 4206 /* Like reading globals: sync_globals */ 4207 call_flags = TCG_CALL_NO_WRITE_GLOBALS; 4208 } else if (def->flags & TCG_OPF_BB_END) { 4209 /* Like writing globals: save_globals */ 4210 call_flags = 0; 4211 } else if (def->flags & TCG_OPF_SIDE_EFFECTS) { 4212 /* Like reading globals: sync_globals */ 4213 call_flags = TCG_CALL_NO_WRITE_GLOBALS; 4214 } else { 4215 /* No effect on globals. */ 4216 call_flags = (TCG_CALL_NO_READ_GLOBALS | 4217 TCG_CALL_NO_WRITE_GLOBALS); 4218 } 4219 } 4220 4221 /* Make sure that input arguments are available. */ 4222 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) { 4223 arg_ts = arg_temp(op->args[i]); 4224 dir_ts = arg_ts->state_ptr; 4225 if (dir_ts && arg_ts->state == TS_DEAD) { 4226 TCGOpcode lopc = (arg_ts->type == TCG_TYPE_I32 4227 ? INDEX_op_ld_i32 4228 : INDEX_op_ld_i64); 4229 TCGOp *lop = tcg_op_insert_before(s, op, lopc, 3); 4230 4231 lop->args[0] = temp_arg(dir_ts); 4232 lop->args[1] = temp_arg(arg_ts->mem_base); 4233 lop->args[2] = arg_ts->mem_offset; 4234 4235 /* Loaded, but synced with memory. */ 4236 arg_ts->state = TS_MEM; 4237 } 4238 } 4239 4240 /* Perform input replacement, and mark inputs that became dead. 4241 No action is required except keeping temp_state up to date 4242 so that we reload when needed. */ 4243 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) { 4244 arg_ts = arg_temp(op->args[i]); 4245 dir_ts = arg_ts->state_ptr; 4246 if (dir_ts) { 4247 op->args[i] = temp_arg(dir_ts); 4248 changes = true; 4249 if (IS_DEAD_ARG(i)) { 4250 arg_ts->state = TS_DEAD; 4251 } 4252 } 4253 } 4254 4255 /* Liveness analysis should ensure that the following are 4256 all correct, for call sites and basic block end points. */ 4257 if (call_flags & TCG_CALL_NO_READ_GLOBALS) { 4258 /* Nothing to do */ 4259 } else if (call_flags & TCG_CALL_NO_WRITE_GLOBALS) { 4260 for (i = 0; i < nb_globals; ++i) { 4261 /* Liveness should see that globals are synced back, 4262 that is, either TS_DEAD or TS_MEM. */ 4263 arg_ts = &s->temps[i]; 4264 tcg_debug_assert(arg_ts->state_ptr == 0 4265 || arg_ts->state != 0); 4266 } 4267 } else { 4268 for (i = 0; i < nb_globals; ++i) { 4269 /* Liveness should see that globals are saved back, 4270 that is, TS_DEAD, waiting to be reloaded. */ 4271 arg_ts = &s->temps[i]; 4272 tcg_debug_assert(arg_ts->state_ptr == 0 4273 || arg_ts->state == TS_DEAD); 4274 } 4275 } 4276 4277 /* Outputs become available. */ 4278 if (opc == INDEX_op_mov_i32 || opc == INDEX_op_mov_i64) { 4279 arg_ts = arg_temp(op->args[0]); 4280 dir_ts = arg_ts->state_ptr; 4281 if (dir_ts) { 4282 op->args[0] = temp_arg(dir_ts); 4283 changes = true; 4284 4285 /* The output is now live and modified. */ 4286 arg_ts->state = 0; 4287 4288 if (NEED_SYNC_ARG(0)) { 4289 TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32 4290 ? INDEX_op_st_i32 4291 : INDEX_op_st_i64); 4292 TCGOp *sop = tcg_op_insert_after(s, op, sopc, 3); 4293 TCGTemp *out_ts = dir_ts; 4294 4295 if (IS_DEAD_ARG(0)) { 4296 out_ts = arg_temp(op->args[1]); 4297 arg_ts->state = TS_DEAD; 4298 tcg_op_remove(s, op); 4299 } else { 4300 arg_ts->state = TS_MEM; 4301 } 4302 4303 sop->args[0] = temp_arg(out_ts); 4304 sop->args[1] = temp_arg(arg_ts->mem_base); 4305 sop->args[2] = arg_ts->mem_offset; 4306 } else { 4307 tcg_debug_assert(!IS_DEAD_ARG(0)); 4308 } 4309 } 4310 } else { 4311 for (i = 0; i < nb_oargs; i++) { 4312 arg_ts = arg_temp(op->args[i]); 4313 dir_ts = arg_ts->state_ptr; 4314 if (!dir_ts) { 4315 continue; 4316 } 4317 op->args[i] = temp_arg(dir_ts); 4318 changes = true; 4319 4320 /* The output is now live and modified. */ 4321 arg_ts->state = 0; 4322 4323 /* Sync outputs upon their last write. */ 4324 if (NEED_SYNC_ARG(i)) { 4325 TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32 4326 ? INDEX_op_st_i32 4327 : INDEX_op_st_i64); 4328 TCGOp *sop = tcg_op_insert_after(s, op, sopc, 3); 4329 4330 sop->args[0] = temp_arg(dir_ts); 4331 sop->args[1] = temp_arg(arg_ts->mem_base); 4332 sop->args[2] = arg_ts->mem_offset; 4333 4334 arg_ts->state = TS_MEM; 4335 } 4336 /* Drop outputs that are dead. */ 4337 if (IS_DEAD_ARG(i)) { 4338 arg_ts->state = TS_DEAD; 4339 } 4340 } 4341 } 4342 } 4343 4344 return changes; 4345 } 4346 4347 static void temp_allocate_frame(TCGContext *s, TCGTemp *ts) 4348 { 4349 intptr_t off; 4350 int size, align; 4351 4352 /* When allocating an object, look at the full type. */ 4353 size = tcg_type_size(ts->base_type); 4354 switch (ts->base_type) { 4355 case TCG_TYPE_I32: 4356 align = 4; 4357 break; 4358 case TCG_TYPE_I64: 4359 case TCG_TYPE_V64: 4360 align = 8; 4361 break; 4362 case TCG_TYPE_I128: 4363 case TCG_TYPE_V128: 4364 case TCG_TYPE_V256: 4365 /* 4366 * Note that we do not require aligned storage for V256, 4367 * and that we provide alignment for I128 to match V128, 4368 * even if that's above what the host ABI requires. 4369 */ 4370 align = 16; 4371 break; 4372 default: 4373 g_assert_not_reached(); 4374 } 4375 4376 /* 4377 * Assume the stack is sufficiently aligned. 4378 * This affects e.g. ARM NEON, where we have 8 byte stack alignment 4379 * and do not require 16 byte vector alignment. This seems slightly 4380 * easier than fully parameterizing the above switch statement. 4381 */ 4382 align = MIN(TCG_TARGET_STACK_ALIGN, align); 4383 off = ROUND_UP(s->current_frame_offset, align); 4384 4385 /* If we've exhausted the stack frame, restart with a smaller TB. */ 4386 if (off + size > s->frame_end) { 4387 tcg_raise_tb_overflow(s); 4388 } 4389 s->current_frame_offset = off + size; 4390 #if defined(__sparc__) 4391 off += TCG_TARGET_STACK_BIAS; 4392 #endif 4393 4394 /* If the object was subdivided, assign memory to all the parts. */ 4395 if (ts->base_type != ts->type) { 4396 int part_size = tcg_type_size(ts->type); 4397 int part_count = size / part_size; 4398 4399 /* 4400 * Each part is allocated sequentially in tcg_temp_new_internal. 4401 * Jump back to the first part by subtracting the current index. 4402 */ 4403 ts -= ts->temp_subindex; 4404 for (int i = 0; i < part_count; ++i) { 4405 ts[i].mem_offset = off + i * part_size; 4406 ts[i].mem_base = s->frame_temp; 4407 ts[i].mem_allocated = 1; 4408 } 4409 } else { 4410 ts->mem_offset = off; 4411 ts->mem_base = s->frame_temp; 4412 ts->mem_allocated = 1; 4413 } 4414 } 4415 4416 /* Assign @reg to @ts, and update reg_to_temp[]. */ 4417 static void set_temp_val_reg(TCGContext *s, TCGTemp *ts, TCGReg reg) 4418 { 4419 if (ts->val_type == TEMP_VAL_REG) { 4420 TCGReg old = ts->reg; 4421 tcg_debug_assert(s->reg_to_temp[old] == ts); 4422 if (old == reg) { 4423 return; 4424 } 4425 s->reg_to_temp[old] = NULL; 4426 } 4427 tcg_debug_assert(s->reg_to_temp[reg] == NULL); 4428 s->reg_to_temp[reg] = ts; 4429 ts->val_type = TEMP_VAL_REG; 4430 ts->reg = reg; 4431 } 4432 4433 /* Assign a non-register value type to @ts, and update reg_to_temp[]. */ 4434 static void set_temp_val_nonreg(TCGContext *s, TCGTemp *ts, TCGTempVal type) 4435 { 4436 tcg_debug_assert(type != TEMP_VAL_REG); 4437 if (ts->val_type == TEMP_VAL_REG) { 4438 TCGReg reg = ts->reg; 4439 tcg_debug_assert(s->reg_to_temp[reg] == ts); 4440 s->reg_to_temp[reg] = NULL; 4441 } 4442 ts->val_type = type; 4443 } 4444 4445 static void temp_load(TCGContext *, TCGTemp *, TCGRegSet, TCGRegSet, TCGRegSet); 4446 4447 /* Mark a temporary as free or dead. If 'free_or_dead' is negative, 4448 mark it free; otherwise mark it dead. */ 4449 static void temp_free_or_dead(TCGContext *s, TCGTemp *ts, int free_or_dead) 4450 { 4451 TCGTempVal new_type; 4452 4453 switch (ts->kind) { 4454 case TEMP_FIXED: 4455 return; 4456 case TEMP_GLOBAL: 4457 case TEMP_TB: 4458 new_type = TEMP_VAL_MEM; 4459 break; 4460 case TEMP_EBB: 4461 new_type = free_or_dead < 0 ? TEMP_VAL_MEM : TEMP_VAL_DEAD; 4462 break; 4463 case TEMP_CONST: 4464 new_type = TEMP_VAL_CONST; 4465 break; 4466 default: 4467 g_assert_not_reached(); 4468 } 4469 set_temp_val_nonreg(s, ts, new_type); 4470 } 4471 4472 /* Mark a temporary as dead. */ 4473 static inline void temp_dead(TCGContext *s, TCGTemp *ts) 4474 { 4475 temp_free_or_dead(s, ts, 1); 4476 } 4477 4478 /* Sync a temporary to memory. 'allocated_regs' is used in case a temporary 4479 registers needs to be allocated to store a constant. If 'free_or_dead' 4480 is non-zero, subsequently release the temporary; if it is positive, the 4481 temp is dead; if it is negative, the temp is free. */ 4482 static void temp_sync(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs, 4483 TCGRegSet preferred_regs, int free_or_dead) 4484 { 4485 if (!temp_readonly(ts) && !ts->mem_coherent) { 4486 if (!ts->mem_allocated) { 4487 temp_allocate_frame(s, ts); 4488 } 4489 switch (ts->val_type) { 4490 case TEMP_VAL_CONST: 4491 /* If we're going to free the temp immediately, then we won't 4492 require it later in a register, so attempt to store the 4493 constant to memory directly. */ 4494 if (free_or_dead 4495 && tcg_out_sti(s, ts->type, ts->val, 4496 ts->mem_base->reg, ts->mem_offset)) { 4497 break; 4498 } 4499 temp_load(s, ts, tcg_target_available_regs[ts->type], 4500 allocated_regs, preferred_regs); 4501 /* fallthrough */ 4502 4503 case TEMP_VAL_REG: 4504 tcg_out_st(s, ts->type, ts->reg, 4505 ts->mem_base->reg, ts->mem_offset); 4506 break; 4507 4508 case TEMP_VAL_MEM: 4509 break; 4510 4511 case TEMP_VAL_DEAD: 4512 default: 4513 g_assert_not_reached(); 4514 } 4515 ts->mem_coherent = 1; 4516 } 4517 if (free_or_dead) { 4518 temp_free_or_dead(s, ts, free_or_dead); 4519 } 4520 } 4521 4522 /* free register 'reg' by spilling the corresponding temporary if necessary */ 4523 static void tcg_reg_free(TCGContext *s, TCGReg reg, TCGRegSet allocated_regs) 4524 { 4525 TCGTemp *ts = s->reg_to_temp[reg]; 4526 if (ts != NULL) { 4527 temp_sync(s, ts, allocated_regs, 0, -1); 4528 } 4529 } 4530 4531 /** 4532 * tcg_reg_alloc: 4533 * @required_regs: Set of registers in which we must allocate. 4534 * @allocated_regs: Set of registers which must be avoided. 4535 * @preferred_regs: Set of registers we should prefer. 4536 * @rev: True if we search the registers in "indirect" order. 4537 * 4538 * The allocated register must be in @required_regs & ~@allocated_regs, 4539 * but if we can put it in @preferred_regs we may save a move later. 4540 */ 4541 static TCGReg tcg_reg_alloc(TCGContext *s, TCGRegSet required_regs, 4542 TCGRegSet allocated_regs, 4543 TCGRegSet preferred_regs, bool rev) 4544 { 4545 int i, j, f, n = ARRAY_SIZE(tcg_target_reg_alloc_order); 4546 TCGRegSet reg_ct[2]; 4547 const int *order; 4548 4549 reg_ct[1] = required_regs & ~allocated_regs; 4550 tcg_debug_assert(reg_ct[1] != 0); 4551 reg_ct[0] = reg_ct[1] & preferred_regs; 4552 4553 /* Skip the preferred_regs option if it cannot be satisfied, 4554 or if the preference made no difference. */ 4555 f = reg_ct[0] == 0 || reg_ct[0] == reg_ct[1]; 4556 4557 order = rev ? indirect_reg_alloc_order : tcg_target_reg_alloc_order; 4558 4559 /* Try free registers, preferences first. */ 4560 for (j = f; j < 2; j++) { 4561 TCGRegSet set = reg_ct[j]; 4562 4563 if (tcg_regset_single(set)) { 4564 /* One register in the set. */ 4565 TCGReg reg = tcg_regset_first(set); 4566 if (s->reg_to_temp[reg] == NULL) { 4567 return reg; 4568 } 4569 } else { 4570 for (i = 0; i < n; i++) { 4571 TCGReg reg = order[i]; 4572 if (s->reg_to_temp[reg] == NULL && 4573 tcg_regset_test_reg(set, reg)) { 4574 return reg; 4575 } 4576 } 4577 } 4578 } 4579 4580 /* We must spill something. */ 4581 for (j = f; j < 2; j++) { 4582 TCGRegSet set = reg_ct[j]; 4583 4584 if (tcg_regset_single(set)) { 4585 /* One register in the set. */ 4586 TCGReg reg = tcg_regset_first(set); 4587 tcg_reg_free(s, reg, allocated_regs); 4588 return reg; 4589 } else { 4590 for (i = 0; i < n; i++) { 4591 TCGReg reg = order[i]; 4592 if (tcg_regset_test_reg(set, reg)) { 4593 tcg_reg_free(s, reg, allocated_regs); 4594 return reg; 4595 } 4596 } 4597 } 4598 } 4599 4600 g_assert_not_reached(); 4601 } 4602 4603 static TCGReg tcg_reg_alloc_pair(TCGContext *s, TCGRegSet required_regs, 4604 TCGRegSet allocated_regs, 4605 TCGRegSet preferred_regs, bool rev) 4606 { 4607 int i, j, k, fmin, n = ARRAY_SIZE(tcg_target_reg_alloc_order); 4608 TCGRegSet reg_ct[2]; 4609 const int *order; 4610 4611 /* Ensure that if I is not in allocated_regs, I+1 is not either. */ 4612 reg_ct[1] = required_regs & ~(allocated_regs | (allocated_regs >> 1)); 4613 tcg_debug_assert(reg_ct[1] != 0); 4614 reg_ct[0] = reg_ct[1] & preferred_regs; 4615 4616 order = rev ? indirect_reg_alloc_order : tcg_target_reg_alloc_order; 4617 4618 /* 4619 * Skip the preferred_regs option if it cannot be satisfied, 4620 * or if the preference made no difference. 4621 */ 4622 k = reg_ct[0] == 0 || reg_ct[0] == reg_ct[1]; 4623 4624 /* 4625 * Minimize the number of flushes by looking for 2 free registers first, 4626 * then a single flush, then two flushes. 4627 */ 4628 for (fmin = 2; fmin >= 0; fmin--) { 4629 for (j = k; j < 2; j++) { 4630 TCGRegSet set = reg_ct[j]; 4631 4632 for (i = 0; i < n; i++) { 4633 TCGReg reg = order[i]; 4634 4635 if (tcg_regset_test_reg(set, reg)) { 4636 int f = !s->reg_to_temp[reg] + !s->reg_to_temp[reg + 1]; 4637 if (f >= fmin) { 4638 tcg_reg_free(s, reg, allocated_regs); 4639 tcg_reg_free(s, reg + 1, allocated_regs); 4640 return reg; 4641 } 4642 } 4643 } 4644 } 4645 } 4646 g_assert_not_reached(); 4647 } 4648 4649 /* Make sure the temporary is in a register. If needed, allocate the register 4650 from DESIRED while avoiding ALLOCATED. */ 4651 static void temp_load(TCGContext *s, TCGTemp *ts, TCGRegSet desired_regs, 4652 TCGRegSet allocated_regs, TCGRegSet preferred_regs) 4653 { 4654 TCGReg reg; 4655 4656 switch (ts->val_type) { 4657 case TEMP_VAL_REG: 4658 return; 4659 case TEMP_VAL_CONST: 4660 reg = tcg_reg_alloc(s, desired_regs, allocated_regs, 4661 preferred_regs, ts->indirect_base); 4662 if (ts->type <= TCG_TYPE_I64) { 4663 tcg_out_movi(s, ts->type, reg, ts->val); 4664 } else { 4665 uint64_t val = ts->val; 4666 MemOp vece = MO_64; 4667 4668 /* 4669 * Find the minimal vector element that matches the constant. 4670 * The targets will, in general, have to do this search anyway, 4671 * do this generically. 4672 */ 4673 if (val == dup_const(MO_8, val)) { 4674 vece = MO_8; 4675 } else if (val == dup_const(MO_16, val)) { 4676 vece = MO_16; 4677 } else if (val == dup_const(MO_32, val)) { 4678 vece = MO_32; 4679 } 4680 4681 tcg_out_dupi_vec(s, ts->type, vece, reg, ts->val); 4682 } 4683 ts->mem_coherent = 0; 4684 break; 4685 case TEMP_VAL_MEM: 4686 reg = tcg_reg_alloc(s, desired_regs, allocated_regs, 4687 preferred_regs, ts->indirect_base); 4688 tcg_out_ld(s, ts->type, reg, ts->mem_base->reg, ts->mem_offset); 4689 ts->mem_coherent = 1; 4690 break; 4691 case TEMP_VAL_DEAD: 4692 default: 4693 g_assert_not_reached(); 4694 } 4695 set_temp_val_reg(s, ts, reg); 4696 } 4697 4698 /* Save a temporary to memory. 'allocated_regs' is used in case a 4699 temporary registers needs to be allocated to store a constant. */ 4700 static void temp_save(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs) 4701 { 4702 /* The liveness analysis already ensures that globals are back 4703 in memory. Keep an tcg_debug_assert for safety. */ 4704 tcg_debug_assert(ts->val_type == TEMP_VAL_MEM || temp_readonly(ts)); 4705 } 4706 4707 /* save globals to their canonical location and assume they can be 4708 modified be the following code. 'allocated_regs' is used in case a 4709 temporary registers needs to be allocated to store a constant. */ 4710 static void save_globals(TCGContext *s, TCGRegSet allocated_regs) 4711 { 4712 int i, n; 4713 4714 for (i = 0, n = s->nb_globals; i < n; i++) { 4715 temp_save(s, &s->temps[i], allocated_regs); 4716 } 4717 } 4718 4719 /* sync globals to their canonical location and assume they can be 4720 read by the following code. 'allocated_regs' is used in case a 4721 temporary registers needs to be allocated to store a constant. */ 4722 static void sync_globals(TCGContext *s, TCGRegSet allocated_regs) 4723 { 4724 int i, n; 4725 4726 for (i = 0, n = s->nb_globals; i < n; i++) { 4727 TCGTemp *ts = &s->temps[i]; 4728 tcg_debug_assert(ts->val_type != TEMP_VAL_REG 4729 || ts->kind == TEMP_FIXED 4730 || ts->mem_coherent); 4731 } 4732 } 4733 4734 /* at the end of a basic block, we assume all temporaries are dead and 4735 all globals are stored at their canonical location. */ 4736 static void tcg_reg_alloc_bb_end(TCGContext *s, TCGRegSet allocated_regs) 4737 { 4738 int i; 4739 4740 for (i = s->nb_globals; i < s->nb_temps; i++) { 4741 TCGTemp *ts = &s->temps[i]; 4742 4743 switch (ts->kind) { 4744 case TEMP_TB: 4745 temp_save(s, ts, allocated_regs); 4746 break; 4747 case TEMP_EBB: 4748 /* The liveness analysis already ensures that temps are dead. 4749 Keep an tcg_debug_assert for safety. */ 4750 tcg_debug_assert(ts->val_type == TEMP_VAL_DEAD); 4751 break; 4752 case TEMP_CONST: 4753 /* Similarly, we should have freed any allocated register. */ 4754 tcg_debug_assert(ts->val_type == TEMP_VAL_CONST); 4755 break; 4756 default: 4757 g_assert_not_reached(); 4758 } 4759 } 4760 4761 save_globals(s, allocated_regs); 4762 } 4763 4764 /* 4765 * At a conditional branch, we assume all temporaries are dead unless 4766 * explicitly live-across-conditional-branch; all globals and local 4767 * temps are synced to their location. 4768 */ 4769 static void tcg_reg_alloc_cbranch(TCGContext *s, TCGRegSet allocated_regs) 4770 { 4771 sync_globals(s, allocated_regs); 4772 4773 for (int i = s->nb_globals; i < s->nb_temps; i++) { 4774 TCGTemp *ts = &s->temps[i]; 4775 /* 4776 * The liveness analysis already ensures that temps are dead. 4777 * Keep tcg_debug_asserts for safety. 4778 */ 4779 switch (ts->kind) { 4780 case TEMP_TB: 4781 tcg_debug_assert(ts->val_type != TEMP_VAL_REG || ts->mem_coherent); 4782 break; 4783 case TEMP_EBB: 4784 case TEMP_CONST: 4785 break; 4786 default: 4787 g_assert_not_reached(); 4788 } 4789 } 4790 } 4791 4792 /* 4793 * Specialized code generation for INDEX_op_mov_* with a constant. 4794 */ 4795 static void tcg_reg_alloc_do_movi(TCGContext *s, TCGTemp *ots, 4796 tcg_target_ulong val, TCGLifeData arg_life, 4797 TCGRegSet preferred_regs) 4798 { 4799 /* ENV should not be modified. */ 4800 tcg_debug_assert(!temp_readonly(ots)); 4801 4802 /* The movi is not explicitly generated here. */ 4803 set_temp_val_nonreg(s, ots, TEMP_VAL_CONST); 4804 ots->val = val; 4805 ots->mem_coherent = 0; 4806 if (NEED_SYNC_ARG(0)) { 4807 temp_sync(s, ots, s->reserved_regs, preferred_regs, IS_DEAD_ARG(0)); 4808 } else if (IS_DEAD_ARG(0)) { 4809 temp_dead(s, ots); 4810 } 4811 } 4812 4813 /* 4814 * Specialized code generation for INDEX_op_mov_*. 4815 */ 4816 static void tcg_reg_alloc_mov(TCGContext *s, const TCGOp *op) 4817 { 4818 const TCGLifeData arg_life = op->life; 4819 TCGRegSet allocated_regs, preferred_regs; 4820 TCGTemp *ts, *ots; 4821 TCGType otype, itype; 4822 TCGReg oreg, ireg; 4823 4824 allocated_regs = s->reserved_regs; 4825 preferred_regs = output_pref(op, 0); 4826 ots = arg_temp(op->args[0]); 4827 ts = arg_temp(op->args[1]); 4828 4829 /* ENV should not be modified. */ 4830 tcg_debug_assert(!temp_readonly(ots)); 4831 4832 /* Note that otype != itype for no-op truncation. */ 4833 otype = ots->type; 4834 itype = ts->type; 4835 4836 if (ts->val_type == TEMP_VAL_CONST) { 4837 /* propagate constant or generate sti */ 4838 tcg_target_ulong val = ts->val; 4839 if (IS_DEAD_ARG(1)) { 4840 temp_dead(s, ts); 4841 } 4842 tcg_reg_alloc_do_movi(s, ots, val, arg_life, preferred_regs); 4843 return; 4844 } 4845 4846 /* If the source value is in memory we're going to be forced 4847 to have it in a register in order to perform the copy. Copy 4848 the SOURCE value into its own register first, that way we 4849 don't have to reload SOURCE the next time it is used. */ 4850 if (ts->val_type == TEMP_VAL_MEM) { 4851 temp_load(s, ts, tcg_target_available_regs[itype], 4852 allocated_regs, preferred_regs); 4853 } 4854 tcg_debug_assert(ts->val_type == TEMP_VAL_REG); 4855 ireg = ts->reg; 4856 4857 if (IS_DEAD_ARG(0)) { 4858 /* mov to a non-saved dead register makes no sense (even with 4859 liveness analysis disabled). */ 4860 tcg_debug_assert(NEED_SYNC_ARG(0)); 4861 if (!ots->mem_allocated) { 4862 temp_allocate_frame(s, ots); 4863 } 4864 tcg_out_st(s, otype, ireg, ots->mem_base->reg, ots->mem_offset); 4865 if (IS_DEAD_ARG(1)) { 4866 temp_dead(s, ts); 4867 } 4868 temp_dead(s, ots); 4869 return; 4870 } 4871 4872 if (IS_DEAD_ARG(1) && ts->kind != TEMP_FIXED) { 4873 /* 4874 * The mov can be suppressed. Kill input first, so that it 4875 * is unlinked from reg_to_temp, then set the output to the 4876 * reg that we saved from the input. 4877 */ 4878 temp_dead(s, ts); 4879 oreg = ireg; 4880 } else { 4881 if (ots->val_type == TEMP_VAL_REG) { 4882 oreg = ots->reg; 4883 } else { 4884 /* Make sure to not spill the input register during allocation. */ 4885 oreg = tcg_reg_alloc(s, tcg_target_available_regs[otype], 4886 allocated_regs | ((TCGRegSet)1 << ireg), 4887 preferred_regs, ots->indirect_base); 4888 } 4889 if (!tcg_out_mov(s, otype, oreg, ireg)) { 4890 /* 4891 * Cross register class move not supported. 4892 * Store the source register into the destination slot 4893 * and leave the destination temp as TEMP_VAL_MEM. 4894 */ 4895 assert(!temp_readonly(ots)); 4896 if (!ts->mem_allocated) { 4897 temp_allocate_frame(s, ots); 4898 } 4899 tcg_out_st(s, ts->type, ireg, ots->mem_base->reg, ots->mem_offset); 4900 set_temp_val_nonreg(s, ts, TEMP_VAL_MEM); 4901 ots->mem_coherent = 1; 4902 return; 4903 } 4904 } 4905 set_temp_val_reg(s, ots, oreg); 4906 ots->mem_coherent = 0; 4907 4908 if (NEED_SYNC_ARG(0)) { 4909 temp_sync(s, ots, allocated_regs, 0, 0); 4910 } 4911 } 4912 4913 /* 4914 * Specialized code generation for INDEX_op_dup_vec. 4915 */ 4916 static void tcg_reg_alloc_dup(TCGContext *s, const TCGOp *op) 4917 { 4918 const TCGLifeData arg_life = op->life; 4919 TCGRegSet dup_out_regs, dup_in_regs; 4920 const TCGArgConstraint *dup_args_ct; 4921 TCGTemp *its, *ots; 4922 TCGType itype, vtype; 4923 unsigned vece; 4924 int lowpart_ofs; 4925 bool ok; 4926 4927 ots = arg_temp(op->args[0]); 4928 its = arg_temp(op->args[1]); 4929 4930 /* ENV should not be modified. */ 4931 tcg_debug_assert(!temp_readonly(ots)); 4932 4933 itype = its->type; 4934 vece = TCGOP_VECE(op); 4935 vtype = TCGOP_TYPE(op); 4936 4937 if (its->val_type == TEMP_VAL_CONST) { 4938 /* Propagate constant via movi -> dupi. */ 4939 tcg_target_ulong val = its->val; 4940 if (IS_DEAD_ARG(1)) { 4941 temp_dead(s, its); 4942 } 4943 tcg_reg_alloc_do_movi(s, ots, val, arg_life, output_pref(op, 0)); 4944 return; 4945 } 4946 4947 dup_args_ct = opcode_args_ct(op); 4948 dup_out_regs = dup_args_ct[0].regs; 4949 dup_in_regs = dup_args_ct[1].regs; 4950 4951 /* Allocate the output register now. */ 4952 if (ots->val_type != TEMP_VAL_REG) { 4953 TCGRegSet allocated_regs = s->reserved_regs; 4954 TCGReg oreg; 4955 4956 if (!IS_DEAD_ARG(1) && its->val_type == TEMP_VAL_REG) { 4957 /* Make sure to not spill the input register. */ 4958 tcg_regset_set_reg(allocated_regs, its->reg); 4959 } 4960 oreg = tcg_reg_alloc(s, dup_out_regs, allocated_regs, 4961 output_pref(op, 0), ots->indirect_base); 4962 set_temp_val_reg(s, ots, oreg); 4963 } 4964 4965 switch (its->val_type) { 4966 case TEMP_VAL_REG: 4967 /* 4968 * The dup constriaints must be broad, covering all possible VECE. 4969 * However, tcg_op_dup_vec() gets to see the VECE and we allow it 4970 * to fail, indicating that extra moves are required for that case. 4971 */ 4972 if (tcg_regset_test_reg(dup_in_regs, its->reg)) { 4973 if (tcg_out_dup_vec(s, vtype, vece, ots->reg, its->reg)) { 4974 goto done; 4975 } 4976 /* Try again from memory or a vector input register. */ 4977 } 4978 if (!its->mem_coherent) { 4979 /* 4980 * The input register is not synced, and so an extra store 4981 * would be required to use memory. Attempt an integer-vector 4982 * register move first. We do not have a TCGRegSet for this. 4983 */ 4984 if (tcg_out_mov(s, itype, ots->reg, its->reg)) { 4985 break; 4986 } 4987 /* Sync the temp back to its slot and load from there. */ 4988 temp_sync(s, its, s->reserved_regs, 0, 0); 4989 } 4990 /* fall through */ 4991 4992 case TEMP_VAL_MEM: 4993 lowpart_ofs = 0; 4994 if (HOST_BIG_ENDIAN) { 4995 lowpart_ofs = tcg_type_size(itype) - (1 << vece); 4996 } 4997 if (tcg_out_dupm_vec(s, vtype, vece, ots->reg, its->mem_base->reg, 4998 its->mem_offset + lowpart_ofs)) { 4999 goto done; 5000 } 5001 /* Load the input into the destination vector register. */ 5002 tcg_out_ld(s, itype, ots->reg, its->mem_base->reg, its->mem_offset); 5003 break; 5004 5005 default: 5006 g_assert_not_reached(); 5007 } 5008 5009 /* We now have a vector input register, so dup must succeed. */ 5010 ok = tcg_out_dup_vec(s, vtype, vece, ots->reg, ots->reg); 5011 tcg_debug_assert(ok); 5012 5013 done: 5014 ots->mem_coherent = 0; 5015 if (IS_DEAD_ARG(1)) { 5016 temp_dead(s, its); 5017 } 5018 if (NEED_SYNC_ARG(0)) { 5019 temp_sync(s, ots, s->reserved_regs, 0, 0); 5020 } 5021 if (IS_DEAD_ARG(0)) { 5022 temp_dead(s, ots); 5023 } 5024 } 5025 5026 static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op) 5027 { 5028 const TCGLifeData arg_life = op->life; 5029 const TCGOpDef * const def = &tcg_op_defs[op->opc]; 5030 TCGRegSet i_allocated_regs; 5031 TCGRegSet o_allocated_regs; 5032 int i, k, nb_iargs, nb_oargs; 5033 TCGReg reg; 5034 TCGArg arg; 5035 const TCGArgConstraint *args_ct; 5036 const TCGArgConstraint *arg_ct; 5037 TCGTemp *ts; 5038 TCGArg new_args[TCG_MAX_OP_ARGS]; 5039 int const_args[TCG_MAX_OP_ARGS]; 5040 TCGCond op_cond; 5041 5042 nb_oargs = def->nb_oargs; 5043 nb_iargs = def->nb_iargs; 5044 5045 /* copy constants */ 5046 memcpy(new_args + nb_oargs + nb_iargs, 5047 op->args + nb_oargs + nb_iargs, 5048 sizeof(TCGArg) * def->nb_cargs); 5049 5050 i_allocated_regs = s->reserved_regs; 5051 o_allocated_regs = s->reserved_regs; 5052 5053 switch (op->opc) { 5054 case INDEX_op_brcond_i32: 5055 case INDEX_op_brcond_i64: 5056 op_cond = op->args[2]; 5057 break; 5058 case INDEX_op_setcond_i32: 5059 case INDEX_op_setcond_i64: 5060 case INDEX_op_negsetcond_i32: 5061 case INDEX_op_negsetcond_i64: 5062 case INDEX_op_cmp_vec: 5063 op_cond = op->args[3]; 5064 break; 5065 case INDEX_op_brcond2_i32: 5066 op_cond = op->args[4]; 5067 break; 5068 case INDEX_op_movcond_i32: 5069 case INDEX_op_movcond_i64: 5070 case INDEX_op_setcond2_i32: 5071 case INDEX_op_cmpsel_vec: 5072 op_cond = op->args[5]; 5073 break; 5074 default: 5075 /* No condition within opcode. */ 5076 op_cond = TCG_COND_ALWAYS; 5077 break; 5078 } 5079 5080 args_ct = opcode_args_ct(op); 5081 5082 /* satisfy input constraints */ 5083 for (k = 0; k < nb_iargs; k++) { 5084 TCGRegSet i_preferred_regs, i_required_regs; 5085 bool allocate_new_reg, copyto_new_reg; 5086 TCGTemp *ts2; 5087 int i1, i2; 5088 5089 i = args_ct[nb_oargs + k].sort_index; 5090 arg = op->args[i]; 5091 arg_ct = &args_ct[i]; 5092 ts = arg_temp(arg); 5093 5094 if (ts->val_type == TEMP_VAL_CONST 5095 && tcg_target_const_match(ts->val, arg_ct->ct, ts->type, 5096 op_cond, TCGOP_VECE(op))) { 5097 /* constant is OK for instruction */ 5098 const_args[i] = 1; 5099 new_args[i] = ts->val; 5100 continue; 5101 } 5102 5103 reg = ts->reg; 5104 i_preferred_regs = 0; 5105 i_required_regs = arg_ct->regs; 5106 allocate_new_reg = false; 5107 copyto_new_reg = false; 5108 5109 switch (arg_ct->pair) { 5110 case 0: /* not paired */ 5111 if (arg_ct->ialias) { 5112 i_preferred_regs = output_pref(op, arg_ct->alias_index); 5113 5114 /* 5115 * If the input is readonly, then it cannot also be an 5116 * output and aliased to itself. If the input is not 5117 * dead after the instruction, we must allocate a new 5118 * register and move it. 5119 */ 5120 if (temp_readonly(ts) || !IS_DEAD_ARG(i) 5121 || args_ct[arg_ct->alias_index].newreg) { 5122 allocate_new_reg = true; 5123 } else if (ts->val_type == TEMP_VAL_REG) { 5124 /* 5125 * Check if the current register has already been 5126 * allocated for another input. 5127 */ 5128 allocate_new_reg = 5129 tcg_regset_test_reg(i_allocated_regs, reg); 5130 } 5131 } 5132 if (!allocate_new_reg) { 5133 temp_load(s, ts, i_required_regs, i_allocated_regs, 5134 i_preferred_regs); 5135 reg = ts->reg; 5136 allocate_new_reg = !tcg_regset_test_reg(i_required_regs, reg); 5137 } 5138 if (allocate_new_reg) { 5139 /* 5140 * Allocate a new register matching the constraint 5141 * and move the temporary register into it. 5142 */ 5143 temp_load(s, ts, tcg_target_available_regs[ts->type], 5144 i_allocated_regs, 0); 5145 reg = tcg_reg_alloc(s, i_required_regs, i_allocated_regs, 5146 i_preferred_regs, ts->indirect_base); 5147 copyto_new_reg = true; 5148 } 5149 break; 5150 5151 case 1: 5152 /* First of an input pair; if i1 == i2, the second is an output. */ 5153 i1 = i; 5154 i2 = arg_ct->pair_index; 5155 ts2 = i1 != i2 ? arg_temp(op->args[i2]) : NULL; 5156 5157 /* 5158 * It is easier to default to allocating a new pair 5159 * and to identify a few cases where it's not required. 5160 */ 5161 if (arg_ct->ialias) { 5162 i_preferred_regs = output_pref(op, arg_ct->alias_index); 5163 if (IS_DEAD_ARG(i1) && 5164 IS_DEAD_ARG(i2) && 5165 !temp_readonly(ts) && 5166 ts->val_type == TEMP_VAL_REG && 5167 ts->reg < TCG_TARGET_NB_REGS - 1 && 5168 tcg_regset_test_reg(i_required_regs, reg) && 5169 !tcg_regset_test_reg(i_allocated_regs, reg) && 5170 !tcg_regset_test_reg(i_allocated_regs, reg + 1) && 5171 (ts2 5172 ? ts2->val_type == TEMP_VAL_REG && 5173 ts2->reg == reg + 1 && 5174 !temp_readonly(ts2) 5175 : s->reg_to_temp[reg + 1] == NULL)) { 5176 break; 5177 } 5178 } else { 5179 /* Without aliasing, the pair must also be an input. */ 5180 tcg_debug_assert(ts2); 5181 if (ts->val_type == TEMP_VAL_REG && 5182 ts2->val_type == TEMP_VAL_REG && 5183 ts2->reg == reg + 1 && 5184 tcg_regset_test_reg(i_required_regs, reg)) { 5185 break; 5186 } 5187 } 5188 reg = tcg_reg_alloc_pair(s, i_required_regs, i_allocated_regs, 5189 0, ts->indirect_base); 5190 goto do_pair; 5191 5192 case 2: /* pair second */ 5193 reg = new_args[arg_ct->pair_index] + 1; 5194 goto do_pair; 5195 5196 case 3: /* ialias with second output, no first input */ 5197 tcg_debug_assert(arg_ct->ialias); 5198 i_preferred_regs = output_pref(op, arg_ct->alias_index); 5199 5200 if (IS_DEAD_ARG(i) && 5201 !temp_readonly(ts) && 5202 ts->val_type == TEMP_VAL_REG && 5203 reg > 0 && 5204 s->reg_to_temp[reg - 1] == NULL && 5205 tcg_regset_test_reg(i_required_regs, reg) && 5206 !tcg_regset_test_reg(i_allocated_regs, reg) && 5207 !tcg_regset_test_reg(i_allocated_regs, reg - 1)) { 5208 tcg_regset_set_reg(i_allocated_regs, reg - 1); 5209 break; 5210 } 5211 reg = tcg_reg_alloc_pair(s, i_required_regs >> 1, 5212 i_allocated_regs, 0, 5213 ts->indirect_base); 5214 tcg_regset_set_reg(i_allocated_regs, reg); 5215 reg += 1; 5216 goto do_pair; 5217 5218 do_pair: 5219 /* 5220 * If an aliased input is not dead after the instruction, 5221 * we must allocate a new register and move it. 5222 */ 5223 if (arg_ct->ialias && (!IS_DEAD_ARG(i) || temp_readonly(ts))) { 5224 TCGRegSet t_allocated_regs = i_allocated_regs; 5225 5226 /* 5227 * Because of the alias, and the continued life, make sure 5228 * that the temp is somewhere *other* than the reg pair, 5229 * and we get a copy in reg. 5230 */ 5231 tcg_regset_set_reg(t_allocated_regs, reg); 5232 tcg_regset_set_reg(t_allocated_regs, reg + 1); 5233 if (ts->val_type == TEMP_VAL_REG && ts->reg == reg) { 5234 /* If ts was already in reg, copy it somewhere else. */ 5235 TCGReg nr; 5236 bool ok; 5237 5238 tcg_debug_assert(ts->kind != TEMP_FIXED); 5239 nr = tcg_reg_alloc(s, tcg_target_available_regs[ts->type], 5240 t_allocated_regs, 0, ts->indirect_base); 5241 ok = tcg_out_mov(s, ts->type, nr, reg); 5242 tcg_debug_assert(ok); 5243 5244 set_temp_val_reg(s, ts, nr); 5245 } else { 5246 temp_load(s, ts, tcg_target_available_regs[ts->type], 5247 t_allocated_regs, 0); 5248 copyto_new_reg = true; 5249 } 5250 } else { 5251 /* Preferably allocate to reg, otherwise copy. */ 5252 i_required_regs = (TCGRegSet)1 << reg; 5253 temp_load(s, ts, i_required_regs, i_allocated_regs, 5254 i_preferred_regs); 5255 copyto_new_reg = ts->reg != reg; 5256 } 5257 break; 5258 5259 default: 5260 g_assert_not_reached(); 5261 } 5262 5263 if (copyto_new_reg) { 5264 if (!tcg_out_mov(s, ts->type, reg, ts->reg)) { 5265 /* 5266 * Cross register class move not supported. Sync the 5267 * temp back to its slot and load from there. 5268 */ 5269 temp_sync(s, ts, i_allocated_regs, 0, 0); 5270 tcg_out_ld(s, ts->type, reg, 5271 ts->mem_base->reg, ts->mem_offset); 5272 } 5273 } 5274 new_args[i] = reg; 5275 const_args[i] = 0; 5276 tcg_regset_set_reg(i_allocated_regs, reg); 5277 } 5278 5279 /* mark dead temporaries and free the associated registers */ 5280 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) { 5281 if (IS_DEAD_ARG(i)) { 5282 temp_dead(s, arg_temp(op->args[i])); 5283 } 5284 } 5285 5286 if (def->flags & TCG_OPF_COND_BRANCH) { 5287 tcg_reg_alloc_cbranch(s, i_allocated_regs); 5288 } else if (def->flags & TCG_OPF_BB_END) { 5289 tcg_reg_alloc_bb_end(s, i_allocated_regs); 5290 } else { 5291 if (def->flags & TCG_OPF_CALL_CLOBBER) { 5292 /* XXX: permit generic clobber register list ? */ 5293 for (i = 0; i < TCG_TARGET_NB_REGS; i++) { 5294 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) { 5295 tcg_reg_free(s, i, i_allocated_regs); 5296 } 5297 } 5298 } 5299 if (def->flags & TCG_OPF_SIDE_EFFECTS) { 5300 /* sync globals if the op has side effects and might trigger 5301 an exception. */ 5302 sync_globals(s, i_allocated_regs); 5303 } 5304 5305 /* satisfy the output constraints */ 5306 for (k = 0; k < nb_oargs; k++) { 5307 i = args_ct[k].sort_index; 5308 arg = op->args[i]; 5309 arg_ct = &args_ct[i]; 5310 ts = arg_temp(arg); 5311 5312 /* ENV should not be modified. */ 5313 tcg_debug_assert(!temp_readonly(ts)); 5314 5315 switch (arg_ct->pair) { 5316 case 0: /* not paired */ 5317 if (arg_ct->oalias && !const_args[arg_ct->alias_index]) { 5318 reg = new_args[arg_ct->alias_index]; 5319 } else if (arg_ct->newreg) { 5320 reg = tcg_reg_alloc(s, arg_ct->regs, 5321 i_allocated_regs | o_allocated_regs, 5322 output_pref(op, k), ts->indirect_base); 5323 } else { 5324 reg = tcg_reg_alloc(s, arg_ct->regs, o_allocated_regs, 5325 output_pref(op, k), ts->indirect_base); 5326 } 5327 break; 5328 5329 case 1: /* first of pair */ 5330 if (arg_ct->oalias) { 5331 reg = new_args[arg_ct->alias_index]; 5332 } else if (arg_ct->newreg) { 5333 reg = tcg_reg_alloc_pair(s, arg_ct->regs, 5334 i_allocated_regs | o_allocated_regs, 5335 output_pref(op, k), 5336 ts->indirect_base); 5337 } else { 5338 reg = tcg_reg_alloc_pair(s, arg_ct->regs, o_allocated_regs, 5339 output_pref(op, k), 5340 ts->indirect_base); 5341 } 5342 break; 5343 5344 case 2: /* second of pair */ 5345 if (arg_ct->oalias) { 5346 reg = new_args[arg_ct->alias_index]; 5347 } else { 5348 reg = new_args[arg_ct->pair_index] + 1; 5349 } 5350 break; 5351 5352 case 3: /* first of pair, aliasing with a second input */ 5353 tcg_debug_assert(!arg_ct->newreg); 5354 reg = new_args[arg_ct->pair_index] - 1; 5355 break; 5356 5357 default: 5358 g_assert_not_reached(); 5359 } 5360 tcg_regset_set_reg(o_allocated_regs, reg); 5361 set_temp_val_reg(s, ts, reg); 5362 ts->mem_coherent = 0; 5363 new_args[i] = reg; 5364 } 5365 } 5366 5367 /* emit instruction */ 5368 switch (op->opc) { 5369 case INDEX_op_ext8s_i32: 5370 tcg_out_ext8s(s, TCG_TYPE_I32, new_args[0], new_args[1]); 5371 break; 5372 case INDEX_op_ext8s_i64: 5373 tcg_out_ext8s(s, TCG_TYPE_I64, new_args[0], new_args[1]); 5374 break; 5375 case INDEX_op_ext8u_i32: 5376 case INDEX_op_ext8u_i64: 5377 tcg_out_ext8u(s, new_args[0], new_args[1]); 5378 break; 5379 case INDEX_op_ext16s_i32: 5380 tcg_out_ext16s(s, TCG_TYPE_I32, new_args[0], new_args[1]); 5381 break; 5382 case INDEX_op_ext16s_i64: 5383 tcg_out_ext16s(s, TCG_TYPE_I64, new_args[0], new_args[1]); 5384 break; 5385 case INDEX_op_ext16u_i32: 5386 case INDEX_op_ext16u_i64: 5387 tcg_out_ext16u(s, new_args[0], new_args[1]); 5388 break; 5389 case INDEX_op_ext32s_i64: 5390 tcg_out_ext32s(s, new_args[0], new_args[1]); 5391 break; 5392 case INDEX_op_ext32u_i64: 5393 tcg_out_ext32u(s, new_args[0], new_args[1]); 5394 break; 5395 case INDEX_op_ext_i32_i64: 5396 tcg_out_exts_i32_i64(s, new_args[0], new_args[1]); 5397 break; 5398 case INDEX_op_extu_i32_i64: 5399 tcg_out_extu_i32_i64(s, new_args[0], new_args[1]); 5400 break; 5401 case INDEX_op_extrl_i64_i32: 5402 tcg_out_extrl_i64_i32(s, new_args[0], new_args[1]); 5403 break; 5404 default: 5405 if (def->flags & TCG_OPF_VECTOR) { 5406 tcg_out_vec_op(s, op->opc, TCGOP_TYPE(op) - TCG_TYPE_V64, 5407 TCGOP_VECE(op), new_args, const_args); 5408 } else { 5409 tcg_out_op(s, op->opc, TCGOP_TYPE(op), new_args, const_args); 5410 } 5411 break; 5412 } 5413 5414 /* move the outputs in the correct register if needed */ 5415 for(i = 0; i < nb_oargs; i++) { 5416 ts = arg_temp(op->args[i]); 5417 5418 /* ENV should not be modified. */ 5419 tcg_debug_assert(!temp_readonly(ts)); 5420 5421 if (NEED_SYNC_ARG(i)) { 5422 temp_sync(s, ts, o_allocated_regs, 0, IS_DEAD_ARG(i)); 5423 } else if (IS_DEAD_ARG(i)) { 5424 temp_dead(s, ts); 5425 } 5426 } 5427 } 5428 5429 static bool tcg_reg_alloc_dup2(TCGContext *s, const TCGOp *op) 5430 { 5431 const TCGLifeData arg_life = op->life; 5432 TCGTemp *ots, *itsl, *itsh; 5433 TCGType vtype = TCGOP_TYPE(op); 5434 5435 /* This opcode is only valid for 32-bit hosts, for 64-bit elements. */ 5436 tcg_debug_assert(TCG_TARGET_REG_BITS == 32); 5437 tcg_debug_assert(TCGOP_VECE(op) == MO_64); 5438 5439 ots = arg_temp(op->args[0]); 5440 itsl = arg_temp(op->args[1]); 5441 itsh = arg_temp(op->args[2]); 5442 5443 /* ENV should not be modified. */ 5444 tcg_debug_assert(!temp_readonly(ots)); 5445 5446 /* Allocate the output register now. */ 5447 if (ots->val_type != TEMP_VAL_REG) { 5448 TCGRegSet allocated_regs = s->reserved_regs; 5449 TCGRegSet dup_out_regs = opcode_args_ct(op)[0].regs; 5450 TCGReg oreg; 5451 5452 /* Make sure to not spill the input registers. */ 5453 if (!IS_DEAD_ARG(1) && itsl->val_type == TEMP_VAL_REG) { 5454 tcg_regset_set_reg(allocated_regs, itsl->reg); 5455 } 5456 if (!IS_DEAD_ARG(2) && itsh->val_type == TEMP_VAL_REG) { 5457 tcg_regset_set_reg(allocated_regs, itsh->reg); 5458 } 5459 5460 oreg = tcg_reg_alloc(s, dup_out_regs, allocated_regs, 5461 output_pref(op, 0), ots->indirect_base); 5462 set_temp_val_reg(s, ots, oreg); 5463 } 5464 5465 /* Promote dup2 of immediates to dupi_vec. */ 5466 if (itsl->val_type == TEMP_VAL_CONST && itsh->val_type == TEMP_VAL_CONST) { 5467 uint64_t val = deposit64(itsl->val, 32, 32, itsh->val); 5468 MemOp vece = MO_64; 5469 5470 if (val == dup_const(MO_8, val)) { 5471 vece = MO_8; 5472 } else if (val == dup_const(MO_16, val)) { 5473 vece = MO_16; 5474 } else if (val == dup_const(MO_32, val)) { 5475 vece = MO_32; 5476 } 5477 5478 tcg_out_dupi_vec(s, vtype, vece, ots->reg, val); 5479 goto done; 5480 } 5481 5482 /* If the two inputs form one 64-bit value, try dupm_vec. */ 5483 if (itsl->temp_subindex == HOST_BIG_ENDIAN && 5484 itsh->temp_subindex == !HOST_BIG_ENDIAN && 5485 itsl == itsh + (HOST_BIG_ENDIAN ? 1 : -1)) { 5486 TCGTemp *its = itsl - HOST_BIG_ENDIAN; 5487 5488 temp_sync(s, its + 0, s->reserved_regs, 0, 0); 5489 temp_sync(s, its + 1, s->reserved_regs, 0, 0); 5490 5491 if (tcg_out_dupm_vec(s, vtype, MO_64, ots->reg, 5492 its->mem_base->reg, its->mem_offset)) { 5493 goto done; 5494 } 5495 } 5496 5497 /* Fall back to generic expansion. */ 5498 return false; 5499 5500 done: 5501 ots->mem_coherent = 0; 5502 if (IS_DEAD_ARG(1)) { 5503 temp_dead(s, itsl); 5504 } 5505 if (IS_DEAD_ARG(2)) { 5506 temp_dead(s, itsh); 5507 } 5508 if (NEED_SYNC_ARG(0)) { 5509 temp_sync(s, ots, s->reserved_regs, 0, IS_DEAD_ARG(0)); 5510 } else if (IS_DEAD_ARG(0)) { 5511 temp_dead(s, ots); 5512 } 5513 return true; 5514 } 5515 5516 static void load_arg_reg(TCGContext *s, TCGReg reg, TCGTemp *ts, 5517 TCGRegSet allocated_regs) 5518 { 5519 if (ts->val_type == TEMP_VAL_REG) { 5520 if (ts->reg != reg) { 5521 tcg_reg_free(s, reg, allocated_regs); 5522 if (!tcg_out_mov(s, ts->type, reg, ts->reg)) { 5523 /* 5524 * Cross register class move not supported. Sync the 5525 * temp back to its slot and load from there. 5526 */ 5527 temp_sync(s, ts, allocated_regs, 0, 0); 5528 tcg_out_ld(s, ts->type, reg, 5529 ts->mem_base->reg, ts->mem_offset); 5530 } 5531 } 5532 } else { 5533 TCGRegSet arg_set = 0; 5534 5535 tcg_reg_free(s, reg, allocated_regs); 5536 tcg_regset_set_reg(arg_set, reg); 5537 temp_load(s, ts, arg_set, allocated_regs, 0); 5538 } 5539 } 5540 5541 static void load_arg_stk(TCGContext *s, unsigned arg_slot, TCGTemp *ts, 5542 TCGRegSet allocated_regs) 5543 { 5544 /* 5545 * When the destination is on the stack, load up the temp and store. 5546 * If there are many call-saved registers, the temp might live to 5547 * see another use; otherwise it'll be discarded. 5548 */ 5549 temp_load(s, ts, tcg_target_available_regs[ts->type], allocated_regs, 0); 5550 tcg_out_st(s, ts->type, ts->reg, TCG_REG_CALL_STACK, 5551 arg_slot_stk_ofs(arg_slot)); 5552 } 5553 5554 static void load_arg_normal(TCGContext *s, const TCGCallArgumentLoc *l, 5555 TCGTemp *ts, TCGRegSet *allocated_regs) 5556 { 5557 if (arg_slot_reg_p(l->arg_slot)) { 5558 TCGReg reg = tcg_target_call_iarg_regs[l->arg_slot]; 5559 load_arg_reg(s, reg, ts, *allocated_regs); 5560 tcg_regset_set_reg(*allocated_regs, reg); 5561 } else { 5562 load_arg_stk(s, l->arg_slot, ts, *allocated_regs); 5563 } 5564 } 5565 5566 static void load_arg_ref(TCGContext *s, unsigned arg_slot, TCGReg ref_base, 5567 intptr_t ref_off, TCGRegSet *allocated_regs) 5568 { 5569 TCGReg reg; 5570 5571 if (arg_slot_reg_p(arg_slot)) { 5572 reg = tcg_target_call_iarg_regs[arg_slot]; 5573 tcg_reg_free(s, reg, *allocated_regs); 5574 tcg_out_addi_ptr(s, reg, ref_base, ref_off); 5575 tcg_regset_set_reg(*allocated_regs, reg); 5576 } else { 5577 reg = tcg_reg_alloc(s, tcg_target_available_regs[TCG_TYPE_PTR], 5578 *allocated_regs, 0, false); 5579 tcg_out_addi_ptr(s, reg, ref_base, ref_off); 5580 tcg_out_st(s, TCG_TYPE_PTR, reg, TCG_REG_CALL_STACK, 5581 arg_slot_stk_ofs(arg_slot)); 5582 } 5583 } 5584 5585 static void tcg_reg_alloc_call(TCGContext *s, TCGOp *op) 5586 { 5587 const int nb_oargs = TCGOP_CALLO(op); 5588 const int nb_iargs = TCGOP_CALLI(op); 5589 const TCGLifeData arg_life = op->life; 5590 const TCGHelperInfo *info = tcg_call_info(op); 5591 TCGRegSet allocated_regs = s->reserved_regs; 5592 int i; 5593 5594 /* 5595 * Move inputs into place in reverse order, 5596 * so that we place stacked arguments first. 5597 */ 5598 for (i = nb_iargs - 1; i >= 0; --i) { 5599 const TCGCallArgumentLoc *loc = &info->in[i]; 5600 TCGTemp *ts = arg_temp(op->args[nb_oargs + i]); 5601 5602 switch (loc->kind) { 5603 case TCG_CALL_ARG_NORMAL: 5604 case TCG_CALL_ARG_EXTEND_U: 5605 case TCG_CALL_ARG_EXTEND_S: 5606 load_arg_normal(s, loc, ts, &allocated_regs); 5607 break; 5608 case TCG_CALL_ARG_BY_REF: 5609 load_arg_stk(s, loc->ref_slot, ts, allocated_regs); 5610 load_arg_ref(s, loc->arg_slot, TCG_REG_CALL_STACK, 5611 arg_slot_stk_ofs(loc->ref_slot), 5612 &allocated_regs); 5613 break; 5614 case TCG_CALL_ARG_BY_REF_N: 5615 load_arg_stk(s, loc->ref_slot, ts, allocated_regs); 5616 break; 5617 default: 5618 g_assert_not_reached(); 5619 } 5620 } 5621 5622 /* Mark dead temporaries and free the associated registers. */ 5623 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) { 5624 if (IS_DEAD_ARG(i)) { 5625 temp_dead(s, arg_temp(op->args[i])); 5626 } 5627 } 5628 5629 /* Clobber call registers. */ 5630 for (i = 0; i < TCG_TARGET_NB_REGS; i++) { 5631 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) { 5632 tcg_reg_free(s, i, allocated_regs); 5633 } 5634 } 5635 5636 /* 5637 * Save globals if they might be written by the helper, 5638 * sync them if they might be read. 5639 */ 5640 if (info->flags & TCG_CALL_NO_READ_GLOBALS) { 5641 /* Nothing to do */ 5642 } else if (info->flags & TCG_CALL_NO_WRITE_GLOBALS) { 5643 sync_globals(s, allocated_regs); 5644 } else { 5645 save_globals(s, allocated_regs); 5646 } 5647 5648 /* 5649 * If the ABI passes a pointer to the returned struct as the first 5650 * argument, load that now. Pass a pointer to the output home slot. 5651 */ 5652 if (info->out_kind == TCG_CALL_RET_BY_REF) { 5653 TCGTemp *ts = arg_temp(op->args[0]); 5654 5655 if (!ts->mem_allocated) { 5656 temp_allocate_frame(s, ts); 5657 } 5658 load_arg_ref(s, 0, ts->mem_base->reg, ts->mem_offset, &allocated_regs); 5659 } 5660 5661 tcg_out_call(s, tcg_call_func(op), info); 5662 5663 /* Assign output registers and emit moves if needed. */ 5664 switch (info->out_kind) { 5665 case TCG_CALL_RET_NORMAL: 5666 for (i = 0; i < nb_oargs; i++) { 5667 TCGTemp *ts = arg_temp(op->args[i]); 5668 TCGReg reg = tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, i); 5669 5670 /* ENV should not be modified. */ 5671 tcg_debug_assert(!temp_readonly(ts)); 5672 5673 set_temp_val_reg(s, ts, reg); 5674 ts->mem_coherent = 0; 5675 } 5676 break; 5677 5678 case TCG_CALL_RET_BY_VEC: 5679 { 5680 TCGTemp *ts = arg_temp(op->args[0]); 5681 5682 tcg_debug_assert(ts->base_type == TCG_TYPE_I128); 5683 tcg_debug_assert(ts->temp_subindex == 0); 5684 if (!ts->mem_allocated) { 5685 temp_allocate_frame(s, ts); 5686 } 5687 tcg_out_st(s, TCG_TYPE_V128, 5688 tcg_target_call_oarg_reg(TCG_CALL_RET_BY_VEC, 0), 5689 ts->mem_base->reg, ts->mem_offset); 5690 } 5691 /* fall through to mark all parts in memory */ 5692 5693 case TCG_CALL_RET_BY_REF: 5694 /* The callee has performed a write through the reference. */ 5695 for (i = 0; i < nb_oargs; i++) { 5696 TCGTemp *ts = arg_temp(op->args[i]); 5697 ts->val_type = TEMP_VAL_MEM; 5698 } 5699 break; 5700 5701 default: 5702 g_assert_not_reached(); 5703 } 5704 5705 /* Flush or discard output registers as needed. */ 5706 for (i = 0; i < nb_oargs; i++) { 5707 TCGTemp *ts = arg_temp(op->args[i]); 5708 if (NEED_SYNC_ARG(i)) { 5709 temp_sync(s, ts, s->reserved_regs, 0, IS_DEAD_ARG(i)); 5710 } else if (IS_DEAD_ARG(i)) { 5711 temp_dead(s, ts); 5712 } 5713 } 5714 } 5715 5716 /** 5717 * atom_and_align_for_opc: 5718 * @s: tcg context 5719 * @opc: memory operation code 5720 * @host_atom: MO_ATOM_{IFALIGN,WITHIN16,SUBALIGN} for host operations 5721 * @allow_two_ops: true if we are prepared to issue two operations 5722 * 5723 * Return the alignment and atomicity to use for the inline fast path 5724 * for the given memory operation. The alignment may be larger than 5725 * that specified in @opc, and the correct alignment will be diagnosed 5726 * by the slow path helper. 5727 * 5728 * If @allow_two_ops, the host is prepared to test for 2x alignment, 5729 * and issue two loads or stores for subalignment. 5730 */ 5731 static TCGAtomAlign atom_and_align_for_opc(TCGContext *s, MemOp opc, 5732 MemOp host_atom, bool allow_two_ops) 5733 { 5734 MemOp align = memop_alignment_bits(opc); 5735 MemOp size = opc & MO_SIZE; 5736 MemOp half = size ? size - 1 : 0; 5737 MemOp atom = opc & MO_ATOM_MASK; 5738 MemOp atmax; 5739 5740 switch (atom) { 5741 case MO_ATOM_NONE: 5742 /* The operation requires no specific atomicity. */ 5743 atmax = MO_8; 5744 break; 5745 5746 case MO_ATOM_IFALIGN: 5747 atmax = size; 5748 break; 5749 5750 case MO_ATOM_IFALIGN_PAIR: 5751 atmax = half; 5752 break; 5753 5754 case MO_ATOM_WITHIN16: 5755 atmax = size; 5756 if (size == MO_128) { 5757 /* Misalignment implies !within16, and therefore no atomicity. */ 5758 } else if (host_atom != MO_ATOM_WITHIN16) { 5759 /* The host does not implement within16, so require alignment. */ 5760 align = MAX(align, size); 5761 } 5762 break; 5763 5764 case MO_ATOM_WITHIN16_PAIR: 5765 atmax = size; 5766 /* 5767 * Misalignment implies !within16, and therefore half atomicity. 5768 * Any host prepared for two operations can implement this with 5769 * half alignment. 5770 */ 5771 if (host_atom != MO_ATOM_WITHIN16 && allow_two_ops) { 5772 align = MAX(align, half); 5773 } 5774 break; 5775 5776 case MO_ATOM_SUBALIGN: 5777 atmax = size; 5778 if (host_atom != MO_ATOM_SUBALIGN) { 5779 /* If unaligned but not odd, there are subobjects up to half. */ 5780 if (allow_two_ops) { 5781 align = MAX(align, half); 5782 } else { 5783 align = MAX(align, size); 5784 } 5785 } 5786 break; 5787 5788 default: 5789 g_assert_not_reached(); 5790 } 5791 5792 return (TCGAtomAlign){ .atom = atmax, .align = align }; 5793 } 5794 5795 /* 5796 * Similarly for qemu_ld/st slow path helpers. 5797 * We must re-implement tcg_gen_callN and tcg_reg_alloc_call simultaneously, 5798 * using only the provided backend tcg_out_* functions. 5799 */ 5800 5801 static int tcg_out_helper_stk_ofs(TCGType type, unsigned slot) 5802 { 5803 int ofs = arg_slot_stk_ofs(slot); 5804 5805 /* 5806 * Each stack slot is TCG_TARGET_LONG_BITS. If the host does not 5807 * require extension to uint64_t, adjust the address for uint32_t. 5808 */ 5809 if (HOST_BIG_ENDIAN && 5810 TCG_TARGET_REG_BITS == 64 && 5811 type == TCG_TYPE_I32) { 5812 ofs += 4; 5813 } 5814 return ofs; 5815 } 5816 5817 static void tcg_out_helper_load_slots(TCGContext *s, 5818 unsigned nmov, TCGMovExtend *mov, 5819 const TCGLdstHelperParam *parm) 5820 { 5821 unsigned i; 5822 TCGReg dst3; 5823 5824 /* 5825 * Start from the end, storing to the stack first. 5826 * This frees those registers, so we need not consider overlap. 5827 */ 5828 for (i = nmov; i-- > 0; ) { 5829 unsigned slot = mov[i].dst; 5830 5831 if (arg_slot_reg_p(slot)) { 5832 goto found_reg; 5833 } 5834 5835 TCGReg src = mov[i].src; 5836 TCGType dst_type = mov[i].dst_type; 5837 MemOp dst_mo = dst_type == TCG_TYPE_I32 ? MO_32 : MO_64; 5838 5839 /* The argument is going onto the stack; extend into scratch. */ 5840 if ((mov[i].src_ext & MO_SIZE) != dst_mo) { 5841 tcg_debug_assert(parm->ntmp != 0); 5842 mov[i].dst = src = parm->tmp[0]; 5843 tcg_out_movext1(s, &mov[i]); 5844 } 5845 5846 tcg_out_st(s, dst_type, src, TCG_REG_CALL_STACK, 5847 tcg_out_helper_stk_ofs(dst_type, slot)); 5848 } 5849 return; 5850 5851 found_reg: 5852 /* 5853 * The remaining arguments are in registers. 5854 * Convert slot numbers to argument registers. 5855 */ 5856 nmov = i + 1; 5857 for (i = 0; i < nmov; ++i) { 5858 mov[i].dst = tcg_target_call_iarg_regs[mov[i].dst]; 5859 } 5860 5861 switch (nmov) { 5862 case 4: 5863 /* The backend must have provided enough temps for the worst case. */ 5864 tcg_debug_assert(parm->ntmp >= 2); 5865 5866 dst3 = mov[3].dst; 5867 for (unsigned j = 0; j < 3; ++j) { 5868 if (dst3 == mov[j].src) { 5869 /* 5870 * Conflict. Copy the source to a temporary, perform the 5871 * remaining moves, then the extension from our scratch 5872 * on the way out. 5873 */ 5874 TCGReg scratch = parm->tmp[1]; 5875 5876 tcg_out_mov(s, mov[3].src_type, scratch, mov[3].src); 5877 tcg_out_movext3(s, mov, mov + 1, mov + 2, parm->tmp[0]); 5878 tcg_out_movext1_new_src(s, &mov[3], scratch); 5879 break; 5880 } 5881 } 5882 5883 /* No conflicts: perform this move and continue. */ 5884 tcg_out_movext1(s, &mov[3]); 5885 /* fall through */ 5886 5887 case 3: 5888 tcg_out_movext3(s, mov, mov + 1, mov + 2, 5889 parm->ntmp ? parm->tmp[0] : -1); 5890 break; 5891 case 2: 5892 tcg_out_movext2(s, mov, mov + 1, 5893 parm->ntmp ? parm->tmp[0] : -1); 5894 break; 5895 case 1: 5896 tcg_out_movext1(s, mov); 5897 break; 5898 default: 5899 g_assert_not_reached(); 5900 } 5901 } 5902 5903 static void tcg_out_helper_load_imm(TCGContext *s, unsigned slot, 5904 TCGType type, tcg_target_long imm, 5905 const TCGLdstHelperParam *parm) 5906 { 5907 if (arg_slot_reg_p(slot)) { 5908 tcg_out_movi(s, type, tcg_target_call_iarg_regs[slot], imm); 5909 } else { 5910 int ofs = tcg_out_helper_stk_ofs(type, slot); 5911 if (!tcg_out_sti(s, type, imm, TCG_REG_CALL_STACK, ofs)) { 5912 tcg_debug_assert(parm->ntmp != 0); 5913 tcg_out_movi(s, type, parm->tmp[0], imm); 5914 tcg_out_st(s, type, parm->tmp[0], TCG_REG_CALL_STACK, ofs); 5915 } 5916 } 5917 } 5918 5919 static void tcg_out_helper_load_common_args(TCGContext *s, 5920 const TCGLabelQemuLdst *ldst, 5921 const TCGLdstHelperParam *parm, 5922 const TCGHelperInfo *info, 5923 unsigned next_arg) 5924 { 5925 TCGMovExtend ptr_mov = { 5926 .dst_type = TCG_TYPE_PTR, 5927 .src_type = TCG_TYPE_PTR, 5928 .src_ext = sizeof(void *) == 4 ? MO_32 : MO_64 5929 }; 5930 const TCGCallArgumentLoc *loc = &info->in[0]; 5931 TCGType type; 5932 unsigned slot; 5933 tcg_target_ulong imm; 5934 5935 /* 5936 * Handle env, which is always first. 5937 */ 5938 ptr_mov.dst = loc->arg_slot; 5939 ptr_mov.src = TCG_AREG0; 5940 tcg_out_helper_load_slots(s, 1, &ptr_mov, parm); 5941 5942 /* 5943 * Handle oi. 5944 */ 5945 imm = ldst->oi; 5946 loc = &info->in[next_arg]; 5947 type = TCG_TYPE_I32; 5948 switch (loc->kind) { 5949 case TCG_CALL_ARG_NORMAL: 5950 break; 5951 case TCG_CALL_ARG_EXTEND_U: 5952 case TCG_CALL_ARG_EXTEND_S: 5953 /* No extension required for MemOpIdx. */ 5954 tcg_debug_assert(imm <= INT32_MAX); 5955 type = TCG_TYPE_REG; 5956 break; 5957 default: 5958 g_assert_not_reached(); 5959 } 5960 tcg_out_helper_load_imm(s, loc->arg_slot, type, imm, parm); 5961 next_arg++; 5962 5963 /* 5964 * Handle ra. 5965 */ 5966 loc = &info->in[next_arg]; 5967 slot = loc->arg_slot; 5968 if (parm->ra_gen) { 5969 int arg_reg = -1; 5970 TCGReg ra_reg; 5971 5972 if (arg_slot_reg_p(slot)) { 5973 arg_reg = tcg_target_call_iarg_regs[slot]; 5974 } 5975 ra_reg = parm->ra_gen(s, ldst, arg_reg); 5976 5977 ptr_mov.dst = slot; 5978 ptr_mov.src = ra_reg; 5979 tcg_out_helper_load_slots(s, 1, &ptr_mov, parm); 5980 } else { 5981 imm = (uintptr_t)ldst->raddr; 5982 tcg_out_helper_load_imm(s, slot, TCG_TYPE_PTR, imm, parm); 5983 } 5984 } 5985 5986 static unsigned tcg_out_helper_add_mov(TCGMovExtend *mov, 5987 const TCGCallArgumentLoc *loc, 5988 TCGType dst_type, TCGType src_type, 5989 TCGReg lo, TCGReg hi) 5990 { 5991 MemOp reg_mo; 5992 5993 if (dst_type <= TCG_TYPE_REG) { 5994 MemOp src_ext; 5995 5996 switch (loc->kind) { 5997 case TCG_CALL_ARG_NORMAL: 5998 src_ext = src_type == TCG_TYPE_I32 ? MO_32 : MO_64; 5999 break; 6000 case TCG_CALL_ARG_EXTEND_U: 6001 dst_type = TCG_TYPE_REG; 6002 src_ext = MO_UL; 6003 break; 6004 case TCG_CALL_ARG_EXTEND_S: 6005 dst_type = TCG_TYPE_REG; 6006 src_ext = MO_SL; 6007 break; 6008 default: 6009 g_assert_not_reached(); 6010 } 6011 6012 mov[0].dst = loc->arg_slot; 6013 mov[0].dst_type = dst_type; 6014 mov[0].src = lo; 6015 mov[0].src_type = src_type; 6016 mov[0].src_ext = src_ext; 6017 return 1; 6018 } 6019 6020 if (TCG_TARGET_REG_BITS == 32) { 6021 assert(dst_type == TCG_TYPE_I64); 6022 reg_mo = MO_32; 6023 } else { 6024 assert(dst_type == TCG_TYPE_I128); 6025 reg_mo = MO_64; 6026 } 6027 6028 mov[0].dst = loc[HOST_BIG_ENDIAN].arg_slot; 6029 mov[0].src = lo; 6030 mov[0].dst_type = TCG_TYPE_REG; 6031 mov[0].src_type = TCG_TYPE_REG; 6032 mov[0].src_ext = reg_mo; 6033 6034 mov[1].dst = loc[!HOST_BIG_ENDIAN].arg_slot; 6035 mov[1].src = hi; 6036 mov[1].dst_type = TCG_TYPE_REG; 6037 mov[1].src_type = TCG_TYPE_REG; 6038 mov[1].src_ext = reg_mo; 6039 6040 return 2; 6041 } 6042 6043 static void tcg_out_ld_helper_args(TCGContext *s, const TCGLabelQemuLdst *ldst, 6044 const TCGLdstHelperParam *parm) 6045 { 6046 const TCGHelperInfo *info; 6047 const TCGCallArgumentLoc *loc; 6048 TCGMovExtend mov[2]; 6049 unsigned next_arg, nmov; 6050 MemOp mop = get_memop(ldst->oi); 6051 6052 switch (mop & MO_SIZE) { 6053 case MO_8: 6054 case MO_16: 6055 case MO_32: 6056 info = &info_helper_ld32_mmu; 6057 break; 6058 case MO_64: 6059 info = &info_helper_ld64_mmu; 6060 break; 6061 case MO_128: 6062 info = &info_helper_ld128_mmu; 6063 break; 6064 default: 6065 g_assert_not_reached(); 6066 } 6067 6068 /* Defer env argument. */ 6069 next_arg = 1; 6070 6071 loc = &info->in[next_arg]; 6072 if (TCG_TARGET_REG_BITS == 32 && s->addr_type == TCG_TYPE_I32) { 6073 /* 6074 * 32-bit host with 32-bit guest: zero-extend the guest address 6075 * to 64-bits for the helper by storing the low part, then 6076 * load a zero for the high part. 6077 */ 6078 tcg_out_helper_add_mov(mov, loc + HOST_BIG_ENDIAN, 6079 TCG_TYPE_I32, TCG_TYPE_I32, 6080 ldst->addrlo_reg, -1); 6081 tcg_out_helper_load_slots(s, 1, mov, parm); 6082 6083 tcg_out_helper_load_imm(s, loc[!HOST_BIG_ENDIAN].arg_slot, 6084 TCG_TYPE_I32, 0, parm); 6085 next_arg += 2; 6086 } else { 6087 nmov = tcg_out_helper_add_mov(mov, loc, TCG_TYPE_I64, s->addr_type, 6088 ldst->addrlo_reg, ldst->addrhi_reg); 6089 tcg_out_helper_load_slots(s, nmov, mov, parm); 6090 next_arg += nmov; 6091 } 6092 6093 switch (info->out_kind) { 6094 case TCG_CALL_RET_NORMAL: 6095 case TCG_CALL_RET_BY_VEC: 6096 break; 6097 case TCG_CALL_RET_BY_REF: 6098 /* 6099 * The return reference is in the first argument slot. 6100 * We need memory in which to return: re-use the top of stack. 6101 */ 6102 { 6103 int ofs_slot0 = TCG_TARGET_CALL_STACK_OFFSET; 6104 6105 if (arg_slot_reg_p(0)) { 6106 tcg_out_addi_ptr(s, tcg_target_call_iarg_regs[0], 6107 TCG_REG_CALL_STACK, ofs_slot0); 6108 } else { 6109 tcg_debug_assert(parm->ntmp != 0); 6110 tcg_out_addi_ptr(s, parm->tmp[0], 6111 TCG_REG_CALL_STACK, ofs_slot0); 6112 tcg_out_st(s, TCG_TYPE_PTR, parm->tmp[0], 6113 TCG_REG_CALL_STACK, ofs_slot0); 6114 } 6115 } 6116 break; 6117 default: 6118 g_assert_not_reached(); 6119 } 6120 6121 tcg_out_helper_load_common_args(s, ldst, parm, info, next_arg); 6122 } 6123 6124 static void tcg_out_ld_helper_ret(TCGContext *s, const TCGLabelQemuLdst *ldst, 6125 bool load_sign, 6126 const TCGLdstHelperParam *parm) 6127 { 6128 MemOp mop = get_memop(ldst->oi); 6129 TCGMovExtend mov[2]; 6130 int ofs_slot0; 6131 6132 switch (ldst->type) { 6133 case TCG_TYPE_I64: 6134 if (TCG_TARGET_REG_BITS == 32) { 6135 break; 6136 } 6137 /* fall through */ 6138 6139 case TCG_TYPE_I32: 6140 mov[0].dst = ldst->datalo_reg; 6141 mov[0].src = tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, 0); 6142 mov[0].dst_type = ldst->type; 6143 mov[0].src_type = TCG_TYPE_REG; 6144 6145 /* 6146 * If load_sign, then we allowed the helper to perform the 6147 * appropriate sign extension to tcg_target_ulong, and all 6148 * we need now is a plain move. 6149 * 6150 * If they do not, then we expect the relevant extension 6151 * instruction to be no more expensive than a move, and 6152 * we thus save the icache etc by only using one of two 6153 * helper functions. 6154 */ 6155 if (load_sign || !(mop & MO_SIGN)) { 6156 if (TCG_TARGET_REG_BITS == 32 || ldst->type == TCG_TYPE_I32) { 6157 mov[0].src_ext = MO_32; 6158 } else { 6159 mov[0].src_ext = MO_64; 6160 } 6161 } else { 6162 mov[0].src_ext = mop & MO_SSIZE; 6163 } 6164 tcg_out_movext1(s, mov); 6165 return; 6166 6167 case TCG_TYPE_I128: 6168 tcg_debug_assert(TCG_TARGET_REG_BITS == 64); 6169 ofs_slot0 = TCG_TARGET_CALL_STACK_OFFSET; 6170 switch (TCG_TARGET_CALL_RET_I128) { 6171 case TCG_CALL_RET_NORMAL: 6172 break; 6173 case TCG_CALL_RET_BY_VEC: 6174 tcg_out_st(s, TCG_TYPE_V128, 6175 tcg_target_call_oarg_reg(TCG_CALL_RET_BY_VEC, 0), 6176 TCG_REG_CALL_STACK, ofs_slot0); 6177 /* fall through */ 6178 case TCG_CALL_RET_BY_REF: 6179 tcg_out_ld(s, TCG_TYPE_I64, ldst->datalo_reg, 6180 TCG_REG_CALL_STACK, ofs_slot0 + 8 * HOST_BIG_ENDIAN); 6181 tcg_out_ld(s, TCG_TYPE_I64, ldst->datahi_reg, 6182 TCG_REG_CALL_STACK, ofs_slot0 + 8 * !HOST_BIG_ENDIAN); 6183 return; 6184 default: 6185 g_assert_not_reached(); 6186 } 6187 break; 6188 6189 default: 6190 g_assert_not_reached(); 6191 } 6192 6193 mov[0].dst = ldst->datalo_reg; 6194 mov[0].src = 6195 tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, HOST_BIG_ENDIAN); 6196 mov[0].dst_type = TCG_TYPE_REG; 6197 mov[0].src_type = TCG_TYPE_REG; 6198 mov[0].src_ext = TCG_TARGET_REG_BITS == 32 ? MO_32 : MO_64; 6199 6200 mov[1].dst = ldst->datahi_reg; 6201 mov[1].src = 6202 tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, !HOST_BIG_ENDIAN); 6203 mov[1].dst_type = TCG_TYPE_REG; 6204 mov[1].src_type = TCG_TYPE_REG; 6205 mov[1].src_ext = TCG_TARGET_REG_BITS == 32 ? MO_32 : MO_64; 6206 6207 tcg_out_movext2(s, mov, mov + 1, parm->ntmp ? parm->tmp[0] : -1); 6208 } 6209 6210 static void tcg_out_st_helper_args(TCGContext *s, const TCGLabelQemuLdst *ldst, 6211 const TCGLdstHelperParam *parm) 6212 { 6213 const TCGHelperInfo *info; 6214 const TCGCallArgumentLoc *loc; 6215 TCGMovExtend mov[4]; 6216 TCGType data_type; 6217 unsigned next_arg, nmov, n; 6218 MemOp mop = get_memop(ldst->oi); 6219 6220 switch (mop & MO_SIZE) { 6221 case MO_8: 6222 case MO_16: 6223 case MO_32: 6224 info = &info_helper_st32_mmu; 6225 data_type = TCG_TYPE_I32; 6226 break; 6227 case MO_64: 6228 info = &info_helper_st64_mmu; 6229 data_type = TCG_TYPE_I64; 6230 break; 6231 case MO_128: 6232 info = &info_helper_st128_mmu; 6233 data_type = TCG_TYPE_I128; 6234 break; 6235 default: 6236 g_assert_not_reached(); 6237 } 6238 6239 /* Defer env argument. */ 6240 next_arg = 1; 6241 nmov = 0; 6242 6243 /* Handle addr argument. */ 6244 loc = &info->in[next_arg]; 6245 if (TCG_TARGET_REG_BITS == 32 && s->addr_type == TCG_TYPE_I32) { 6246 /* 6247 * 32-bit host with 32-bit guest: zero-extend the guest address 6248 * to 64-bits for the helper by storing the low part. Later, 6249 * after we have processed the register inputs, we will load a 6250 * zero for the high part. 6251 */ 6252 tcg_out_helper_add_mov(mov, loc + HOST_BIG_ENDIAN, 6253 TCG_TYPE_I32, TCG_TYPE_I32, 6254 ldst->addrlo_reg, -1); 6255 next_arg += 2; 6256 nmov += 1; 6257 } else { 6258 n = tcg_out_helper_add_mov(mov, loc, TCG_TYPE_I64, s->addr_type, 6259 ldst->addrlo_reg, ldst->addrhi_reg); 6260 next_arg += n; 6261 nmov += n; 6262 } 6263 6264 /* Handle data argument. */ 6265 loc = &info->in[next_arg]; 6266 switch (loc->kind) { 6267 case TCG_CALL_ARG_NORMAL: 6268 case TCG_CALL_ARG_EXTEND_U: 6269 case TCG_CALL_ARG_EXTEND_S: 6270 n = tcg_out_helper_add_mov(mov + nmov, loc, data_type, ldst->type, 6271 ldst->datalo_reg, ldst->datahi_reg); 6272 next_arg += n; 6273 nmov += n; 6274 tcg_out_helper_load_slots(s, nmov, mov, parm); 6275 break; 6276 6277 case TCG_CALL_ARG_BY_REF: 6278 tcg_debug_assert(TCG_TARGET_REG_BITS == 64); 6279 tcg_debug_assert(data_type == TCG_TYPE_I128); 6280 tcg_out_st(s, TCG_TYPE_I64, 6281 HOST_BIG_ENDIAN ? ldst->datahi_reg : ldst->datalo_reg, 6282 TCG_REG_CALL_STACK, arg_slot_stk_ofs(loc[0].ref_slot)); 6283 tcg_out_st(s, TCG_TYPE_I64, 6284 HOST_BIG_ENDIAN ? ldst->datalo_reg : ldst->datahi_reg, 6285 TCG_REG_CALL_STACK, arg_slot_stk_ofs(loc[1].ref_slot)); 6286 6287 tcg_out_helper_load_slots(s, nmov, mov, parm); 6288 6289 if (arg_slot_reg_p(loc->arg_slot)) { 6290 tcg_out_addi_ptr(s, tcg_target_call_iarg_regs[loc->arg_slot], 6291 TCG_REG_CALL_STACK, 6292 arg_slot_stk_ofs(loc->ref_slot)); 6293 } else { 6294 tcg_debug_assert(parm->ntmp != 0); 6295 tcg_out_addi_ptr(s, parm->tmp[0], TCG_REG_CALL_STACK, 6296 arg_slot_stk_ofs(loc->ref_slot)); 6297 tcg_out_st(s, TCG_TYPE_PTR, parm->tmp[0], 6298 TCG_REG_CALL_STACK, arg_slot_stk_ofs(loc->arg_slot)); 6299 } 6300 next_arg += 2; 6301 break; 6302 6303 default: 6304 g_assert_not_reached(); 6305 } 6306 6307 if (TCG_TARGET_REG_BITS == 32 && s->addr_type == TCG_TYPE_I32) { 6308 /* Zero extend the address by loading a zero for the high part. */ 6309 loc = &info->in[1 + !HOST_BIG_ENDIAN]; 6310 tcg_out_helper_load_imm(s, loc->arg_slot, TCG_TYPE_I32, 0, parm); 6311 } 6312 6313 tcg_out_helper_load_common_args(s, ldst, parm, info, next_arg); 6314 } 6315 6316 int tcg_gen_code(TCGContext *s, TranslationBlock *tb, uint64_t pc_start) 6317 { 6318 int i, start_words, num_insns; 6319 TCGOp *op; 6320 6321 if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP) 6322 && qemu_log_in_addr_range(pc_start))) { 6323 FILE *logfile = qemu_log_trylock(); 6324 if (logfile) { 6325 fprintf(logfile, "OP:\n"); 6326 tcg_dump_ops(s, logfile, false); 6327 fprintf(logfile, "\n"); 6328 qemu_log_unlock(logfile); 6329 } 6330 } 6331 6332 #ifdef CONFIG_DEBUG_TCG 6333 /* Ensure all labels referenced have been emitted. */ 6334 { 6335 TCGLabel *l; 6336 bool error = false; 6337 6338 QSIMPLEQ_FOREACH(l, &s->labels, next) { 6339 if (unlikely(!l->present) && !QSIMPLEQ_EMPTY(&l->branches)) { 6340 qemu_log_mask(CPU_LOG_TB_OP, 6341 "$L%d referenced but not present.\n", l->id); 6342 error = true; 6343 } 6344 } 6345 assert(!error); 6346 } 6347 #endif 6348 6349 /* Do not reuse any EBB that may be allocated within the TB. */ 6350 tcg_temp_ebb_reset_freed(s); 6351 6352 tcg_optimize(s); 6353 6354 reachable_code_pass(s); 6355 liveness_pass_0(s); 6356 liveness_pass_1(s); 6357 6358 if (s->nb_indirects > 0) { 6359 if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_IND) 6360 && qemu_log_in_addr_range(pc_start))) { 6361 FILE *logfile = qemu_log_trylock(); 6362 if (logfile) { 6363 fprintf(logfile, "OP before indirect lowering:\n"); 6364 tcg_dump_ops(s, logfile, false); 6365 fprintf(logfile, "\n"); 6366 qemu_log_unlock(logfile); 6367 } 6368 } 6369 6370 /* Replace indirect temps with direct temps. */ 6371 if (liveness_pass_2(s)) { 6372 /* If changes were made, re-run liveness. */ 6373 liveness_pass_1(s); 6374 } 6375 } 6376 6377 if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_OPT) 6378 && qemu_log_in_addr_range(pc_start))) { 6379 FILE *logfile = qemu_log_trylock(); 6380 if (logfile) { 6381 fprintf(logfile, "OP after optimization and liveness analysis:\n"); 6382 tcg_dump_ops(s, logfile, true); 6383 fprintf(logfile, "\n"); 6384 qemu_log_unlock(logfile); 6385 } 6386 } 6387 6388 /* Initialize goto_tb jump offsets. */ 6389 tb->jmp_reset_offset[0] = TB_JMP_OFFSET_INVALID; 6390 tb->jmp_reset_offset[1] = TB_JMP_OFFSET_INVALID; 6391 tb->jmp_insn_offset[0] = TB_JMP_OFFSET_INVALID; 6392 tb->jmp_insn_offset[1] = TB_JMP_OFFSET_INVALID; 6393 6394 tcg_reg_alloc_start(s); 6395 6396 /* 6397 * Reset the buffer pointers when restarting after overflow. 6398 * TODO: Move this into translate-all.c with the rest of the 6399 * buffer management. Having only this done here is confusing. 6400 */ 6401 s->code_buf = tcg_splitwx_to_rw(tb->tc.ptr); 6402 s->code_ptr = s->code_buf; 6403 s->data_gen_ptr = NULL; 6404 6405 QSIMPLEQ_INIT(&s->ldst_labels); 6406 s->pool_labels = NULL; 6407 6408 start_words = s->insn_start_words; 6409 s->gen_insn_data = 6410 tcg_malloc(sizeof(uint64_t) * s->gen_tb->icount * start_words); 6411 6412 tcg_out_tb_start(s); 6413 6414 num_insns = -1; 6415 QTAILQ_FOREACH(op, &s->ops, link) { 6416 TCGOpcode opc = op->opc; 6417 6418 switch (opc) { 6419 case INDEX_op_mov_i32: 6420 case INDEX_op_mov_i64: 6421 case INDEX_op_mov_vec: 6422 tcg_reg_alloc_mov(s, op); 6423 break; 6424 case INDEX_op_dup_vec: 6425 tcg_reg_alloc_dup(s, op); 6426 break; 6427 case INDEX_op_insn_start: 6428 if (num_insns >= 0) { 6429 size_t off = tcg_current_code_size(s); 6430 s->gen_insn_end_off[num_insns] = off; 6431 /* Assert that we do not overflow our stored offset. */ 6432 assert(s->gen_insn_end_off[num_insns] == off); 6433 } 6434 num_insns++; 6435 for (i = 0; i < start_words; ++i) { 6436 s->gen_insn_data[num_insns * start_words + i] = 6437 tcg_get_insn_start_param(op, i); 6438 } 6439 break; 6440 case INDEX_op_discard: 6441 temp_dead(s, arg_temp(op->args[0])); 6442 break; 6443 case INDEX_op_set_label: 6444 tcg_reg_alloc_bb_end(s, s->reserved_regs); 6445 tcg_out_label(s, arg_label(op->args[0])); 6446 break; 6447 case INDEX_op_call: 6448 tcg_reg_alloc_call(s, op); 6449 break; 6450 case INDEX_op_exit_tb: 6451 tcg_out_exit_tb(s, op->args[0]); 6452 break; 6453 case INDEX_op_goto_tb: 6454 tcg_out_goto_tb(s, op->args[0]); 6455 break; 6456 case INDEX_op_dup2_vec: 6457 if (tcg_reg_alloc_dup2(s, op)) { 6458 break; 6459 } 6460 /* fall through */ 6461 default: 6462 /* Sanity check that we've not introduced any unhandled opcodes. */ 6463 tcg_debug_assert(tcg_op_supported(opc, TCGOP_TYPE(op), 6464 TCGOP_FLAGS(op))); 6465 /* Note: in order to speed up the code, it would be much 6466 faster to have specialized register allocator functions for 6467 some common argument patterns */ 6468 tcg_reg_alloc_op(s, op); 6469 break; 6470 } 6471 /* Test for (pending) buffer overflow. The assumption is that any 6472 one operation beginning below the high water mark cannot overrun 6473 the buffer completely. Thus we can test for overflow after 6474 generating code without having to check during generation. */ 6475 if (unlikely((void *)s->code_ptr > s->code_gen_highwater)) { 6476 return -1; 6477 } 6478 /* Test for TB overflow, as seen by gen_insn_end_off. */ 6479 if (unlikely(tcg_current_code_size(s) > UINT16_MAX)) { 6480 return -2; 6481 } 6482 } 6483 tcg_debug_assert(num_insns + 1 == s->gen_tb->icount); 6484 s->gen_insn_end_off[num_insns] = tcg_current_code_size(s); 6485 6486 /* Generate TB finalization at the end of block */ 6487 i = tcg_out_ldst_finalize(s); 6488 if (i < 0) { 6489 return i; 6490 } 6491 i = tcg_out_pool_finalize(s); 6492 if (i < 0) { 6493 return i; 6494 } 6495 if (!tcg_resolve_relocs(s)) { 6496 return -2; 6497 } 6498 6499 #ifndef CONFIG_TCG_INTERPRETER 6500 /* flush instruction cache */ 6501 flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(s->code_buf), 6502 (uintptr_t)s->code_buf, 6503 tcg_ptr_byte_diff(s->code_ptr, s->code_buf)); 6504 #endif 6505 6506 return tcg_current_code_size(s); 6507 } 6508 6509 #ifdef ELF_HOST_MACHINE 6510 /* In order to use this feature, the backend needs to do three things: 6511 6512 (1) Define ELF_HOST_MACHINE to indicate both what value to 6513 put into the ELF image and to indicate support for the feature. 6514 6515 (2) Define tcg_register_jit. This should create a buffer containing 6516 the contents of a .debug_frame section that describes the post- 6517 prologue unwind info for the tcg machine. 6518 6519 (3) Call tcg_register_jit_int, with the constructed .debug_frame. 6520 */ 6521 6522 /* Begin GDB interface. THE FOLLOWING MUST MATCH GDB DOCS. */ 6523 typedef enum { 6524 JIT_NOACTION = 0, 6525 JIT_REGISTER_FN, 6526 JIT_UNREGISTER_FN 6527 } jit_actions_t; 6528 6529 struct jit_code_entry { 6530 struct jit_code_entry *next_entry; 6531 struct jit_code_entry *prev_entry; 6532 const void *symfile_addr; 6533 uint64_t symfile_size; 6534 }; 6535 6536 struct jit_descriptor { 6537 uint32_t version; 6538 uint32_t action_flag; 6539 struct jit_code_entry *relevant_entry; 6540 struct jit_code_entry *first_entry; 6541 }; 6542 6543 void __jit_debug_register_code(void) __attribute__((noinline)); 6544 void __jit_debug_register_code(void) 6545 { 6546 asm(""); 6547 } 6548 6549 /* Must statically initialize the version, because GDB may check 6550 the version before we can set it. */ 6551 struct jit_descriptor __jit_debug_descriptor = { 1, 0, 0, 0 }; 6552 6553 /* End GDB interface. */ 6554 6555 static int find_string(const char *strtab, const char *str) 6556 { 6557 const char *p = strtab + 1; 6558 6559 while (1) { 6560 if (strcmp(p, str) == 0) { 6561 return p - strtab; 6562 } 6563 p += strlen(p) + 1; 6564 } 6565 } 6566 6567 static void tcg_register_jit_int(const void *buf_ptr, size_t buf_size, 6568 const void *debug_frame, 6569 size_t debug_frame_size) 6570 { 6571 struct __attribute__((packed)) DebugInfo { 6572 uint32_t len; 6573 uint16_t version; 6574 uint32_t abbrev; 6575 uint8_t ptr_size; 6576 uint8_t cu_die; 6577 uint16_t cu_lang; 6578 uintptr_t cu_low_pc; 6579 uintptr_t cu_high_pc; 6580 uint8_t fn_die; 6581 char fn_name[16]; 6582 uintptr_t fn_low_pc; 6583 uintptr_t fn_high_pc; 6584 uint8_t cu_eoc; 6585 }; 6586 6587 struct ElfImage { 6588 ElfW(Ehdr) ehdr; 6589 ElfW(Phdr) phdr; 6590 ElfW(Shdr) shdr[7]; 6591 ElfW(Sym) sym[2]; 6592 struct DebugInfo di; 6593 uint8_t da[24]; 6594 char str[80]; 6595 }; 6596 6597 struct ElfImage *img; 6598 6599 static const struct ElfImage img_template = { 6600 .ehdr = { 6601 .e_ident[EI_MAG0] = ELFMAG0, 6602 .e_ident[EI_MAG1] = ELFMAG1, 6603 .e_ident[EI_MAG2] = ELFMAG2, 6604 .e_ident[EI_MAG3] = ELFMAG3, 6605 .e_ident[EI_CLASS] = ELF_CLASS, 6606 .e_ident[EI_DATA] = ELF_DATA, 6607 .e_ident[EI_VERSION] = EV_CURRENT, 6608 .e_type = ET_EXEC, 6609 .e_machine = ELF_HOST_MACHINE, 6610 .e_version = EV_CURRENT, 6611 .e_phoff = offsetof(struct ElfImage, phdr), 6612 .e_shoff = offsetof(struct ElfImage, shdr), 6613 .e_ehsize = sizeof(ElfW(Shdr)), 6614 .e_phentsize = sizeof(ElfW(Phdr)), 6615 .e_phnum = 1, 6616 .e_shentsize = sizeof(ElfW(Shdr)), 6617 .e_shnum = ARRAY_SIZE(img->shdr), 6618 .e_shstrndx = ARRAY_SIZE(img->shdr) - 1, 6619 #ifdef ELF_HOST_FLAGS 6620 .e_flags = ELF_HOST_FLAGS, 6621 #endif 6622 #ifdef ELF_OSABI 6623 .e_ident[EI_OSABI] = ELF_OSABI, 6624 #endif 6625 }, 6626 .phdr = { 6627 .p_type = PT_LOAD, 6628 .p_flags = PF_X, 6629 }, 6630 .shdr = { 6631 [0] = { .sh_type = SHT_NULL }, 6632 /* Trick: The contents of code_gen_buffer are not present in 6633 this fake ELF file; that got allocated elsewhere. Therefore 6634 we mark .text as SHT_NOBITS (similar to .bss) so that readers 6635 will not look for contents. We can record any address. */ 6636 [1] = { /* .text */ 6637 .sh_type = SHT_NOBITS, 6638 .sh_flags = SHF_EXECINSTR | SHF_ALLOC, 6639 }, 6640 [2] = { /* .debug_info */ 6641 .sh_type = SHT_PROGBITS, 6642 .sh_offset = offsetof(struct ElfImage, di), 6643 .sh_size = sizeof(struct DebugInfo), 6644 }, 6645 [3] = { /* .debug_abbrev */ 6646 .sh_type = SHT_PROGBITS, 6647 .sh_offset = offsetof(struct ElfImage, da), 6648 .sh_size = sizeof(img->da), 6649 }, 6650 [4] = { /* .debug_frame */ 6651 .sh_type = SHT_PROGBITS, 6652 .sh_offset = sizeof(struct ElfImage), 6653 }, 6654 [5] = { /* .symtab */ 6655 .sh_type = SHT_SYMTAB, 6656 .sh_offset = offsetof(struct ElfImage, sym), 6657 .sh_size = sizeof(img->sym), 6658 .sh_info = 1, 6659 .sh_link = ARRAY_SIZE(img->shdr) - 1, 6660 .sh_entsize = sizeof(ElfW(Sym)), 6661 }, 6662 [6] = { /* .strtab */ 6663 .sh_type = SHT_STRTAB, 6664 .sh_offset = offsetof(struct ElfImage, str), 6665 .sh_size = sizeof(img->str), 6666 } 6667 }, 6668 .sym = { 6669 [1] = { /* code_gen_buffer */ 6670 .st_info = ELF_ST_INFO(STB_GLOBAL, STT_FUNC), 6671 .st_shndx = 1, 6672 } 6673 }, 6674 .di = { 6675 .len = sizeof(struct DebugInfo) - 4, 6676 .version = 2, 6677 .ptr_size = sizeof(void *), 6678 .cu_die = 1, 6679 .cu_lang = 0x8001, /* DW_LANG_Mips_Assembler */ 6680 .fn_die = 2, 6681 .fn_name = "code_gen_buffer" 6682 }, 6683 .da = { 6684 1, /* abbrev number (the cu) */ 6685 0x11, 1, /* DW_TAG_compile_unit, has children */ 6686 0x13, 0x5, /* DW_AT_language, DW_FORM_data2 */ 6687 0x11, 0x1, /* DW_AT_low_pc, DW_FORM_addr */ 6688 0x12, 0x1, /* DW_AT_high_pc, DW_FORM_addr */ 6689 0, 0, /* end of abbrev */ 6690 2, /* abbrev number (the fn) */ 6691 0x2e, 0, /* DW_TAG_subprogram, no children */ 6692 0x3, 0x8, /* DW_AT_name, DW_FORM_string */ 6693 0x11, 0x1, /* DW_AT_low_pc, DW_FORM_addr */ 6694 0x12, 0x1, /* DW_AT_high_pc, DW_FORM_addr */ 6695 0, 0, /* end of abbrev */ 6696 0 /* no more abbrev */ 6697 }, 6698 .str = "\0" ".text\0" ".debug_info\0" ".debug_abbrev\0" 6699 ".debug_frame\0" ".symtab\0" ".strtab\0" "code_gen_buffer", 6700 }; 6701 6702 /* We only need a single jit entry; statically allocate it. */ 6703 static struct jit_code_entry one_entry; 6704 6705 uintptr_t buf = (uintptr_t)buf_ptr; 6706 size_t img_size = sizeof(struct ElfImage) + debug_frame_size; 6707 DebugFrameHeader *dfh; 6708 6709 img = g_malloc(img_size); 6710 *img = img_template; 6711 6712 img->phdr.p_vaddr = buf; 6713 img->phdr.p_paddr = buf; 6714 img->phdr.p_memsz = buf_size; 6715 6716 img->shdr[1].sh_name = find_string(img->str, ".text"); 6717 img->shdr[1].sh_addr = buf; 6718 img->shdr[1].sh_size = buf_size; 6719 6720 img->shdr[2].sh_name = find_string(img->str, ".debug_info"); 6721 img->shdr[3].sh_name = find_string(img->str, ".debug_abbrev"); 6722 6723 img->shdr[4].sh_name = find_string(img->str, ".debug_frame"); 6724 img->shdr[4].sh_size = debug_frame_size; 6725 6726 img->shdr[5].sh_name = find_string(img->str, ".symtab"); 6727 img->shdr[6].sh_name = find_string(img->str, ".strtab"); 6728 6729 img->sym[1].st_name = find_string(img->str, "code_gen_buffer"); 6730 img->sym[1].st_value = buf; 6731 img->sym[1].st_size = buf_size; 6732 6733 img->di.cu_low_pc = buf; 6734 img->di.cu_high_pc = buf + buf_size; 6735 img->di.fn_low_pc = buf; 6736 img->di.fn_high_pc = buf + buf_size; 6737 6738 dfh = (DebugFrameHeader *)(img + 1); 6739 memcpy(dfh, debug_frame, debug_frame_size); 6740 dfh->fde.func_start = buf; 6741 dfh->fde.func_len = buf_size; 6742 6743 #ifdef DEBUG_JIT 6744 /* Enable this block to be able to debug the ELF image file creation. 6745 One can use readelf, objdump, or other inspection utilities. */ 6746 { 6747 g_autofree char *jit = g_strdup_printf("%s/qemu.jit", g_get_tmp_dir()); 6748 FILE *f = fopen(jit, "w+b"); 6749 if (f) { 6750 if (fwrite(img, img_size, 1, f) != img_size) { 6751 /* Avoid stupid unused return value warning for fwrite. */ 6752 } 6753 fclose(f); 6754 } 6755 } 6756 #endif 6757 6758 one_entry.symfile_addr = img; 6759 one_entry.symfile_size = img_size; 6760 6761 __jit_debug_descriptor.action_flag = JIT_REGISTER_FN; 6762 __jit_debug_descriptor.relevant_entry = &one_entry; 6763 __jit_debug_descriptor.first_entry = &one_entry; 6764 __jit_debug_register_code(); 6765 } 6766 #else 6767 /* No support for the feature. Provide the entry point expected by exec.c, 6768 and implement the internal function we declared earlier. */ 6769 6770 static void tcg_register_jit_int(const void *buf, size_t size, 6771 const void *debug_frame, 6772 size_t debug_frame_size) 6773 { 6774 } 6775 6776 void tcg_register_jit(const void *buf, size_t buf_size) 6777 { 6778 } 6779 #endif /* ELF_HOST_MACHINE */ 6780 6781 #if !TCG_TARGET_MAYBE_vec 6782 void tcg_expand_vec_op(TCGOpcode o, TCGType t, unsigned e, TCGArg a0, ...) 6783 { 6784 g_assert_not_reached(); 6785 } 6786 #endif 6787