1 /* 2 * Tiny Code Generator for QEMU 3 * 4 * Copyright (c) 2008 Fabrice Bellard 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a copy 7 * of this software and associated documentation files (the "Software"), to deal 8 * in the Software without restriction, including without limitation the rights 9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 * copies of the Software, and to permit persons to whom the Software is 11 * furnished to do so, subject to the following conditions: 12 * 13 * The above copyright notice and this permission notice shall be included in 14 * all copies or substantial portions of the Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 22 * THE SOFTWARE. 23 */ 24 25 #include "qemu/osdep.h" 26 27 /* Define to jump the ELF file used to communicate with GDB. */ 28 #undef DEBUG_JIT 29 30 #include "qemu/error-report.h" 31 #include "qemu/cutils.h" 32 #include "qemu/host-utils.h" 33 #include "qemu/qemu-print.h" 34 #include "qemu/cacheflush.h" 35 #include "qemu/cacheinfo.h" 36 #include "qemu/timer.h" 37 #include "exec/translation-block.h" 38 #include "exec/tlb-common.h" 39 #include "tcg/startup.h" 40 #include "tcg/tcg-op-common.h" 41 42 #if UINTPTR_MAX == UINT32_MAX 43 # define ELF_CLASS ELFCLASS32 44 #else 45 # define ELF_CLASS ELFCLASS64 46 #endif 47 #if HOST_BIG_ENDIAN 48 # define ELF_DATA ELFDATA2MSB 49 #else 50 # define ELF_DATA ELFDATA2LSB 51 #endif 52 53 #include "elf.h" 54 #include "exec/log.h" 55 #include "tcg/tcg-ldst.h" 56 #include "tcg/tcg-temp-internal.h" 57 #include "tcg-internal.h" 58 #include "tcg/perf.h" 59 #include "tcg-has.h" 60 #ifdef CONFIG_USER_ONLY 61 #include "user/guest-base.h" 62 #endif 63 64 /* Forward declarations for functions declared in tcg-target.c.inc and 65 used here. */ 66 static void tcg_target_init(TCGContext *s); 67 static void tcg_target_qemu_prologue(TCGContext *s); 68 static bool patch_reloc(tcg_insn_unit *code_ptr, int type, 69 intptr_t value, intptr_t addend); 70 static void tcg_out_nop_fill(tcg_insn_unit *p, int count); 71 72 typedef struct TCGLabelQemuLdst TCGLabelQemuLdst; 73 static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l); 74 static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l); 75 76 /* The CIE and FDE header definitions will be common to all hosts. */ 77 typedef struct { 78 uint32_t len __attribute__((aligned((sizeof(void *))))); 79 uint32_t id; 80 uint8_t version; 81 char augmentation[1]; 82 uint8_t code_align; 83 uint8_t data_align; 84 uint8_t return_column; 85 } DebugFrameCIE; 86 87 typedef struct QEMU_PACKED { 88 uint32_t len __attribute__((aligned((sizeof(void *))))); 89 uint32_t cie_offset; 90 uintptr_t func_start; 91 uintptr_t func_len; 92 } DebugFrameFDEHeader; 93 94 typedef struct QEMU_PACKED { 95 DebugFrameCIE cie; 96 DebugFrameFDEHeader fde; 97 } DebugFrameHeader; 98 99 struct TCGLabelQemuLdst { 100 bool is_ld; /* qemu_ld: true, qemu_st: false */ 101 MemOpIdx oi; 102 TCGType type; /* result type of a load */ 103 TCGReg addr_reg; /* reg index for guest virtual addr */ 104 TCGReg datalo_reg; /* reg index for low word to be loaded or stored */ 105 TCGReg datahi_reg; /* reg index for high word to be loaded or stored */ 106 const tcg_insn_unit *raddr; /* addr of the next IR of qemu_ld/st IR */ 107 tcg_insn_unit *label_ptr[2]; /* label pointers to be updated */ 108 QSIMPLEQ_ENTRY(TCGLabelQemuLdst) next; 109 }; 110 111 static void tcg_register_jit_int(const void *buf, size_t size, 112 const void *debug_frame, 113 size_t debug_frame_size) 114 __attribute__((unused)); 115 116 /* Forward declarations for functions declared and used in tcg-target.c.inc. */ 117 static void tcg_out_tb_start(TCGContext *s); 118 static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg1, 119 intptr_t arg2); 120 static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg); 121 static void tcg_out_movi(TCGContext *s, TCGType type, 122 TCGReg ret, tcg_target_long arg); 123 static void tcg_out_ext8s(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg); 124 static void tcg_out_ext16s(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg); 125 static void tcg_out_ext8u(TCGContext *s, TCGReg ret, TCGReg arg); 126 static void tcg_out_ext16u(TCGContext *s, TCGReg ret, TCGReg arg); 127 static void tcg_out_ext32s(TCGContext *s, TCGReg ret, TCGReg arg); 128 static void tcg_out_ext32u(TCGContext *s, TCGReg ret, TCGReg arg); 129 static void tcg_out_exts_i32_i64(TCGContext *s, TCGReg ret, TCGReg arg); 130 static void tcg_out_extu_i32_i64(TCGContext *s, TCGReg ret, TCGReg arg); 131 static void tcg_out_extrl_i64_i32(TCGContext *s, TCGReg ret, TCGReg arg); 132 static void tcg_out_addi_ptr(TCGContext *s, TCGReg, TCGReg, tcg_target_long); 133 static bool tcg_out_xchg(TCGContext *s, TCGType type, TCGReg r1, TCGReg r2); 134 static void tcg_out_exit_tb(TCGContext *s, uintptr_t arg); 135 static void tcg_out_goto_tb(TCGContext *s, int which); 136 static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, 137 const TCGArg args[TCG_MAX_OP_ARGS], 138 const int const_args[TCG_MAX_OP_ARGS]); 139 #if TCG_TARGET_MAYBE_vec 140 static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece, 141 TCGReg dst, TCGReg src); 142 static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece, 143 TCGReg dst, TCGReg base, intptr_t offset); 144 static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece, 145 TCGReg dst, int64_t arg); 146 static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, 147 unsigned vecl, unsigned vece, 148 const TCGArg args[TCG_MAX_OP_ARGS], 149 const int const_args[TCG_MAX_OP_ARGS]); 150 #else 151 static inline bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece, 152 TCGReg dst, TCGReg src) 153 { 154 g_assert_not_reached(); 155 } 156 static inline bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece, 157 TCGReg dst, TCGReg base, intptr_t offset) 158 { 159 g_assert_not_reached(); 160 } 161 static inline void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece, 162 TCGReg dst, int64_t arg) 163 { 164 g_assert_not_reached(); 165 } 166 static inline void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, 167 unsigned vecl, unsigned vece, 168 const TCGArg args[TCG_MAX_OP_ARGS], 169 const int const_args[TCG_MAX_OP_ARGS]) 170 { 171 g_assert_not_reached(); 172 } 173 int tcg_can_emit_vec_op(TCGOpcode o, TCGType t, unsigned ve) 174 { 175 return 0; 176 } 177 #endif 178 static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, TCGReg arg1, 179 intptr_t arg2); 180 static bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val, 181 TCGReg base, intptr_t ofs); 182 static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target, 183 const TCGHelperInfo *info); 184 static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot); 185 static bool tcg_target_const_match(int64_t val, int ct, 186 TCGType type, TCGCond cond, int vece); 187 188 #ifndef CONFIG_USER_ONLY 189 #define guest_base ({ qemu_build_not_reached(); (uintptr_t)0; }) 190 #endif 191 192 typedef struct TCGLdstHelperParam { 193 TCGReg (*ra_gen)(TCGContext *s, const TCGLabelQemuLdst *l, int arg_reg); 194 unsigned ntmp; 195 int tmp[3]; 196 } TCGLdstHelperParam; 197 198 static void tcg_out_ld_helper_args(TCGContext *s, const TCGLabelQemuLdst *l, 199 const TCGLdstHelperParam *p) 200 __attribute__((unused)); 201 static void tcg_out_ld_helper_ret(TCGContext *s, const TCGLabelQemuLdst *l, 202 bool load_sign, const TCGLdstHelperParam *p) 203 __attribute__((unused)); 204 static void tcg_out_st_helper_args(TCGContext *s, const TCGLabelQemuLdst *l, 205 const TCGLdstHelperParam *p) 206 __attribute__((unused)); 207 208 static void * const qemu_ld_helpers[MO_SSIZE + 1] __attribute__((unused)) = { 209 [MO_UB] = helper_ldub_mmu, 210 [MO_SB] = helper_ldsb_mmu, 211 [MO_UW] = helper_lduw_mmu, 212 [MO_SW] = helper_ldsw_mmu, 213 [MO_UL] = helper_ldul_mmu, 214 [MO_UQ] = helper_ldq_mmu, 215 #if TCG_TARGET_REG_BITS == 64 216 [MO_SL] = helper_ldsl_mmu, 217 [MO_128] = helper_ld16_mmu, 218 #endif 219 }; 220 221 static void * const qemu_st_helpers[MO_SIZE + 1] __attribute__((unused)) = { 222 [MO_8] = helper_stb_mmu, 223 [MO_16] = helper_stw_mmu, 224 [MO_32] = helper_stl_mmu, 225 [MO_64] = helper_stq_mmu, 226 #if TCG_TARGET_REG_BITS == 64 227 [MO_128] = helper_st16_mmu, 228 #endif 229 }; 230 231 typedef struct { 232 MemOp atom; /* lg2 bits of atomicity required */ 233 MemOp align; /* lg2 bits of alignment to use */ 234 } TCGAtomAlign; 235 236 static TCGAtomAlign atom_and_align_for_opc(TCGContext *s, MemOp opc, 237 MemOp host_atom, bool allow_two_ops) 238 __attribute__((unused)); 239 240 #ifdef CONFIG_USER_ONLY 241 bool tcg_use_softmmu; 242 #endif 243 244 TCGContext tcg_init_ctx; 245 __thread TCGContext *tcg_ctx; 246 247 TCGContext **tcg_ctxs; 248 unsigned int tcg_cur_ctxs; 249 unsigned int tcg_max_ctxs; 250 TCGv_env tcg_env; 251 const void *tcg_code_gen_epilogue; 252 uintptr_t tcg_splitwx_diff; 253 254 #ifndef CONFIG_TCG_INTERPRETER 255 tcg_prologue_fn *tcg_qemu_tb_exec; 256 #endif 257 258 static TCGRegSet tcg_target_available_regs[TCG_TYPE_COUNT]; 259 static TCGRegSet tcg_target_call_clobber_regs; 260 261 #if TCG_TARGET_INSN_UNIT_SIZE == 1 262 static __attribute__((unused)) inline void tcg_out8(TCGContext *s, uint8_t v) 263 { 264 *s->code_ptr++ = v; 265 } 266 267 static __attribute__((unused)) inline void tcg_patch8(tcg_insn_unit *p, 268 uint8_t v) 269 { 270 *p = v; 271 } 272 #endif 273 274 #if TCG_TARGET_INSN_UNIT_SIZE <= 2 275 static __attribute__((unused)) inline void tcg_out16(TCGContext *s, uint16_t v) 276 { 277 if (TCG_TARGET_INSN_UNIT_SIZE == 2) { 278 *s->code_ptr++ = v; 279 } else { 280 tcg_insn_unit *p = s->code_ptr; 281 memcpy(p, &v, sizeof(v)); 282 s->code_ptr = p + (2 / TCG_TARGET_INSN_UNIT_SIZE); 283 } 284 } 285 286 static __attribute__((unused)) inline void tcg_patch16(tcg_insn_unit *p, 287 uint16_t v) 288 { 289 if (TCG_TARGET_INSN_UNIT_SIZE == 2) { 290 *p = v; 291 } else { 292 memcpy(p, &v, sizeof(v)); 293 } 294 } 295 #endif 296 297 #if TCG_TARGET_INSN_UNIT_SIZE <= 4 298 static __attribute__((unused)) inline void tcg_out32(TCGContext *s, uint32_t v) 299 { 300 if (TCG_TARGET_INSN_UNIT_SIZE == 4) { 301 *s->code_ptr++ = v; 302 } else { 303 tcg_insn_unit *p = s->code_ptr; 304 memcpy(p, &v, sizeof(v)); 305 s->code_ptr = p + (4 / TCG_TARGET_INSN_UNIT_SIZE); 306 } 307 } 308 309 static __attribute__((unused)) inline void tcg_patch32(tcg_insn_unit *p, 310 uint32_t v) 311 { 312 if (TCG_TARGET_INSN_UNIT_SIZE == 4) { 313 *p = v; 314 } else { 315 memcpy(p, &v, sizeof(v)); 316 } 317 } 318 #endif 319 320 #if TCG_TARGET_INSN_UNIT_SIZE <= 8 321 static __attribute__((unused)) inline void tcg_out64(TCGContext *s, uint64_t v) 322 { 323 if (TCG_TARGET_INSN_UNIT_SIZE == 8) { 324 *s->code_ptr++ = v; 325 } else { 326 tcg_insn_unit *p = s->code_ptr; 327 memcpy(p, &v, sizeof(v)); 328 s->code_ptr = p + (8 / TCG_TARGET_INSN_UNIT_SIZE); 329 } 330 } 331 332 static __attribute__((unused)) inline void tcg_patch64(tcg_insn_unit *p, 333 uint64_t v) 334 { 335 if (TCG_TARGET_INSN_UNIT_SIZE == 8) { 336 *p = v; 337 } else { 338 memcpy(p, &v, sizeof(v)); 339 } 340 } 341 #endif 342 343 /* label relocation processing */ 344 345 static void tcg_out_reloc(TCGContext *s, tcg_insn_unit *code_ptr, int type, 346 TCGLabel *l, intptr_t addend) 347 { 348 TCGRelocation *r = tcg_malloc(sizeof(TCGRelocation)); 349 350 r->type = type; 351 r->ptr = code_ptr; 352 r->addend = addend; 353 QSIMPLEQ_INSERT_TAIL(&l->relocs, r, next); 354 } 355 356 static void tcg_out_label(TCGContext *s, TCGLabel *l) 357 { 358 tcg_debug_assert(!l->has_value); 359 l->has_value = 1; 360 l->u.value_ptr = tcg_splitwx_to_rx(s->code_ptr); 361 } 362 363 TCGLabel *gen_new_label(void) 364 { 365 TCGContext *s = tcg_ctx; 366 TCGLabel *l = tcg_malloc(sizeof(TCGLabel)); 367 368 memset(l, 0, sizeof(TCGLabel)); 369 l->id = s->nb_labels++; 370 QSIMPLEQ_INIT(&l->branches); 371 QSIMPLEQ_INIT(&l->relocs); 372 373 QSIMPLEQ_INSERT_TAIL(&s->labels, l, next); 374 375 return l; 376 } 377 378 static bool tcg_resolve_relocs(TCGContext *s) 379 { 380 TCGLabel *l; 381 382 QSIMPLEQ_FOREACH(l, &s->labels, next) { 383 TCGRelocation *r; 384 uintptr_t value = l->u.value; 385 386 QSIMPLEQ_FOREACH(r, &l->relocs, next) { 387 if (!patch_reloc(r->ptr, r->type, value, r->addend)) { 388 return false; 389 } 390 } 391 } 392 return true; 393 } 394 395 static void set_jmp_reset_offset(TCGContext *s, int which) 396 { 397 /* 398 * We will check for overflow at the end of the opcode loop in 399 * tcg_gen_code, where we bound tcg_current_code_size to UINT16_MAX. 400 */ 401 s->gen_tb->jmp_reset_offset[which] = tcg_current_code_size(s); 402 } 403 404 static void G_GNUC_UNUSED set_jmp_insn_offset(TCGContext *s, int which) 405 { 406 /* 407 * We will check for overflow at the end of the opcode loop in 408 * tcg_gen_code, where we bound tcg_current_code_size to UINT16_MAX. 409 */ 410 s->gen_tb->jmp_insn_offset[which] = tcg_current_code_size(s); 411 } 412 413 static uintptr_t G_GNUC_UNUSED get_jmp_target_addr(TCGContext *s, int which) 414 { 415 /* 416 * Return the read-execute version of the pointer, for the benefit 417 * of any pc-relative addressing mode. 418 */ 419 return (uintptr_t)tcg_splitwx_to_rx(&s->gen_tb->jmp_target_addr[which]); 420 } 421 422 static int __attribute__((unused)) 423 tlb_mask_table_ofs(TCGContext *s, int which) 424 { 425 return (offsetof(CPUNegativeOffsetState, tlb.f[which]) - 426 sizeof(CPUNegativeOffsetState)); 427 } 428 429 /* Signal overflow, starting over with fewer guest insns. */ 430 static G_NORETURN 431 void tcg_raise_tb_overflow(TCGContext *s) 432 { 433 siglongjmp(s->jmp_trans, -2); 434 } 435 436 /* 437 * Used by tcg_out_movext{1,2} to hold the arguments for tcg_out_movext. 438 * By the time we arrive at tcg_out_movext1, @dst is always a TCGReg. 439 * 440 * However, tcg_out_helper_load_slots reuses this field to hold an 441 * argument slot number (which may designate a argument register or an 442 * argument stack slot), converting to TCGReg once all arguments that 443 * are destined for the stack are processed. 444 */ 445 typedef struct TCGMovExtend { 446 unsigned dst; 447 TCGReg src; 448 TCGType dst_type; 449 TCGType src_type; 450 MemOp src_ext; 451 } TCGMovExtend; 452 453 /** 454 * tcg_out_movext -- move and extend 455 * @s: tcg context 456 * @dst_type: integral type for destination 457 * @dst: destination register 458 * @src_type: integral type for source 459 * @src_ext: extension to apply to source 460 * @src: source register 461 * 462 * Move or extend @src into @dst, depending on @src_ext and the types. 463 */ 464 static void tcg_out_movext(TCGContext *s, TCGType dst_type, TCGReg dst, 465 TCGType src_type, MemOp src_ext, TCGReg src) 466 { 467 switch (src_ext) { 468 case MO_UB: 469 tcg_out_ext8u(s, dst, src); 470 break; 471 case MO_SB: 472 tcg_out_ext8s(s, dst_type, dst, src); 473 break; 474 case MO_UW: 475 tcg_out_ext16u(s, dst, src); 476 break; 477 case MO_SW: 478 tcg_out_ext16s(s, dst_type, dst, src); 479 break; 480 case MO_UL: 481 case MO_SL: 482 if (dst_type == TCG_TYPE_I32) { 483 if (src_type == TCG_TYPE_I32) { 484 tcg_out_mov(s, TCG_TYPE_I32, dst, src); 485 } else { 486 tcg_out_extrl_i64_i32(s, dst, src); 487 } 488 } else if (src_type == TCG_TYPE_I32) { 489 if (src_ext & MO_SIGN) { 490 tcg_out_exts_i32_i64(s, dst, src); 491 } else { 492 tcg_out_extu_i32_i64(s, dst, src); 493 } 494 } else { 495 if (src_ext & MO_SIGN) { 496 tcg_out_ext32s(s, dst, src); 497 } else { 498 tcg_out_ext32u(s, dst, src); 499 } 500 } 501 break; 502 case MO_UQ: 503 tcg_debug_assert(TCG_TARGET_REG_BITS == 64); 504 if (dst_type == TCG_TYPE_I32) { 505 tcg_out_extrl_i64_i32(s, dst, src); 506 } else { 507 tcg_out_mov(s, TCG_TYPE_I64, dst, src); 508 } 509 break; 510 default: 511 g_assert_not_reached(); 512 } 513 } 514 515 /* Minor variations on a theme, using a structure. */ 516 static void tcg_out_movext1_new_src(TCGContext *s, const TCGMovExtend *i, 517 TCGReg src) 518 { 519 tcg_out_movext(s, i->dst_type, i->dst, i->src_type, i->src_ext, src); 520 } 521 522 static void tcg_out_movext1(TCGContext *s, const TCGMovExtend *i) 523 { 524 tcg_out_movext1_new_src(s, i, i->src); 525 } 526 527 /** 528 * tcg_out_movext2 -- move and extend two pair 529 * @s: tcg context 530 * @i1: first move description 531 * @i2: second move description 532 * @scratch: temporary register, or -1 for none 533 * 534 * As tcg_out_movext, for both @i1 and @i2, caring for overlap 535 * between the sources and destinations. 536 */ 537 538 static void tcg_out_movext2(TCGContext *s, const TCGMovExtend *i1, 539 const TCGMovExtend *i2, int scratch) 540 { 541 TCGReg src1 = i1->src; 542 TCGReg src2 = i2->src; 543 544 if (i1->dst != src2) { 545 tcg_out_movext1(s, i1); 546 tcg_out_movext1(s, i2); 547 return; 548 } 549 if (i2->dst == src1) { 550 TCGType src1_type = i1->src_type; 551 TCGType src2_type = i2->src_type; 552 553 if (tcg_out_xchg(s, MAX(src1_type, src2_type), src1, src2)) { 554 /* The data is now in the correct registers, now extend. */ 555 src1 = i2->src; 556 src2 = i1->src; 557 } else { 558 tcg_debug_assert(scratch >= 0); 559 tcg_out_mov(s, src1_type, scratch, src1); 560 src1 = scratch; 561 } 562 } 563 tcg_out_movext1_new_src(s, i2, src2); 564 tcg_out_movext1_new_src(s, i1, src1); 565 } 566 567 /** 568 * tcg_out_movext3 -- move and extend three pair 569 * @s: tcg context 570 * @i1: first move description 571 * @i2: second move description 572 * @i3: third move description 573 * @scratch: temporary register, or -1 for none 574 * 575 * As tcg_out_movext, for all of @i1, @i2 and @i3, caring for overlap 576 * between the sources and destinations. 577 */ 578 579 static void tcg_out_movext3(TCGContext *s, const TCGMovExtend *i1, 580 const TCGMovExtend *i2, const TCGMovExtend *i3, 581 int scratch) 582 { 583 TCGReg src1 = i1->src; 584 TCGReg src2 = i2->src; 585 TCGReg src3 = i3->src; 586 587 if (i1->dst != src2 && i1->dst != src3) { 588 tcg_out_movext1(s, i1); 589 tcg_out_movext2(s, i2, i3, scratch); 590 return; 591 } 592 if (i2->dst != src1 && i2->dst != src3) { 593 tcg_out_movext1(s, i2); 594 tcg_out_movext2(s, i1, i3, scratch); 595 return; 596 } 597 if (i3->dst != src1 && i3->dst != src2) { 598 tcg_out_movext1(s, i3); 599 tcg_out_movext2(s, i1, i2, scratch); 600 return; 601 } 602 603 /* 604 * There is a cycle. Since there are only 3 nodes, the cycle is 605 * either "clockwise" or "anti-clockwise", and can be solved with 606 * a single scratch or two xchg. 607 */ 608 if (i1->dst == src2 && i2->dst == src3 && i3->dst == src1) { 609 /* "Clockwise" */ 610 if (tcg_out_xchg(s, MAX(i1->src_type, i2->src_type), src1, src2)) { 611 tcg_out_xchg(s, MAX(i2->src_type, i3->src_type), src2, src3); 612 /* The data is now in the correct registers, now extend. */ 613 tcg_out_movext1_new_src(s, i1, i1->dst); 614 tcg_out_movext1_new_src(s, i2, i2->dst); 615 tcg_out_movext1_new_src(s, i3, i3->dst); 616 } else { 617 tcg_debug_assert(scratch >= 0); 618 tcg_out_mov(s, i1->src_type, scratch, src1); 619 tcg_out_movext1(s, i3); 620 tcg_out_movext1(s, i2); 621 tcg_out_movext1_new_src(s, i1, scratch); 622 } 623 } else if (i1->dst == src3 && i2->dst == src1 && i3->dst == src2) { 624 /* "Anti-clockwise" */ 625 if (tcg_out_xchg(s, MAX(i2->src_type, i3->src_type), src2, src3)) { 626 tcg_out_xchg(s, MAX(i1->src_type, i2->src_type), src1, src2); 627 /* The data is now in the correct registers, now extend. */ 628 tcg_out_movext1_new_src(s, i1, i1->dst); 629 tcg_out_movext1_new_src(s, i2, i2->dst); 630 tcg_out_movext1_new_src(s, i3, i3->dst); 631 } else { 632 tcg_debug_assert(scratch >= 0); 633 tcg_out_mov(s, i1->src_type, scratch, src1); 634 tcg_out_movext1(s, i2); 635 tcg_out_movext1(s, i3); 636 tcg_out_movext1_new_src(s, i1, scratch); 637 } 638 } else { 639 g_assert_not_reached(); 640 } 641 } 642 643 /* 644 * Allocate a new TCGLabelQemuLdst entry. 645 */ 646 647 __attribute__((unused)) 648 static TCGLabelQemuLdst *new_ldst_label(TCGContext *s) 649 { 650 TCGLabelQemuLdst *l = tcg_malloc(sizeof(*l)); 651 652 memset(l, 0, sizeof(*l)); 653 QSIMPLEQ_INSERT_TAIL(&s->ldst_labels, l, next); 654 655 return l; 656 } 657 658 /* 659 * Allocate new constant pool entries. 660 */ 661 662 typedef struct TCGLabelPoolData { 663 struct TCGLabelPoolData *next; 664 tcg_insn_unit *label; 665 intptr_t addend; 666 int rtype; 667 unsigned nlong; 668 tcg_target_ulong data[]; 669 } TCGLabelPoolData; 670 671 static TCGLabelPoolData *new_pool_alloc(TCGContext *s, int nlong, int rtype, 672 tcg_insn_unit *label, intptr_t addend) 673 { 674 TCGLabelPoolData *n = tcg_malloc(sizeof(TCGLabelPoolData) 675 + sizeof(tcg_target_ulong) * nlong); 676 677 n->label = label; 678 n->addend = addend; 679 n->rtype = rtype; 680 n->nlong = nlong; 681 return n; 682 } 683 684 static void new_pool_insert(TCGContext *s, TCGLabelPoolData *n) 685 { 686 TCGLabelPoolData *i, **pp; 687 int nlong = n->nlong; 688 689 /* Insertion sort on the pool. */ 690 for (pp = &s->pool_labels; (i = *pp) != NULL; pp = &i->next) { 691 if (nlong > i->nlong) { 692 break; 693 } 694 if (nlong < i->nlong) { 695 continue; 696 } 697 if (memcmp(n->data, i->data, sizeof(tcg_target_ulong) * nlong) >= 0) { 698 break; 699 } 700 } 701 n->next = *pp; 702 *pp = n; 703 } 704 705 /* The "usual" for generic integer code. */ 706 __attribute__((unused)) 707 static void new_pool_label(TCGContext *s, tcg_target_ulong d, int rtype, 708 tcg_insn_unit *label, intptr_t addend) 709 { 710 TCGLabelPoolData *n = new_pool_alloc(s, 1, rtype, label, addend); 711 n->data[0] = d; 712 new_pool_insert(s, n); 713 } 714 715 /* For v64 or v128, depending on the host. */ 716 __attribute__((unused)) 717 static void new_pool_l2(TCGContext *s, int rtype, tcg_insn_unit *label, 718 intptr_t addend, tcg_target_ulong d0, 719 tcg_target_ulong d1) 720 { 721 TCGLabelPoolData *n = new_pool_alloc(s, 2, rtype, label, addend); 722 n->data[0] = d0; 723 n->data[1] = d1; 724 new_pool_insert(s, n); 725 } 726 727 /* For v128 or v256, depending on the host. */ 728 __attribute__((unused)) 729 static void new_pool_l4(TCGContext *s, int rtype, tcg_insn_unit *label, 730 intptr_t addend, tcg_target_ulong d0, 731 tcg_target_ulong d1, tcg_target_ulong d2, 732 tcg_target_ulong d3) 733 { 734 TCGLabelPoolData *n = new_pool_alloc(s, 4, rtype, label, addend); 735 n->data[0] = d0; 736 n->data[1] = d1; 737 n->data[2] = d2; 738 n->data[3] = d3; 739 new_pool_insert(s, n); 740 } 741 742 /* For v256, for 32-bit host. */ 743 __attribute__((unused)) 744 static void new_pool_l8(TCGContext *s, int rtype, tcg_insn_unit *label, 745 intptr_t addend, tcg_target_ulong d0, 746 tcg_target_ulong d1, tcg_target_ulong d2, 747 tcg_target_ulong d3, tcg_target_ulong d4, 748 tcg_target_ulong d5, tcg_target_ulong d6, 749 tcg_target_ulong d7) 750 { 751 TCGLabelPoolData *n = new_pool_alloc(s, 8, rtype, label, addend); 752 n->data[0] = d0; 753 n->data[1] = d1; 754 n->data[2] = d2; 755 n->data[3] = d3; 756 n->data[4] = d4; 757 n->data[5] = d5; 758 n->data[6] = d6; 759 n->data[7] = d7; 760 new_pool_insert(s, n); 761 } 762 763 /* 764 * Generate TB finalization at the end of block 765 */ 766 767 static int tcg_out_ldst_finalize(TCGContext *s) 768 { 769 TCGLabelQemuLdst *lb; 770 771 /* qemu_ld/st slow paths */ 772 QSIMPLEQ_FOREACH(lb, &s->ldst_labels, next) { 773 if (lb->is_ld 774 ? !tcg_out_qemu_ld_slow_path(s, lb) 775 : !tcg_out_qemu_st_slow_path(s, lb)) { 776 return -2; 777 } 778 779 /* 780 * Test for (pending) buffer overflow. The assumption is that any 781 * one operation beginning below the high water mark cannot overrun 782 * the buffer completely. Thus we can test for overflow after 783 * generating code without having to check during generation. 784 */ 785 if (unlikely((void *)s->code_ptr > s->code_gen_highwater)) { 786 return -1; 787 } 788 } 789 return 0; 790 } 791 792 static int tcg_out_pool_finalize(TCGContext *s) 793 { 794 TCGLabelPoolData *p = s->pool_labels; 795 TCGLabelPoolData *l = NULL; 796 void *a; 797 798 if (p == NULL) { 799 return 0; 800 } 801 802 /* 803 * ??? Round up to qemu_icache_linesize, but then do not round 804 * again when allocating the next TranslationBlock structure. 805 */ 806 a = (void *)ROUND_UP((uintptr_t)s->code_ptr, 807 sizeof(tcg_target_ulong) * p->nlong); 808 tcg_out_nop_fill(s->code_ptr, (tcg_insn_unit *)a - s->code_ptr); 809 s->data_gen_ptr = a; 810 811 for (; p != NULL; p = p->next) { 812 size_t size = sizeof(tcg_target_ulong) * p->nlong; 813 uintptr_t value; 814 815 if (!l || l->nlong != p->nlong || memcmp(l->data, p->data, size)) { 816 if (unlikely(a > s->code_gen_highwater)) { 817 return -1; 818 } 819 memcpy(a, p->data, size); 820 a += size; 821 l = p; 822 } 823 824 value = (uintptr_t)tcg_splitwx_to_rx(a) - size; 825 if (!patch_reloc(p->label, p->rtype, value, p->addend)) { 826 return -2; 827 } 828 } 829 830 s->code_ptr = a; 831 return 0; 832 } 833 834 #define C_PFX1(P, A) P##A 835 #define C_PFX2(P, A, B) P##A##_##B 836 #define C_PFX3(P, A, B, C) P##A##_##B##_##C 837 #define C_PFX4(P, A, B, C, D) P##A##_##B##_##C##_##D 838 #define C_PFX5(P, A, B, C, D, E) P##A##_##B##_##C##_##D##_##E 839 #define C_PFX6(P, A, B, C, D, E, F) P##A##_##B##_##C##_##D##_##E##_##F 840 841 /* Define an enumeration for the various combinations. */ 842 843 #define C_O0_I1(I1) C_PFX1(c_o0_i1_, I1), 844 #define C_O0_I2(I1, I2) C_PFX2(c_o0_i2_, I1, I2), 845 #define C_O0_I3(I1, I2, I3) C_PFX3(c_o0_i3_, I1, I2, I3), 846 #define C_O0_I4(I1, I2, I3, I4) C_PFX4(c_o0_i4_, I1, I2, I3, I4), 847 848 #define C_O1_I1(O1, I1) C_PFX2(c_o1_i1_, O1, I1), 849 #define C_O1_I2(O1, I1, I2) C_PFX3(c_o1_i2_, O1, I1, I2), 850 #define C_O1_I3(O1, I1, I2, I3) C_PFX4(c_o1_i3_, O1, I1, I2, I3), 851 #define C_O1_I4(O1, I1, I2, I3, I4) C_PFX5(c_o1_i4_, O1, I1, I2, I3, I4), 852 853 #define C_N1_I2(O1, I1, I2) C_PFX3(c_n1_i2_, O1, I1, I2), 854 #define C_N1O1_I1(O1, O2, I1) C_PFX3(c_n1o1_i1_, O1, O2, I1), 855 #define C_N2_I1(O1, O2, I1) C_PFX3(c_n2_i1_, O1, O2, I1), 856 857 #define C_O2_I1(O1, O2, I1) C_PFX3(c_o2_i1_, O1, O2, I1), 858 #define C_O2_I2(O1, O2, I1, I2) C_PFX4(c_o2_i2_, O1, O2, I1, I2), 859 #define C_O2_I3(O1, O2, I1, I2, I3) C_PFX5(c_o2_i3_, O1, O2, I1, I2, I3), 860 #define C_O2_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_o2_i4_, O1, O2, I1, I2, I3, I4), 861 #define C_N1_O1_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_n1_o1_i4_, O1, O2, I1, I2, I3, I4), 862 863 typedef enum { 864 C_Dynamic = -2, 865 C_NotImplemented = -1, 866 #include "tcg-target-con-set.h" 867 } TCGConstraintSetIndex; 868 869 static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode, TCGType, unsigned); 870 871 #undef C_O0_I1 872 #undef C_O0_I2 873 #undef C_O0_I3 874 #undef C_O0_I4 875 #undef C_O1_I1 876 #undef C_O1_I2 877 #undef C_O1_I3 878 #undef C_O1_I4 879 #undef C_N1_I2 880 #undef C_N1O1_I1 881 #undef C_N2_I1 882 #undef C_O2_I1 883 #undef C_O2_I2 884 #undef C_O2_I3 885 #undef C_O2_I4 886 #undef C_N1_O1_I4 887 888 /* Put all of the constraint sets into an array, indexed by the enum. */ 889 890 typedef struct TCGConstraintSet { 891 uint8_t nb_oargs, nb_iargs; 892 const char *args_ct_str[TCG_MAX_OP_ARGS]; 893 } TCGConstraintSet; 894 895 #define C_O0_I1(I1) { 0, 1, { #I1 } }, 896 #define C_O0_I2(I1, I2) { 0, 2, { #I1, #I2 } }, 897 #define C_O0_I3(I1, I2, I3) { 0, 3, { #I1, #I2, #I3 } }, 898 #define C_O0_I4(I1, I2, I3, I4) { 0, 4, { #I1, #I2, #I3, #I4 } }, 899 900 #define C_O1_I1(O1, I1) { 1, 1, { #O1, #I1 } }, 901 #define C_O1_I2(O1, I1, I2) { 1, 2, { #O1, #I1, #I2 } }, 902 #define C_O1_I3(O1, I1, I2, I3) { 1, 3, { #O1, #I1, #I2, #I3 } }, 903 #define C_O1_I4(O1, I1, I2, I3, I4) { 1, 4, { #O1, #I1, #I2, #I3, #I4 } }, 904 905 #define C_N1_I2(O1, I1, I2) { 1, 2, { "&" #O1, #I1, #I2 } }, 906 #define C_N1O1_I1(O1, O2, I1) { 2, 1, { "&" #O1, #O2, #I1 } }, 907 #define C_N2_I1(O1, O2, I1) { 2, 1, { "&" #O1, "&" #O2, #I1 } }, 908 909 #define C_O2_I1(O1, O2, I1) { 2, 1, { #O1, #O2, #I1 } }, 910 #define C_O2_I2(O1, O2, I1, I2) { 2, 2, { #O1, #O2, #I1, #I2 } }, 911 #define C_O2_I3(O1, O2, I1, I2, I3) { 2, 3, { #O1, #O2, #I1, #I2, #I3 } }, 912 #define C_O2_I4(O1, O2, I1, I2, I3, I4) { 2, 4, { #O1, #O2, #I1, #I2, #I3, #I4 } }, 913 #define C_N1_O1_I4(O1, O2, I1, I2, I3, I4) { 2, 4, { "&" #O1, #O2, #I1, #I2, #I3, #I4 } }, 914 915 static const TCGConstraintSet constraint_sets[] = { 916 #include "tcg-target-con-set.h" 917 }; 918 919 #undef C_O0_I1 920 #undef C_O0_I2 921 #undef C_O0_I3 922 #undef C_O0_I4 923 #undef C_O1_I1 924 #undef C_O1_I2 925 #undef C_O1_I3 926 #undef C_O1_I4 927 #undef C_N1_I2 928 #undef C_N1O1_I1 929 #undef C_N2_I1 930 #undef C_O2_I1 931 #undef C_O2_I2 932 #undef C_O2_I3 933 #undef C_O2_I4 934 #undef C_N1_O1_I4 935 936 /* Expand the enumerator to be returned from tcg_target_op_def(). */ 937 938 #define C_O0_I1(I1) C_PFX1(c_o0_i1_, I1) 939 #define C_O0_I2(I1, I2) C_PFX2(c_o0_i2_, I1, I2) 940 #define C_O0_I3(I1, I2, I3) C_PFX3(c_o0_i3_, I1, I2, I3) 941 #define C_O0_I4(I1, I2, I3, I4) C_PFX4(c_o0_i4_, I1, I2, I3, I4) 942 943 #define C_O1_I1(O1, I1) C_PFX2(c_o1_i1_, O1, I1) 944 #define C_O1_I2(O1, I1, I2) C_PFX3(c_o1_i2_, O1, I1, I2) 945 #define C_O1_I3(O1, I1, I2, I3) C_PFX4(c_o1_i3_, O1, I1, I2, I3) 946 #define C_O1_I4(O1, I1, I2, I3, I4) C_PFX5(c_o1_i4_, O1, I1, I2, I3, I4) 947 948 #define C_N1_I2(O1, I1, I2) C_PFX3(c_n1_i2_, O1, I1, I2) 949 #define C_N1O1_I1(O1, O2, I1) C_PFX3(c_n1o1_i1_, O1, O2, I1) 950 #define C_N2_I1(O1, O2, I1) C_PFX3(c_n2_i1_, O1, O2, I1) 951 952 #define C_O2_I1(O1, O2, I1) C_PFX3(c_o2_i1_, O1, O2, I1) 953 #define C_O2_I2(O1, O2, I1, I2) C_PFX4(c_o2_i2_, O1, O2, I1, I2) 954 #define C_O2_I3(O1, O2, I1, I2, I3) C_PFX5(c_o2_i3_, O1, O2, I1, I2, I3) 955 #define C_O2_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_o2_i4_, O1, O2, I1, I2, I3, I4) 956 #define C_N1_O1_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_n1_o1_i4_, O1, O2, I1, I2, I3, I4) 957 958 /* 959 * TCGOutOp is the base class for a set of structures that describe how 960 * to generate code for a given TCGOpcode. 961 * 962 * @static_constraint: 963 * C_NotImplemented: The TCGOpcode is not supported by the backend. 964 * C_Dynamic: Use @dynamic_constraint to select a constraint set 965 * based on any of @type, @flags, or host isa. 966 * Otherwise: The register allocation constrains for the TCGOpcode. 967 * 968 * Subclasses of TCGOutOp will define a set of output routines that may 969 * be used. Such routines will often be selected by the set of registers 970 * and constants that come out of register allocation. The set of 971 * routines that are provided will guide the set of constraints that are 972 * legal. In particular, assume that tcg_optimize() has done its job in 973 * swapping commutative operands and folding operations for which all 974 * operands are constant. 975 */ 976 typedef struct TCGOutOp { 977 TCGConstraintSetIndex static_constraint; 978 TCGConstraintSetIndex (*dynamic_constraint)(TCGType type, unsigned flags); 979 } TCGOutOp; 980 981 typedef struct TCGOutOpBinary { 982 TCGOutOp base; 983 void (*out_rrr)(TCGContext *s, TCGType type, 984 TCGReg a0, TCGReg a1, TCGReg a2); 985 void (*out_rri)(TCGContext *s, TCGType type, 986 TCGReg a0, TCGReg a1, tcg_target_long a2); 987 } TCGOutOpBinary; 988 989 typedef struct TCGOutOpUnary { 990 TCGOutOp base; 991 void (*out_rr)(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1); 992 } TCGOutOpUnary; 993 994 typedef struct TCGOutOpSubtract { 995 TCGOutOp base; 996 void (*out_rrr)(TCGContext *s, TCGType type, 997 TCGReg a0, TCGReg a1, TCGReg a2); 998 void (*out_rir)(TCGContext *s, TCGType type, 999 TCGReg a0, tcg_target_long a1, TCGReg a2); 1000 } TCGOutOpSubtract; 1001 1002 #include "tcg-target.c.inc" 1003 1004 #ifndef CONFIG_TCG_INTERPRETER 1005 /* Validate CPUTLBDescFast placement. */ 1006 QEMU_BUILD_BUG_ON((int)(offsetof(CPUNegativeOffsetState, tlb.f[0]) - 1007 sizeof(CPUNegativeOffsetState)) 1008 < MIN_TLB_MASK_TABLE_OFS); 1009 #endif 1010 1011 /* 1012 * Register V as the TCGOutOp for O. 1013 * This verifies that V is of type T, otherwise give a nice compiler error. 1014 * This prevents trivial mistakes within each arch/tcg-target.c.inc. 1015 */ 1016 #define OUTOP(O, T, V) [O] = _Generic(V, T: &V.base) 1017 1018 /* Register allocation descriptions for every TCGOpcode. */ 1019 static const TCGOutOp * const all_outop[NB_OPS] = { 1020 OUTOP(INDEX_op_add, TCGOutOpBinary, outop_add), 1021 OUTOP(INDEX_op_and, TCGOutOpBinary, outop_and), 1022 OUTOP(INDEX_op_andc, TCGOutOpBinary, outop_andc), 1023 OUTOP(INDEX_op_eqv, TCGOutOpBinary, outop_eqv), 1024 OUTOP(INDEX_op_mul, TCGOutOpBinary, outop_mul), 1025 OUTOP(INDEX_op_muluh, TCGOutOpBinary, outop_muluh), 1026 OUTOP(INDEX_op_nand, TCGOutOpBinary, outop_nand), 1027 OUTOP(INDEX_op_neg, TCGOutOpUnary, outop_neg), 1028 OUTOP(INDEX_op_nor, TCGOutOpBinary, outop_nor), 1029 OUTOP(INDEX_op_not, TCGOutOpUnary, outop_not), 1030 OUTOP(INDEX_op_or, TCGOutOpBinary, outop_or), 1031 OUTOP(INDEX_op_orc, TCGOutOpBinary, outop_orc), 1032 OUTOP(INDEX_op_sub, TCGOutOpSubtract, outop_sub), 1033 OUTOP(INDEX_op_xor, TCGOutOpBinary, outop_xor), 1034 }; 1035 1036 #undef OUTOP 1037 1038 /* 1039 * All TCG threads except the parent (i.e. the one that called tcg_context_init 1040 * and registered the target's TCG globals) must register with this function 1041 * before initiating translation. 1042 * 1043 * In user-mode we just point tcg_ctx to tcg_init_ctx. See the documentation 1044 * of tcg_region_init() for the reasoning behind this. 1045 * 1046 * In system-mode each caller registers its context in tcg_ctxs[]. Note that in 1047 * system-mode tcg_ctxs[] does not track tcg_ctx_init, since the initial context 1048 * is not used anymore for translation once this function is called. 1049 * 1050 * Not tracking tcg_init_ctx in tcg_ctxs[] in system-mode keeps code that 1051 * iterates over the array (e.g. tcg_code_size() the same for both system/user 1052 * modes. 1053 */ 1054 #ifdef CONFIG_USER_ONLY 1055 void tcg_register_thread(void) 1056 { 1057 tcg_ctx = &tcg_init_ctx; 1058 } 1059 #else 1060 void tcg_register_thread(void) 1061 { 1062 TCGContext *s = g_malloc(sizeof(*s)); 1063 unsigned int i, n; 1064 1065 *s = tcg_init_ctx; 1066 1067 /* Relink mem_base. */ 1068 for (i = 0, n = tcg_init_ctx.nb_globals; i < n; ++i) { 1069 if (tcg_init_ctx.temps[i].mem_base) { 1070 ptrdiff_t b = tcg_init_ctx.temps[i].mem_base - tcg_init_ctx.temps; 1071 tcg_debug_assert(b >= 0 && b < n); 1072 s->temps[i].mem_base = &s->temps[b]; 1073 } 1074 } 1075 1076 /* Claim an entry in tcg_ctxs */ 1077 n = qatomic_fetch_inc(&tcg_cur_ctxs); 1078 g_assert(n < tcg_max_ctxs); 1079 qatomic_set(&tcg_ctxs[n], s); 1080 1081 if (n > 0) { 1082 tcg_region_initial_alloc(s); 1083 } 1084 1085 tcg_ctx = s; 1086 } 1087 #endif /* !CONFIG_USER_ONLY */ 1088 1089 /* pool based memory allocation */ 1090 void *tcg_malloc_internal(TCGContext *s, int size) 1091 { 1092 TCGPool *p; 1093 int pool_size; 1094 1095 if (size > TCG_POOL_CHUNK_SIZE) { 1096 /* big malloc: insert a new pool (XXX: could optimize) */ 1097 p = g_malloc(sizeof(TCGPool) + size); 1098 p->size = size; 1099 p->next = s->pool_first_large; 1100 s->pool_first_large = p; 1101 return p->data; 1102 } else { 1103 p = s->pool_current; 1104 if (!p) { 1105 p = s->pool_first; 1106 if (!p) 1107 goto new_pool; 1108 } else { 1109 if (!p->next) { 1110 new_pool: 1111 pool_size = TCG_POOL_CHUNK_SIZE; 1112 p = g_malloc(sizeof(TCGPool) + pool_size); 1113 p->size = pool_size; 1114 p->next = NULL; 1115 if (s->pool_current) { 1116 s->pool_current->next = p; 1117 } else { 1118 s->pool_first = p; 1119 } 1120 } else { 1121 p = p->next; 1122 } 1123 } 1124 } 1125 s->pool_current = p; 1126 s->pool_cur = p->data + size; 1127 s->pool_end = p->data + p->size; 1128 return p->data; 1129 } 1130 1131 void tcg_pool_reset(TCGContext *s) 1132 { 1133 TCGPool *p, *t; 1134 for (p = s->pool_first_large; p; p = t) { 1135 t = p->next; 1136 g_free(p); 1137 } 1138 s->pool_first_large = NULL; 1139 s->pool_cur = s->pool_end = NULL; 1140 s->pool_current = NULL; 1141 } 1142 1143 /* 1144 * Create TCGHelperInfo structures for "tcg/tcg-ldst.h" functions, 1145 * akin to what "exec/helper-tcg.h" does with DEF_HELPER_FLAGS_N. 1146 * We only use these for layout in tcg_out_ld_helper_ret and 1147 * tcg_out_st_helper_args, and share them between several of 1148 * the helpers, with the end result that it's easier to build manually. 1149 */ 1150 1151 #if TCG_TARGET_REG_BITS == 32 1152 # define dh_typecode_ttl dh_typecode_i32 1153 #else 1154 # define dh_typecode_ttl dh_typecode_i64 1155 #endif 1156 1157 static TCGHelperInfo info_helper_ld32_mmu = { 1158 .flags = TCG_CALL_NO_WG, 1159 .typemask = dh_typemask(ttl, 0) /* return tcg_target_ulong */ 1160 | dh_typemask(env, 1) 1161 | dh_typemask(i64, 2) /* uint64_t addr */ 1162 | dh_typemask(i32, 3) /* unsigned oi */ 1163 | dh_typemask(ptr, 4) /* uintptr_t ra */ 1164 }; 1165 1166 static TCGHelperInfo info_helper_ld64_mmu = { 1167 .flags = TCG_CALL_NO_WG, 1168 .typemask = dh_typemask(i64, 0) /* return uint64_t */ 1169 | dh_typemask(env, 1) 1170 | dh_typemask(i64, 2) /* uint64_t addr */ 1171 | dh_typemask(i32, 3) /* unsigned oi */ 1172 | dh_typemask(ptr, 4) /* uintptr_t ra */ 1173 }; 1174 1175 static TCGHelperInfo info_helper_ld128_mmu = { 1176 .flags = TCG_CALL_NO_WG, 1177 .typemask = dh_typemask(i128, 0) /* return Int128 */ 1178 | dh_typemask(env, 1) 1179 | dh_typemask(i64, 2) /* uint64_t addr */ 1180 | dh_typemask(i32, 3) /* unsigned oi */ 1181 | dh_typemask(ptr, 4) /* uintptr_t ra */ 1182 }; 1183 1184 static TCGHelperInfo info_helper_st32_mmu = { 1185 .flags = TCG_CALL_NO_WG, 1186 .typemask = dh_typemask(void, 0) 1187 | dh_typemask(env, 1) 1188 | dh_typemask(i64, 2) /* uint64_t addr */ 1189 | dh_typemask(i32, 3) /* uint32_t data */ 1190 | dh_typemask(i32, 4) /* unsigned oi */ 1191 | dh_typemask(ptr, 5) /* uintptr_t ra */ 1192 }; 1193 1194 static TCGHelperInfo info_helper_st64_mmu = { 1195 .flags = TCG_CALL_NO_WG, 1196 .typemask = dh_typemask(void, 0) 1197 | dh_typemask(env, 1) 1198 | dh_typemask(i64, 2) /* uint64_t addr */ 1199 | dh_typemask(i64, 3) /* uint64_t data */ 1200 | dh_typemask(i32, 4) /* unsigned oi */ 1201 | dh_typemask(ptr, 5) /* uintptr_t ra */ 1202 }; 1203 1204 static TCGHelperInfo info_helper_st128_mmu = { 1205 .flags = TCG_CALL_NO_WG, 1206 .typemask = dh_typemask(void, 0) 1207 | dh_typemask(env, 1) 1208 | dh_typemask(i64, 2) /* uint64_t addr */ 1209 | dh_typemask(i128, 3) /* Int128 data */ 1210 | dh_typemask(i32, 4) /* unsigned oi */ 1211 | dh_typemask(ptr, 5) /* uintptr_t ra */ 1212 }; 1213 1214 #ifdef CONFIG_TCG_INTERPRETER 1215 static ffi_type *typecode_to_ffi(int argmask) 1216 { 1217 /* 1218 * libffi does not support __int128_t, so we have forced Int128 1219 * to use the structure definition instead of the builtin type. 1220 */ 1221 static ffi_type *ffi_type_i128_elements[3] = { 1222 &ffi_type_uint64, 1223 &ffi_type_uint64, 1224 NULL 1225 }; 1226 static ffi_type ffi_type_i128 = { 1227 .size = 16, 1228 .alignment = __alignof__(Int128), 1229 .type = FFI_TYPE_STRUCT, 1230 .elements = ffi_type_i128_elements, 1231 }; 1232 1233 switch (argmask) { 1234 case dh_typecode_void: 1235 return &ffi_type_void; 1236 case dh_typecode_i32: 1237 return &ffi_type_uint32; 1238 case dh_typecode_s32: 1239 return &ffi_type_sint32; 1240 case dh_typecode_i64: 1241 return &ffi_type_uint64; 1242 case dh_typecode_s64: 1243 return &ffi_type_sint64; 1244 case dh_typecode_ptr: 1245 return &ffi_type_pointer; 1246 case dh_typecode_i128: 1247 return &ffi_type_i128; 1248 } 1249 g_assert_not_reached(); 1250 } 1251 1252 static ffi_cif *init_ffi_layout(TCGHelperInfo *info) 1253 { 1254 unsigned typemask = info->typemask; 1255 struct { 1256 ffi_cif cif; 1257 ffi_type *args[]; 1258 } *ca; 1259 ffi_status status; 1260 int nargs; 1261 1262 /* Ignoring the return type, find the last non-zero field. */ 1263 nargs = 32 - clz32(typemask >> 3); 1264 nargs = DIV_ROUND_UP(nargs, 3); 1265 assert(nargs <= MAX_CALL_IARGS); 1266 1267 ca = g_malloc0(sizeof(*ca) + nargs * sizeof(ffi_type *)); 1268 ca->cif.rtype = typecode_to_ffi(typemask & 7); 1269 ca->cif.nargs = nargs; 1270 1271 if (nargs != 0) { 1272 ca->cif.arg_types = ca->args; 1273 for (int j = 0; j < nargs; ++j) { 1274 int typecode = extract32(typemask, (j + 1) * 3, 3); 1275 ca->args[j] = typecode_to_ffi(typecode); 1276 } 1277 } 1278 1279 status = ffi_prep_cif(&ca->cif, FFI_DEFAULT_ABI, nargs, 1280 ca->cif.rtype, ca->cif.arg_types); 1281 assert(status == FFI_OK); 1282 1283 return &ca->cif; 1284 } 1285 1286 #define HELPER_INFO_INIT(I) (&(I)->cif) 1287 #define HELPER_INFO_INIT_VAL(I) init_ffi_layout(I) 1288 #else 1289 #define HELPER_INFO_INIT(I) (&(I)->init) 1290 #define HELPER_INFO_INIT_VAL(I) 1 1291 #endif /* CONFIG_TCG_INTERPRETER */ 1292 1293 static inline bool arg_slot_reg_p(unsigned arg_slot) 1294 { 1295 /* 1296 * Split the sizeof away from the comparison to avoid Werror from 1297 * "unsigned < 0 is always false", when iarg_regs is empty. 1298 */ 1299 unsigned nreg = ARRAY_SIZE(tcg_target_call_iarg_regs); 1300 return arg_slot < nreg; 1301 } 1302 1303 static inline int arg_slot_stk_ofs(unsigned arg_slot) 1304 { 1305 unsigned max = TCG_STATIC_CALL_ARGS_SIZE / sizeof(tcg_target_long); 1306 unsigned stk_slot = arg_slot - ARRAY_SIZE(tcg_target_call_iarg_regs); 1307 1308 tcg_debug_assert(stk_slot < max); 1309 return TCG_TARGET_CALL_STACK_OFFSET + stk_slot * sizeof(tcg_target_long); 1310 } 1311 1312 typedef struct TCGCumulativeArgs { 1313 int arg_idx; /* tcg_gen_callN args[] */ 1314 int info_in_idx; /* TCGHelperInfo in[] */ 1315 int arg_slot; /* regs+stack slot */ 1316 int ref_slot; /* stack slots for references */ 1317 } TCGCumulativeArgs; 1318 1319 static void layout_arg_even(TCGCumulativeArgs *cum) 1320 { 1321 cum->arg_slot += cum->arg_slot & 1; 1322 } 1323 1324 static void layout_arg_1(TCGCumulativeArgs *cum, TCGHelperInfo *info, 1325 TCGCallArgumentKind kind) 1326 { 1327 TCGCallArgumentLoc *loc = &info->in[cum->info_in_idx]; 1328 1329 *loc = (TCGCallArgumentLoc){ 1330 .kind = kind, 1331 .arg_idx = cum->arg_idx, 1332 .arg_slot = cum->arg_slot, 1333 }; 1334 cum->info_in_idx++; 1335 cum->arg_slot++; 1336 } 1337 1338 static void layout_arg_normal_n(TCGCumulativeArgs *cum, 1339 TCGHelperInfo *info, int n) 1340 { 1341 TCGCallArgumentLoc *loc = &info->in[cum->info_in_idx]; 1342 1343 for (int i = 0; i < n; ++i) { 1344 /* Layout all using the same arg_idx, adjusting the subindex. */ 1345 loc[i] = (TCGCallArgumentLoc){ 1346 .kind = TCG_CALL_ARG_NORMAL, 1347 .arg_idx = cum->arg_idx, 1348 .tmp_subindex = i, 1349 .arg_slot = cum->arg_slot + i, 1350 }; 1351 } 1352 cum->info_in_idx += n; 1353 cum->arg_slot += n; 1354 } 1355 1356 static void layout_arg_by_ref(TCGCumulativeArgs *cum, TCGHelperInfo *info) 1357 { 1358 TCGCallArgumentLoc *loc = &info->in[cum->info_in_idx]; 1359 int n = 128 / TCG_TARGET_REG_BITS; 1360 1361 /* The first subindex carries the pointer. */ 1362 layout_arg_1(cum, info, TCG_CALL_ARG_BY_REF); 1363 1364 /* 1365 * The callee is allowed to clobber memory associated with 1366 * structure pass by-reference. Therefore we must make copies. 1367 * Allocate space from "ref_slot", which will be adjusted to 1368 * follow the parameters on the stack. 1369 */ 1370 loc[0].ref_slot = cum->ref_slot; 1371 1372 /* 1373 * Subsequent words also go into the reference slot, but 1374 * do not accumulate into the regular arguments. 1375 */ 1376 for (int i = 1; i < n; ++i) { 1377 loc[i] = (TCGCallArgumentLoc){ 1378 .kind = TCG_CALL_ARG_BY_REF_N, 1379 .arg_idx = cum->arg_idx, 1380 .tmp_subindex = i, 1381 .ref_slot = cum->ref_slot + i, 1382 }; 1383 } 1384 cum->info_in_idx += n - 1; /* i=0 accounted for in layout_arg_1 */ 1385 cum->ref_slot += n; 1386 } 1387 1388 static void init_call_layout(TCGHelperInfo *info) 1389 { 1390 int max_reg_slots = ARRAY_SIZE(tcg_target_call_iarg_regs); 1391 int max_stk_slots = TCG_STATIC_CALL_ARGS_SIZE / sizeof(tcg_target_long); 1392 unsigned typemask = info->typemask; 1393 unsigned typecode; 1394 TCGCumulativeArgs cum = { }; 1395 1396 /* 1397 * Parse and place any function return value. 1398 */ 1399 typecode = typemask & 7; 1400 switch (typecode) { 1401 case dh_typecode_void: 1402 info->nr_out = 0; 1403 break; 1404 case dh_typecode_i32: 1405 case dh_typecode_s32: 1406 case dh_typecode_ptr: 1407 info->nr_out = 1; 1408 info->out_kind = TCG_CALL_RET_NORMAL; 1409 break; 1410 case dh_typecode_i64: 1411 case dh_typecode_s64: 1412 info->nr_out = 64 / TCG_TARGET_REG_BITS; 1413 info->out_kind = TCG_CALL_RET_NORMAL; 1414 /* Query the last register now to trigger any assert early. */ 1415 tcg_target_call_oarg_reg(info->out_kind, info->nr_out - 1); 1416 break; 1417 case dh_typecode_i128: 1418 info->nr_out = 128 / TCG_TARGET_REG_BITS; 1419 info->out_kind = TCG_TARGET_CALL_RET_I128; 1420 switch (TCG_TARGET_CALL_RET_I128) { 1421 case TCG_CALL_RET_NORMAL: 1422 /* Query the last register now to trigger any assert early. */ 1423 tcg_target_call_oarg_reg(info->out_kind, info->nr_out - 1); 1424 break; 1425 case TCG_CALL_RET_BY_VEC: 1426 /* Query the single register now to trigger any assert early. */ 1427 tcg_target_call_oarg_reg(TCG_CALL_RET_BY_VEC, 0); 1428 break; 1429 case TCG_CALL_RET_BY_REF: 1430 /* 1431 * Allocate the first argument to the output. 1432 * We don't need to store this anywhere, just make it 1433 * unavailable for use in the input loop below. 1434 */ 1435 cum.arg_slot = 1; 1436 break; 1437 default: 1438 qemu_build_not_reached(); 1439 } 1440 break; 1441 default: 1442 g_assert_not_reached(); 1443 } 1444 1445 /* 1446 * Parse and place function arguments. 1447 */ 1448 for (typemask >>= 3; typemask; typemask >>= 3, cum.arg_idx++) { 1449 TCGCallArgumentKind kind; 1450 TCGType type; 1451 1452 typecode = typemask & 7; 1453 switch (typecode) { 1454 case dh_typecode_i32: 1455 case dh_typecode_s32: 1456 type = TCG_TYPE_I32; 1457 break; 1458 case dh_typecode_i64: 1459 case dh_typecode_s64: 1460 type = TCG_TYPE_I64; 1461 break; 1462 case dh_typecode_ptr: 1463 type = TCG_TYPE_PTR; 1464 break; 1465 case dh_typecode_i128: 1466 type = TCG_TYPE_I128; 1467 break; 1468 default: 1469 g_assert_not_reached(); 1470 } 1471 1472 switch (type) { 1473 case TCG_TYPE_I32: 1474 switch (TCG_TARGET_CALL_ARG_I32) { 1475 case TCG_CALL_ARG_EVEN: 1476 layout_arg_even(&cum); 1477 /* fall through */ 1478 case TCG_CALL_ARG_NORMAL: 1479 layout_arg_1(&cum, info, TCG_CALL_ARG_NORMAL); 1480 break; 1481 case TCG_CALL_ARG_EXTEND: 1482 kind = TCG_CALL_ARG_EXTEND_U + (typecode & 1); 1483 layout_arg_1(&cum, info, kind); 1484 break; 1485 default: 1486 qemu_build_not_reached(); 1487 } 1488 break; 1489 1490 case TCG_TYPE_I64: 1491 switch (TCG_TARGET_CALL_ARG_I64) { 1492 case TCG_CALL_ARG_EVEN: 1493 layout_arg_even(&cum); 1494 /* fall through */ 1495 case TCG_CALL_ARG_NORMAL: 1496 if (TCG_TARGET_REG_BITS == 32) { 1497 layout_arg_normal_n(&cum, info, 2); 1498 } else { 1499 layout_arg_1(&cum, info, TCG_CALL_ARG_NORMAL); 1500 } 1501 break; 1502 default: 1503 qemu_build_not_reached(); 1504 } 1505 break; 1506 1507 case TCG_TYPE_I128: 1508 switch (TCG_TARGET_CALL_ARG_I128) { 1509 case TCG_CALL_ARG_EVEN: 1510 layout_arg_even(&cum); 1511 /* fall through */ 1512 case TCG_CALL_ARG_NORMAL: 1513 layout_arg_normal_n(&cum, info, 128 / TCG_TARGET_REG_BITS); 1514 break; 1515 case TCG_CALL_ARG_BY_REF: 1516 layout_arg_by_ref(&cum, info); 1517 break; 1518 default: 1519 qemu_build_not_reached(); 1520 } 1521 break; 1522 1523 default: 1524 g_assert_not_reached(); 1525 } 1526 } 1527 info->nr_in = cum.info_in_idx; 1528 1529 /* Validate that we didn't overrun the input array. */ 1530 assert(cum.info_in_idx <= ARRAY_SIZE(info->in)); 1531 /* Validate the backend has enough argument space. */ 1532 assert(cum.arg_slot <= max_reg_slots + max_stk_slots); 1533 1534 /* 1535 * Relocate the "ref_slot" area to the end of the parameters. 1536 * Minimizing this stack offset helps code size for x86, 1537 * which has a signed 8-bit offset encoding. 1538 */ 1539 if (cum.ref_slot != 0) { 1540 int ref_base = 0; 1541 1542 if (cum.arg_slot > max_reg_slots) { 1543 int align = __alignof(Int128) / sizeof(tcg_target_long); 1544 1545 ref_base = cum.arg_slot - max_reg_slots; 1546 if (align > 1) { 1547 ref_base = ROUND_UP(ref_base, align); 1548 } 1549 } 1550 assert(ref_base + cum.ref_slot <= max_stk_slots); 1551 ref_base += max_reg_slots; 1552 1553 if (ref_base != 0) { 1554 for (int i = cum.info_in_idx - 1; i >= 0; --i) { 1555 TCGCallArgumentLoc *loc = &info->in[i]; 1556 switch (loc->kind) { 1557 case TCG_CALL_ARG_BY_REF: 1558 case TCG_CALL_ARG_BY_REF_N: 1559 loc->ref_slot += ref_base; 1560 break; 1561 default: 1562 break; 1563 } 1564 } 1565 } 1566 } 1567 } 1568 1569 static int indirect_reg_alloc_order[ARRAY_SIZE(tcg_target_reg_alloc_order)]; 1570 static void process_constraint_sets(void); 1571 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type, 1572 TCGReg reg, const char *name); 1573 1574 static void tcg_context_init(unsigned max_threads) 1575 { 1576 TCGContext *s = &tcg_init_ctx; 1577 int n, i; 1578 TCGTemp *ts; 1579 1580 memset(s, 0, sizeof(*s)); 1581 s->nb_globals = 0; 1582 1583 init_call_layout(&info_helper_ld32_mmu); 1584 init_call_layout(&info_helper_ld64_mmu); 1585 init_call_layout(&info_helper_ld128_mmu); 1586 init_call_layout(&info_helper_st32_mmu); 1587 init_call_layout(&info_helper_st64_mmu); 1588 init_call_layout(&info_helper_st128_mmu); 1589 1590 tcg_target_init(s); 1591 process_constraint_sets(); 1592 1593 /* Reverse the order of the saved registers, assuming they're all at 1594 the start of tcg_target_reg_alloc_order. */ 1595 for (n = 0; n < ARRAY_SIZE(tcg_target_reg_alloc_order); ++n) { 1596 int r = tcg_target_reg_alloc_order[n]; 1597 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, r)) { 1598 break; 1599 } 1600 } 1601 for (i = 0; i < n; ++i) { 1602 indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[n - 1 - i]; 1603 } 1604 for (; i < ARRAY_SIZE(tcg_target_reg_alloc_order); ++i) { 1605 indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[i]; 1606 } 1607 1608 tcg_ctx = s; 1609 /* 1610 * In user-mode we simply share the init context among threads, since we 1611 * use a single region. See the documentation tcg_region_init() for the 1612 * reasoning behind this. 1613 * In system-mode we will have at most max_threads TCG threads. 1614 */ 1615 #ifdef CONFIG_USER_ONLY 1616 tcg_ctxs = &tcg_ctx; 1617 tcg_cur_ctxs = 1; 1618 tcg_max_ctxs = 1; 1619 #else 1620 tcg_max_ctxs = max_threads; 1621 tcg_ctxs = g_new0(TCGContext *, max_threads); 1622 #endif 1623 1624 tcg_debug_assert(!tcg_regset_test_reg(s->reserved_regs, TCG_AREG0)); 1625 ts = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, TCG_AREG0, "env"); 1626 tcg_env = temp_tcgv_ptr(ts); 1627 } 1628 1629 void tcg_init(size_t tb_size, int splitwx, unsigned max_threads) 1630 { 1631 tcg_context_init(max_threads); 1632 tcg_region_init(tb_size, splitwx, max_threads); 1633 } 1634 1635 /* 1636 * Allocate TBs right before their corresponding translated code, making 1637 * sure that TBs and code are on different cache lines. 1638 */ 1639 TranslationBlock *tcg_tb_alloc(TCGContext *s) 1640 { 1641 uintptr_t align = qemu_icache_linesize; 1642 TranslationBlock *tb; 1643 void *next; 1644 1645 retry: 1646 tb = (void *)ROUND_UP((uintptr_t)s->code_gen_ptr, align); 1647 next = (void *)ROUND_UP((uintptr_t)(tb + 1), align); 1648 1649 if (unlikely(next > s->code_gen_highwater)) { 1650 if (tcg_region_alloc(s)) { 1651 return NULL; 1652 } 1653 goto retry; 1654 } 1655 qatomic_set(&s->code_gen_ptr, next); 1656 return tb; 1657 } 1658 1659 void tcg_prologue_init(void) 1660 { 1661 TCGContext *s = tcg_ctx; 1662 size_t prologue_size; 1663 1664 s->code_ptr = s->code_gen_ptr; 1665 s->code_buf = s->code_gen_ptr; 1666 s->data_gen_ptr = NULL; 1667 1668 #ifndef CONFIG_TCG_INTERPRETER 1669 tcg_qemu_tb_exec = (tcg_prologue_fn *)tcg_splitwx_to_rx(s->code_ptr); 1670 #endif 1671 1672 s->pool_labels = NULL; 1673 1674 qemu_thread_jit_write(); 1675 /* Generate the prologue. */ 1676 tcg_target_qemu_prologue(s); 1677 1678 /* Allow the prologue to put e.g. guest_base into a pool entry. */ 1679 { 1680 int result = tcg_out_pool_finalize(s); 1681 tcg_debug_assert(result == 0); 1682 } 1683 1684 prologue_size = tcg_current_code_size(s); 1685 perf_report_prologue(s->code_gen_ptr, prologue_size); 1686 1687 #ifndef CONFIG_TCG_INTERPRETER 1688 flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(s->code_buf), 1689 (uintptr_t)s->code_buf, prologue_size); 1690 #endif 1691 1692 if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM)) { 1693 FILE *logfile = qemu_log_trylock(); 1694 if (logfile) { 1695 fprintf(logfile, "PROLOGUE: [size=%zu]\n", prologue_size); 1696 if (s->data_gen_ptr) { 1697 size_t code_size = s->data_gen_ptr - s->code_gen_ptr; 1698 size_t data_size = prologue_size - code_size; 1699 size_t i; 1700 1701 disas(logfile, s->code_gen_ptr, code_size); 1702 1703 for (i = 0; i < data_size; i += sizeof(tcg_target_ulong)) { 1704 if (sizeof(tcg_target_ulong) == 8) { 1705 fprintf(logfile, 1706 "0x%08" PRIxPTR ": .quad 0x%016" PRIx64 "\n", 1707 (uintptr_t)s->data_gen_ptr + i, 1708 *(uint64_t *)(s->data_gen_ptr + i)); 1709 } else { 1710 fprintf(logfile, 1711 "0x%08" PRIxPTR ": .long 0x%08x\n", 1712 (uintptr_t)s->data_gen_ptr + i, 1713 *(uint32_t *)(s->data_gen_ptr + i)); 1714 } 1715 } 1716 } else { 1717 disas(logfile, s->code_gen_ptr, prologue_size); 1718 } 1719 fprintf(logfile, "\n"); 1720 qemu_log_unlock(logfile); 1721 } 1722 } 1723 1724 #ifndef CONFIG_TCG_INTERPRETER 1725 /* 1726 * Assert that goto_ptr is implemented completely, setting an epilogue. 1727 * For tci, we use NULL as the signal to return from the interpreter, 1728 * so skip this check. 1729 */ 1730 tcg_debug_assert(tcg_code_gen_epilogue != NULL); 1731 #endif 1732 1733 tcg_region_prologue_set(s); 1734 } 1735 1736 void tcg_func_start(TCGContext *s) 1737 { 1738 tcg_pool_reset(s); 1739 s->nb_temps = s->nb_globals; 1740 1741 /* No temps have been previously allocated for size or locality. */ 1742 tcg_temp_ebb_reset_freed(s); 1743 1744 /* No constant temps have been previously allocated. */ 1745 for (int i = 0; i < TCG_TYPE_COUNT; ++i) { 1746 if (s->const_table[i]) { 1747 g_hash_table_remove_all(s->const_table[i]); 1748 } 1749 } 1750 1751 s->nb_ops = 0; 1752 s->nb_labels = 0; 1753 s->current_frame_offset = s->frame_start; 1754 1755 #ifdef CONFIG_DEBUG_TCG 1756 s->goto_tb_issue_mask = 0; 1757 #endif 1758 1759 QTAILQ_INIT(&s->ops); 1760 QTAILQ_INIT(&s->free_ops); 1761 s->emit_before_op = NULL; 1762 QSIMPLEQ_INIT(&s->labels); 1763 1764 tcg_debug_assert(s->addr_type <= TCG_TYPE_REG); 1765 tcg_debug_assert(s->insn_start_words > 0); 1766 } 1767 1768 static TCGTemp *tcg_temp_alloc(TCGContext *s) 1769 { 1770 int n = s->nb_temps++; 1771 1772 if (n >= TCG_MAX_TEMPS) { 1773 tcg_raise_tb_overflow(s); 1774 } 1775 return memset(&s->temps[n], 0, sizeof(TCGTemp)); 1776 } 1777 1778 static TCGTemp *tcg_global_alloc(TCGContext *s) 1779 { 1780 TCGTemp *ts; 1781 1782 tcg_debug_assert(s->nb_globals == s->nb_temps); 1783 tcg_debug_assert(s->nb_globals < TCG_MAX_TEMPS); 1784 s->nb_globals++; 1785 ts = tcg_temp_alloc(s); 1786 ts->kind = TEMP_GLOBAL; 1787 1788 return ts; 1789 } 1790 1791 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type, 1792 TCGReg reg, const char *name) 1793 { 1794 TCGTemp *ts; 1795 1796 tcg_debug_assert(TCG_TARGET_REG_BITS == 64 || type == TCG_TYPE_I32); 1797 1798 ts = tcg_global_alloc(s); 1799 ts->base_type = type; 1800 ts->type = type; 1801 ts->kind = TEMP_FIXED; 1802 ts->reg = reg; 1803 ts->name = name; 1804 tcg_regset_set_reg(s->reserved_regs, reg); 1805 1806 return ts; 1807 } 1808 1809 void tcg_set_frame(TCGContext *s, TCGReg reg, intptr_t start, intptr_t size) 1810 { 1811 s->frame_start = start; 1812 s->frame_end = start + size; 1813 s->frame_temp 1814 = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, reg, "_frame"); 1815 } 1816 1817 static TCGTemp *tcg_global_mem_new_internal(TCGv_ptr base, intptr_t offset, 1818 const char *name, TCGType type) 1819 { 1820 TCGContext *s = tcg_ctx; 1821 TCGTemp *base_ts = tcgv_ptr_temp(base); 1822 TCGTemp *ts = tcg_global_alloc(s); 1823 int indirect_reg = 0; 1824 1825 switch (base_ts->kind) { 1826 case TEMP_FIXED: 1827 break; 1828 case TEMP_GLOBAL: 1829 /* We do not support double-indirect registers. */ 1830 tcg_debug_assert(!base_ts->indirect_reg); 1831 base_ts->indirect_base = 1; 1832 s->nb_indirects += (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64 1833 ? 2 : 1); 1834 indirect_reg = 1; 1835 break; 1836 default: 1837 g_assert_not_reached(); 1838 } 1839 1840 if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) { 1841 TCGTemp *ts2 = tcg_global_alloc(s); 1842 char buf[64]; 1843 1844 ts->base_type = TCG_TYPE_I64; 1845 ts->type = TCG_TYPE_I32; 1846 ts->indirect_reg = indirect_reg; 1847 ts->mem_allocated = 1; 1848 ts->mem_base = base_ts; 1849 ts->mem_offset = offset; 1850 pstrcpy(buf, sizeof(buf), name); 1851 pstrcat(buf, sizeof(buf), "_0"); 1852 ts->name = strdup(buf); 1853 1854 tcg_debug_assert(ts2 == ts + 1); 1855 ts2->base_type = TCG_TYPE_I64; 1856 ts2->type = TCG_TYPE_I32; 1857 ts2->indirect_reg = indirect_reg; 1858 ts2->mem_allocated = 1; 1859 ts2->mem_base = base_ts; 1860 ts2->mem_offset = offset + 4; 1861 ts2->temp_subindex = 1; 1862 pstrcpy(buf, sizeof(buf), name); 1863 pstrcat(buf, sizeof(buf), "_1"); 1864 ts2->name = strdup(buf); 1865 } else { 1866 ts->base_type = type; 1867 ts->type = type; 1868 ts->indirect_reg = indirect_reg; 1869 ts->mem_allocated = 1; 1870 ts->mem_base = base_ts; 1871 ts->mem_offset = offset; 1872 ts->name = name; 1873 } 1874 return ts; 1875 } 1876 1877 TCGv_i32 tcg_global_mem_new_i32(TCGv_ptr reg, intptr_t off, const char *name) 1878 { 1879 TCGTemp *ts = tcg_global_mem_new_internal(reg, off, name, TCG_TYPE_I32); 1880 return temp_tcgv_i32(ts); 1881 } 1882 1883 TCGv_i64 tcg_global_mem_new_i64(TCGv_ptr reg, intptr_t off, const char *name) 1884 { 1885 TCGTemp *ts = tcg_global_mem_new_internal(reg, off, name, TCG_TYPE_I64); 1886 return temp_tcgv_i64(ts); 1887 } 1888 1889 TCGv_ptr tcg_global_mem_new_ptr(TCGv_ptr reg, intptr_t off, const char *name) 1890 { 1891 TCGTemp *ts = tcg_global_mem_new_internal(reg, off, name, TCG_TYPE_PTR); 1892 return temp_tcgv_ptr(ts); 1893 } 1894 1895 TCGTemp *tcg_temp_new_internal(TCGType type, TCGTempKind kind) 1896 { 1897 TCGContext *s = tcg_ctx; 1898 TCGTemp *ts; 1899 int n; 1900 1901 if (kind == TEMP_EBB) { 1902 int idx = find_first_bit(s->free_temps[type].l, TCG_MAX_TEMPS); 1903 1904 if (idx < TCG_MAX_TEMPS) { 1905 /* There is already an available temp with the right type. */ 1906 clear_bit(idx, s->free_temps[type].l); 1907 1908 ts = &s->temps[idx]; 1909 ts->temp_allocated = 1; 1910 tcg_debug_assert(ts->base_type == type); 1911 tcg_debug_assert(ts->kind == kind); 1912 return ts; 1913 } 1914 } else { 1915 tcg_debug_assert(kind == TEMP_TB); 1916 } 1917 1918 switch (type) { 1919 case TCG_TYPE_I32: 1920 case TCG_TYPE_V64: 1921 case TCG_TYPE_V128: 1922 case TCG_TYPE_V256: 1923 n = 1; 1924 break; 1925 case TCG_TYPE_I64: 1926 n = 64 / TCG_TARGET_REG_BITS; 1927 break; 1928 case TCG_TYPE_I128: 1929 n = 128 / TCG_TARGET_REG_BITS; 1930 break; 1931 default: 1932 g_assert_not_reached(); 1933 } 1934 1935 ts = tcg_temp_alloc(s); 1936 ts->base_type = type; 1937 ts->temp_allocated = 1; 1938 ts->kind = kind; 1939 1940 if (n == 1) { 1941 ts->type = type; 1942 } else { 1943 ts->type = TCG_TYPE_REG; 1944 1945 for (int i = 1; i < n; ++i) { 1946 TCGTemp *ts2 = tcg_temp_alloc(s); 1947 1948 tcg_debug_assert(ts2 == ts + i); 1949 ts2->base_type = type; 1950 ts2->type = TCG_TYPE_REG; 1951 ts2->temp_allocated = 1; 1952 ts2->temp_subindex = i; 1953 ts2->kind = kind; 1954 } 1955 } 1956 return ts; 1957 } 1958 1959 TCGv_i32 tcg_temp_new_i32(void) 1960 { 1961 return temp_tcgv_i32(tcg_temp_new_internal(TCG_TYPE_I32, TEMP_TB)); 1962 } 1963 1964 TCGv_i32 tcg_temp_ebb_new_i32(void) 1965 { 1966 return temp_tcgv_i32(tcg_temp_new_internal(TCG_TYPE_I32, TEMP_EBB)); 1967 } 1968 1969 TCGv_i64 tcg_temp_new_i64(void) 1970 { 1971 return temp_tcgv_i64(tcg_temp_new_internal(TCG_TYPE_I64, TEMP_TB)); 1972 } 1973 1974 TCGv_i64 tcg_temp_ebb_new_i64(void) 1975 { 1976 return temp_tcgv_i64(tcg_temp_new_internal(TCG_TYPE_I64, TEMP_EBB)); 1977 } 1978 1979 TCGv_ptr tcg_temp_new_ptr(void) 1980 { 1981 return temp_tcgv_ptr(tcg_temp_new_internal(TCG_TYPE_PTR, TEMP_TB)); 1982 } 1983 1984 TCGv_ptr tcg_temp_ebb_new_ptr(void) 1985 { 1986 return temp_tcgv_ptr(tcg_temp_new_internal(TCG_TYPE_PTR, TEMP_EBB)); 1987 } 1988 1989 TCGv_i128 tcg_temp_new_i128(void) 1990 { 1991 return temp_tcgv_i128(tcg_temp_new_internal(TCG_TYPE_I128, TEMP_TB)); 1992 } 1993 1994 TCGv_i128 tcg_temp_ebb_new_i128(void) 1995 { 1996 return temp_tcgv_i128(tcg_temp_new_internal(TCG_TYPE_I128, TEMP_EBB)); 1997 } 1998 1999 TCGv_vec tcg_temp_new_vec(TCGType type) 2000 { 2001 TCGTemp *t; 2002 2003 #ifdef CONFIG_DEBUG_TCG 2004 switch (type) { 2005 case TCG_TYPE_V64: 2006 assert(TCG_TARGET_HAS_v64); 2007 break; 2008 case TCG_TYPE_V128: 2009 assert(TCG_TARGET_HAS_v128); 2010 break; 2011 case TCG_TYPE_V256: 2012 assert(TCG_TARGET_HAS_v256); 2013 break; 2014 default: 2015 g_assert_not_reached(); 2016 } 2017 #endif 2018 2019 t = tcg_temp_new_internal(type, TEMP_EBB); 2020 return temp_tcgv_vec(t); 2021 } 2022 2023 /* Create a new temp of the same type as an existing temp. */ 2024 TCGv_vec tcg_temp_new_vec_matching(TCGv_vec match) 2025 { 2026 TCGTemp *t = tcgv_vec_temp(match); 2027 2028 tcg_debug_assert(t->temp_allocated != 0); 2029 2030 t = tcg_temp_new_internal(t->base_type, TEMP_EBB); 2031 return temp_tcgv_vec(t); 2032 } 2033 2034 void tcg_temp_free_internal(TCGTemp *ts) 2035 { 2036 TCGContext *s = tcg_ctx; 2037 2038 switch (ts->kind) { 2039 case TEMP_CONST: 2040 case TEMP_TB: 2041 /* Silently ignore free. */ 2042 break; 2043 case TEMP_EBB: 2044 tcg_debug_assert(ts->temp_allocated != 0); 2045 ts->temp_allocated = 0; 2046 set_bit(temp_idx(ts), s->free_temps[ts->base_type].l); 2047 break; 2048 default: 2049 /* It never made sense to free TEMP_FIXED or TEMP_GLOBAL. */ 2050 g_assert_not_reached(); 2051 } 2052 } 2053 2054 void tcg_temp_free_i32(TCGv_i32 arg) 2055 { 2056 tcg_temp_free_internal(tcgv_i32_temp(arg)); 2057 } 2058 2059 void tcg_temp_free_i64(TCGv_i64 arg) 2060 { 2061 tcg_temp_free_internal(tcgv_i64_temp(arg)); 2062 } 2063 2064 void tcg_temp_free_i128(TCGv_i128 arg) 2065 { 2066 tcg_temp_free_internal(tcgv_i128_temp(arg)); 2067 } 2068 2069 void tcg_temp_free_ptr(TCGv_ptr arg) 2070 { 2071 tcg_temp_free_internal(tcgv_ptr_temp(arg)); 2072 } 2073 2074 void tcg_temp_free_vec(TCGv_vec arg) 2075 { 2076 tcg_temp_free_internal(tcgv_vec_temp(arg)); 2077 } 2078 2079 TCGTemp *tcg_constant_internal(TCGType type, int64_t val) 2080 { 2081 TCGContext *s = tcg_ctx; 2082 GHashTable *h = s->const_table[type]; 2083 TCGTemp *ts; 2084 2085 if (h == NULL) { 2086 h = g_hash_table_new(g_int64_hash, g_int64_equal); 2087 s->const_table[type] = h; 2088 } 2089 2090 ts = g_hash_table_lookup(h, &val); 2091 if (ts == NULL) { 2092 int64_t *val_ptr; 2093 2094 ts = tcg_temp_alloc(s); 2095 2096 if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) { 2097 TCGTemp *ts2 = tcg_temp_alloc(s); 2098 2099 tcg_debug_assert(ts2 == ts + 1); 2100 2101 ts->base_type = TCG_TYPE_I64; 2102 ts->type = TCG_TYPE_I32; 2103 ts->kind = TEMP_CONST; 2104 ts->temp_allocated = 1; 2105 2106 ts2->base_type = TCG_TYPE_I64; 2107 ts2->type = TCG_TYPE_I32; 2108 ts2->kind = TEMP_CONST; 2109 ts2->temp_allocated = 1; 2110 ts2->temp_subindex = 1; 2111 2112 /* 2113 * Retain the full value of the 64-bit constant in the low 2114 * part, so that the hash table works. Actual uses will 2115 * truncate the value to the low part. 2116 */ 2117 ts[HOST_BIG_ENDIAN].val = val; 2118 ts[!HOST_BIG_ENDIAN].val = val >> 32; 2119 val_ptr = &ts[HOST_BIG_ENDIAN].val; 2120 } else { 2121 ts->base_type = type; 2122 ts->type = type; 2123 ts->kind = TEMP_CONST; 2124 ts->temp_allocated = 1; 2125 ts->val = val; 2126 val_ptr = &ts->val; 2127 } 2128 g_hash_table_insert(h, val_ptr, ts); 2129 } 2130 2131 return ts; 2132 } 2133 2134 TCGv_i32 tcg_constant_i32(int32_t val) 2135 { 2136 return temp_tcgv_i32(tcg_constant_internal(TCG_TYPE_I32, val)); 2137 } 2138 2139 TCGv_i64 tcg_constant_i64(int64_t val) 2140 { 2141 return temp_tcgv_i64(tcg_constant_internal(TCG_TYPE_I64, val)); 2142 } 2143 2144 TCGv_ptr tcg_constant_ptr_int(intptr_t val) 2145 { 2146 return temp_tcgv_ptr(tcg_constant_internal(TCG_TYPE_PTR, val)); 2147 } 2148 2149 TCGv_vec tcg_constant_vec(TCGType type, unsigned vece, int64_t val) 2150 { 2151 val = dup_const(vece, val); 2152 return temp_tcgv_vec(tcg_constant_internal(type, val)); 2153 } 2154 2155 TCGv_vec tcg_constant_vec_matching(TCGv_vec match, unsigned vece, int64_t val) 2156 { 2157 TCGTemp *t = tcgv_vec_temp(match); 2158 2159 tcg_debug_assert(t->temp_allocated != 0); 2160 return tcg_constant_vec(t->base_type, vece, val); 2161 } 2162 2163 #ifdef CONFIG_DEBUG_TCG 2164 size_t temp_idx(TCGTemp *ts) 2165 { 2166 ptrdiff_t n = ts - tcg_ctx->temps; 2167 assert(n >= 0 && n < tcg_ctx->nb_temps); 2168 return n; 2169 } 2170 2171 TCGTemp *tcgv_i32_temp(TCGv_i32 v) 2172 { 2173 uintptr_t o = (uintptr_t)v - offsetof(TCGContext, temps); 2174 2175 assert(o < sizeof(TCGTemp) * tcg_ctx->nb_temps); 2176 assert(o % sizeof(TCGTemp) == 0); 2177 2178 return (void *)tcg_ctx + (uintptr_t)v; 2179 } 2180 #endif /* CONFIG_DEBUG_TCG */ 2181 2182 /* 2183 * Return true if OP may appear in the opcode stream with TYPE. 2184 * Test the runtime variable that controls each opcode. 2185 */ 2186 bool tcg_op_supported(TCGOpcode op, TCGType type, unsigned flags) 2187 { 2188 bool has_type; 2189 2190 switch (type) { 2191 case TCG_TYPE_I32: 2192 has_type = true; 2193 break; 2194 case TCG_TYPE_I64: 2195 has_type = TCG_TARGET_REG_BITS == 64; 2196 break; 2197 case TCG_TYPE_V64: 2198 has_type = TCG_TARGET_HAS_v64; 2199 break; 2200 case TCG_TYPE_V128: 2201 has_type = TCG_TARGET_HAS_v128; 2202 break; 2203 case TCG_TYPE_V256: 2204 has_type = TCG_TARGET_HAS_v256; 2205 break; 2206 default: 2207 has_type = false; 2208 break; 2209 } 2210 2211 switch (op) { 2212 case INDEX_op_discard: 2213 case INDEX_op_set_label: 2214 case INDEX_op_call: 2215 case INDEX_op_br: 2216 case INDEX_op_mb: 2217 case INDEX_op_insn_start: 2218 case INDEX_op_exit_tb: 2219 case INDEX_op_goto_tb: 2220 case INDEX_op_goto_ptr: 2221 case INDEX_op_qemu_ld_i32: 2222 case INDEX_op_qemu_st_i32: 2223 case INDEX_op_qemu_ld_i64: 2224 case INDEX_op_qemu_st_i64: 2225 return true; 2226 2227 case INDEX_op_qemu_st8_i32: 2228 return TCG_TARGET_HAS_qemu_st8_i32; 2229 2230 case INDEX_op_qemu_ld_i128: 2231 case INDEX_op_qemu_st_i128: 2232 return TCG_TARGET_HAS_qemu_ldst_i128; 2233 2234 case INDEX_op_add: 2235 case INDEX_op_and: 2236 case INDEX_op_mov: 2237 case INDEX_op_or: 2238 case INDEX_op_xor: 2239 return has_type; 2240 2241 case INDEX_op_setcond_i32: 2242 case INDEX_op_brcond_i32: 2243 case INDEX_op_movcond_i32: 2244 case INDEX_op_ld8u_i32: 2245 case INDEX_op_ld8s_i32: 2246 case INDEX_op_ld16u_i32: 2247 case INDEX_op_ld16s_i32: 2248 case INDEX_op_ld_i32: 2249 case INDEX_op_st8_i32: 2250 case INDEX_op_st16_i32: 2251 case INDEX_op_st_i32: 2252 case INDEX_op_shl_i32: 2253 case INDEX_op_shr_i32: 2254 case INDEX_op_sar_i32: 2255 case INDEX_op_extract_i32: 2256 case INDEX_op_sextract_i32: 2257 case INDEX_op_deposit_i32: 2258 return true; 2259 2260 case INDEX_op_negsetcond_i32: 2261 return TCG_TARGET_HAS_negsetcond_i32; 2262 case INDEX_op_div_i32: 2263 case INDEX_op_divu_i32: 2264 return TCG_TARGET_HAS_div_i32; 2265 case INDEX_op_rem_i32: 2266 case INDEX_op_remu_i32: 2267 return TCG_TARGET_HAS_rem_i32; 2268 case INDEX_op_div2_i32: 2269 case INDEX_op_divu2_i32: 2270 return TCG_TARGET_HAS_div2_i32; 2271 case INDEX_op_rotl_i32: 2272 case INDEX_op_rotr_i32: 2273 return TCG_TARGET_HAS_rot_i32; 2274 case INDEX_op_extract2_i32: 2275 return TCG_TARGET_HAS_extract2_i32; 2276 case INDEX_op_add2_i32: 2277 return TCG_TARGET_HAS_add2_i32; 2278 case INDEX_op_sub2_i32: 2279 return TCG_TARGET_HAS_sub2_i32; 2280 case INDEX_op_mulu2_i32: 2281 return TCG_TARGET_HAS_mulu2_i32; 2282 case INDEX_op_muls2_i32: 2283 return TCG_TARGET_HAS_muls2_i32; 2284 case INDEX_op_mulsh_i32: 2285 return TCG_TARGET_HAS_mulsh_i32; 2286 case INDEX_op_bswap16_i32: 2287 return TCG_TARGET_HAS_bswap16_i32; 2288 case INDEX_op_bswap32_i32: 2289 return TCG_TARGET_HAS_bswap32_i32; 2290 case INDEX_op_clz_i32: 2291 return TCG_TARGET_HAS_clz_i32; 2292 case INDEX_op_ctz_i32: 2293 return TCG_TARGET_HAS_ctz_i32; 2294 case INDEX_op_ctpop_i32: 2295 return TCG_TARGET_HAS_ctpop_i32; 2296 2297 case INDEX_op_brcond2_i32: 2298 case INDEX_op_setcond2_i32: 2299 return TCG_TARGET_REG_BITS == 32; 2300 2301 case INDEX_op_setcond_i64: 2302 case INDEX_op_brcond_i64: 2303 case INDEX_op_movcond_i64: 2304 case INDEX_op_ld8u_i64: 2305 case INDEX_op_ld8s_i64: 2306 case INDEX_op_ld16u_i64: 2307 case INDEX_op_ld16s_i64: 2308 case INDEX_op_ld32u_i64: 2309 case INDEX_op_ld32s_i64: 2310 case INDEX_op_ld_i64: 2311 case INDEX_op_st8_i64: 2312 case INDEX_op_st16_i64: 2313 case INDEX_op_st32_i64: 2314 case INDEX_op_st_i64: 2315 case INDEX_op_shl_i64: 2316 case INDEX_op_shr_i64: 2317 case INDEX_op_sar_i64: 2318 case INDEX_op_ext_i32_i64: 2319 case INDEX_op_extu_i32_i64: 2320 case INDEX_op_extract_i64: 2321 case INDEX_op_sextract_i64: 2322 case INDEX_op_deposit_i64: 2323 return TCG_TARGET_REG_BITS == 64; 2324 2325 case INDEX_op_negsetcond_i64: 2326 return TCG_TARGET_HAS_negsetcond_i64; 2327 case INDEX_op_div_i64: 2328 case INDEX_op_divu_i64: 2329 return TCG_TARGET_HAS_div_i64; 2330 case INDEX_op_rem_i64: 2331 case INDEX_op_remu_i64: 2332 return TCG_TARGET_HAS_rem_i64; 2333 case INDEX_op_div2_i64: 2334 case INDEX_op_divu2_i64: 2335 return TCG_TARGET_HAS_div2_i64; 2336 case INDEX_op_rotl_i64: 2337 case INDEX_op_rotr_i64: 2338 return TCG_TARGET_HAS_rot_i64; 2339 case INDEX_op_extract2_i64: 2340 return TCG_TARGET_HAS_extract2_i64; 2341 case INDEX_op_extrl_i64_i32: 2342 case INDEX_op_extrh_i64_i32: 2343 return TCG_TARGET_HAS_extr_i64_i32; 2344 case INDEX_op_bswap16_i64: 2345 return TCG_TARGET_HAS_bswap16_i64; 2346 case INDEX_op_bswap32_i64: 2347 return TCG_TARGET_HAS_bswap32_i64; 2348 case INDEX_op_bswap64_i64: 2349 return TCG_TARGET_HAS_bswap64_i64; 2350 case INDEX_op_clz_i64: 2351 return TCG_TARGET_HAS_clz_i64; 2352 case INDEX_op_ctz_i64: 2353 return TCG_TARGET_HAS_ctz_i64; 2354 case INDEX_op_ctpop_i64: 2355 return TCG_TARGET_HAS_ctpop_i64; 2356 case INDEX_op_add2_i64: 2357 return TCG_TARGET_HAS_add2_i64; 2358 case INDEX_op_sub2_i64: 2359 return TCG_TARGET_HAS_sub2_i64; 2360 case INDEX_op_mulu2_i64: 2361 return TCG_TARGET_HAS_mulu2_i64; 2362 case INDEX_op_muls2_i64: 2363 return TCG_TARGET_HAS_muls2_i64; 2364 case INDEX_op_mulsh_i64: 2365 return TCG_TARGET_HAS_mulsh_i64; 2366 2367 case INDEX_op_mov_vec: 2368 case INDEX_op_dup_vec: 2369 case INDEX_op_dupm_vec: 2370 case INDEX_op_ld_vec: 2371 case INDEX_op_st_vec: 2372 case INDEX_op_add_vec: 2373 case INDEX_op_sub_vec: 2374 case INDEX_op_and_vec: 2375 case INDEX_op_or_vec: 2376 case INDEX_op_xor_vec: 2377 case INDEX_op_cmp_vec: 2378 return has_type; 2379 case INDEX_op_dup2_vec: 2380 return has_type && TCG_TARGET_REG_BITS == 32; 2381 case INDEX_op_not_vec: 2382 return has_type && TCG_TARGET_HAS_not_vec; 2383 case INDEX_op_neg_vec: 2384 return has_type && TCG_TARGET_HAS_neg_vec; 2385 case INDEX_op_abs_vec: 2386 return has_type && TCG_TARGET_HAS_abs_vec; 2387 case INDEX_op_andc_vec: 2388 return has_type && TCG_TARGET_HAS_andc_vec; 2389 case INDEX_op_orc_vec: 2390 return has_type && TCG_TARGET_HAS_orc_vec; 2391 case INDEX_op_nand_vec: 2392 return has_type && TCG_TARGET_HAS_nand_vec; 2393 case INDEX_op_nor_vec: 2394 return has_type && TCG_TARGET_HAS_nor_vec; 2395 case INDEX_op_eqv_vec: 2396 return has_type && TCG_TARGET_HAS_eqv_vec; 2397 case INDEX_op_mul_vec: 2398 return has_type && TCG_TARGET_HAS_mul_vec; 2399 case INDEX_op_shli_vec: 2400 case INDEX_op_shri_vec: 2401 case INDEX_op_sari_vec: 2402 return has_type && TCG_TARGET_HAS_shi_vec; 2403 case INDEX_op_shls_vec: 2404 case INDEX_op_shrs_vec: 2405 case INDEX_op_sars_vec: 2406 return has_type && TCG_TARGET_HAS_shs_vec; 2407 case INDEX_op_shlv_vec: 2408 case INDEX_op_shrv_vec: 2409 case INDEX_op_sarv_vec: 2410 return has_type && TCG_TARGET_HAS_shv_vec; 2411 case INDEX_op_rotli_vec: 2412 return has_type && TCG_TARGET_HAS_roti_vec; 2413 case INDEX_op_rotls_vec: 2414 return has_type && TCG_TARGET_HAS_rots_vec; 2415 case INDEX_op_rotlv_vec: 2416 case INDEX_op_rotrv_vec: 2417 return has_type && TCG_TARGET_HAS_rotv_vec; 2418 case INDEX_op_ssadd_vec: 2419 case INDEX_op_usadd_vec: 2420 case INDEX_op_sssub_vec: 2421 case INDEX_op_ussub_vec: 2422 return has_type && TCG_TARGET_HAS_sat_vec; 2423 case INDEX_op_smin_vec: 2424 case INDEX_op_umin_vec: 2425 case INDEX_op_smax_vec: 2426 case INDEX_op_umax_vec: 2427 return has_type && TCG_TARGET_HAS_minmax_vec; 2428 case INDEX_op_bitsel_vec: 2429 return has_type && TCG_TARGET_HAS_bitsel_vec; 2430 case INDEX_op_cmpsel_vec: 2431 return has_type && TCG_TARGET_HAS_cmpsel_vec; 2432 2433 default: 2434 if (op < INDEX_op_last_generic) { 2435 const TCGOutOp *outop; 2436 TCGConstraintSetIndex con_set; 2437 2438 if (!has_type) { 2439 return false; 2440 } 2441 2442 outop = all_outop[op]; 2443 tcg_debug_assert(outop != NULL); 2444 2445 con_set = outop->static_constraint; 2446 if (con_set == C_Dynamic) { 2447 con_set = outop->dynamic_constraint(type, flags); 2448 } 2449 if (con_set >= 0) { 2450 return true; 2451 } 2452 tcg_debug_assert(con_set == C_NotImplemented); 2453 return false; 2454 } 2455 tcg_debug_assert(op < NB_OPS); 2456 return true; 2457 2458 case INDEX_op_last_generic: 2459 g_assert_not_reached(); 2460 } 2461 } 2462 2463 bool tcg_op_deposit_valid(TCGType type, unsigned ofs, unsigned len) 2464 { 2465 unsigned width; 2466 2467 tcg_debug_assert(type == TCG_TYPE_I32 || type == TCG_TYPE_I64); 2468 width = (type == TCG_TYPE_I32 ? 32 : 64); 2469 2470 tcg_debug_assert(ofs < width); 2471 tcg_debug_assert(len > 0); 2472 tcg_debug_assert(len <= width - ofs); 2473 2474 return TCG_TARGET_deposit_valid(type, ofs, len); 2475 } 2476 2477 static TCGOp *tcg_op_alloc(TCGOpcode opc, unsigned nargs); 2478 2479 static void tcg_gen_callN(void *func, TCGHelperInfo *info, 2480 TCGTemp *ret, TCGTemp **args) 2481 { 2482 TCGv_i64 extend_free[MAX_CALL_IARGS]; 2483 int n_extend = 0; 2484 TCGOp *op; 2485 int i, n, pi = 0, total_args; 2486 2487 if (unlikely(g_once_init_enter(HELPER_INFO_INIT(info)))) { 2488 init_call_layout(info); 2489 g_once_init_leave(HELPER_INFO_INIT(info), HELPER_INFO_INIT_VAL(info)); 2490 } 2491 2492 total_args = info->nr_out + info->nr_in + 2; 2493 op = tcg_op_alloc(INDEX_op_call, total_args); 2494 2495 #ifdef CONFIG_PLUGIN 2496 /* Flag helpers that may affect guest state */ 2497 if (tcg_ctx->plugin_insn && !(info->flags & TCG_CALL_NO_SIDE_EFFECTS)) { 2498 tcg_ctx->plugin_insn->calls_helpers = true; 2499 } 2500 #endif 2501 2502 TCGOP_CALLO(op) = n = info->nr_out; 2503 switch (n) { 2504 case 0: 2505 tcg_debug_assert(ret == NULL); 2506 break; 2507 case 1: 2508 tcg_debug_assert(ret != NULL); 2509 op->args[pi++] = temp_arg(ret); 2510 break; 2511 case 2: 2512 case 4: 2513 tcg_debug_assert(ret != NULL); 2514 tcg_debug_assert(ret->base_type == ret->type + ctz32(n)); 2515 tcg_debug_assert(ret->temp_subindex == 0); 2516 for (i = 0; i < n; ++i) { 2517 op->args[pi++] = temp_arg(ret + i); 2518 } 2519 break; 2520 default: 2521 g_assert_not_reached(); 2522 } 2523 2524 TCGOP_CALLI(op) = n = info->nr_in; 2525 for (i = 0; i < n; i++) { 2526 const TCGCallArgumentLoc *loc = &info->in[i]; 2527 TCGTemp *ts = args[loc->arg_idx] + loc->tmp_subindex; 2528 2529 switch (loc->kind) { 2530 case TCG_CALL_ARG_NORMAL: 2531 case TCG_CALL_ARG_BY_REF: 2532 case TCG_CALL_ARG_BY_REF_N: 2533 op->args[pi++] = temp_arg(ts); 2534 break; 2535 2536 case TCG_CALL_ARG_EXTEND_U: 2537 case TCG_CALL_ARG_EXTEND_S: 2538 { 2539 TCGv_i64 temp = tcg_temp_ebb_new_i64(); 2540 TCGv_i32 orig = temp_tcgv_i32(ts); 2541 2542 if (loc->kind == TCG_CALL_ARG_EXTEND_S) { 2543 tcg_gen_ext_i32_i64(temp, orig); 2544 } else { 2545 tcg_gen_extu_i32_i64(temp, orig); 2546 } 2547 op->args[pi++] = tcgv_i64_arg(temp); 2548 extend_free[n_extend++] = temp; 2549 } 2550 break; 2551 2552 default: 2553 g_assert_not_reached(); 2554 } 2555 } 2556 op->args[pi++] = (uintptr_t)func; 2557 op->args[pi++] = (uintptr_t)info; 2558 tcg_debug_assert(pi == total_args); 2559 2560 if (tcg_ctx->emit_before_op) { 2561 QTAILQ_INSERT_BEFORE(tcg_ctx->emit_before_op, op, link); 2562 } else { 2563 QTAILQ_INSERT_TAIL(&tcg_ctx->ops, op, link); 2564 } 2565 2566 tcg_debug_assert(n_extend < ARRAY_SIZE(extend_free)); 2567 for (i = 0; i < n_extend; ++i) { 2568 tcg_temp_free_i64(extend_free[i]); 2569 } 2570 } 2571 2572 void tcg_gen_call0(void *func, TCGHelperInfo *info, TCGTemp *ret) 2573 { 2574 tcg_gen_callN(func, info, ret, NULL); 2575 } 2576 2577 void tcg_gen_call1(void *func, TCGHelperInfo *info, TCGTemp *ret, TCGTemp *t1) 2578 { 2579 tcg_gen_callN(func, info, ret, &t1); 2580 } 2581 2582 void tcg_gen_call2(void *func, TCGHelperInfo *info, TCGTemp *ret, 2583 TCGTemp *t1, TCGTemp *t2) 2584 { 2585 TCGTemp *args[2] = { t1, t2 }; 2586 tcg_gen_callN(func, info, ret, args); 2587 } 2588 2589 void tcg_gen_call3(void *func, TCGHelperInfo *info, TCGTemp *ret, 2590 TCGTemp *t1, TCGTemp *t2, TCGTemp *t3) 2591 { 2592 TCGTemp *args[3] = { t1, t2, t3 }; 2593 tcg_gen_callN(func, info, ret, args); 2594 } 2595 2596 void tcg_gen_call4(void *func, TCGHelperInfo *info, TCGTemp *ret, 2597 TCGTemp *t1, TCGTemp *t2, TCGTemp *t3, TCGTemp *t4) 2598 { 2599 TCGTemp *args[4] = { t1, t2, t3, t4 }; 2600 tcg_gen_callN(func, info, ret, args); 2601 } 2602 2603 void tcg_gen_call5(void *func, TCGHelperInfo *info, TCGTemp *ret, TCGTemp *t1, 2604 TCGTemp *t2, TCGTemp *t3, TCGTemp *t4, TCGTemp *t5) 2605 { 2606 TCGTemp *args[5] = { t1, t2, t3, t4, t5 }; 2607 tcg_gen_callN(func, info, ret, args); 2608 } 2609 2610 void tcg_gen_call6(void *func, TCGHelperInfo *info, TCGTemp *ret, 2611 TCGTemp *t1, TCGTemp *t2, TCGTemp *t3, 2612 TCGTemp *t4, TCGTemp *t5, TCGTemp *t6) 2613 { 2614 TCGTemp *args[6] = { t1, t2, t3, t4, t5, t6 }; 2615 tcg_gen_callN(func, info, ret, args); 2616 } 2617 2618 void tcg_gen_call7(void *func, TCGHelperInfo *info, TCGTemp *ret, TCGTemp *t1, 2619 TCGTemp *t2, TCGTemp *t3, TCGTemp *t4, 2620 TCGTemp *t5, TCGTemp *t6, TCGTemp *t7) 2621 { 2622 TCGTemp *args[7] = { t1, t2, t3, t4, t5, t6, t7 }; 2623 tcg_gen_callN(func, info, ret, args); 2624 } 2625 2626 static void tcg_reg_alloc_start(TCGContext *s) 2627 { 2628 int i, n; 2629 2630 for (i = 0, n = s->nb_temps; i < n; i++) { 2631 TCGTemp *ts = &s->temps[i]; 2632 TCGTempVal val = TEMP_VAL_MEM; 2633 2634 switch (ts->kind) { 2635 case TEMP_CONST: 2636 val = TEMP_VAL_CONST; 2637 break; 2638 case TEMP_FIXED: 2639 val = TEMP_VAL_REG; 2640 break; 2641 case TEMP_GLOBAL: 2642 break; 2643 case TEMP_EBB: 2644 val = TEMP_VAL_DEAD; 2645 /* fall through */ 2646 case TEMP_TB: 2647 ts->mem_allocated = 0; 2648 break; 2649 default: 2650 g_assert_not_reached(); 2651 } 2652 ts->val_type = val; 2653 } 2654 2655 memset(s->reg_to_temp, 0, sizeof(s->reg_to_temp)); 2656 } 2657 2658 static char *tcg_get_arg_str_ptr(TCGContext *s, char *buf, int buf_size, 2659 TCGTemp *ts) 2660 { 2661 int idx = temp_idx(ts); 2662 2663 switch (ts->kind) { 2664 case TEMP_FIXED: 2665 case TEMP_GLOBAL: 2666 pstrcpy(buf, buf_size, ts->name); 2667 break; 2668 case TEMP_TB: 2669 snprintf(buf, buf_size, "loc%d", idx - s->nb_globals); 2670 break; 2671 case TEMP_EBB: 2672 snprintf(buf, buf_size, "tmp%d", idx - s->nb_globals); 2673 break; 2674 case TEMP_CONST: 2675 switch (ts->type) { 2676 case TCG_TYPE_I32: 2677 snprintf(buf, buf_size, "$0x%x", (int32_t)ts->val); 2678 break; 2679 #if TCG_TARGET_REG_BITS > 32 2680 case TCG_TYPE_I64: 2681 snprintf(buf, buf_size, "$0x%" PRIx64, ts->val); 2682 break; 2683 #endif 2684 case TCG_TYPE_V64: 2685 case TCG_TYPE_V128: 2686 case TCG_TYPE_V256: 2687 snprintf(buf, buf_size, "v%d$0x%" PRIx64, 2688 64 << (ts->type - TCG_TYPE_V64), ts->val); 2689 break; 2690 default: 2691 g_assert_not_reached(); 2692 } 2693 break; 2694 } 2695 return buf; 2696 } 2697 2698 static char *tcg_get_arg_str(TCGContext *s, char *buf, 2699 int buf_size, TCGArg arg) 2700 { 2701 return tcg_get_arg_str_ptr(s, buf, buf_size, arg_temp(arg)); 2702 } 2703 2704 static const char * const cond_name[] = 2705 { 2706 [TCG_COND_NEVER] = "never", 2707 [TCG_COND_ALWAYS] = "always", 2708 [TCG_COND_EQ] = "eq", 2709 [TCG_COND_NE] = "ne", 2710 [TCG_COND_LT] = "lt", 2711 [TCG_COND_GE] = "ge", 2712 [TCG_COND_LE] = "le", 2713 [TCG_COND_GT] = "gt", 2714 [TCG_COND_LTU] = "ltu", 2715 [TCG_COND_GEU] = "geu", 2716 [TCG_COND_LEU] = "leu", 2717 [TCG_COND_GTU] = "gtu", 2718 [TCG_COND_TSTEQ] = "tsteq", 2719 [TCG_COND_TSTNE] = "tstne", 2720 }; 2721 2722 static const char * const ldst_name[(MO_BSWAP | MO_SSIZE) + 1] = 2723 { 2724 [MO_UB] = "ub", 2725 [MO_SB] = "sb", 2726 [MO_LEUW] = "leuw", 2727 [MO_LESW] = "lesw", 2728 [MO_LEUL] = "leul", 2729 [MO_LESL] = "lesl", 2730 [MO_LEUQ] = "leq", 2731 [MO_BEUW] = "beuw", 2732 [MO_BESW] = "besw", 2733 [MO_BEUL] = "beul", 2734 [MO_BESL] = "besl", 2735 [MO_BEUQ] = "beq", 2736 [MO_128 + MO_BE] = "beo", 2737 [MO_128 + MO_LE] = "leo", 2738 }; 2739 2740 static const char * const alignment_name[(MO_AMASK >> MO_ASHIFT) + 1] = { 2741 [MO_UNALN >> MO_ASHIFT] = "un+", 2742 [MO_ALIGN >> MO_ASHIFT] = "al+", 2743 [MO_ALIGN_2 >> MO_ASHIFT] = "al2+", 2744 [MO_ALIGN_4 >> MO_ASHIFT] = "al4+", 2745 [MO_ALIGN_8 >> MO_ASHIFT] = "al8+", 2746 [MO_ALIGN_16 >> MO_ASHIFT] = "al16+", 2747 [MO_ALIGN_32 >> MO_ASHIFT] = "al32+", 2748 [MO_ALIGN_64 >> MO_ASHIFT] = "al64+", 2749 }; 2750 2751 static const char * const atom_name[(MO_ATOM_MASK >> MO_ATOM_SHIFT) + 1] = { 2752 [MO_ATOM_IFALIGN >> MO_ATOM_SHIFT] = "", 2753 [MO_ATOM_IFALIGN_PAIR >> MO_ATOM_SHIFT] = "pair+", 2754 [MO_ATOM_WITHIN16 >> MO_ATOM_SHIFT] = "w16+", 2755 [MO_ATOM_WITHIN16_PAIR >> MO_ATOM_SHIFT] = "w16p+", 2756 [MO_ATOM_SUBALIGN >> MO_ATOM_SHIFT] = "sub+", 2757 [MO_ATOM_NONE >> MO_ATOM_SHIFT] = "noat+", 2758 }; 2759 2760 static const char bswap_flag_name[][6] = { 2761 [TCG_BSWAP_IZ] = "iz", 2762 [TCG_BSWAP_OZ] = "oz", 2763 [TCG_BSWAP_OS] = "os", 2764 [TCG_BSWAP_IZ | TCG_BSWAP_OZ] = "iz,oz", 2765 [TCG_BSWAP_IZ | TCG_BSWAP_OS] = "iz,os", 2766 }; 2767 2768 #ifdef CONFIG_PLUGIN 2769 static const char * const plugin_from_name[] = { 2770 "from-tb", 2771 "from-insn", 2772 "after-insn", 2773 "after-tb", 2774 }; 2775 #endif 2776 2777 static inline bool tcg_regset_single(TCGRegSet d) 2778 { 2779 return (d & (d - 1)) == 0; 2780 } 2781 2782 static inline TCGReg tcg_regset_first(TCGRegSet d) 2783 { 2784 if (TCG_TARGET_NB_REGS <= 32) { 2785 return ctz32(d); 2786 } else { 2787 return ctz64(d); 2788 } 2789 } 2790 2791 /* Return only the number of characters output -- no error return. */ 2792 #define ne_fprintf(...) \ 2793 ({ int ret_ = fprintf(__VA_ARGS__); ret_ >= 0 ? ret_ : 0; }) 2794 2795 void tcg_dump_ops(TCGContext *s, FILE *f, bool have_prefs) 2796 { 2797 char buf[128]; 2798 TCGOp *op; 2799 2800 QTAILQ_FOREACH(op, &s->ops, link) { 2801 int i, k, nb_oargs, nb_iargs, nb_cargs; 2802 const TCGOpDef *def; 2803 TCGOpcode c; 2804 int col = 0; 2805 2806 c = op->opc; 2807 def = &tcg_op_defs[c]; 2808 2809 if (c == INDEX_op_insn_start) { 2810 nb_oargs = 0; 2811 col += ne_fprintf(f, "\n ----"); 2812 2813 for (i = 0, k = s->insn_start_words; i < k; ++i) { 2814 col += ne_fprintf(f, " %016" PRIx64, 2815 tcg_get_insn_start_param(op, i)); 2816 } 2817 } else if (c == INDEX_op_call) { 2818 const TCGHelperInfo *info = tcg_call_info(op); 2819 void *func = tcg_call_func(op); 2820 2821 /* variable number of arguments */ 2822 nb_oargs = TCGOP_CALLO(op); 2823 nb_iargs = TCGOP_CALLI(op); 2824 nb_cargs = def->nb_cargs; 2825 2826 col += ne_fprintf(f, " %s ", def->name); 2827 2828 /* 2829 * Print the function name from TCGHelperInfo, if available. 2830 * Note that plugins have a template function for the info, 2831 * but the actual function pointer comes from the plugin. 2832 */ 2833 if (func == info->func) { 2834 col += ne_fprintf(f, "%s", info->name); 2835 } else { 2836 col += ne_fprintf(f, "plugin(%p)", func); 2837 } 2838 2839 col += ne_fprintf(f, ",$0x%x,$%d", info->flags, nb_oargs); 2840 for (i = 0; i < nb_oargs; i++) { 2841 col += ne_fprintf(f, ",%s", tcg_get_arg_str(s, buf, sizeof(buf), 2842 op->args[i])); 2843 } 2844 for (i = 0; i < nb_iargs; i++) { 2845 TCGArg arg = op->args[nb_oargs + i]; 2846 const char *t = tcg_get_arg_str(s, buf, sizeof(buf), arg); 2847 col += ne_fprintf(f, ",%s", t); 2848 } 2849 } else { 2850 if (def->flags & TCG_OPF_INT) { 2851 col += ne_fprintf(f, " %s_i%d ", 2852 def->name, 2853 8 * tcg_type_size(TCGOP_TYPE(op))); 2854 } else if (def->flags & TCG_OPF_VECTOR) { 2855 col += ne_fprintf(f, "%s v%d,e%d,", 2856 def->name, 2857 8 * tcg_type_size(TCGOP_TYPE(op)), 2858 8 << TCGOP_VECE(op)); 2859 } else { 2860 col += ne_fprintf(f, " %s ", def->name); 2861 } 2862 2863 nb_oargs = def->nb_oargs; 2864 nb_iargs = def->nb_iargs; 2865 nb_cargs = def->nb_cargs; 2866 2867 k = 0; 2868 for (i = 0; i < nb_oargs; i++) { 2869 const char *sep = k ? "," : ""; 2870 col += ne_fprintf(f, "%s%s", sep, 2871 tcg_get_arg_str(s, buf, sizeof(buf), 2872 op->args[k++])); 2873 } 2874 for (i = 0; i < nb_iargs; i++) { 2875 const char *sep = k ? "," : ""; 2876 col += ne_fprintf(f, "%s%s", sep, 2877 tcg_get_arg_str(s, buf, sizeof(buf), 2878 op->args[k++])); 2879 } 2880 switch (c) { 2881 case INDEX_op_brcond_i32: 2882 case INDEX_op_setcond_i32: 2883 case INDEX_op_negsetcond_i32: 2884 case INDEX_op_movcond_i32: 2885 case INDEX_op_brcond2_i32: 2886 case INDEX_op_setcond2_i32: 2887 case INDEX_op_brcond_i64: 2888 case INDEX_op_setcond_i64: 2889 case INDEX_op_negsetcond_i64: 2890 case INDEX_op_movcond_i64: 2891 case INDEX_op_cmp_vec: 2892 case INDEX_op_cmpsel_vec: 2893 if (op->args[k] < ARRAY_SIZE(cond_name) 2894 && cond_name[op->args[k]]) { 2895 col += ne_fprintf(f, ",%s", cond_name[op->args[k++]]); 2896 } else { 2897 col += ne_fprintf(f, ",$0x%" TCG_PRIlx, op->args[k++]); 2898 } 2899 i = 1; 2900 break; 2901 case INDEX_op_qemu_ld_i32: 2902 case INDEX_op_qemu_st_i32: 2903 case INDEX_op_qemu_st8_i32: 2904 case INDEX_op_qemu_ld_i64: 2905 case INDEX_op_qemu_st_i64: 2906 case INDEX_op_qemu_ld_i128: 2907 case INDEX_op_qemu_st_i128: 2908 { 2909 const char *s_al, *s_op, *s_at; 2910 MemOpIdx oi = op->args[k++]; 2911 MemOp mop = get_memop(oi); 2912 unsigned ix = get_mmuidx(oi); 2913 2914 s_al = alignment_name[(mop & MO_AMASK) >> MO_ASHIFT]; 2915 s_op = ldst_name[mop & (MO_BSWAP | MO_SSIZE)]; 2916 s_at = atom_name[(mop & MO_ATOM_MASK) >> MO_ATOM_SHIFT]; 2917 mop &= ~(MO_AMASK | MO_BSWAP | MO_SSIZE | MO_ATOM_MASK); 2918 2919 /* If all fields are accounted for, print symbolically. */ 2920 if (!mop && s_al && s_op && s_at) { 2921 col += ne_fprintf(f, ",%s%s%s,%u", 2922 s_at, s_al, s_op, ix); 2923 } else { 2924 mop = get_memop(oi); 2925 col += ne_fprintf(f, ",$0x%x,%u", mop, ix); 2926 } 2927 i = 1; 2928 } 2929 break; 2930 case INDEX_op_bswap16_i32: 2931 case INDEX_op_bswap16_i64: 2932 case INDEX_op_bswap32_i32: 2933 case INDEX_op_bswap32_i64: 2934 case INDEX_op_bswap64_i64: 2935 { 2936 TCGArg flags = op->args[k]; 2937 const char *name = NULL; 2938 2939 if (flags < ARRAY_SIZE(bswap_flag_name)) { 2940 name = bswap_flag_name[flags]; 2941 } 2942 if (name) { 2943 col += ne_fprintf(f, ",%s", name); 2944 } else { 2945 col += ne_fprintf(f, ",$0x%" TCG_PRIlx, flags); 2946 } 2947 i = k = 1; 2948 } 2949 break; 2950 #ifdef CONFIG_PLUGIN 2951 case INDEX_op_plugin_cb: 2952 { 2953 TCGArg from = op->args[k++]; 2954 const char *name = NULL; 2955 2956 if (from < ARRAY_SIZE(plugin_from_name)) { 2957 name = plugin_from_name[from]; 2958 } 2959 if (name) { 2960 col += ne_fprintf(f, "%s", name); 2961 } else { 2962 col += ne_fprintf(f, "$0x%" TCG_PRIlx, from); 2963 } 2964 i = 1; 2965 } 2966 break; 2967 #endif 2968 default: 2969 i = 0; 2970 break; 2971 } 2972 switch (c) { 2973 case INDEX_op_set_label: 2974 case INDEX_op_br: 2975 case INDEX_op_brcond_i32: 2976 case INDEX_op_brcond_i64: 2977 case INDEX_op_brcond2_i32: 2978 col += ne_fprintf(f, "%s$L%d", k ? "," : "", 2979 arg_label(op->args[k])->id); 2980 i++, k++; 2981 break; 2982 case INDEX_op_mb: 2983 { 2984 TCGBar membar = op->args[k]; 2985 const char *b_op, *m_op; 2986 2987 switch (membar & TCG_BAR_SC) { 2988 case 0: 2989 b_op = "none"; 2990 break; 2991 case TCG_BAR_LDAQ: 2992 b_op = "acq"; 2993 break; 2994 case TCG_BAR_STRL: 2995 b_op = "rel"; 2996 break; 2997 case TCG_BAR_SC: 2998 b_op = "seq"; 2999 break; 3000 default: 3001 g_assert_not_reached(); 3002 } 3003 3004 switch (membar & TCG_MO_ALL) { 3005 case 0: 3006 m_op = "none"; 3007 break; 3008 case TCG_MO_LD_LD: 3009 m_op = "rr"; 3010 break; 3011 case TCG_MO_LD_ST: 3012 m_op = "rw"; 3013 break; 3014 case TCG_MO_ST_LD: 3015 m_op = "wr"; 3016 break; 3017 case TCG_MO_ST_ST: 3018 m_op = "ww"; 3019 break; 3020 case TCG_MO_LD_LD | TCG_MO_LD_ST: 3021 m_op = "rr+rw"; 3022 break; 3023 case TCG_MO_LD_LD | TCG_MO_ST_LD: 3024 m_op = "rr+wr"; 3025 break; 3026 case TCG_MO_LD_LD | TCG_MO_ST_ST: 3027 m_op = "rr+ww"; 3028 break; 3029 case TCG_MO_LD_ST | TCG_MO_ST_LD: 3030 m_op = "rw+wr"; 3031 break; 3032 case TCG_MO_LD_ST | TCG_MO_ST_ST: 3033 m_op = "rw+ww"; 3034 break; 3035 case TCG_MO_ST_LD | TCG_MO_ST_ST: 3036 m_op = "wr+ww"; 3037 break; 3038 case TCG_MO_LD_LD | TCG_MO_LD_ST | TCG_MO_ST_LD: 3039 m_op = "rr+rw+wr"; 3040 break; 3041 case TCG_MO_LD_LD | TCG_MO_LD_ST | TCG_MO_ST_ST: 3042 m_op = "rr+rw+ww"; 3043 break; 3044 case TCG_MO_LD_LD | TCG_MO_ST_LD | TCG_MO_ST_ST: 3045 m_op = "rr+wr+ww"; 3046 break; 3047 case TCG_MO_LD_ST | TCG_MO_ST_LD | TCG_MO_ST_ST: 3048 m_op = "rw+wr+ww"; 3049 break; 3050 case TCG_MO_ALL: 3051 m_op = "all"; 3052 break; 3053 default: 3054 g_assert_not_reached(); 3055 } 3056 3057 col += ne_fprintf(f, "%s%s:%s", (k ? "," : ""), b_op, m_op); 3058 i++, k++; 3059 } 3060 break; 3061 default: 3062 break; 3063 } 3064 for (; i < nb_cargs; i++, k++) { 3065 col += ne_fprintf(f, "%s$0x%" TCG_PRIlx, k ? "," : "", 3066 op->args[k]); 3067 } 3068 } 3069 3070 if (have_prefs || op->life) { 3071 for (; col < 40; ++col) { 3072 putc(' ', f); 3073 } 3074 } 3075 3076 if (op->life) { 3077 unsigned life = op->life; 3078 3079 if (life & (SYNC_ARG * 3)) { 3080 ne_fprintf(f, " sync:"); 3081 for (i = 0; i < 2; ++i) { 3082 if (life & (SYNC_ARG << i)) { 3083 ne_fprintf(f, " %d", i); 3084 } 3085 } 3086 } 3087 life /= DEAD_ARG; 3088 if (life) { 3089 ne_fprintf(f, " dead:"); 3090 for (i = 0; life; ++i, life >>= 1) { 3091 if (life & 1) { 3092 ne_fprintf(f, " %d", i); 3093 } 3094 } 3095 } 3096 } 3097 3098 if (have_prefs) { 3099 for (i = 0; i < nb_oargs; ++i) { 3100 TCGRegSet set = output_pref(op, i); 3101 3102 if (i == 0) { 3103 ne_fprintf(f, " pref="); 3104 } else { 3105 ne_fprintf(f, ","); 3106 } 3107 if (set == 0) { 3108 ne_fprintf(f, "none"); 3109 } else if (set == MAKE_64BIT_MASK(0, TCG_TARGET_NB_REGS)) { 3110 ne_fprintf(f, "all"); 3111 #ifdef CONFIG_DEBUG_TCG 3112 } else if (tcg_regset_single(set)) { 3113 TCGReg reg = tcg_regset_first(set); 3114 ne_fprintf(f, "%s", tcg_target_reg_names[reg]); 3115 #endif 3116 } else if (TCG_TARGET_NB_REGS <= 32) { 3117 ne_fprintf(f, "0x%x", (uint32_t)set); 3118 } else { 3119 ne_fprintf(f, "0x%" PRIx64, (uint64_t)set); 3120 } 3121 } 3122 } 3123 3124 putc('\n', f); 3125 } 3126 } 3127 3128 /* we give more priority to constraints with less registers */ 3129 static int get_constraint_priority(const TCGArgConstraint *arg_ct, int k) 3130 { 3131 int n; 3132 3133 arg_ct += k; 3134 n = ctpop64(arg_ct->regs); 3135 3136 /* 3137 * Sort constraints of a single register first, which includes output 3138 * aliases (which must exactly match the input already allocated). 3139 */ 3140 if (n == 1 || arg_ct->oalias) { 3141 return INT_MAX; 3142 } 3143 3144 /* 3145 * Sort register pairs next, first then second immediately after. 3146 * Arbitrarily sort multiple pairs by the index of the first reg; 3147 * there shouldn't be many pairs. 3148 */ 3149 switch (arg_ct->pair) { 3150 case 1: 3151 case 3: 3152 return (k + 1) * 2; 3153 case 2: 3154 return (arg_ct->pair_index + 1) * 2 - 1; 3155 } 3156 3157 /* Finally, sort by decreasing register count. */ 3158 assert(n > 1); 3159 return -n; 3160 } 3161 3162 /* sort from highest priority to lowest */ 3163 static void sort_constraints(TCGArgConstraint *a, int start, int n) 3164 { 3165 int i, j; 3166 3167 for (i = 0; i < n; i++) { 3168 a[start + i].sort_index = start + i; 3169 } 3170 if (n <= 1) { 3171 return; 3172 } 3173 for (i = 0; i < n - 1; i++) { 3174 for (j = i + 1; j < n; j++) { 3175 int p1 = get_constraint_priority(a, a[start + i].sort_index); 3176 int p2 = get_constraint_priority(a, a[start + j].sort_index); 3177 if (p1 < p2) { 3178 int tmp = a[start + i].sort_index; 3179 a[start + i].sort_index = a[start + j].sort_index; 3180 a[start + j].sort_index = tmp; 3181 } 3182 } 3183 } 3184 } 3185 3186 static const TCGArgConstraint empty_cts[TCG_MAX_OP_ARGS]; 3187 static TCGArgConstraint all_cts[ARRAY_SIZE(constraint_sets)][TCG_MAX_OP_ARGS]; 3188 3189 static void process_constraint_sets(void) 3190 { 3191 for (size_t c = 0; c < ARRAY_SIZE(constraint_sets); ++c) { 3192 const TCGConstraintSet *tdefs = &constraint_sets[c]; 3193 TCGArgConstraint *args_ct = all_cts[c]; 3194 int nb_oargs = tdefs->nb_oargs; 3195 int nb_iargs = tdefs->nb_iargs; 3196 int nb_args = nb_oargs + nb_iargs; 3197 bool saw_alias_pair = false; 3198 3199 for (int i = 0; i < nb_args; i++) { 3200 const char *ct_str = tdefs->args_ct_str[i]; 3201 bool input_p = i >= nb_oargs; 3202 int o; 3203 3204 switch (*ct_str) { 3205 case '0' ... '9': 3206 o = *ct_str - '0'; 3207 tcg_debug_assert(input_p); 3208 tcg_debug_assert(o < nb_oargs); 3209 tcg_debug_assert(args_ct[o].regs != 0); 3210 tcg_debug_assert(!args_ct[o].oalias); 3211 args_ct[i] = args_ct[o]; 3212 /* The output sets oalias. */ 3213 args_ct[o].oalias = 1; 3214 args_ct[o].alias_index = i; 3215 /* The input sets ialias. */ 3216 args_ct[i].ialias = 1; 3217 args_ct[i].alias_index = o; 3218 if (args_ct[i].pair) { 3219 saw_alias_pair = true; 3220 } 3221 tcg_debug_assert(ct_str[1] == '\0'); 3222 continue; 3223 3224 case '&': 3225 tcg_debug_assert(!input_p); 3226 args_ct[i].newreg = true; 3227 ct_str++; 3228 break; 3229 3230 case 'p': /* plus */ 3231 /* Allocate to the register after the previous. */ 3232 tcg_debug_assert(i > (input_p ? nb_oargs : 0)); 3233 o = i - 1; 3234 tcg_debug_assert(!args_ct[o].pair); 3235 tcg_debug_assert(!args_ct[o].ct); 3236 args_ct[i] = (TCGArgConstraint){ 3237 .pair = 2, 3238 .pair_index = o, 3239 .regs = args_ct[o].regs << 1, 3240 .newreg = args_ct[o].newreg, 3241 }; 3242 args_ct[o].pair = 1; 3243 args_ct[o].pair_index = i; 3244 tcg_debug_assert(ct_str[1] == '\0'); 3245 continue; 3246 3247 case 'm': /* minus */ 3248 /* Allocate to the register before the previous. */ 3249 tcg_debug_assert(i > (input_p ? nb_oargs : 0)); 3250 o = i - 1; 3251 tcg_debug_assert(!args_ct[o].pair); 3252 tcg_debug_assert(!args_ct[o].ct); 3253 args_ct[i] = (TCGArgConstraint){ 3254 .pair = 1, 3255 .pair_index = o, 3256 .regs = args_ct[o].regs >> 1, 3257 .newreg = args_ct[o].newreg, 3258 }; 3259 args_ct[o].pair = 2; 3260 args_ct[o].pair_index = i; 3261 tcg_debug_assert(ct_str[1] == '\0'); 3262 continue; 3263 } 3264 3265 do { 3266 switch (*ct_str) { 3267 case 'i': 3268 args_ct[i].ct |= TCG_CT_CONST; 3269 break; 3270 #ifdef TCG_REG_ZERO 3271 case 'z': 3272 args_ct[i].ct |= TCG_CT_REG_ZERO; 3273 break; 3274 #endif 3275 3276 /* Include all of the target-specific constraints. */ 3277 3278 #undef CONST 3279 #define CONST(CASE, MASK) \ 3280 case CASE: args_ct[i].ct |= MASK; break; 3281 #define REGS(CASE, MASK) \ 3282 case CASE: args_ct[i].regs |= MASK; break; 3283 3284 #include "tcg-target-con-str.h" 3285 3286 #undef REGS 3287 #undef CONST 3288 default: 3289 case '0' ... '9': 3290 case '&': 3291 case 'p': 3292 case 'm': 3293 /* Typo in TCGConstraintSet constraint. */ 3294 g_assert_not_reached(); 3295 } 3296 } while (*++ct_str != '\0'); 3297 } 3298 3299 /* 3300 * Fix up output pairs that are aliased with inputs. 3301 * When we created the alias, we copied pair from the output. 3302 * There are three cases: 3303 * (1a) Pairs of inputs alias pairs of outputs. 3304 * (1b) One input aliases the first of a pair of outputs. 3305 * (2) One input aliases the second of a pair of outputs. 3306 * 3307 * Case 1a is handled by making sure that the pair_index'es are 3308 * properly updated so that they appear the same as a pair of inputs. 3309 * 3310 * Case 1b is handled by setting the pair_index of the input to 3311 * itself, simply so it doesn't point to an unrelated argument. 3312 * Since we don't encounter the "second" during the input allocation 3313 * phase, nothing happens with the second half of the input pair. 3314 * 3315 * Case 2 is handled by setting the second input to pair=3, the 3316 * first output to pair=3, and the pair_index'es to match. 3317 */ 3318 if (saw_alias_pair) { 3319 for (int i = nb_oargs; i < nb_args; i++) { 3320 int o, o2, i2; 3321 3322 /* 3323 * Since [0-9pm] must be alone in the constraint string, 3324 * the only way they can both be set is if the pair comes 3325 * from the output alias. 3326 */ 3327 if (!args_ct[i].ialias) { 3328 continue; 3329 } 3330 switch (args_ct[i].pair) { 3331 case 0: 3332 break; 3333 case 1: 3334 o = args_ct[i].alias_index; 3335 o2 = args_ct[o].pair_index; 3336 tcg_debug_assert(args_ct[o].pair == 1); 3337 tcg_debug_assert(args_ct[o2].pair == 2); 3338 if (args_ct[o2].oalias) { 3339 /* Case 1a */ 3340 i2 = args_ct[o2].alias_index; 3341 tcg_debug_assert(args_ct[i2].pair == 2); 3342 args_ct[i2].pair_index = i; 3343 args_ct[i].pair_index = i2; 3344 } else { 3345 /* Case 1b */ 3346 args_ct[i].pair_index = i; 3347 } 3348 break; 3349 case 2: 3350 o = args_ct[i].alias_index; 3351 o2 = args_ct[o].pair_index; 3352 tcg_debug_assert(args_ct[o].pair == 2); 3353 tcg_debug_assert(args_ct[o2].pair == 1); 3354 if (args_ct[o2].oalias) { 3355 /* Case 1a */ 3356 i2 = args_ct[o2].alias_index; 3357 tcg_debug_assert(args_ct[i2].pair == 1); 3358 args_ct[i2].pair_index = i; 3359 args_ct[i].pair_index = i2; 3360 } else { 3361 /* Case 2 */ 3362 args_ct[i].pair = 3; 3363 args_ct[o2].pair = 3; 3364 args_ct[i].pair_index = o2; 3365 args_ct[o2].pair_index = i; 3366 } 3367 break; 3368 default: 3369 g_assert_not_reached(); 3370 } 3371 } 3372 } 3373 3374 /* sort the constraints (XXX: this is just an heuristic) */ 3375 sort_constraints(args_ct, 0, nb_oargs); 3376 sort_constraints(args_ct, nb_oargs, nb_iargs); 3377 } 3378 } 3379 3380 static const TCGArgConstraint *opcode_args_ct(const TCGOp *op) 3381 { 3382 TCGOpcode opc = op->opc; 3383 TCGType type = TCGOP_TYPE(op); 3384 unsigned flags = TCGOP_FLAGS(op); 3385 const TCGOpDef *def = &tcg_op_defs[opc]; 3386 const TCGOutOp *outop = all_outop[opc]; 3387 TCGConstraintSetIndex con_set; 3388 3389 if (def->flags & TCG_OPF_NOT_PRESENT) { 3390 return empty_cts; 3391 } 3392 3393 if (outop) { 3394 con_set = outop->static_constraint; 3395 if (con_set == C_Dynamic) { 3396 con_set = outop->dynamic_constraint(type, flags); 3397 } 3398 } else { 3399 con_set = tcg_target_op_def(opc, type, flags); 3400 } 3401 tcg_debug_assert(con_set >= 0); 3402 tcg_debug_assert(con_set < ARRAY_SIZE(constraint_sets)); 3403 3404 /* The constraint arguments must match TCGOpcode arguments. */ 3405 tcg_debug_assert(constraint_sets[con_set].nb_oargs == def->nb_oargs); 3406 tcg_debug_assert(constraint_sets[con_set].nb_iargs == def->nb_iargs); 3407 3408 return all_cts[con_set]; 3409 } 3410 3411 static void remove_label_use(TCGOp *op, int idx) 3412 { 3413 TCGLabel *label = arg_label(op->args[idx]); 3414 TCGLabelUse *use; 3415 3416 QSIMPLEQ_FOREACH(use, &label->branches, next) { 3417 if (use->op == op) { 3418 QSIMPLEQ_REMOVE(&label->branches, use, TCGLabelUse, next); 3419 return; 3420 } 3421 } 3422 g_assert_not_reached(); 3423 } 3424 3425 void tcg_op_remove(TCGContext *s, TCGOp *op) 3426 { 3427 switch (op->opc) { 3428 case INDEX_op_br: 3429 remove_label_use(op, 0); 3430 break; 3431 case INDEX_op_brcond_i32: 3432 case INDEX_op_brcond_i64: 3433 remove_label_use(op, 3); 3434 break; 3435 case INDEX_op_brcond2_i32: 3436 remove_label_use(op, 5); 3437 break; 3438 default: 3439 break; 3440 } 3441 3442 QTAILQ_REMOVE(&s->ops, op, link); 3443 QTAILQ_INSERT_TAIL(&s->free_ops, op, link); 3444 s->nb_ops--; 3445 } 3446 3447 void tcg_remove_ops_after(TCGOp *op) 3448 { 3449 TCGContext *s = tcg_ctx; 3450 3451 while (true) { 3452 TCGOp *last = tcg_last_op(); 3453 if (last == op) { 3454 return; 3455 } 3456 tcg_op_remove(s, last); 3457 } 3458 } 3459 3460 static TCGOp *tcg_op_alloc(TCGOpcode opc, unsigned nargs) 3461 { 3462 TCGContext *s = tcg_ctx; 3463 TCGOp *op = NULL; 3464 3465 if (unlikely(!QTAILQ_EMPTY(&s->free_ops))) { 3466 QTAILQ_FOREACH(op, &s->free_ops, link) { 3467 if (nargs <= op->nargs) { 3468 QTAILQ_REMOVE(&s->free_ops, op, link); 3469 nargs = op->nargs; 3470 goto found; 3471 } 3472 } 3473 } 3474 3475 /* Most opcodes have 3 or 4 operands: reduce fragmentation. */ 3476 nargs = MAX(4, nargs); 3477 op = tcg_malloc(sizeof(TCGOp) + sizeof(TCGArg) * nargs); 3478 3479 found: 3480 memset(op, 0, offsetof(TCGOp, link)); 3481 op->opc = opc; 3482 op->nargs = nargs; 3483 3484 /* Check for bitfield overflow. */ 3485 tcg_debug_assert(op->nargs == nargs); 3486 3487 s->nb_ops++; 3488 return op; 3489 } 3490 3491 TCGOp *tcg_emit_op(TCGOpcode opc, unsigned nargs) 3492 { 3493 TCGOp *op = tcg_op_alloc(opc, nargs); 3494 3495 if (tcg_ctx->emit_before_op) { 3496 QTAILQ_INSERT_BEFORE(tcg_ctx->emit_before_op, op, link); 3497 } else { 3498 QTAILQ_INSERT_TAIL(&tcg_ctx->ops, op, link); 3499 } 3500 return op; 3501 } 3502 3503 TCGOp *tcg_op_insert_before(TCGContext *s, TCGOp *old_op, 3504 TCGOpcode opc, TCGType type, unsigned nargs) 3505 { 3506 TCGOp *new_op = tcg_op_alloc(opc, nargs); 3507 3508 TCGOP_TYPE(new_op) = type; 3509 QTAILQ_INSERT_BEFORE(old_op, new_op, link); 3510 return new_op; 3511 } 3512 3513 TCGOp *tcg_op_insert_after(TCGContext *s, TCGOp *old_op, 3514 TCGOpcode opc, TCGType type, unsigned nargs) 3515 { 3516 TCGOp *new_op = tcg_op_alloc(opc, nargs); 3517 3518 TCGOP_TYPE(new_op) = type; 3519 QTAILQ_INSERT_AFTER(&s->ops, old_op, new_op, link); 3520 return new_op; 3521 } 3522 3523 static void move_label_uses(TCGLabel *to, TCGLabel *from) 3524 { 3525 TCGLabelUse *u; 3526 3527 QSIMPLEQ_FOREACH(u, &from->branches, next) { 3528 TCGOp *op = u->op; 3529 switch (op->opc) { 3530 case INDEX_op_br: 3531 op->args[0] = label_arg(to); 3532 break; 3533 case INDEX_op_brcond_i32: 3534 case INDEX_op_brcond_i64: 3535 op->args[3] = label_arg(to); 3536 break; 3537 case INDEX_op_brcond2_i32: 3538 op->args[5] = label_arg(to); 3539 break; 3540 default: 3541 g_assert_not_reached(); 3542 } 3543 } 3544 3545 QSIMPLEQ_CONCAT(&to->branches, &from->branches); 3546 } 3547 3548 /* Reachable analysis : remove unreachable code. */ 3549 static void __attribute__((noinline)) 3550 reachable_code_pass(TCGContext *s) 3551 { 3552 TCGOp *op, *op_next, *op_prev; 3553 bool dead = false; 3554 3555 QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) { 3556 bool remove = dead; 3557 TCGLabel *label; 3558 3559 switch (op->opc) { 3560 case INDEX_op_set_label: 3561 label = arg_label(op->args[0]); 3562 3563 /* 3564 * Note that the first op in the TB is always a load, 3565 * so there is always something before a label. 3566 */ 3567 op_prev = QTAILQ_PREV(op, link); 3568 3569 /* 3570 * If we find two sequential labels, move all branches to 3571 * reference the second label and remove the first label. 3572 * Do this before branch to next optimization, so that the 3573 * middle label is out of the way. 3574 */ 3575 if (op_prev->opc == INDEX_op_set_label) { 3576 move_label_uses(label, arg_label(op_prev->args[0])); 3577 tcg_op_remove(s, op_prev); 3578 op_prev = QTAILQ_PREV(op, link); 3579 } 3580 3581 /* 3582 * Optimization can fold conditional branches to unconditional. 3583 * If we find a label which is preceded by an unconditional 3584 * branch to next, remove the branch. We couldn't do this when 3585 * processing the branch because any dead code between the branch 3586 * and label had not yet been removed. 3587 */ 3588 if (op_prev->opc == INDEX_op_br && 3589 label == arg_label(op_prev->args[0])) { 3590 tcg_op_remove(s, op_prev); 3591 /* Fall through means insns become live again. */ 3592 dead = false; 3593 } 3594 3595 if (QSIMPLEQ_EMPTY(&label->branches)) { 3596 /* 3597 * While there is an occasional backward branch, virtually 3598 * all branches generated by the translators are forward. 3599 * Which means that generally we will have already removed 3600 * all references to the label that will be, and there is 3601 * little to be gained by iterating. 3602 */ 3603 remove = true; 3604 } else { 3605 /* Once we see a label, insns become live again. */ 3606 dead = false; 3607 remove = false; 3608 } 3609 break; 3610 3611 case INDEX_op_br: 3612 case INDEX_op_exit_tb: 3613 case INDEX_op_goto_ptr: 3614 /* Unconditional branches; everything following is dead. */ 3615 dead = true; 3616 break; 3617 3618 case INDEX_op_call: 3619 /* Notice noreturn helper calls, raising exceptions. */ 3620 if (tcg_call_flags(op) & TCG_CALL_NO_RETURN) { 3621 dead = true; 3622 } 3623 break; 3624 3625 case INDEX_op_insn_start: 3626 /* Never remove -- we need to keep these for unwind. */ 3627 remove = false; 3628 break; 3629 3630 default: 3631 break; 3632 } 3633 3634 if (remove) { 3635 tcg_op_remove(s, op); 3636 } 3637 } 3638 } 3639 3640 #define TS_DEAD 1 3641 #define TS_MEM 2 3642 3643 #define IS_DEAD_ARG(n) (arg_life & (DEAD_ARG << (n))) 3644 #define NEED_SYNC_ARG(n) (arg_life & (SYNC_ARG << (n))) 3645 3646 /* For liveness_pass_1, the register preferences for a given temp. */ 3647 static inline TCGRegSet *la_temp_pref(TCGTemp *ts) 3648 { 3649 return ts->state_ptr; 3650 } 3651 3652 /* For liveness_pass_1, reset the preferences for a given temp to the 3653 * maximal regset for its type. 3654 */ 3655 static inline void la_reset_pref(TCGTemp *ts) 3656 { 3657 *la_temp_pref(ts) 3658 = (ts->state == TS_DEAD ? 0 : tcg_target_available_regs[ts->type]); 3659 } 3660 3661 /* liveness analysis: end of function: all temps are dead, and globals 3662 should be in memory. */ 3663 static void la_func_end(TCGContext *s, int ng, int nt) 3664 { 3665 int i; 3666 3667 for (i = 0; i < ng; ++i) { 3668 s->temps[i].state = TS_DEAD | TS_MEM; 3669 la_reset_pref(&s->temps[i]); 3670 } 3671 for (i = ng; i < nt; ++i) { 3672 s->temps[i].state = TS_DEAD; 3673 la_reset_pref(&s->temps[i]); 3674 } 3675 } 3676 3677 /* liveness analysis: end of basic block: all temps are dead, globals 3678 and local temps should be in memory. */ 3679 static void la_bb_end(TCGContext *s, int ng, int nt) 3680 { 3681 int i; 3682 3683 for (i = 0; i < nt; ++i) { 3684 TCGTemp *ts = &s->temps[i]; 3685 int state; 3686 3687 switch (ts->kind) { 3688 case TEMP_FIXED: 3689 case TEMP_GLOBAL: 3690 case TEMP_TB: 3691 state = TS_DEAD | TS_MEM; 3692 break; 3693 case TEMP_EBB: 3694 case TEMP_CONST: 3695 state = TS_DEAD; 3696 break; 3697 default: 3698 g_assert_not_reached(); 3699 } 3700 ts->state = state; 3701 la_reset_pref(ts); 3702 } 3703 } 3704 3705 /* liveness analysis: sync globals back to memory. */ 3706 static void la_global_sync(TCGContext *s, int ng) 3707 { 3708 int i; 3709 3710 for (i = 0; i < ng; ++i) { 3711 int state = s->temps[i].state; 3712 s->temps[i].state = state | TS_MEM; 3713 if (state == TS_DEAD) { 3714 /* If the global was previously dead, reset prefs. */ 3715 la_reset_pref(&s->temps[i]); 3716 } 3717 } 3718 } 3719 3720 /* 3721 * liveness analysis: conditional branch: all temps are dead unless 3722 * explicitly live-across-conditional-branch, globals and local temps 3723 * should be synced. 3724 */ 3725 static void la_bb_sync(TCGContext *s, int ng, int nt) 3726 { 3727 la_global_sync(s, ng); 3728 3729 for (int i = ng; i < nt; ++i) { 3730 TCGTemp *ts = &s->temps[i]; 3731 int state; 3732 3733 switch (ts->kind) { 3734 case TEMP_TB: 3735 state = ts->state; 3736 ts->state = state | TS_MEM; 3737 if (state != TS_DEAD) { 3738 continue; 3739 } 3740 break; 3741 case TEMP_EBB: 3742 case TEMP_CONST: 3743 continue; 3744 default: 3745 g_assert_not_reached(); 3746 } 3747 la_reset_pref(&s->temps[i]); 3748 } 3749 } 3750 3751 /* liveness analysis: sync globals back to memory and kill. */ 3752 static void la_global_kill(TCGContext *s, int ng) 3753 { 3754 int i; 3755 3756 for (i = 0; i < ng; i++) { 3757 s->temps[i].state = TS_DEAD | TS_MEM; 3758 la_reset_pref(&s->temps[i]); 3759 } 3760 } 3761 3762 /* liveness analysis: note live globals crossing calls. */ 3763 static void la_cross_call(TCGContext *s, int nt) 3764 { 3765 TCGRegSet mask = ~tcg_target_call_clobber_regs; 3766 int i; 3767 3768 for (i = 0; i < nt; i++) { 3769 TCGTemp *ts = &s->temps[i]; 3770 if (!(ts->state & TS_DEAD)) { 3771 TCGRegSet *pset = la_temp_pref(ts); 3772 TCGRegSet set = *pset; 3773 3774 set &= mask; 3775 /* If the combination is not possible, restart. */ 3776 if (set == 0) { 3777 set = tcg_target_available_regs[ts->type] & mask; 3778 } 3779 *pset = set; 3780 } 3781 } 3782 } 3783 3784 /* 3785 * Liveness analysis: Verify the lifetime of TEMP_TB, and reduce 3786 * to TEMP_EBB, if possible. 3787 */ 3788 static void __attribute__((noinline)) 3789 liveness_pass_0(TCGContext *s) 3790 { 3791 void * const multiple_ebb = (void *)(uintptr_t)-1; 3792 int nb_temps = s->nb_temps; 3793 TCGOp *op, *ebb; 3794 3795 for (int i = s->nb_globals; i < nb_temps; ++i) { 3796 s->temps[i].state_ptr = NULL; 3797 } 3798 3799 /* 3800 * Represent each EBB by the op at which it begins. In the case of 3801 * the first EBB, this is the first op, otherwise it is a label. 3802 * Collect the uses of each TEMP_TB: NULL for unused, EBB for use 3803 * within a single EBB, else MULTIPLE_EBB. 3804 */ 3805 ebb = QTAILQ_FIRST(&s->ops); 3806 QTAILQ_FOREACH(op, &s->ops, link) { 3807 const TCGOpDef *def; 3808 int nb_oargs, nb_iargs; 3809 3810 switch (op->opc) { 3811 case INDEX_op_set_label: 3812 ebb = op; 3813 continue; 3814 case INDEX_op_discard: 3815 continue; 3816 case INDEX_op_call: 3817 nb_oargs = TCGOP_CALLO(op); 3818 nb_iargs = TCGOP_CALLI(op); 3819 break; 3820 default: 3821 def = &tcg_op_defs[op->opc]; 3822 nb_oargs = def->nb_oargs; 3823 nb_iargs = def->nb_iargs; 3824 break; 3825 } 3826 3827 for (int i = 0; i < nb_oargs + nb_iargs; ++i) { 3828 TCGTemp *ts = arg_temp(op->args[i]); 3829 3830 if (ts->kind != TEMP_TB) { 3831 continue; 3832 } 3833 if (ts->state_ptr == NULL) { 3834 ts->state_ptr = ebb; 3835 } else if (ts->state_ptr != ebb) { 3836 ts->state_ptr = multiple_ebb; 3837 } 3838 } 3839 } 3840 3841 /* 3842 * For TEMP_TB that turned out not to be used beyond one EBB, 3843 * reduce the liveness to TEMP_EBB. 3844 */ 3845 for (int i = s->nb_globals; i < nb_temps; ++i) { 3846 TCGTemp *ts = &s->temps[i]; 3847 if (ts->kind == TEMP_TB && ts->state_ptr != multiple_ebb) { 3848 ts->kind = TEMP_EBB; 3849 } 3850 } 3851 } 3852 3853 /* Liveness analysis : update the opc_arg_life array to tell if a 3854 given input arguments is dead. Instructions updating dead 3855 temporaries are removed. */ 3856 static void __attribute__((noinline)) 3857 liveness_pass_1(TCGContext *s) 3858 { 3859 int nb_globals = s->nb_globals; 3860 int nb_temps = s->nb_temps; 3861 TCGOp *op, *op_prev; 3862 TCGRegSet *prefs; 3863 int i; 3864 3865 prefs = tcg_malloc(sizeof(TCGRegSet) * nb_temps); 3866 for (i = 0; i < nb_temps; ++i) { 3867 s->temps[i].state_ptr = prefs + i; 3868 } 3869 3870 /* ??? Should be redundant with the exit_tb that ends the TB. */ 3871 la_func_end(s, nb_globals, nb_temps); 3872 3873 QTAILQ_FOREACH_REVERSE_SAFE(op, &s->ops, link, op_prev) { 3874 int nb_iargs, nb_oargs; 3875 TCGOpcode opc_new, opc_new2; 3876 TCGLifeData arg_life = 0; 3877 TCGTemp *ts; 3878 TCGOpcode opc = op->opc; 3879 const TCGOpDef *def = &tcg_op_defs[opc]; 3880 const TCGArgConstraint *args_ct; 3881 3882 switch (opc) { 3883 case INDEX_op_call: 3884 { 3885 const TCGHelperInfo *info = tcg_call_info(op); 3886 int call_flags = tcg_call_flags(op); 3887 3888 nb_oargs = TCGOP_CALLO(op); 3889 nb_iargs = TCGOP_CALLI(op); 3890 3891 /* pure functions can be removed if their result is unused */ 3892 if (call_flags & TCG_CALL_NO_SIDE_EFFECTS) { 3893 for (i = 0; i < nb_oargs; i++) { 3894 ts = arg_temp(op->args[i]); 3895 if (ts->state != TS_DEAD) { 3896 goto do_not_remove_call; 3897 } 3898 } 3899 goto do_remove; 3900 } 3901 do_not_remove_call: 3902 3903 /* Output args are dead. */ 3904 for (i = 0; i < nb_oargs; i++) { 3905 ts = arg_temp(op->args[i]); 3906 if (ts->state & TS_DEAD) { 3907 arg_life |= DEAD_ARG << i; 3908 } 3909 if (ts->state & TS_MEM) { 3910 arg_life |= SYNC_ARG << i; 3911 } 3912 ts->state = TS_DEAD; 3913 la_reset_pref(ts); 3914 } 3915 3916 /* Not used -- it will be tcg_target_call_oarg_reg(). */ 3917 memset(op->output_pref, 0, sizeof(op->output_pref)); 3918 3919 if (!(call_flags & (TCG_CALL_NO_WRITE_GLOBALS | 3920 TCG_CALL_NO_READ_GLOBALS))) { 3921 la_global_kill(s, nb_globals); 3922 } else if (!(call_flags & TCG_CALL_NO_READ_GLOBALS)) { 3923 la_global_sync(s, nb_globals); 3924 } 3925 3926 /* Record arguments that die in this helper. */ 3927 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) { 3928 ts = arg_temp(op->args[i]); 3929 if (ts->state & TS_DEAD) { 3930 arg_life |= DEAD_ARG << i; 3931 } 3932 } 3933 3934 /* For all live registers, remove call-clobbered prefs. */ 3935 la_cross_call(s, nb_temps); 3936 3937 /* 3938 * Input arguments are live for preceding opcodes. 3939 * 3940 * For those arguments that die, and will be allocated in 3941 * registers, clear the register set for that arg, to be 3942 * filled in below. For args that will be on the stack, 3943 * reset to any available reg. Process arguments in reverse 3944 * order so that if a temp is used more than once, the stack 3945 * reset to max happens before the register reset to 0. 3946 */ 3947 for (i = nb_iargs - 1; i >= 0; i--) { 3948 const TCGCallArgumentLoc *loc = &info->in[i]; 3949 ts = arg_temp(op->args[nb_oargs + i]); 3950 3951 if (ts->state & TS_DEAD) { 3952 switch (loc->kind) { 3953 case TCG_CALL_ARG_NORMAL: 3954 case TCG_CALL_ARG_EXTEND_U: 3955 case TCG_CALL_ARG_EXTEND_S: 3956 if (arg_slot_reg_p(loc->arg_slot)) { 3957 *la_temp_pref(ts) = 0; 3958 break; 3959 } 3960 /* fall through */ 3961 default: 3962 *la_temp_pref(ts) = 3963 tcg_target_available_regs[ts->type]; 3964 break; 3965 } 3966 ts->state &= ~TS_DEAD; 3967 } 3968 } 3969 3970 /* 3971 * For each input argument, add its input register to prefs. 3972 * If a temp is used once, this produces a single set bit; 3973 * if a temp is used multiple times, this produces a set. 3974 */ 3975 for (i = 0; i < nb_iargs; i++) { 3976 const TCGCallArgumentLoc *loc = &info->in[i]; 3977 ts = arg_temp(op->args[nb_oargs + i]); 3978 3979 switch (loc->kind) { 3980 case TCG_CALL_ARG_NORMAL: 3981 case TCG_CALL_ARG_EXTEND_U: 3982 case TCG_CALL_ARG_EXTEND_S: 3983 if (arg_slot_reg_p(loc->arg_slot)) { 3984 tcg_regset_set_reg(*la_temp_pref(ts), 3985 tcg_target_call_iarg_regs[loc->arg_slot]); 3986 } 3987 break; 3988 default: 3989 break; 3990 } 3991 } 3992 } 3993 break; 3994 case INDEX_op_insn_start: 3995 break; 3996 case INDEX_op_discard: 3997 /* mark the temporary as dead */ 3998 ts = arg_temp(op->args[0]); 3999 ts->state = TS_DEAD; 4000 la_reset_pref(ts); 4001 break; 4002 4003 case INDEX_op_add2_i32: 4004 case INDEX_op_add2_i64: 4005 opc_new = INDEX_op_add; 4006 goto do_addsub2; 4007 case INDEX_op_sub2_i32: 4008 case INDEX_op_sub2_i64: 4009 opc_new = INDEX_op_sub; 4010 do_addsub2: 4011 nb_iargs = 4; 4012 nb_oargs = 2; 4013 /* Test if the high part of the operation is dead, but not 4014 the low part. The result can be optimized to a simple 4015 add or sub. This happens often for x86_64 guest when the 4016 cpu mode is set to 32 bit. */ 4017 if (arg_temp(op->args[1])->state == TS_DEAD) { 4018 if (arg_temp(op->args[0])->state == TS_DEAD) { 4019 goto do_remove; 4020 } 4021 /* Replace the opcode and adjust the args in place, 4022 leaving 3 unused args at the end. */ 4023 op->opc = opc = opc_new; 4024 op->args[1] = op->args[2]; 4025 op->args[2] = op->args[4]; 4026 /* Fall through and mark the single-word operation live. */ 4027 nb_iargs = 2; 4028 nb_oargs = 1; 4029 } 4030 goto do_not_remove; 4031 4032 case INDEX_op_muls2_i32: 4033 opc_new = INDEX_op_mul; 4034 opc_new2 = INDEX_op_mulsh_i32; 4035 goto do_mul2; 4036 case INDEX_op_mulu2_i32: 4037 case INDEX_op_mulu2_i64: 4038 opc_new = INDEX_op_mul; 4039 opc_new2 = INDEX_op_muluh; 4040 goto do_mul2; 4041 case INDEX_op_muls2_i64: 4042 opc_new = INDEX_op_mul; 4043 opc_new2 = INDEX_op_mulsh_i64; 4044 goto do_mul2; 4045 do_mul2: 4046 nb_iargs = 2; 4047 nb_oargs = 2; 4048 if (arg_temp(op->args[1])->state == TS_DEAD) { 4049 if (arg_temp(op->args[0])->state == TS_DEAD) { 4050 /* Both parts of the operation are dead. */ 4051 goto do_remove; 4052 } 4053 /* The high part of the operation is dead; generate the low. */ 4054 op->opc = opc = opc_new; 4055 op->args[1] = op->args[2]; 4056 op->args[2] = op->args[3]; 4057 } else if (arg_temp(op->args[0])->state == TS_DEAD && 4058 tcg_op_supported(opc_new2, TCGOP_TYPE(op), 0)) { 4059 /* The low part of the operation is dead; generate the high. */ 4060 op->opc = opc = opc_new2; 4061 op->args[0] = op->args[1]; 4062 op->args[1] = op->args[2]; 4063 op->args[2] = op->args[3]; 4064 } else { 4065 goto do_not_remove; 4066 } 4067 /* Mark the single-word operation live. */ 4068 nb_oargs = 1; 4069 goto do_not_remove; 4070 4071 default: 4072 /* XXX: optimize by hardcoding common cases (e.g. triadic ops) */ 4073 nb_iargs = def->nb_iargs; 4074 nb_oargs = def->nb_oargs; 4075 4076 /* Test if the operation can be removed because all 4077 its outputs are dead. We assume that nb_oargs == 0 4078 implies side effects */ 4079 if (!(def->flags & TCG_OPF_SIDE_EFFECTS) && nb_oargs != 0) { 4080 for (i = 0; i < nb_oargs; i++) { 4081 if (arg_temp(op->args[i])->state != TS_DEAD) { 4082 goto do_not_remove; 4083 } 4084 } 4085 goto do_remove; 4086 } 4087 goto do_not_remove; 4088 4089 do_remove: 4090 tcg_op_remove(s, op); 4091 break; 4092 4093 do_not_remove: 4094 for (i = 0; i < nb_oargs; i++) { 4095 ts = arg_temp(op->args[i]); 4096 4097 /* Remember the preference of the uses that followed. */ 4098 if (i < ARRAY_SIZE(op->output_pref)) { 4099 op->output_pref[i] = *la_temp_pref(ts); 4100 } 4101 4102 /* Output args are dead. */ 4103 if (ts->state & TS_DEAD) { 4104 arg_life |= DEAD_ARG << i; 4105 } 4106 if (ts->state & TS_MEM) { 4107 arg_life |= SYNC_ARG << i; 4108 } 4109 ts->state = TS_DEAD; 4110 la_reset_pref(ts); 4111 } 4112 4113 /* If end of basic block, update. */ 4114 if (def->flags & TCG_OPF_BB_EXIT) { 4115 la_func_end(s, nb_globals, nb_temps); 4116 } else if (def->flags & TCG_OPF_COND_BRANCH) { 4117 la_bb_sync(s, nb_globals, nb_temps); 4118 } else if (def->flags & TCG_OPF_BB_END) { 4119 la_bb_end(s, nb_globals, nb_temps); 4120 } else if (def->flags & TCG_OPF_SIDE_EFFECTS) { 4121 la_global_sync(s, nb_globals); 4122 if (def->flags & TCG_OPF_CALL_CLOBBER) { 4123 la_cross_call(s, nb_temps); 4124 } 4125 } 4126 4127 /* Record arguments that die in this opcode. */ 4128 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) { 4129 ts = arg_temp(op->args[i]); 4130 if (ts->state & TS_DEAD) { 4131 arg_life |= DEAD_ARG << i; 4132 } 4133 } 4134 4135 /* Input arguments are live for preceding opcodes. */ 4136 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) { 4137 ts = arg_temp(op->args[i]); 4138 if (ts->state & TS_DEAD) { 4139 /* For operands that were dead, initially allow 4140 all regs for the type. */ 4141 *la_temp_pref(ts) = tcg_target_available_regs[ts->type]; 4142 ts->state &= ~TS_DEAD; 4143 } 4144 } 4145 4146 /* Incorporate constraints for this operand. */ 4147 switch (opc) { 4148 case INDEX_op_mov: 4149 /* Note that these are TCG_OPF_NOT_PRESENT and do not 4150 have proper constraints. That said, special case 4151 moves to propagate preferences backward. */ 4152 if (IS_DEAD_ARG(1)) { 4153 *la_temp_pref(arg_temp(op->args[0])) 4154 = *la_temp_pref(arg_temp(op->args[1])); 4155 } 4156 break; 4157 4158 default: 4159 args_ct = opcode_args_ct(op); 4160 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) { 4161 const TCGArgConstraint *ct = &args_ct[i]; 4162 TCGRegSet set, *pset; 4163 4164 ts = arg_temp(op->args[i]); 4165 pset = la_temp_pref(ts); 4166 set = *pset; 4167 4168 set &= ct->regs; 4169 if (ct->ialias) { 4170 set &= output_pref(op, ct->alias_index); 4171 } 4172 /* If the combination is not possible, restart. */ 4173 if (set == 0) { 4174 set = ct->regs; 4175 } 4176 *pset = set; 4177 } 4178 break; 4179 } 4180 break; 4181 } 4182 op->life = arg_life; 4183 } 4184 } 4185 4186 /* Liveness analysis: Convert indirect regs to direct temporaries. */ 4187 static bool __attribute__((noinline)) 4188 liveness_pass_2(TCGContext *s) 4189 { 4190 int nb_globals = s->nb_globals; 4191 int nb_temps, i; 4192 bool changes = false; 4193 TCGOp *op, *op_next; 4194 4195 /* Create a temporary for each indirect global. */ 4196 for (i = 0; i < nb_globals; ++i) { 4197 TCGTemp *its = &s->temps[i]; 4198 if (its->indirect_reg) { 4199 TCGTemp *dts = tcg_temp_alloc(s); 4200 dts->type = its->type; 4201 dts->base_type = its->base_type; 4202 dts->temp_subindex = its->temp_subindex; 4203 dts->kind = TEMP_EBB; 4204 its->state_ptr = dts; 4205 } else { 4206 its->state_ptr = NULL; 4207 } 4208 /* All globals begin dead. */ 4209 its->state = TS_DEAD; 4210 } 4211 for (nb_temps = s->nb_temps; i < nb_temps; ++i) { 4212 TCGTemp *its = &s->temps[i]; 4213 its->state_ptr = NULL; 4214 its->state = TS_DEAD; 4215 } 4216 4217 QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) { 4218 TCGOpcode opc = op->opc; 4219 const TCGOpDef *def = &tcg_op_defs[opc]; 4220 TCGLifeData arg_life = op->life; 4221 int nb_iargs, nb_oargs, call_flags; 4222 TCGTemp *arg_ts, *dir_ts; 4223 4224 if (opc == INDEX_op_call) { 4225 nb_oargs = TCGOP_CALLO(op); 4226 nb_iargs = TCGOP_CALLI(op); 4227 call_flags = tcg_call_flags(op); 4228 } else { 4229 nb_iargs = def->nb_iargs; 4230 nb_oargs = def->nb_oargs; 4231 4232 /* Set flags similar to how calls require. */ 4233 if (def->flags & TCG_OPF_COND_BRANCH) { 4234 /* Like reading globals: sync_globals */ 4235 call_flags = TCG_CALL_NO_WRITE_GLOBALS; 4236 } else if (def->flags & TCG_OPF_BB_END) { 4237 /* Like writing globals: save_globals */ 4238 call_flags = 0; 4239 } else if (def->flags & TCG_OPF_SIDE_EFFECTS) { 4240 /* Like reading globals: sync_globals */ 4241 call_flags = TCG_CALL_NO_WRITE_GLOBALS; 4242 } else { 4243 /* No effect on globals. */ 4244 call_flags = (TCG_CALL_NO_READ_GLOBALS | 4245 TCG_CALL_NO_WRITE_GLOBALS); 4246 } 4247 } 4248 4249 /* Make sure that input arguments are available. */ 4250 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) { 4251 arg_ts = arg_temp(op->args[i]); 4252 dir_ts = arg_ts->state_ptr; 4253 if (dir_ts && arg_ts->state == TS_DEAD) { 4254 TCGOpcode lopc = (arg_ts->type == TCG_TYPE_I32 4255 ? INDEX_op_ld_i32 4256 : INDEX_op_ld_i64); 4257 TCGOp *lop = tcg_op_insert_before(s, op, lopc, 4258 arg_ts->type, 3); 4259 4260 lop->args[0] = temp_arg(dir_ts); 4261 lop->args[1] = temp_arg(arg_ts->mem_base); 4262 lop->args[2] = arg_ts->mem_offset; 4263 4264 /* Loaded, but synced with memory. */ 4265 arg_ts->state = TS_MEM; 4266 } 4267 } 4268 4269 /* Perform input replacement, and mark inputs that became dead. 4270 No action is required except keeping temp_state up to date 4271 so that we reload when needed. */ 4272 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) { 4273 arg_ts = arg_temp(op->args[i]); 4274 dir_ts = arg_ts->state_ptr; 4275 if (dir_ts) { 4276 op->args[i] = temp_arg(dir_ts); 4277 changes = true; 4278 if (IS_DEAD_ARG(i)) { 4279 arg_ts->state = TS_DEAD; 4280 } 4281 } 4282 } 4283 4284 /* Liveness analysis should ensure that the following are 4285 all correct, for call sites and basic block end points. */ 4286 if (call_flags & TCG_CALL_NO_READ_GLOBALS) { 4287 /* Nothing to do */ 4288 } else if (call_flags & TCG_CALL_NO_WRITE_GLOBALS) { 4289 for (i = 0; i < nb_globals; ++i) { 4290 /* Liveness should see that globals are synced back, 4291 that is, either TS_DEAD or TS_MEM. */ 4292 arg_ts = &s->temps[i]; 4293 tcg_debug_assert(arg_ts->state_ptr == 0 4294 || arg_ts->state != 0); 4295 } 4296 } else { 4297 for (i = 0; i < nb_globals; ++i) { 4298 /* Liveness should see that globals are saved back, 4299 that is, TS_DEAD, waiting to be reloaded. */ 4300 arg_ts = &s->temps[i]; 4301 tcg_debug_assert(arg_ts->state_ptr == 0 4302 || arg_ts->state == TS_DEAD); 4303 } 4304 } 4305 4306 /* Outputs become available. */ 4307 if (opc == INDEX_op_mov) { 4308 arg_ts = arg_temp(op->args[0]); 4309 dir_ts = arg_ts->state_ptr; 4310 if (dir_ts) { 4311 op->args[0] = temp_arg(dir_ts); 4312 changes = true; 4313 4314 /* The output is now live and modified. */ 4315 arg_ts->state = 0; 4316 4317 if (NEED_SYNC_ARG(0)) { 4318 TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32 4319 ? INDEX_op_st_i32 4320 : INDEX_op_st_i64); 4321 TCGOp *sop = tcg_op_insert_after(s, op, sopc, 4322 arg_ts->type, 3); 4323 TCGTemp *out_ts = dir_ts; 4324 4325 if (IS_DEAD_ARG(0)) { 4326 out_ts = arg_temp(op->args[1]); 4327 arg_ts->state = TS_DEAD; 4328 tcg_op_remove(s, op); 4329 } else { 4330 arg_ts->state = TS_MEM; 4331 } 4332 4333 sop->args[0] = temp_arg(out_ts); 4334 sop->args[1] = temp_arg(arg_ts->mem_base); 4335 sop->args[2] = arg_ts->mem_offset; 4336 } else { 4337 tcg_debug_assert(!IS_DEAD_ARG(0)); 4338 } 4339 } 4340 } else { 4341 for (i = 0; i < nb_oargs; i++) { 4342 arg_ts = arg_temp(op->args[i]); 4343 dir_ts = arg_ts->state_ptr; 4344 if (!dir_ts) { 4345 continue; 4346 } 4347 op->args[i] = temp_arg(dir_ts); 4348 changes = true; 4349 4350 /* The output is now live and modified. */ 4351 arg_ts->state = 0; 4352 4353 /* Sync outputs upon their last write. */ 4354 if (NEED_SYNC_ARG(i)) { 4355 TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32 4356 ? INDEX_op_st_i32 4357 : INDEX_op_st_i64); 4358 TCGOp *sop = tcg_op_insert_after(s, op, sopc, 4359 arg_ts->type, 3); 4360 4361 sop->args[0] = temp_arg(dir_ts); 4362 sop->args[1] = temp_arg(arg_ts->mem_base); 4363 sop->args[2] = arg_ts->mem_offset; 4364 4365 arg_ts->state = TS_MEM; 4366 } 4367 /* Drop outputs that are dead. */ 4368 if (IS_DEAD_ARG(i)) { 4369 arg_ts->state = TS_DEAD; 4370 } 4371 } 4372 } 4373 } 4374 4375 return changes; 4376 } 4377 4378 static void temp_allocate_frame(TCGContext *s, TCGTemp *ts) 4379 { 4380 intptr_t off; 4381 int size, align; 4382 4383 /* When allocating an object, look at the full type. */ 4384 size = tcg_type_size(ts->base_type); 4385 switch (ts->base_type) { 4386 case TCG_TYPE_I32: 4387 align = 4; 4388 break; 4389 case TCG_TYPE_I64: 4390 case TCG_TYPE_V64: 4391 align = 8; 4392 break; 4393 case TCG_TYPE_I128: 4394 case TCG_TYPE_V128: 4395 case TCG_TYPE_V256: 4396 /* 4397 * Note that we do not require aligned storage for V256, 4398 * and that we provide alignment for I128 to match V128, 4399 * even if that's above what the host ABI requires. 4400 */ 4401 align = 16; 4402 break; 4403 default: 4404 g_assert_not_reached(); 4405 } 4406 4407 /* 4408 * Assume the stack is sufficiently aligned. 4409 * This affects e.g. ARM NEON, where we have 8 byte stack alignment 4410 * and do not require 16 byte vector alignment. This seems slightly 4411 * easier than fully parameterizing the above switch statement. 4412 */ 4413 align = MIN(TCG_TARGET_STACK_ALIGN, align); 4414 off = ROUND_UP(s->current_frame_offset, align); 4415 4416 /* If we've exhausted the stack frame, restart with a smaller TB. */ 4417 if (off + size > s->frame_end) { 4418 tcg_raise_tb_overflow(s); 4419 } 4420 s->current_frame_offset = off + size; 4421 #if defined(__sparc__) 4422 off += TCG_TARGET_STACK_BIAS; 4423 #endif 4424 4425 /* If the object was subdivided, assign memory to all the parts. */ 4426 if (ts->base_type != ts->type) { 4427 int part_size = tcg_type_size(ts->type); 4428 int part_count = size / part_size; 4429 4430 /* 4431 * Each part is allocated sequentially in tcg_temp_new_internal. 4432 * Jump back to the first part by subtracting the current index. 4433 */ 4434 ts -= ts->temp_subindex; 4435 for (int i = 0; i < part_count; ++i) { 4436 ts[i].mem_offset = off + i * part_size; 4437 ts[i].mem_base = s->frame_temp; 4438 ts[i].mem_allocated = 1; 4439 } 4440 } else { 4441 ts->mem_offset = off; 4442 ts->mem_base = s->frame_temp; 4443 ts->mem_allocated = 1; 4444 } 4445 } 4446 4447 /* Assign @reg to @ts, and update reg_to_temp[]. */ 4448 static void set_temp_val_reg(TCGContext *s, TCGTemp *ts, TCGReg reg) 4449 { 4450 if (ts->val_type == TEMP_VAL_REG) { 4451 TCGReg old = ts->reg; 4452 tcg_debug_assert(s->reg_to_temp[old] == ts); 4453 if (old == reg) { 4454 return; 4455 } 4456 s->reg_to_temp[old] = NULL; 4457 } 4458 tcg_debug_assert(s->reg_to_temp[reg] == NULL); 4459 s->reg_to_temp[reg] = ts; 4460 ts->val_type = TEMP_VAL_REG; 4461 ts->reg = reg; 4462 } 4463 4464 /* Assign a non-register value type to @ts, and update reg_to_temp[]. */ 4465 static void set_temp_val_nonreg(TCGContext *s, TCGTemp *ts, TCGTempVal type) 4466 { 4467 tcg_debug_assert(type != TEMP_VAL_REG); 4468 if (ts->val_type == TEMP_VAL_REG) { 4469 TCGReg reg = ts->reg; 4470 tcg_debug_assert(s->reg_to_temp[reg] == ts); 4471 s->reg_to_temp[reg] = NULL; 4472 } 4473 ts->val_type = type; 4474 } 4475 4476 static void temp_load(TCGContext *, TCGTemp *, TCGRegSet, TCGRegSet, TCGRegSet); 4477 4478 /* Mark a temporary as free or dead. If 'free_or_dead' is negative, 4479 mark it free; otherwise mark it dead. */ 4480 static void temp_free_or_dead(TCGContext *s, TCGTemp *ts, int free_or_dead) 4481 { 4482 TCGTempVal new_type; 4483 4484 switch (ts->kind) { 4485 case TEMP_FIXED: 4486 return; 4487 case TEMP_GLOBAL: 4488 case TEMP_TB: 4489 new_type = TEMP_VAL_MEM; 4490 break; 4491 case TEMP_EBB: 4492 new_type = free_or_dead < 0 ? TEMP_VAL_MEM : TEMP_VAL_DEAD; 4493 break; 4494 case TEMP_CONST: 4495 new_type = TEMP_VAL_CONST; 4496 break; 4497 default: 4498 g_assert_not_reached(); 4499 } 4500 set_temp_val_nonreg(s, ts, new_type); 4501 } 4502 4503 /* Mark a temporary as dead. */ 4504 static inline void temp_dead(TCGContext *s, TCGTemp *ts) 4505 { 4506 temp_free_or_dead(s, ts, 1); 4507 } 4508 4509 /* Sync a temporary to memory. 'allocated_regs' is used in case a temporary 4510 registers needs to be allocated to store a constant. If 'free_or_dead' 4511 is non-zero, subsequently release the temporary; if it is positive, the 4512 temp is dead; if it is negative, the temp is free. */ 4513 static void temp_sync(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs, 4514 TCGRegSet preferred_regs, int free_or_dead) 4515 { 4516 if (!temp_readonly(ts) && !ts->mem_coherent) { 4517 if (!ts->mem_allocated) { 4518 temp_allocate_frame(s, ts); 4519 } 4520 switch (ts->val_type) { 4521 case TEMP_VAL_CONST: 4522 /* If we're going to free the temp immediately, then we won't 4523 require it later in a register, so attempt to store the 4524 constant to memory directly. */ 4525 if (free_or_dead 4526 && tcg_out_sti(s, ts->type, ts->val, 4527 ts->mem_base->reg, ts->mem_offset)) { 4528 break; 4529 } 4530 temp_load(s, ts, tcg_target_available_regs[ts->type], 4531 allocated_regs, preferred_regs); 4532 /* fallthrough */ 4533 4534 case TEMP_VAL_REG: 4535 tcg_out_st(s, ts->type, ts->reg, 4536 ts->mem_base->reg, ts->mem_offset); 4537 break; 4538 4539 case TEMP_VAL_MEM: 4540 break; 4541 4542 case TEMP_VAL_DEAD: 4543 default: 4544 g_assert_not_reached(); 4545 } 4546 ts->mem_coherent = 1; 4547 } 4548 if (free_or_dead) { 4549 temp_free_or_dead(s, ts, free_or_dead); 4550 } 4551 } 4552 4553 /* free register 'reg' by spilling the corresponding temporary if necessary */ 4554 static void tcg_reg_free(TCGContext *s, TCGReg reg, TCGRegSet allocated_regs) 4555 { 4556 TCGTemp *ts = s->reg_to_temp[reg]; 4557 if (ts != NULL) { 4558 temp_sync(s, ts, allocated_regs, 0, -1); 4559 } 4560 } 4561 4562 /** 4563 * tcg_reg_alloc: 4564 * @required_regs: Set of registers in which we must allocate. 4565 * @allocated_regs: Set of registers which must be avoided. 4566 * @preferred_regs: Set of registers we should prefer. 4567 * @rev: True if we search the registers in "indirect" order. 4568 * 4569 * The allocated register must be in @required_regs & ~@allocated_regs, 4570 * but if we can put it in @preferred_regs we may save a move later. 4571 */ 4572 static TCGReg tcg_reg_alloc(TCGContext *s, TCGRegSet required_regs, 4573 TCGRegSet allocated_regs, 4574 TCGRegSet preferred_regs, bool rev) 4575 { 4576 int i, j, f, n = ARRAY_SIZE(tcg_target_reg_alloc_order); 4577 TCGRegSet reg_ct[2]; 4578 const int *order; 4579 4580 reg_ct[1] = required_regs & ~allocated_regs; 4581 tcg_debug_assert(reg_ct[1] != 0); 4582 reg_ct[0] = reg_ct[1] & preferred_regs; 4583 4584 /* Skip the preferred_regs option if it cannot be satisfied, 4585 or if the preference made no difference. */ 4586 f = reg_ct[0] == 0 || reg_ct[0] == reg_ct[1]; 4587 4588 order = rev ? indirect_reg_alloc_order : tcg_target_reg_alloc_order; 4589 4590 /* Try free registers, preferences first. */ 4591 for (j = f; j < 2; j++) { 4592 TCGRegSet set = reg_ct[j]; 4593 4594 if (tcg_regset_single(set)) { 4595 /* One register in the set. */ 4596 TCGReg reg = tcg_regset_first(set); 4597 if (s->reg_to_temp[reg] == NULL) { 4598 return reg; 4599 } 4600 } else { 4601 for (i = 0; i < n; i++) { 4602 TCGReg reg = order[i]; 4603 if (s->reg_to_temp[reg] == NULL && 4604 tcg_regset_test_reg(set, reg)) { 4605 return reg; 4606 } 4607 } 4608 } 4609 } 4610 4611 /* We must spill something. */ 4612 for (j = f; j < 2; j++) { 4613 TCGRegSet set = reg_ct[j]; 4614 4615 if (tcg_regset_single(set)) { 4616 /* One register in the set. */ 4617 TCGReg reg = tcg_regset_first(set); 4618 tcg_reg_free(s, reg, allocated_regs); 4619 return reg; 4620 } else { 4621 for (i = 0; i < n; i++) { 4622 TCGReg reg = order[i]; 4623 if (tcg_regset_test_reg(set, reg)) { 4624 tcg_reg_free(s, reg, allocated_regs); 4625 return reg; 4626 } 4627 } 4628 } 4629 } 4630 4631 g_assert_not_reached(); 4632 } 4633 4634 static TCGReg tcg_reg_alloc_pair(TCGContext *s, TCGRegSet required_regs, 4635 TCGRegSet allocated_regs, 4636 TCGRegSet preferred_regs, bool rev) 4637 { 4638 int i, j, k, fmin, n = ARRAY_SIZE(tcg_target_reg_alloc_order); 4639 TCGRegSet reg_ct[2]; 4640 const int *order; 4641 4642 /* Ensure that if I is not in allocated_regs, I+1 is not either. */ 4643 reg_ct[1] = required_regs & ~(allocated_regs | (allocated_regs >> 1)); 4644 tcg_debug_assert(reg_ct[1] != 0); 4645 reg_ct[0] = reg_ct[1] & preferred_regs; 4646 4647 order = rev ? indirect_reg_alloc_order : tcg_target_reg_alloc_order; 4648 4649 /* 4650 * Skip the preferred_regs option if it cannot be satisfied, 4651 * or if the preference made no difference. 4652 */ 4653 k = reg_ct[0] == 0 || reg_ct[0] == reg_ct[1]; 4654 4655 /* 4656 * Minimize the number of flushes by looking for 2 free registers first, 4657 * then a single flush, then two flushes. 4658 */ 4659 for (fmin = 2; fmin >= 0; fmin--) { 4660 for (j = k; j < 2; j++) { 4661 TCGRegSet set = reg_ct[j]; 4662 4663 for (i = 0; i < n; i++) { 4664 TCGReg reg = order[i]; 4665 4666 if (tcg_regset_test_reg(set, reg)) { 4667 int f = !s->reg_to_temp[reg] + !s->reg_to_temp[reg + 1]; 4668 if (f >= fmin) { 4669 tcg_reg_free(s, reg, allocated_regs); 4670 tcg_reg_free(s, reg + 1, allocated_regs); 4671 return reg; 4672 } 4673 } 4674 } 4675 } 4676 } 4677 g_assert_not_reached(); 4678 } 4679 4680 /* Make sure the temporary is in a register. If needed, allocate the register 4681 from DESIRED while avoiding ALLOCATED. */ 4682 static void temp_load(TCGContext *s, TCGTemp *ts, TCGRegSet desired_regs, 4683 TCGRegSet allocated_regs, TCGRegSet preferred_regs) 4684 { 4685 TCGReg reg; 4686 4687 switch (ts->val_type) { 4688 case TEMP_VAL_REG: 4689 return; 4690 case TEMP_VAL_CONST: 4691 reg = tcg_reg_alloc(s, desired_regs, allocated_regs, 4692 preferred_regs, ts->indirect_base); 4693 if (ts->type <= TCG_TYPE_I64) { 4694 tcg_out_movi(s, ts->type, reg, ts->val); 4695 } else { 4696 uint64_t val = ts->val; 4697 MemOp vece = MO_64; 4698 4699 /* 4700 * Find the minimal vector element that matches the constant. 4701 * The targets will, in general, have to do this search anyway, 4702 * do this generically. 4703 */ 4704 if (val == dup_const(MO_8, val)) { 4705 vece = MO_8; 4706 } else if (val == dup_const(MO_16, val)) { 4707 vece = MO_16; 4708 } else if (val == dup_const(MO_32, val)) { 4709 vece = MO_32; 4710 } 4711 4712 tcg_out_dupi_vec(s, ts->type, vece, reg, ts->val); 4713 } 4714 ts->mem_coherent = 0; 4715 break; 4716 case TEMP_VAL_MEM: 4717 if (!ts->mem_allocated) { 4718 temp_allocate_frame(s, ts); 4719 } 4720 reg = tcg_reg_alloc(s, desired_regs, allocated_regs, 4721 preferred_regs, ts->indirect_base); 4722 tcg_out_ld(s, ts->type, reg, ts->mem_base->reg, ts->mem_offset); 4723 ts->mem_coherent = 1; 4724 break; 4725 case TEMP_VAL_DEAD: 4726 default: 4727 g_assert_not_reached(); 4728 } 4729 set_temp_val_reg(s, ts, reg); 4730 } 4731 4732 /* Save a temporary to memory. 'allocated_regs' is used in case a 4733 temporary registers needs to be allocated to store a constant. */ 4734 static void temp_save(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs) 4735 { 4736 /* The liveness analysis already ensures that globals are back 4737 in memory. Keep an tcg_debug_assert for safety. */ 4738 tcg_debug_assert(ts->val_type == TEMP_VAL_MEM || temp_readonly(ts)); 4739 } 4740 4741 /* save globals to their canonical location and assume they can be 4742 modified be the following code. 'allocated_regs' is used in case a 4743 temporary registers needs to be allocated to store a constant. */ 4744 static void save_globals(TCGContext *s, TCGRegSet allocated_regs) 4745 { 4746 int i, n; 4747 4748 for (i = 0, n = s->nb_globals; i < n; i++) { 4749 temp_save(s, &s->temps[i], allocated_regs); 4750 } 4751 } 4752 4753 /* sync globals to their canonical location and assume they can be 4754 read by the following code. 'allocated_regs' is used in case a 4755 temporary registers needs to be allocated to store a constant. */ 4756 static void sync_globals(TCGContext *s, TCGRegSet allocated_regs) 4757 { 4758 int i, n; 4759 4760 for (i = 0, n = s->nb_globals; i < n; i++) { 4761 TCGTemp *ts = &s->temps[i]; 4762 tcg_debug_assert(ts->val_type != TEMP_VAL_REG 4763 || ts->kind == TEMP_FIXED 4764 || ts->mem_coherent); 4765 } 4766 } 4767 4768 /* at the end of a basic block, we assume all temporaries are dead and 4769 all globals are stored at their canonical location. */ 4770 static void tcg_reg_alloc_bb_end(TCGContext *s, TCGRegSet allocated_regs) 4771 { 4772 int i; 4773 4774 for (i = s->nb_globals; i < s->nb_temps; i++) { 4775 TCGTemp *ts = &s->temps[i]; 4776 4777 switch (ts->kind) { 4778 case TEMP_TB: 4779 temp_save(s, ts, allocated_regs); 4780 break; 4781 case TEMP_EBB: 4782 /* The liveness analysis already ensures that temps are dead. 4783 Keep an tcg_debug_assert for safety. */ 4784 tcg_debug_assert(ts->val_type == TEMP_VAL_DEAD); 4785 break; 4786 case TEMP_CONST: 4787 /* Similarly, we should have freed any allocated register. */ 4788 tcg_debug_assert(ts->val_type == TEMP_VAL_CONST); 4789 break; 4790 default: 4791 g_assert_not_reached(); 4792 } 4793 } 4794 4795 save_globals(s, allocated_regs); 4796 } 4797 4798 /* 4799 * At a conditional branch, we assume all temporaries are dead unless 4800 * explicitly live-across-conditional-branch; all globals and local 4801 * temps are synced to their location. 4802 */ 4803 static void tcg_reg_alloc_cbranch(TCGContext *s, TCGRegSet allocated_regs) 4804 { 4805 sync_globals(s, allocated_regs); 4806 4807 for (int i = s->nb_globals; i < s->nb_temps; i++) { 4808 TCGTemp *ts = &s->temps[i]; 4809 /* 4810 * The liveness analysis already ensures that temps are dead. 4811 * Keep tcg_debug_asserts for safety. 4812 */ 4813 switch (ts->kind) { 4814 case TEMP_TB: 4815 tcg_debug_assert(ts->val_type != TEMP_VAL_REG || ts->mem_coherent); 4816 break; 4817 case TEMP_EBB: 4818 case TEMP_CONST: 4819 break; 4820 default: 4821 g_assert_not_reached(); 4822 } 4823 } 4824 } 4825 4826 /* 4827 * Specialized code generation for INDEX_op_mov_* with a constant. 4828 */ 4829 static void tcg_reg_alloc_do_movi(TCGContext *s, TCGTemp *ots, 4830 tcg_target_ulong val, TCGLifeData arg_life, 4831 TCGRegSet preferred_regs) 4832 { 4833 /* ENV should not be modified. */ 4834 tcg_debug_assert(!temp_readonly(ots)); 4835 4836 /* The movi is not explicitly generated here. */ 4837 set_temp_val_nonreg(s, ots, TEMP_VAL_CONST); 4838 ots->val = val; 4839 ots->mem_coherent = 0; 4840 if (NEED_SYNC_ARG(0)) { 4841 temp_sync(s, ots, s->reserved_regs, preferred_regs, IS_DEAD_ARG(0)); 4842 } else if (IS_DEAD_ARG(0)) { 4843 temp_dead(s, ots); 4844 } 4845 } 4846 4847 /* 4848 * Specialized code generation for INDEX_op_mov_*. 4849 */ 4850 static void tcg_reg_alloc_mov(TCGContext *s, const TCGOp *op) 4851 { 4852 const TCGLifeData arg_life = op->life; 4853 TCGRegSet allocated_regs, preferred_regs; 4854 TCGTemp *ts, *ots; 4855 TCGType otype, itype; 4856 TCGReg oreg, ireg; 4857 4858 allocated_regs = s->reserved_regs; 4859 preferred_regs = output_pref(op, 0); 4860 ots = arg_temp(op->args[0]); 4861 ts = arg_temp(op->args[1]); 4862 4863 /* ENV should not be modified. */ 4864 tcg_debug_assert(!temp_readonly(ots)); 4865 4866 /* Note that otype != itype for no-op truncation. */ 4867 otype = ots->type; 4868 itype = ts->type; 4869 4870 if (ts->val_type == TEMP_VAL_CONST) { 4871 /* propagate constant or generate sti */ 4872 tcg_target_ulong val = ts->val; 4873 if (IS_DEAD_ARG(1)) { 4874 temp_dead(s, ts); 4875 } 4876 tcg_reg_alloc_do_movi(s, ots, val, arg_life, preferred_regs); 4877 return; 4878 } 4879 4880 /* If the source value is in memory we're going to be forced 4881 to have it in a register in order to perform the copy. Copy 4882 the SOURCE value into its own register first, that way we 4883 don't have to reload SOURCE the next time it is used. */ 4884 if (ts->val_type == TEMP_VAL_MEM) { 4885 temp_load(s, ts, tcg_target_available_regs[itype], 4886 allocated_regs, preferred_regs); 4887 } 4888 tcg_debug_assert(ts->val_type == TEMP_VAL_REG); 4889 ireg = ts->reg; 4890 4891 if (IS_DEAD_ARG(0)) { 4892 /* mov to a non-saved dead register makes no sense (even with 4893 liveness analysis disabled). */ 4894 tcg_debug_assert(NEED_SYNC_ARG(0)); 4895 if (!ots->mem_allocated) { 4896 temp_allocate_frame(s, ots); 4897 } 4898 tcg_out_st(s, otype, ireg, ots->mem_base->reg, ots->mem_offset); 4899 if (IS_DEAD_ARG(1)) { 4900 temp_dead(s, ts); 4901 } 4902 temp_dead(s, ots); 4903 return; 4904 } 4905 4906 if (IS_DEAD_ARG(1) && ts->kind != TEMP_FIXED) { 4907 /* 4908 * The mov can be suppressed. Kill input first, so that it 4909 * is unlinked from reg_to_temp, then set the output to the 4910 * reg that we saved from the input. 4911 */ 4912 temp_dead(s, ts); 4913 oreg = ireg; 4914 } else { 4915 if (ots->val_type == TEMP_VAL_REG) { 4916 oreg = ots->reg; 4917 } else { 4918 /* Make sure to not spill the input register during allocation. */ 4919 oreg = tcg_reg_alloc(s, tcg_target_available_regs[otype], 4920 allocated_regs | ((TCGRegSet)1 << ireg), 4921 preferred_regs, ots->indirect_base); 4922 } 4923 if (!tcg_out_mov(s, otype, oreg, ireg)) { 4924 /* 4925 * Cross register class move not supported. 4926 * Store the source register into the destination slot 4927 * and leave the destination temp as TEMP_VAL_MEM. 4928 */ 4929 assert(!temp_readonly(ots)); 4930 if (!ts->mem_allocated) { 4931 temp_allocate_frame(s, ots); 4932 } 4933 tcg_out_st(s, ts->type, ireg, ots->mem_base->reg, ots->mem_offset); 4934 set_temp_val_nonreg(s, ts, TEMP_VAL_MEM); 4935 ots->mem_coherent = 1; 4936 return; 4937 } 4938 } 4939 set_temp_val_reg(s, ots, oreg); 4940 ots->mem_coherent = 0; 4941 4942 if (NEED_SYNC_ARG(0)) { 4943 temp_sync(s, ots, allocated_regs, 0, 0); 4944 } 4945 } 4946 4947 /* 4948 * Specialized code generation for INDEX_op_dup_vec. 4949 */ 4950 static void tcg_reg_alloc_dup(TCGContext *s, const TCGOp *op) 4951 { 4952 const TCGLifeData arg_life = op->life; 4953 TCGRegSet dup_out_regs, dup_in_regs; 4954 const TCGArgConstraint *dup_args_ct; 4955 TCGTemp *its, *ots; 4956 TCGType itype, vtype; 4957 unsigned vece; 4958 int lowpart_ofs; 4959 bool ok; 4960 4961 ots = arg_temp(op->args[0]); 4962 its = arg_temp(op->args[1]); 4963 4964 /* ENV should not be modified. */ 4965 tcg_debug_assert(!temp_readonly(ots)); 4966 4967 itype = its->type; 4968 vece = TCGOP_VECE(op); 4969 vtype = TCGOP_TYPE(op); 4970 4971 if (its->val_type == TEMP_VAL_CONST) { 4972 /* Propagate constant via movi -> dupi. */ 4973 tcg_target_ulong val = its->val; 4974 if (IS_DEAD_ARG(1)) { 4975 temp_dead(s, its); 4976 } 4977 tcg_reg_alloc_do_movi(s, ots, val, arg_life, output_pref(op, 0)); 4978 return; 4979 } 4980 4981 dup_args_ct = opcode_args_ct(op); 4982 dup_out_regs = dup_args_ct[0].regs; 4983 dup_in_regs = dup_args_ct[1].regs; 4984 4985 /* Allocate the output register now. */ 4986 if (ots->val_type != TEMP_VAL_REG) { 4987 TCGRegSet allocated_regs = s->reserved_regs; 4988 TCGReg oreg; 4989 4990 if (!IS_DEAD_ARG(1) && its->val_type == TEMP_VAL_REG) { 4991 /* Make sure to not spill the input register. */ 4992 tcg_regset_set_reg(allocated_regs, its->reg); 4993 } 4994 oreg = tcg_reg_alloc(s, dup_out_regs, allocated_regs, 4995 output_pref(op, 0), ots->indirect_base); 4996 set_temp_val_reg(s, ots, oreg); 4997 } 4998 4999 switch (its->val_type) { 5000 case TEMP_VAL_REG: 5001 /* 5002 * The dup constriaints must be broad, covering all possible VECE. 5003 * However, tcg_op_dup_vec() gets to see the VECE and we allow it 5004 * to fail, indicating that extra moves are required for that case. 5005 */ 5006 if (tcg_regset_test_reg(dup_in_regs, its->reg)) { 5007 if (tcg_out_dup_vec(s, vtype, vece, ots->reg, its->reg)) { 5008 goto done; 5009 } 5010 /* Try again from memory or a vector input register. */ 5011 } 5012 if (!its->mem_coherent) { 5013 /* 5014 * The input register is not synced, and so an extra store 5015 * would be required to use memory. Attempt an integer-vector 5016 * register move first. We do not have a TCGRegSet for this. 5017 */ 5018 if (tcg_out_mov(s, itype, ots->reg, its->reg)) { 5019 break; 5020 } 5021 /* Sync the temp back to its slot and load from there. */ 5022 temp_sync(s, its, s->reserved_regs, 0, 0); 5023 } 5024 /* fall through */ 5025 5026 case TEMP_VAL_MEM: 5027 lowpart_ofs = 0; 5028 if (HOST_BIG_ENDIAN) { 5029 lowpart_ofs = tcg_type_size(itype) - (1 << vece); 5030 } 5031 if (tcg_out_dupm_vec(s, vtype, vece, ots->reg, its->mem_base->reg, 5032 its->mem_offset + lowpart_ofs)) { 5033 goto done; 5034 } 5035 /* Load the input into the destination vector register. */ 5036 tcg_out_ld(s, itype, ots->reg, its->mem_base->reg, its->mem_offset); 5037 break; 5038 5039 default: 5040 g_assert_not_reached(); 5041 } 5042 5043 /* We now have a vector input register, so dup must succeed. */ 5044 ok = tcg_out_dup_vec(s, vtype, vece, ots->reg, ots->reg); 5045 tcg_debug_assert(ok); 5046 5047 done: 5048 ots->mem_coherent = 0; 5049 if (IS_DEAD_ARG(1)) { 5050 temp_dead(s, its); 5051 } 5052 if (NEED_SYNC_ARG(0)) { 5053 temp_sync(s, ots, s->reserved_regs, 0, 0); 5054 } 5055 if (IS_DEAD_ARG(0)) { 5056 temp_dead(s, ots); 5057 } 5058 } 5059 5060 static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op) 5061 { 5062 const TCGLifeData arg_life = op->life; 5063 const TCGOpDef * const def = &tcg_op_defs[op->opc]; 5064 TCGRegSet i_allocated_regs; 5065 TCGRegSet o_allocated_regs; 5066 int i, k, nb_iargs, nb_oargs; 5067 TCGReg reg; 5068 TCGArg arg; 5069 const TCGArgConstraint *args_ct; 5070 const TCGArgConstraint *arg_ct; 5071 TCGTemp *ts; 5072 TCGArg new_args[TCG_MAX_OP_ARGS]; 5073 int const_args[TCG_MAX_OP_ARGS]; 5074 TCGCond op_cond; 5075 5076 nb_oargs = def->nb_oargs; 5077 nb_iargs = def->nb_iargs; 5078 5079 /* copy constants */ 5080 memcpy(new_args + nb_oargs + nb_iargs, 5081 op->args + nb_oargs + nb_iargs, 5082 sizeof(TCGArg) * def->nb_cargs); 5083 5084 i_allocated_regs = s->reserved_regs; 5085 o_allocated_regs = s->reserved_regs; 5086 5087 switch (op->opc) { 5088 case INDEX_op_brcond_i32: 5089 case INDEX_op_brcond_i64: 5090 op_cond = op->args[2]; 5091 break; 5092 case INDEX_op_setcond_i32: 5093 case INDEX_op_setcond_i64: 5094 case INDEX_op_negsetcond_i32: 5095 case INDEX_op_negsetcond_i64: 5096 case INDEX_op_cmp_vec: 5097 op_cond = op->args[3]; 5098 break; 5099 case INDEX_op_brcond2_i32: 5100 op_cond = op->args[4]; 5101 break; 5102 case INDEX_op_movcond_i32: 5103 case INDEX_op_movcond_i64: 5104 case INDEX_op_setcond2_i32: 5105 case INDEX_op_cmpsel_vec: 5106 op_cond = op->args[5]; 5107 break; 5108 default: 5109 /* No condition within opcode. */ 5110 op_cond = TCG_COND_ALWAYS; 5111 break; 5112 } 5113 5114 args_ct = opcode_args_ct(op); 5115 5116 /* satisfy input constraints */ 5117 for (k = 0; k < nb_iargs; k++) { 5118 TCGRegSet i_preferred_regs, i_required_regs; 5119 bool allocate_new_reg, copyto_new_reg; 5120 TCGTemp *ts2; 5121 int i1, i2; 5122 5123 i = args_ct[nb_oargs + k].sort_index; 5124 arg = op->args[i]; 5125 arg_ct = &args_ct[i]; 5126 ts = arg_temp(arg); 5127 5128 if (ts->val_type == TEMP_VAL_CONST) { 5129 #ifdef TCG_REG_ZERO 5130 if (ts->val == 0 && (arg_ct->ct & TCG_CT_REG_ZERO)) { 5131 /* Hardware zero register: indicate register via non-const. */ 5132 const_args[i] = 0; 5133 new_args[i] = TCG_REG_ZERO; 5134 continue; 5135 } 5136 #endif 5137 5138 if (tcg_target_const_match(ts->val, arg_ct->ct, ts->type, 5139 op_cond, TCGOP_VECE(op))) { 5140 /* constant is OK for instruction */ 5141 const_args[i] = 1; 5142 new_args[i] = ts->val; 5143 continue; 5144 } 5145 } 5146 5147 reg = ts->reg; 5148 i_preferred_regs = 0; 5149 i_required_regs = arg_ct->regs; 5150 allocate_new_reg = false; 5151 copyto_new_reg = false; 5152 5153 switch (arg_ct->pair) { 5154 case 0: /* not paired */ 5155 if (arg_ct->ialias) { 5156 i_preferred_regs = output_pref(op, arg_ct->alias_index); 5157 5158 /* 5159 * If the input is readonly, then it cannot also be an 5160 * output and aliased to itself. If the input is not 5161 * dead after the instruction, we must allocate a new 5162 * register and move it. 5163 */ 5164 if (temp_readonly(ts) || !IS_DEAD_ARG(i) 5165 || args_ct[arg_ct->alias_index].newreg) { 5166 allocate_new_reg = true; 5167 } else if (ts->val_type == TEMP_VAL_REG) { 5168 /* 5169 * Check if the current register has already been 5170 * allocated for another input. 5171 */ 5172 allocate_new_reg = 5173 tcg_regset_test_reg(i_allocated_regs, reg); 5174 } 5175 } 5176 if (!allocate_new_reg) { 5177 temp_load(s, ts, i_required_regs, i_allocated_regs, 5178 i_preferred_regs); 5179 reg = ts->reg; 5180 allocate_new_reg = !tcg_regset_test_reg(i_required_regs, reg); 5181 } 5182 if (allocate_new_reg) { 5183 /* 5184 * Allocate a new register matching the constraint 5185 * and move the temporary register into it. 5186 */ 5187 temp_load(s, ts, tcg_target_available_regs[ts->type], 5188 i_allocated_regs, 0); 5189 reg = tcg_reg_alloc(s, i_required_regs, i_allocated_regs, 5190 i_preferred_regs, ts->indirect_base); 5191 copyto_new_reg = true; 5192 } 5193 break; 5194 5195 case 1: 5196 /* First of an input pair; if i1 == i2, the second is an output. */ 5197 i1 = i; 5198 i2 = arg_ct->pair_index; 5199 ts2 = i1 != i2 ? arg_temp(op->args[i2]) : NULL; 5200 5201 /* 5202 * It is easier to default to allocating a new pair 5203 * and to identify a few cases where it's not required. 5204 */ 5205 if (arg_ct->ialias) { 5206 i_preferred_regs = output_pref(op, arg_ct->alias_index); 5207 if (IS_DEAD_ARG(i1) && 5208 IS_DEAD_ARG(i2) && 5209 !temp_readonly(ts) && 5210 ts->val_type == TEMP_VAL_REG && 5211 ts->reg < TCG_TARGET_NB_REGS - 1 && 5212 tcg_regset_test_reg(i_required_regs, reg) && 5213 !tcg_regset_test_reg(i_allocated_regs, reg) && 5214 !tcg_regset_test_reg(i_allocated_regs, reg + 1) && 5215 (ts2 5216 ? ts2->val_type == TEMP_VAL_REG && 5217 ts2->reg == reg + 1 && 5218 !temp_readonly(ts2) 5219 : s->reg_to_temp[reg + 1] == NULL)) { 5220 break; 5221 } 5222 } else { 5223 /* Without aliasing, the pair must also be an input. */ 5224 tcg_debug_assert(ts2); 5225 if (ts->val_type == TEMP_VAL_REG && 5226 ts2->val_type == TEMP_VAL_REG && 5227 ts2->reg == reg + 1 && 5228 tcg_regset_test_reg(i_required_regs, reg)) { 5229 break; 5230 } 5231 } 5232 reg = tcg_reg_alloc_pair(s, i_required_regs, i_allocated_regs, 5233 0, ts->indirect_base); 5234 goto do_pair; 5235 5236 case 2: /* pair second */ 5237 reg = new_args[arg_ct->pair_index] + 1; 5238 goto do_pair; 5239 5240 case 3: /* ialias with second output, no first input */ 5241 tcg_debug_assert(arg_ct->ialias); 5242 i_preferred_regs = output_pref(op, arg_ct->alias_index); 5243 5244 if (IS_DEAD_ARG(i) && 5245 !temp_readonly(ts) && 5246 ts->val_type == TEMP_VAL_REG && 5247 reg > 0 && 5248 s->reg_to_temp[reg - 1] == NULL && 5249 tcg_regset_test_reg(i_required_regs, reg) && 5250 !tcg_regset_test_reg(i_allocated_regs, reg) && 5251 !tcg_regset_test_reg(i_allocated_regs, reg - 1)) { 5252 tcg_regset_set_reg(i_allocated_regs, reg - 1); 5253 break; 5254 } 5255 reg = tcg_reg_alloc_pair(s, i_required_regs >> 1, 5256 i_allocated_regs, 0, 5257 ts->indirect_base); 5258 tcg_regset_set_reg(i_allocated_regs, reg); 5259 reg += 1; 5260 goto do_pair; 5261 5262 do_pair: 5263 /* 5264 * If an aliased input is not dead after the instruction, 5265 * we must allocate a new register and move it. 5266 */ 5267 if (arg_ct->ialias && (!IS_DEAD_ARG(i) || temp_readonly(ts))) { 5268 TCGRegSet t_allocated_regs = i_allocated_regs; 5269 5270 /* 5271 * Because of the alias, and the continued life, make sure 5272 * that the temp is somewhere *other* than the reg pair, 5273 * and we get a copy in reg. 5274 */ 5275 tcg_regset_set_reg(t_allocated_regs, reg); 5276 tcg_regset_set_reg(t_allocated_regs, reg + 1); 5277 if (ts->val_type == TEMP_VAL_REG && ts->reg == reg) { 5278 /* If ts was already in reg, copy it somewhere else. */ 5279 TCGReg nr; 5280 bool ok; 5281 5282 tcg_debug_assert(ts->kind != TEMP_FIXED); 5283 nr = tcg_reg_alloc(s, tcg_target_available_regs[ts->type], 5284 t_allocated_regs, 0, ts->indirect_base); 5285 ok = tcg_out_mov(s, ts->type, nr, reg); 5286 tcg_debug_assert(ok); 5287 5288 set_temp_val_reg(s, ts, nr); 5289 } else { 5290 temp_load(s, ts, tcg_target_available_regs[ts->type], 5291 t_allocated_regs, 0); 5292 copyto_new_reg = true; 5293 } 5294 } else { 5295 /* Preferably allocate to reg, otherwise copy. */ 5296 i_required_regs = (TCGRegSet)1 << reg; 5297 temp_load(s, ts, i_required_regs, i_allocated_regs, 5298 i_preferred_regs); 5299 copyto_new_reg = ts->reg != reg; 5300 } 5301 break; 5302 5303 default: 5304 g_assert_not_reached(); 5305 } 5306 5307 if (copyto_new_reg) { 5308 if (!tcg_out_mov(s, ts->type, reg, ts->reg)) { 5309 /* 5310 * Cross register class move not supported. Sync the 5311 * temp back to its slot and load from there. 5312 */ 5313 temp_sync(s, ts, i_allocated_regs, 0, 0); 5314 tcg_out_ld(s, ts->type, reg, 5315 ts->mem_base->reg, ts->mem_offset); 5316 } 5317 } 5318 new_args[i] = reg; 5319 const_args[i] = 0; 5320 tcg_regset_set_reg(i_allocated_regs, reg); 5321 } 5322 5323 /* mark dead temporaries and free the associated registers */ 5324 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) { 5325 if (IS_DEAD_ARG(i)) { 5326 temp_dead(s, arg_temp(op->args[i])); 5327 } 5328 } 5329 5330 if (def->flags & TCG_OPF_COND_BRANCH) { 5331 tcg_reg_alloc_cbranch(s, i_allocated_regs); 5332 } else if (def->flags & TCG_OPF_BB_END) { 5333 tcg_reg_alloc_bb_end(s, i_allocated_regs); 5334 } else { 5335 if (def->flags & TCG_OPF_CALL_CLOBBER) { 5336 /* XXX: permit generic clobber register list ? */ 5337 for (i = 0; i < TCG_TARGET_NB_REGS; i++) { 5338 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) { 5339 tcg_reg_free(s, i, i_allocated_regs); 5340 } 5341 } 5342 } 5343 if (def->flags & TCG_OPF_SIDE_EFFECTS) { 5344 /* sync globals if the op has side effects and might trigger 5345 an exception. */ 5346 sync_globals(s, i_allocated_regs); 5347 } 5348 5349 /* satisfy the output constraints */ 5350 for (k = 0; k < nb_oargs; k++) { 5351 i = args_ct[k].sort_index; 5352 arg = op->args[i]; 5353 arg_ct = &args_ct[i]; 5354 ts = arg_temp(arg); 5355 5356 /* ENV should not be modified. */ 5357 tcg_debug_assert(!temp_readonly(ts)); 5358 5359 switch (arg_ct->pair) { 5360 case 0: /* not paired */ 5361 if (arg_ct->oalias && !const_args[arg_ct->alias_index]) { 5362 reg = new_args[arg_ct->alias_index]; 5363 } else if (arg_ct->newreg) { 5364 reg = tcg_reg_alloc(s, arg_ct->regs, 5365 i_allocated_regs | o_allocated_regs, 5366 output_pref(op, k), ts->indirect_base); 5367 } else { 5368 reg = tcg_reg_alloc(s, arg_ct->regs, o_allocated_regs, 5369 output_pref(op, k), ts->indirect_base); 5370 } 5371 break; 5372 5373 case 1: /* first of pair */ 5374 if (arg_ct->oalias) { 5375 reg = new_args[arg_ct->alias_index]; 5376 } else if (arg_ct->newreg) { 5377 reg = tcg_reg_alloc_pair(s, arg_ct->regs, 5378 i_allocated_regs | o_allocated_regs, 5379 output_pref(op, k), 5380 ts->indirect_base); 5381 } else { 5382 reg = tcg_reg_alloc_pair(s, arg_ct->regs, o_allocated_regs, 5383 output_pref(op, k), 5384 ts->indirect_base); 5385 } 5386 break; 5387 5388 case 2: /* second of pair */ 5389 if (arg_ct->oalias) { 5390 reg = new_args[arg_ct->alias_index]; 5391 } else { 5392 reg = new_args[arg_ct->pair_index] + 1; 5393 } 5394 break; 5395 5396 case 3: /* first of pair, aliasing with a second input */ 5397 tcg_debug_assert(!arg_ct->newreg); 5398 reg = new_args[arg_ct->pair_index] - 1; 5399 break; 5400 5401 default: 5402 g_assert_not_reached(); 5403 } 5404 tcg_regset_set_reg(o_allocated_regs, reg); 5405 set_temp_val_reg(s, ts, reg); 5406 ts->mem_coherent = 0; 5407 new_args[i] = reg; 5408 } 5409 } 5410 5411 /* emit instruction */ 5412 TCGType type = TCGOP_TYPE(op); 5413 switch (op->opc) { 5414 case INDEX_op_ext_i32_i64: 5415 tcg_out_exts_i32_i64(s, new_args[0], new_args[1]); 5416 break; 5417 case INDEX_op_extu_i32_i64: 5418 tcg_out_extu_i32_i64(s, new_args[0], new_args[1]); 5419 break; 5420 case INDEX_op_extrl_i64_i32: 5421 tcg_out_extrl_i64_i32(s, new_args[0], new_args[1]); 5422 break; 5423 5424 case INDEX_op_add: 5425 case INDEX_op_and: 5426 case INDEX_op_andc: 5427 case INDEX_op_eqv: 5428 case INDEX_op_mul: 5429 case INDEX_op_muluh: 5430 case INDEX_op_nand: 5431 case INDEX_op_nor: 5432 case INDEX_op_or: 5433 case INDEX_op_orc: 5434 case INDEX_op_xor: 5435 { 5436 const TCGOutOpBinary *out = 5437 container_of(all_outop[op->opc], TCGOutOpBinary, base); 5438 5439 /* Constants should never appear in the first source operand. */ 5440 tcg_debug_assert(!const_args[1]); 5441 if (const_args[2]) { 5442 out->out_rri(s, type, new_args[0], new_args[1], new_args[2]); 5443 } else { 5444 out->out_rrr(s, type, new_args[0], new_args[1], new_args[2]); 5445 } 5446 } 5447 break; 5448 5449 case INDEX_op_sub: 5450 { 5451 const TCGOutOpSubtract *out = &outop_sub; 5452 5453 /* 5454 * Constants should never appear in the second source operand. 5455 * These are folded to add with negative constant. 5456 */ 5457 tcg_debug_assert(!const_args[2]); 5458 if (const_args[1]) { 5459 out->out_rir(s, type, new_args[0], new_args[1], new_args[2]); 5460 } else { 5461 out->out_rrr(s, type, new_args[0], new_args[1], new_args[2]); 5462 } 5463 } 5464 break; 5465 5466 case INDEX_op_neg: 5467 case INDEX_op_not: 5468 { 5469 const TCGOutOpUnary *out = 5470 container_of(all_outop[op->opc], TCGOutOpUnary, base); 5471 5472 /* Constants should have been folded. */ 5473 tcg_debug_assert(!const_args[1]); 5474 out->out_rr(s, type, new_args[0], new_args[1]); 5475 } 5476 break; 5477 5478 default: 5479 if (def->flags & TCG_OPF_VECTOR) { 5480 tcg_out_vec_op(s, op->opc, type - TCG_TYPE_V64, 5481 TCGOP_VECE(op), new_args, const_args); 5482 } else { 5483 tcg_out_op(s, op->opc, type, new_args, const_args); 5484 } 5485 break; 5486 } 5487 5488 /* move the outputs in the correct register if needed */ 5489 for(i = 0; i < nb_oargs; i++) { 5490 ts = arg_temp(op->args[i]); 5491 5492 /* ENV should not be modified. */ 5493 tcg_debug_assert(!temp_readonly(ts)); 5494 5495 if (NEED_SYNC_ARG(i)) { 5496 temp_sync(s, ts, o_allocated_regs, 0, IS_DEAD_ARG(i)); 5497 } else if (IS_DEAD_ARG(i)) { 5498 temp_dead(s, ts); 5499 } 5500 } 5501 } 5502 5503 static bool tcg_reg_alloc_dup2(TCGContext *s, const TCGOp *op) 5504 { 5505 const TCGLifeData arg_life = op->life; 5506 TCGTemp *ots, *itsl, *itsh; 5507 TCGType vtype = TCGOP_TYPE(op); 5508 5509 /* This opcode is only valid for 32-bit hosts, for 64-bit elements. */ 5510 tcg_debug_assert(TCG_TARGET_REG_BITS == 32); 5511 tcg_debug_assert(TCGOP_VECE(op) == MO_64); 5512 5513 ots = arg_temp(op->args[0]); 5514 itsl = arg_temp(op->args[1]); 5515 itsh = arg_temp(op->args[2]); 5516 5517 /* ENV should not be modified. */ 5518 tcg_debug_assert(!temp_readonly(ots)); 5519 5520 /* Allocate the output register now. */ 5521 if (ots->val_type != TEMP_VAL_REG) { 5522 TCGRegSet allocated_regs = s->reserved_regs; 5523 TCGRegSet dup_out_regs = opcode_args_ct(op)[0].regs; 5524 TCGReg oreg; 5525 5526 /* Make sure to not spill the input registers. */ 5527 if (!IS_DEAD_ARG(1) && itsl->val_type == TEMP_VAL_REG) { 5528 tcg_regset_set_reg(allocated_regs, itsl->reg); 5529 } 5530 if (!IS_DEAD_ARG(2) && itsh->val_type == TEMP_VAL_REG) { 5531 tcg_regset_set_reg(allocated_regs, itsh->reg); 5532 } 5533 5534 oreg = tcg_reg_alloc(s, dup_out_regs, allocated_regs, 5535 output_pref(op, 0), ots->indirect_base); 5536 set_temp_val_reg(s, ots, oreg); 5537 } 5538 5539 /* Promote dup2 of immediates to dupi_vec. */ 5540 if (itsl->val_type == TEMP_VAL_CONST && itsh->val_type == TEMP_VAL_CONST) { 5541 uint64_t val = deposit64(itsl->val, 32, 32, itsh->val); 5542 MemOp vece = MO_64; 5543 5544 if (val == dup_const(MO_8, val)) { 5545 vece = MO_8; 5546 } else if (val == dup_const(MO_16, val)) { 5547 vece = MO_16; 5548 } else if (val == dup_const(MO_32, val)) { 5549 vece = MO_32; 5550 } 5551 5552 tcg_out_dupi_vec(s, vtype, vece, ots->reg, val); 5553 goto done; 5554 } 5555 5556 /* If the two inputs form one 64-bit value, try dupm_vec. */ 5557 if (itsl->temp_subindex == HOST_BIG_ENDIAN && 5558 itsh->temp_subindex == !HOST_BIG_ENDIAN && 5559 itsl == itsh + (HOST_BIG_ENDIAN ? 1 : -1)) { 5560 TCGTemp *its = itsl - HOST_BIG_ENDIAN; 5561 5562 temp_sync(s, its + 0, s->reserved_regs, 0, 0); 5563 temp_sync(s, its + 1, s->reserved_regs, 0, 0); 5564 5565 if (tcg_out_dupm_vec(s, vtype, MO_64, ots->reg, 5566 its->mem_base->reg, its->mem_offset)) { 5567 goto done; 5568 } 5569 } 5570 5571 /* Fall back to generic expansion. */ 5572 return false; 5573 5574 done: 5575 ots->mem_coherent = 0; 5576 if (IS_DEAD_ARG(1)) { 5577 temp_dead(s, itsl); 5578 } 5579 if (IS_DEAD_ARG(2)) { 5580 temp_dead(s, itsh); 5581 } 5582 if (NEED_SYNC_ARG(0)) { 5583 temp_sync(s, ots, s->reserved_regs, 0, IS_DEAD_ARG(0)); 5584 } else if (IS_DEAD_ARG(0)) { 5585 temp_dead(s, ots); 5586 } 5587 return true; 5588 } 5589 5590 static void load_arg_reg(TCGContext *s, TCGReg reg, TCGTemp *ts, 5591 TCGRegSet allocated_regs) 5592 { 5593 if (ts->val_type == TEMP_VAL_REG) { 5594 if (ts->reg != reg) { 5595 tcg_reg_free(s, reg, allocated_regs); 5596 if (!tcg_out_mov(s, ts->type, reg, ts->reg)) { 5597 /* 5598 * Cross register class move not supported. Sync the 5599 * temp back to its slot and load from there. 5600 */ 5601 temp_sync(s, ts, allocated_regs, 0, 0); 5602 tcg_out_ld(s, ts->type, reg, 5603 ts->mem_base->reg, ts->mem_offset); 5604 } 5605 } 5606 } else { 5607 TCGRegSet arg_set = 0; 5608 5609 tcg_reg_free(s, reg, allocated_regs); 5610 tcg_regset_set_reg(arg_set, reg); 5611 temp_load(s, ts, arg_set, allocated_regs, 0); 5612 } 5613 } 5614 5615 static void load_arg_stk(TCGContext *s, unsigned arg_slot, TCGTemp *ts, 5616 TCGRegSet allocated_regs) 5617 { 5618 /* 5619 * When the destination is on the stack, load up the temp and store. 5620 * If there are many call-saved registers, the temp might live to 5621 * see another use; otherwise it'll be discarded. 5622 */ 5623 temp_load(s, ts, tcg_target_available_regs[ts->type], allocated_regs, 0); 5624 tcg_out_st(s, ts->type, ts->reg, TCG_REG_CALL_STACK, 5625 arg_slot_stk_ofs(arg_slot)); 5626 } 5627 5628 static void load_arg_normal(TCGContext *s, const TCGCallArgumentLoc *l, 5629 TCGTemp *ts, TCGRegSet *allocated_regs) 5630 { 5631 if (arg_slot_reg_p(l->arg_slot)) { 5632 TCGReg reg = tcg_target_call_iarg_regs[l->arg_slot]; 5633 load_arg_reg(s, reg, ts, *allocated_regs); 5634 tcg_regset_set_reg(*allocated_regs, reg); 5635 } else { 5636 load_arg_stk(s, l->arg_slot, ts, *allocated_regs); 5637 } 5638 } 5639 5640 static void load_arg_ref(TCGContext *s, unsigned arg_slot, TCGReg ref_base, 5641 intptr_t ref_off, TCGRegSet *allocated_regs) 5642 { 5643 TCGReg reg; 5644 5645 if (arg_slot_reg_p(arg_slot)) { 5646 reg = tcg_target_call_iarg_regs[arg_slot]; 5647 tcg_reg_free(s, reg, *allocated_regs); 5648 tcg_out_addi_ptr(s, reg, ref_base, ref_off); 5649 tcg_regset_set_reg(*allocated_regs, reg); 5650 } else { 5651 reg = tcg_reg_alloc(s, tcg_target_available_regs[TCG_TYPE_PTR], 5652 *allocated_regs, 0, false); 5653 tcg_out_addi_ptr(s, reg, ref_base, ref_off); 5654 tcg_out_st(s, TCG_TYPE_PTR, reg, TCG_REG_CALL_STACK, 5655 arg_slot_stk_ofs(arg_slot)); 5656 } 5657 } 5658 5659 static void tcg_reg_alloc_call(TCGContext *s, TCGOp *op) 5660 { 5661 const int nb_oargs = TCGOP_CALLO(op); 5662 const int nb_iargs = TCGOP_CALLI(op); 5663 const TCGLifeData arg_life = op->life; 5664 const TCGHelperInfo *info = tcg_call_info(op); 5665 TCGRegSet allocated_regs = s->reserved_regs; 5666 int i; 5667 5668 /* 5669 * Move inputs into place in reverse order, 5670 * so that we place stacked arguments first. 5671 */ 5672 for (i = nb_iargs - 1; i >= 0; --i) { 5673 const TCGCallArgumentLoc *loc = &info->in[i]; 5674 TCGTemp *ts = arg_temp(op->args[nb_oargs + i]); 5675 5676 switch (loc->kind) { 5677 case TCG_CALL_ARG_NORMAL: 5678 case TCG_CALL_ARG_EXTEND_U: 5679 case TCG_CALL_ARG_EXTEND_S: 5680 load_arg_normal(s, loc, ts, &allocated_regs); 5681 break; 5682 case TCG_CALL_ARG_BY_REF: 5683 load_arg_stk(s, loc->ref_slot, ts, allocated_regs); 5684 load_arg_ref(s, loc->arg_slot, TCG_REG_CALL_STACK, 5685 arg_slot_stk_ofs(loc->ref_slot), 5686 &allocated_regs); 5687 break; 5688 case TCG_CALL_ARG_BY_REF_N: 5689 load_arg_stk(s, loc->ref_slot, ts, allocated_regs); 5690 break; 5691 default: 5692 g_assert_not_reached(); 5693 } 5694 } 5695 5696 /* Mark dead temporaries and free the associated registers. */ 5697 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) { 5698 if (IS_DEAD_ARG(i)) { 5699 temp_dead(s, arg_temp(op->args[i])); 5700 } 5701 } 5702 5703 /* Clobber call registers. */ 5704 for (i = 0; i < TCG_TARGET_NB_REGS; i++) { 5705 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) { 5706 tcg_reg_free(s, i, allocated_regs); 5707 } 5708 } 5709 5710 /* 5711 * Save globals if they might be written by the helper, 5712 * sync them if they might be read. 5713 */ 5714 if (info->flags & TCG_CALL_NO_READ_GLOBALS) { 5715 /* Nothing to do */ 5716 } else if (info->flags & TCG_CALL_NO_WRITE_GLOBALS) { 5717 sync_globals(s, allocated_regs); 5718 } else { 5719 save_globals(s, allocated_regs); 5720 } 5721 5722 /* 5723 * If the ABI passes a pointer to the returned struct as the first 5724 * argument, load that now. Pass a pointer to the output home slot. 5725 */ 5726 if (info->out_kind == TCG_CALL_RET_BY_REF) { 5727 TCGTemp *ts = arg_temp(op->args[0]); 5728 5729 if (!ts->mem_allocated) { 5730 temp_allocate_frame(s, ts); 5731 } 5732 load_arg_ref(s, 0, ts->mem_base->reg, ts->mem_offset, &allocated_regs); 5733 } 5734 5735 tcg_out_call(s, tcg_call_func(op), info); 5736 5737 /* Assign output registers and emit moves if needed. */ 5738 switch (info->out_kind) { 5739 case TCG_CALL_RET_NORMAL: 5740 for (i = 0; i < nb_oargs; i++) { 5741 TCGTemp *ts = arg_temp(op->args[i]); 5742 TCGReg reg = tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, i); 5743 5744 /* ENV should not be modified. */ 5745 tcg_debug_assert(!temp_readonly(ts)); 5746 5747 set_temp_val_reg(s, ts, reg); 5748 ts->mem_coherent = 0; 5749 } 5750 break; 5751 5752 case TCG_CALL_RET_BY_VEC: 5753 { 5754 TCGTemp *ts = arg_temp(op->args[0]); 5755 5756 tcg_debug_assert(ts->base_type == TCG_TYPE_I128); 5757 tcg_debug_assert(ts->temp_subindex == 0); 5758 if (!ts->mem_allocated) { 5759 temp_allocate_frame(s, ts); 5760 } 5761 tcg_out_st(s, TCG_TYPE_V128, 5762 tcg_target_call_oarg_reg(TCG_CALL_RET_BY_VEC, 0), 5763 ts->mem_base->reg, ts->mem_offset); 5764 } 5765 /* fall through to mark all parts in memory */ 5766 5767 case TCG_CALL_RET_BY_REF: 5768 /* The callee has performed a write through the reference. */ 5769 for (i = 0; i < nb_oargs; i++) { 5770 TCGTemp *ts = arg_temp(op->args[i]); 5771 ts->val_type = TEMP_VAL_MEM; 5772 } 5773 break; 5774 5775 default: 5776 g_assert_not_reached(); 5777 } 5778 5779 /* Flush or discard output registers as needed. */ 5780 for (i = 0; i < nb_oargs; i++) { 5781 TCGTemp *ts = arg_temp(op->args[i]); 5782 if (NEED_SYNC_ARG(i)) { 5783 temp_sync(s, ts, s->reserved_regs, 0, IS_DEAD_ARG(i)); 5784 } else if (IS_DEAD_ARG(i)) { 5785 temp_dead(s, ts); 5786 } 5787 } 5788 } 5789 5790 /** 5791 * atom_and_align_for_opc: 5792 * @s: tcg context 5793 * @opc: memory operation code 5794 * @host_atom: MO_ATOM_{IFALIGN,WITHIN16,SUBALIGN} for host operations 5795 * @allow_two_ops: true if we are prepared to issue two operations 5796 * 5797 * Return the alignment and atomicity to use for the inline fast path 5798 * for the given memory operation. The alignment may be larger than 5799 * that specified in @opc, and the correct alignment will be diagnosed 5800 * by the slow path helper. 5801 * 5802 * If @allow_two_ops, the host is prepared to test for 2x alignment, 5803 * and issue two loads or stores for subalignment. 5804 */ 5805 static TCGAtomAlign atom_and_align_for_opc(TCGContext *s, MemOp opc, 5806 MemOp host_atom, bool allow_two_ops) 5807 { 5808 MemOp align = memop_alignment_bits(opc); 5809 MemOp size = opc & MO_SIZE; 5810 MemOp half = size ? size - 1 : 0; 5811 MemOp atom = opc & MO_ATOM_MASK; 5812 MemOp atmax; 5813 5814 switch (atom) { 5815 case MO_ATOM_NONE: 5816 /* The operation requires no specific atomicity. */ 5817 atmax = MO_8; 5818 break; 5819 5820 case MO_ATOM_IFALIGN: 5821 atmax = size; 5822 break; 5823 5824 case MO_ATOM_IFALIGN_PAIR: 5825 atmax = half; 5826 break; 5827 5828 case MO_ATOM_WITHIN16: 5829 atmax = size; 5830 if (size == MO_128) { 5831 /* Misalignment implies !within16, and therefore no atomicity. */ 5832 } else if (host_atom != MO_ATOM_WITHIN16) { 5833 /* The host does not implement within16, so require alignment. */ 5834 align = MAX(align, size); 5835 } 5836 break; 5837 5838 case MO_ATOM_WITHIN16_PAIR: 5839 atmax = size; 5840 /* 5841 * Misalignment implies !within16, and therefore half atomicity. 5842 * Any host prepared for two operations can implement this with 5843 * half alignment. 5844 */ 5845 if (host_atom != MO_ATOM_WITHIN16 && allow_two_ops) { 5846 align = MAX(align, half); 5847 } 5848 break; 5849 5850 case MO_ATOM_SUBALIGN: 5851 atmax = size; 5852 if (host_atom != MO_ATOM_SUBALIGN) { 5853 /* If unaligned but not odd, there are subobjects up to half. */ 5854 if (allow_two_ops) { 5855 align = MAX(align, half); 5856 } else { 5857 align = MAX(align, size); 5858 } 5859 } 5860 break; 5861 5862 default: 5863 g_assert_not_reached(); 5864 } 5865 5866 return (TCGAtomAlign){ .atom = atmax, .align = align }; 5867 } 5868 5869 /* 5870 * Similarly for qemu_ld/st slow path helpers. 5871 * We must re-implement tcg_gen_callN and tcg_reg_alloc_call simultaneously, 5872 * using only the provided backend tcg_out_* functions. 5873 */ 5874 5875 static int tcg_out_helper_stk_ofs(TCGType type, unsigned slot) 5876 { 5877 int ofs = arg_slot_stk_ofs(slot); 5878 5879 /* 5880 * Each stack slot is TCG_TARGET_LONG_BITS. If the host does not 5881 * require extension to uint64_t, adjust the address for uint32_t. 5882 */ 5883 if (HOST_BIG_ENDIAN && 5884 TCG_TARGET_REG_BITS == 64 && 5885 type == TCG_TYPE_I32) { 5886 ofs += 4; 5887 } 5888 return ofs; 5889 } 5890 5891 static void tcg_out_helper_load_slots(TCGContext *s, 5892 unsigned nmov, TCGMovExtend *mov, 5893 const TCGLdstHelperParam *parm) 5894 { 5895 unsigned i; 5896 TCGReg dst3; 5897 5898 /* 5899 * Start from the end, storing to the stack first. 5900 * This frees those registers, so we need not consider overlap. 5901 */ 5902 for (i = nmov; i-- > 0; ) { 5903 unsigned slot = mov[i].dst; 5904 5905 if (arg_slot_reg_p(slot)) { 5906 goto found_reg; 5907 } 5908 5909 TCGReg src = mov[i].src; 5910 TCGType dst_type = mov[i].dst_type; 5911 MemOp dst_mo = dst_type == TCG_TYPE_I32 ? MO_32 : MO_64; 5912 5913 /* The argument is going onto the stack; extend into scratch. */ 5914 if ((mov[i].src_ext & MO_SIZE) != dst_mo) { 5915 tcg_debug_assert(parm->ntmp != 0); 5916 mov[i].dst = src = parm->tmp[0]; 5917 tcg_out_movext1(s, &mov[i]); 5918 } 5919 5920 tcg_out_st(s, dst_type, src, TCG_REG_CALL_STACK, 5921 tcg_out_helper_stk_ofs(dst_type, slot)); 5922 } 5923 return; 5924 5925 found_reg: 5926 /* 5927 * The remaining arguments are in registers. 5928 * Convert slot numbers to argument registers. 5929 */ 5930 nmov = i + 1; 5931 for (i = 0; i < nmov; ++i) { 5932 mov[i].dst = tcg_target_call_iarg_regs[mov[i].dst]; 5933 } 5934 5935 switch (nmov) { 5936 case 4: 5937 /* The backend must have provided enough temps for the worst case. */ 5938 tcg_debug_assert(parm->ntmp >= 2); 5939 5940 dst3 = mov[3].dst; 5941 for (unsigned j = 0; j < 3; ++j) { 5942 if (dst3 == mov[j].src) { 5943 /* 5944 * Conflict. Copy the source to a temporary, perform the 5945 * remaining moves, then the extension from our scratch 5946 * on the way out. 5947 */ 5948 TCGReg scratch = parm->tmp[1]; 5949 5950 tcg_out_mov(s, mov[3].src_type, scratch, mov[3].src); 5951 tcg_out_movext3(s, mov, mov + 1, mov + 2, parm->tmp[0]); 5952 tcg_out_movext1_new_src(s, &mov[3], scratch); 5953 break; 5954 } 5955 } 5956 5957 /* No conflicts: perform this move and continue. */ 5958 tcg_out_movext1(s, &mov[3]); 5959 /* fall through */ 5960 5961 case 3: 5962 tcg_out_movext3(s, mov, mov + 1, mov + 2, 5963 parm->ntmp ? parm->tmp[0] : -1); 5964 break; 5965 case 2: 5966 tcg_out_movext2(s, mov, mov + 1, 5967 parm->ntmp ? parm->tmp[0] : -1); 5968 break; 5969 case 1: 5970 tcg_out_movext1(s, mov); 5971 break; 5972 default: 5973 g_assert_not_reached(); 5974 } 5975 } 5976 5977 static void tcg_out_helper_load_imm(TCGContext *s, unsigned slot, 5978 TCGType type, tcg_target_long imm, 5979 const TCGLdstHelperParam *parm) 5980 { 5981 if (arg_slot_reg_p(slot)) { 5982 tcg_out_movi(s, type, tcg_target_call_iarg_regs[slot], imm); 5983 } else { 5984 int ofs = tcg_out_helper_stk_ofs(type, slot); 5985 if (!tcg_out_sti(s, type, imm, TCG_REG_CALL_STACK, ofs)) { 5986 tcg_debug_assert(parm->ntmp != 0); 5987 tcg_out_movi(s, type, parm->tmp[0], imm); 5988 tcg_out_st(s, type, parm->tmp[0], TCG_REG_CALL_STACK, ofs); 5989 } 5990 } 5991 } 5992 5993 static void tcg_out_helper_load_common_args(TCGContext *s, 5994 const TCGLabelQemuLdst *ldst, 5995 const TCGLdstHelperParam *parm, 5996 const TCGHelperInfo *info, 5997 unsigned next_arg) 5998 { 5999 TCGMovExtend ptr_mov = { 6000 .dst_type = TCG_TYPE_PTR, 6001 .src_type = TCG_TYPE_PTR, 6002 .src_ext = sizeof(void *) == 4 ? MO_32 : MO_64 6003 }; 6004 const TCGCallArgumentLoc *loc = &info->in[0]; 6005 TCGType type; 6006 unsigned slot; 6007 tcg_target_ulong imm; 6008 6009 /* 6010 * Handle env, which is always first. 6011 */ 6012 ptr_mov.dst = loc->arg_slot; 6013 ptr_mov.src = TCG_AREG0; 6014 tcg_out_helper_load_slots(s, 1, &ptr_mov, parm); 6015 6016 /* 6017 * Handle oi. 6018 */ 6019 imm = ldst->oi; 6020 loc = &info->in[next_arg]; 6021 type = TCG_TYPE_I32; 6022 switch (loc->kind) { 6023 case TCG_CALL_ARG_NORMAL: 6024 break; 6025 case TCG_CALL_ARG_EXTEND_U: 6026 case TCG_CALL_ARG_EXTEND_S: 6027 /* No extension required for MemOpIdx. */ 6028 tcg_debug_assert(imm <= INT32_MAX); 6029 type = TCG_TYPE_REG; 6030 break; 6031 default: 6032 g_assert_not_reached(); 6033 } 6034 tcg_out_helper_load_imm(s, loc->arg_slot, type, imm, parm); 6035 next_arg++; 6036 6037 /* 6038 * Handle ra. 6039 */ 6040 loc = &info->in[next_arg]; 6041 slot = loc->arg_slot; 6042 if (parm->ra_gen) { 6043 int arg_reg = -1; 6044 TCGReg ra_reg; 6045 6046 if (arg_slot_reg_p(slot)) { 6047 arg_reg = tcg_target_call_iarg_regs[slot]; 6048 } 6049 ra_reg = parm->ra_gen(s, ldst, arg_reg); 6050 6051 ptr_mov.dst = slot; 6052 ptr_mov.src = ra_reg; 6053 tcg_out_helper_load_slots(s, 1, &ptr_mov, parm); 6054 } else { 6055 imm = (uintptr_t)ldst->raddr; 6056 tcg_out_helper_load_imm(s, slot, TCG_TYPE_PTR, imm, parm); 6057 } 6058 } 6059 6060 static unsigned tcg_out_helper_add_mov(TCGMovExtend *mov, 6061 const TCGCallArgumentLoc *loc, 6062 TCGType dst_type, TCGType src_type, 6063 TCGReg lo, TCGReg hi) 6064 { 6065 MemOp reg_mo; 6066 6067 if (dst_type <= TCG_TYPE_REG) { 6068 MemOp src_ext; 6069 6070 switch (loc->kind) { 6071 case TCG_CALL_ARG_NORMAL: 6072 src_ext = src_type == TCG_TYPE_I32 ? MO_32 : MO_64; 6073 break; 6074 case TCG_CALL_ARG_EXTEND_U: 6075 dst_type = TCG_TYPE_REG; 6076 src_ext = MO_UL; 6077 break; 6078 case TCG_CALL_ARG_EXTEND_S: 6079 dst_type = TCG_TYPE_REG; 6080 src_ext = MO_SL; 6081 break; 6082 default: 6083 g_assert_not_reached(); 6084 } 6085 6086 mov[0].dst = loc->arg_slot; 6087 mov[0].dst_type = dst_type; 6088 mov[0].src = lo; 6089 mov[0].src_type = src_type; 6090 mov[0].src_ext = src_ext; 6091 return 1; 6092 } 6093 6094 if (TCG_TARGET_REG_BITS == 32) { 6095 assert(dst_type == TCG_TYPE_I64); 6096 reg_mo = MO_32; 6097 } else { 6098 assert(dst_type == TCG_TYPE_I128); 6099 reg_mo = MO_64; 6100 } 6101 6102 mov[0].dst = loc[HOST_BIG_ENDIAN].arg_slot; 6103 mov[0].src = lo; 6104 mov[0].dst_type = TCG_TYPE_REG; 6105 mov[0].src_type = TCG_TYPE_REG; 6106 mov[0].src_ext = reg_mo; 6107 6108 mov[1].dst = loc[!HOST_BIG_ENDIAN].arg_slot; 6109 mov[1].src = hi; 6110 mov[1].dst_type = TCG_TYPE_REG; 6111 mov[1].src_type = TCG_TYPE_REG; 6112 mov[1].src_ext = reg_mo; 6113 6114 return 2; 6115 } 6116 6117 static void tcg_out_ld_helper_args(TCGContext *s, const TCGLabelQemuLdst *ldst, 6118 const TCGLdstHelperParam *parm) 6119 { 6120 const TCGHelperInfo *info; 6121 const TCGCallArgumentLoc *loc; 6122 TCGMovExtend mov[2]; 6123 unsigned next_arg, nmov; 6124 MemOp mop = get_memop(ldst->oi); 6125 6126 switch (mop & MO_SIZE) { 6127 case MO_8: 6128 case MO_16: 6129 case MO_32: 6130 info = &info_helper_ld32_mmu; 6131 break; 6132 case MO_64: 6133 info = &info_helper_ld64_mmu; 6134 break; 6135 case MO_128: 6136 info = &info_helper_ld128_mmu; 6137 break; 6138 default: 6139 g_assert_not_reached(); 6140 } 6141 6142 /* Defer env argument. */ 6143 next_arg = 1; 6144 6145 loc = &info->in[next_arg]; 6146 if (TCG_TARGET_REG_BITS == 32 && s->addr_type == TCG_TYPE_I32) { 6147 /* 6148 * 32-bit host with 32-bit guest: zero-extend the guest address 6149 * to 64-bits for the helper by storing the low part, then 6150 * load a zero for the high part. 6151 */ 6152 tcg_out_helper_add_mov(mov, loc + HOST_BIG_ENDIAN, 6153 TCG_TYPE_I32, TCG_TYPE_I32, 6154 ldst->addr_reg, -1); 6155 tcg_out_helper_load_slots(s, 1, mov, parm); 6156 6157 tcg_out_helper_load_imm(s, loc[!HOST_BIG_ENDIAN].arg_slot, 6158 TCG_TYPE_I32, 0, parm); 6159 next_arg += 2; 6160 } else { 6161 nmov = tcg_out_helper_add_mov(mov, loc, TCG_TYPE_I64, s->addr_type, 6162 ldst->addr_reg, -1); 6163 tcg_out_helper_load_slots(s, nmov, mov, parm); 6164 next_arg += nmov; 6165 } 6166 6167 switch (info->out_kind) { 6168 case TCG_CALL_RET_NORMAL: 6169 case TCG_CALL_RET_BY_VEC: 6170 break; 6171 case TCG_CALL_RET_BY_REF: 6172 /* 6173 * The return reference is in the first argument slot. 6174 * We need memory in which to return: re-use the top of stack. 6175 */ 6176 { 6177 int ofs_slot0 = TCG_TARGET_CALL_STACK_OFFSET; 6178 6179 if (arg_slot_reg_p(0)) { 6180 tcg_out_addi_ptr(s, tcg_target_call_iarg_regs[0], 6181 TCG_REG_CALL_STACK, ofs_slot0); 6182 } else { 6183 tcg_debug_assert(parm->ntmp != 0); 6184 tcg_out_addi_ptr(s, parm->tmp[0], 6185 TCG_REG_CALL_STACK, ofs_slot0); 6186 tcg_out_st(s, TCG_TYPE_PTR, parm->tmp[0], 6187 TCG_REG_CALL_STACK, ofs_slot0); 6188 } 6189 } 6190 break; 6191 default: 6192 g_assert_not_reached(); 6193 } 6194 6195 tcg_out_helper_load_common_args(s, ldst, parm, info, next_arg); 6196 } 6197 6198 static void tcg_out_ld_helper_ret(TCGContext *s, const TCGLabelQemuLdst *ldst, 6199 bool load_sign, 6200 const TCGLdstHelperParam *parm) 6201 { 6202 MemOp mop = get_memop(ldst->oi); 6203 TCGMovExtend mov[2]; 6204 int ofs_slot0; 6205 6206 switch (ldst->type) { 6207 case TCG_TYPE_I64: 6208 if (TCG_TARGET_REG_BITS == 32) { 6209 break; 6210 } 6211 /* fall through */ 6212 6213 case TCG_TYPE_I32: 6214 mov[0].dst = ldst->datalo_reg; 6215 mov[0].src = tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, 0); 6216 mov[0].dst_type = ldst->type; 6217 mov[0].src_type = TCG_TYPE_REG; 6218 6219 /* 6220 * If load_sign, then we allowed the helper to perform the 6221 * appropriate sign extension to tcg_target_ulong, and all 6222 * we need now is a plain move. 6223 * 6224 * If they do not, then we expect the relevant extension 6225 * instruction to be no more expensive than a move, and 6226 * we thus save the icache etc by only using one of two 6227 * helper functions. 6228 */ 6229 if (load_sign || !(mop & MO_SIGN)) { 6230 if (TCG_TARGET_REG_BITS == 32 || ldst->type == TCG_TYPE_I32) { 6231 mov[0].src_ext = MO_32; 6232 } else { 6233 mov[0].src_ext = MO_64; 6234 } 6235 } else { 6236 mov[0].src_ext = mop & MO_SSIZE; 6237 } 6238 tcg_out_movext1(s, mov); 6239 return; 6240 6241 case TCG_TYPE_I128: 6242 tcg_debug_assert(TCG_TARGET_REG_BITS == 64); 6243 ofs_slot0 = TCG_TARGET_CALL_STACK_OFFSET; 6244 switch (TCG_TARGET_CALL_RET_I128) { 6245 case TCG_CALL_RET_NORMAL: 6246 break; 6247 case TCG_CALL_RET_BY_VEC: 6248 tcg_out_st(s, TCG_TYPE_V128, 6249 tcg_target_call_oarg_reg(TCG_CALL_RET_BY_VEC, 0), 6250 TCG_REG_CALL_STACK, ofs_slot0); 6251 /* fall through */ 6252 case TCG_CALL_RET_BY_REF: 6253 tcg_out_ld(s, TCG_TYPE_I64, ldst->datalo_reg, 6254 TCG_REG_CALL_STACK, ofs_slot0 + 8 * HOST_BIG_ENDIAN); 6255 tcg_out_ld(s, TCG_TYPE_I64, ldst->datahi_reg, 6256 TCG_REG_CALL_STACK, ofs_slot0 + 8 * !HOST_BIG_ENDIAN); 6257 return; 6258 default: 6259 g_assert_not_reached(); 6260 } 6261 break; 6262 6263 default: 6264 g_assert_not_reached(); 6265 } 6266 6267 mov[0].dst = ldst->datalo_reg; 6268 mov[0].src = 6269 tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, HOST_BIG_ENDIAN); 6270 mov[0].dst_type = TCG_TYPE_REG; 6271 mov[0].src_type = TCG_TYPE_REG; 6272 mov[0].src_ext = TCG_TARGET_REG_BITS == 32 ? MO_32 : MO_64; 6273 6274 mov[1].dst = ldst->datahi_reg; 6275 mov[1].src = 6276 tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, !HOST_BIG_ENDIAN); 6277 mov[1].dst_type = TCG_TYPE_REG; 6278 mov[1].src_type = TCG_TYPE_REG; 6279 mov[1].src_ext = TCG_TARGET_REG_BITS == 32 ? MO_32 : MO_64; 6280 6281 tcg_out_movext2(s, mov, mov + 1, parm->ntmp ? parm->tmp[0] : -1); 6282 } 6283 6284 static void tcg_out_st_helper_args(TCGContext *s, const TCGLabelQemuLdst *ldst, 6285 const TCGLdstHelperParam *parm) 6286 { 6287 const TCGHelperInfo *info; 6288 const TCGCallArgumentLoc *loc; 6289 TCGMovExtend mov[4]; 6290 TCGType data_type; 6291 unsigned next_arg, nmov, n; 6292 MemOp mop = get_memop(ldst->oi); 6293 6294 switch (mop & MO_SIZE) { 6295 case MO_8: 6296 case MO_16: 6297 case MO_32: 6298 info = &info_helper_st32_mmu; 6299 data_type = TCG_TYPE_I32; 6300 break; 6301 case MO_64: 6302 info = &info_helper_st64_mmu; 6303 data_type = TCG_TYPE_I64; 6304 break; 6305 case MO_128: 6306 info = &info_helper_st128_mmu; 6307 data_type = TCG_TYPE_I128; 6308 break; 6309 default: 6310 g_assert_not_reached(); 6311 } 6312 6313 /* Defer env argument. */ 6314 next_arg = 1; 6315 nmov = 0; 6316 6317 /* Handle addr argument. */ 6318 loc = &info->in[next_arg]; 6319 tcg_debug_assert(s->addr_type <= TCG_TYPE_REG); 6320 if (TCG_TARGET_REG_BITS == 32) { 6321 /* 6322 * 32-bit host (and thus 32-bit guest): zero-extend the guest address 6323 * to 64-bits for the helper by storing the low part. Later, 6324 * after we have processed the register inputs, we will load a 6325 * zero for the high part. 6326 */ 6327 tcg_out_helper_add_mov(mov, loc + HOST_BIG_ENDIAN, 6328 TCG_TYPE_I32, TCG_TYPE_I32, 6329 ldst->addr_reg, -1); 6330 next_arg += 2; 6331 nmov += 1; 6332 } else { 6333 n = tcg_out_helper_add_mov(mov, loc, TCG_TYPE_I64, s->addr_type, 6334 ldst->addr_reg, -1); 6335 next_arg += n; 6336 nmov += n; 6337 } 6338 6339 /* Handle data argument. */ 6340 loc = &info->in[next_arg]; 6341 switch (loc->kind) { 6342 case TCG_CALL_ARG_NORMAL: 6343 case TCG_CALL_ARG_EXTEND_U: 6344 case TCG_CALL_ARG_EXTEND_S: 6345 n = tcg_out_helper_add_mov(mov + nmov, loc, data_type, ldst->type, 6346 ldst->datalo_reg, ldst->datahi_reg); 6347 next_arg += n; 6348 nmov += n; 6349 tcg_out_helper_load_slots(s, nmov, mov, parm); 6350 break; 6351 6352 case TCG_CALL_ARG_BY_REF: 6353 tcg_debug_assert(TCG_TARGET_REG_BITS == 64); 6354 tcg_debug_assert(data_type == TCG_TYPE_I128); 6355 tcg_out_st(s, TCG_TYPE_I64, 6356 HOST_BIG_ENDIAN ? ldst->datahi_reg : ldst->datalo_reg, 6357 TCG_REG_CALL_STACK, arg_slot_stk_ofs(loc[0].ref_slot)); 6358 tcg_out_st(s, TCG_TYPE_I64, 6359 HOST_BIG_ENDIAN ? ldst->datalo_reg : ldst->datahi_reg, 6360 TCG_REG_CALL_STACK, arg_slot_stk_ofs(loc[1].ref_slot)); 6361 6362 tcg_out_helper_load_slots(s, nmov, mov, parm); 6363 6364 if (arg_slot_reg_p(loc->arg_slot)) { 6365 tcg_out_addi_ptr(s, tcg_target_call_iarg_regs[loc->arg_slot], 6366 TCG_REG_CALL_STACK, 6367 arg_slot_stk_ofs(loc->ref_slot)); 6368 } else { 6369 tcg_debug_assert(parm->ntmp != 0); 6370 tcg_out_addi_ptr(s, parm->tmp[0], TCG_REG_CALL_STACK, 6371 arg_slot_stk_ofs(loc->ref_slot)); 6372 tcg_out_st(s, TCG_TYPE_PTR, parm->tmp[0], 6373 TCG_REG_CALL_STACK, arg_slot_stk_ofs(loc->arg_slot)); 6374 } 6375 next_arg += 2; 6376 break; 6377 6378 default: 6379 g_assert_not_reached(); 6380 } 6381 6382 if (TCG_TARGET_REG_BITS == 32) { 6383 /* Zero extend the address by loading a zero for the high part. */ 6384 loc = &info->in[1 + !HOST_BIG_ENDIAN]; 6385 tcg_out_helper_load_imm(s, loc->arg_slot, TCG_TYPE_I32, 0, parm); 6386 } 6387 6388 tcg_out_helper_load_common_args(s, ldst, parm, info, next_arg); 6389 } 6390 6391 int tcg_gen_code(TCGContext *s, TranslationBlock *tb, uint64_t pc_start) 6392 { 6393 int i, start_words, num_insns; 6394 TCGOp *op; 6395 6396 if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP) 6397 && qemu_log_in_addr_range(pc_start))) { 6398 FILE *logfile = qemu_log_trylock(); 6399 if (logfile) { 6400 fprintf(logfile, "OP:\n"); 6401 tcg_dump_ops(s, logfile, false); 6402 fprintf(logfile, "\n"); 6403 qemu_log_unlock(logfile); 6404 } 6405 } 6406 6407 #ifdef CONFIG_DEBUG_TCG 6408 /* Ensure all labels referenced have been emitted. */ 6409 { 6410 TCGLabel *l; 6411 bool error = false; 6412 6413 QSIMPLEQ_FOREACH(l, &s->labels, next) { 6414 if (unlikely(!l->present) && !QSIMPLEQ_EMPTY(&l->branches)) { 6415 qemu_log_mask(CPU_LOG_TB_OP, 6416 "$L%d referenced but not present.\n", l->id); 6417 error = true; 6418 } 6419 } 6420 assert(!error); 6421 } 6422 #endif 6423 6424 /* Do not reuse any EBB that may be allocated within the TB. */ 6425 tcg_temp_ebb_reset_freed(s); 6426 6427 tcg_optimize(s); 6428 6429 reachable_code_pass(s); 6430 liveness_pass_0(s); 6431 liveness_pass_1(s); 6432 6433 if (s->nb_indirects > 0) { 6434 if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_IND) 6435 && qemu_log_in_addr_range(pc_start))) { 6436 FILE *logfile = qemu_log_trylock(); 6437 if (logfile) { 6438 fprintf(logfile, "OP before indirect lowering:\n"); 6439 tcg_dump_ops(s, logfile, false); 6440 fprintf(logfile, "\n"); 6441 qemu_log_unlock(logfile); 6442 } 6443 } 6444 6445 /* Replace indirect temps with direct temps. */ 6446 if (liveness_pass_2(s)) { 6447 /* If changes were made, re-run liveness. */ 6448 liveness_pass_1(s); 6449 } 6450 } 6451 6452 if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_OPT) 6453 && qemu_log_in_addr_range(pc_start))) { 6454 FILE *logfile = qemu_log_trylock(); 6455 if (logfile) { 6456 fprintf(logfile, "OP after optimization and liveness analysis:\n"); 6457 tcg_dump_ops(s, logfile, true); 6458 fprintf(logfile, "\n"); 6459 qemu_log_unlock(logfile); 6460 } 6461 } 6462 6463 /* Initialize goto_tb jump offsets. */ 6464 tb->jmp_reset_offset[0] = TB_JMP_OFFSET_INVALID; 6465 tb->jmp_reset_offset[1] = TB_JMP_OFFSET_INVALID; 6466 tb->jmp_insn_offset[0] = TB_JMP_OFFSET_INVALID; 6467 tb->jmp_insn_offset[1] = TB_JMP_OFFSET_INVALID; 6468 6469 tcg_reg_alloc_start(s); 6470 6471 /* 6472 * Reset the buffer pointers when restarting after overflow. 6473 * TODO: Move this into translate-all.c with the rest of the 6474 * buffer management. Having only this done here is confusing. 6475 */ 6476 s->code_buf = tcg_splitwx_to_rw(tb->tc.ptr); 6477 s->code_ptr = s->code_buf; 6478 s->data_gen_ptr = NULL; 6479 6480 QSIMPLEQ_INIT(&s->ldst_labels); 6481 s->pool_labels = NULL; 6482 6483 start_words = s->insn_start_words; 6484 s->gen_insn_data = 6485 tcg_malloc(sizeof(uint64_t) * s->gen_tb->icount * start_words); 6486 6487 tcg_out_tb_start(s); 6488 6489 num_insns = -1; 6490 QTAILQ_FOREACH(op, &s->ops, link) { 6491 TCGOpcode opc = op->opc; 6492 6493 switch (opc) { 6494 case INDEX_op_mov: 6495 case INDEX_op_mov_vec: 6496 tcg_reg_alloc_mov(s, op); 6497 break; 6498 case INDEX_op_dup_vec: 6499 tcg_reg_alloc_dup(s, op); 6500 break; 6501 case INDEX_op_insn_start: 6502 if (num_insns >= 0) { 6503 size_t off = tcg_current_code_size(s); 6504 s->gen_insn_end_off[num_insns] = off; 6505 /* Assert that we do not overflow our stored offset. */ 6506 assert(s->gen_insn_end_off[num_insns] == off); 6507 } 6508 num_insns++; 6509 for (i = 0; i < start_words; ++i) { 6510 s->gen_insn_data[num_insns * start_words + i] = 6511 tcg_get_insn_start_param(op, i); 6512 } 6513 break; 6514 case INDEX_op_discard: 6515 temp_dead(s, arg_temp(op->args[0])); 6516 break; 6517 case INDEX_op_set_label: 6518 tcg_reg_alloc_bb_end(s, s->reserved_regs); 6519 tcg_out_label(s, arg_label(op->args[0])); 6520 break; 6521 case INDEX_op_call: 6522 tcg_reg_alloc_call(s, op); 6523 break; 6524 case INDEX_op_exit_tb: 6525 tcg_out_exit_tb(s, op->args[0]); 6526 break; 6527 case INDEX_op_goto_tb: 6528 tcg_out_goto_tb(s, op->args[0]); 6529 break; 6530 case INDEX_op_dup2_vec: 6531 if (tcg_reg_alloc_dup2(s, op)) { 6532 break; 6533 } 6534 /* fall through */ 6535 default: 6536 /* Sanity check that we've not introduced any unhandled opcodes. */ 6537 tcg_debug_assert(tcg_op_supported(opc, TCGOP_TYPE(op), 6538 TCGOP_FLAGS(op))); 6539 /* Note: in order to speed up the code, it would be much 6540 faster to have specialized register allocator functions for 6541 some common argument patterns */ 6542 tcg_reg_alloc_op(s, op); 6543 break; 6544 } 6545 /* Test for (pending) buffer overflow. The assumption is that any 6546 one operation beginning below the high water mark cannot overrun 6547 the buffer completely. Thus we can test for overflow after 6548 generating code without having to check during generation. */ 6549 if (unlikely((void *)s->code_ptr > s->code_gen_highwater)) { 6550 return -1; 6551 } 6552 /* Test for TB overflow, as seen by gen_insn_end_off. */ 6553 if (unlikely(tcg_current_code_size(s) > UINT16_MAX)) { 6554 return -2; 6555 } 6556 } 6557 tcg_debug_assert(num_insns + 1 == s->gen_tb->icount); 6558 s->gen_insn_end_off[num_insns] = tcg_current_code_size(s); 6559 6560 /* Generate TB finalization at the end of block */ 6561 i = tcg_out_ldst_finalize(s); 6562 if (i < 0) { 6563 return i; 6564 } 6565 i = tcg_out_pool_finalize(s); 6566 if (i < 0) { 6567 return i; 6568 } 6569 if (!tcg_resolve_relocs(s)) { 6570 return -2; 6571 } 6572 6573 #ifndef CONFIG_TCG_INTERPRETER 6574 /* flush instruction cache */ 6575 flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(s->code_buf), 6576 (uintptr_t)s->code_buf, 6577 tcg_ptr_byte_diff(s->code_ptr, s->code_buf)); 6578 #endif 6579 6580 return tcg_current_code_size(s); 6581 } 6582 6583 #ifdef ELF_HOST_MACHINE 6584 /* In order to use this feature, the backend needs to do three things: 6585 6586 (1) Define ELF_HOST_MACHINE to indicate both what value to 6587 put into the ELF image and to indicate support for the feature. 6588 6589 (2) Define tcg_register_jit. This should create a buffer containing 6590 the contents of a .debug_frame section that describes the post- 6591 prologue unwind info for the tcg machine. 6592 6593 (3) Call tcg_register_jit_int, with the constructed .debug_frame. 6594 */ 6595 6596 /* Begin GDB interface. THE FOLLOWING MUST MATCH GDB DOCS. */ 6597 typedef enum { 6598 JIT_NOACTION = 0, 6599 JIT_REGISTER_FN, 6600 JIT_UNREGISTER_FN 6601 } jit_actions_t; 6602 6603 struct jit_code_entry { 6604 struct jit_code_entry *next_entry; 6605 struct jit_code_entry *prev_entry; 6606 const void *symfile_addr; 6607 uint64_t symfile_size; 6608 }; 6609 6610 struct jit_descriptor { 6611 uint32_t version; 6612 uint32_t action_flag; 6613 struct jit_code_entry *relevant_entry; 6614 struct jit_code_entry *first_entry; 6615 }; 6616 6617 void __jit_debug_register_code(void) __attribute__((noinline)); 6618 void __jit_debug_register_code(void) 6619 { 6620 asm(""); 6621 } 6622 6623 /* Must statically initialize the version, because GDB may check 6624 the version before we can set it. */ 6625 struct jit_descriptor __jit_debug_descriptor = { 1, 0, 0, 0 }; 6626 6627 /* End GDB interface. */ 6628 6629 static int find_string(const char *strtab, const char *str) 6630 { 6631 const char *p = strtab + 1; 6632 6633 while (1) { 6634 if (strcmp(p, str) == 0) { 6635 return p - strtab; 6636 } 6637 p += strlen(p) + 1; 6638 } 6639 } 6640 6641 static void tcg_register_jit_int(const void *buf_ptr, size_t buf_size, 6642 const void *debug_frame, 6643 size_t debug_frame_size) 6644 { 6645 struct __attribute__((packed)) DebugInfo { 6646 uint32_t len; 6647 uint16_t version; 6648 uint32_t abbrev; 6649 uint8_t ptr_size; 6650 uint8_t cu_die; 6651 uint16_t cu_lang; 6652 uintptr_t cu_low_pc; 6653 uintptr_t cu_high_pc; 6654 uint8_t fn_die; 6655 char fn_name[16]; 6656 uintptr_t fn_low_pc; 6657 uintptr_t fn_high_pc; 6658 uint8_t cu_eoc; 6659 }; 6660 6661 struct ElfImage { 6662 ElfW(Ehdr) ehdr; 6663 ElfW(Phdr) phdr; 6664 ElfW(Shdr) shdr[7]; 6665 ElfW(Sym) sym[2]; 6666 struct DebugInfo di; 6667 uint8_t da[24]; 6668 char str[80]; 6669 }; 6670 6671 struct ElfImage *img; 6672 6673 static const struct ElfImage img_template = { 6674 .ehdr = { 6675 .e_ident[EI_MAG0] = ELFMAG0, 6676 .e_ident[EI_MAG1] = ELFMAG1, 6677 .e_ident[EI_MAG2] = ELFMAG2, 6678 .e_ident[EI_MAG3] = ELFMAG3, 6679 .e_ident[EI_CLASS] = ELF_CLASS, 6680 .e_ident[EI_DATA] = ELF_DATA, 6681 .e_ident[EI_VERSION] = EV_CURRENT, 6682 .e_type = ET_EXEC, 6683 .e_machine = ELF_HOST_MACHINE, 6684 .e_version = EV_CURRENT, 6685 .e_phoff = offsetof(struct ElfImage, phdr), 6686 .e_shoff = offsetof(struct ElfImage, shdr), 6687 .e_ehsize = sizeof(ElfW(Shdr)), 6688 .e_phentsize = sizeof(ElfW(Phdr)), 6689 .e_phnum = 1, 6690 .e_shentsize = sizeof(ElfW(Shdr)), 6691 .e_shnum = ARRAY_SIZE(img->shdr), 6692 .e_shstrndx = ARRAY_SIZE(img->shdr) - 1, 6693 #ifdef ELF_HOST_FLAGS 6694 .e_flags = ELF_HOST_FLAGS, 6695 #endif 6696 #ifdef ELF_OSABI 6697 .e_ident[EI_OSABI] = ELF_OSABI, 6698 #endif 6699 }, 6700 .phdr = { 6701 .p_type = PT_LOAD, 6702 .p_flags = PF_X, 6703 }, 6704 .shdr = { 6705 [0] = { .sh_type = SHT_NULL }, 6706 /* Trick: The contents of code_gen_buffer are not present in 6707 this fake ELF file; that got allocated elsewhere. Therefore 6708 we mark .text as SHT_NOBITS (similar to .bss) so that readers 6709 will not look for contents. We can record any address. */ 6710 [1] = { /* .text */ 6711 .sh_type = SHT_NOBITS, 6712 .sh_flags = SHF_EXECINSTR | SHF_ALLOC, 6713 }, 6714 [2] = { /* .debug_info */ 6715 .sh_type = SHT_PROGBITS, 6716 .sh_offset = offsetof(struct ElfImage, di), 6717 .sh_size = sizeof(struct DebugInfo), 6718 }, 6719 [3] = { /* .debug_abbrev */ 6720 .sh_type = SHT_PROGBITS, 6721 .sh_offset = offsetof(struct ElfImage, da), 6722 .sh_size = sizeof(img->da), 6723 }, 6724 [4] = { /* .debug_frame */ 6725 .sh_type = SHT_PROGBITS, 6726 .sh_offset = sizeof(struct ElfImage), 6727 }, 6728 [5] = { /* .symtab */ 6729 .sh_type = SHT_SYMTAB, 6730 .sh_offset = offsetof(struct ElfImage, sym), 6731 .sh_size = sizeof(img->sym), 6732 .sh_info = 1, 6733 .sh_link = ARRAY_SIZE(img->shdr) - 1, 6734 .sh_entsize = sizeof(ElfW(Sym)), 6735 }, 6736 [6] = { /* .strtab */ 6737 .sh_type = SHT_STRTAB, 6738 .sh_offset = offsetof(struct ElfImage, str), 6739 .sh_size = sizeof(img->str), 6740 } 6741 }, 6742 .sym = { 6743 [1] = { /* code_gen_buffer */ 6744 .st_info = ELF_ST_INFO(STB_GLOBAL, STT_FUNC), 6745 .st_shndx = 1, 6746 } 6747 }, 6748 .di = { 6749 .len = sizeof(struct DebugInfo) - 4, 6750 .version = 2, 6751 .ptr_size = sizeof(void *), 6752 .cu_die = 1, 6753 .cu_lang = 0x8001, /* DW_LANG_Mips_Assembler */ 6754 .fn_die = 2, 6755 .fn_name = "code_gen_buffer" 6756 }, 6757 .da = { 6758 1, /* abbrev number (the cu) */ 6759 0x11, 1, /* DW_TAG_compile_unit, has children */ 6760 0x13, 0x5, /* DW_AT_language, DW_FORM_data2 */ 6761 0x11, 0x1, /* DW_AT_low_pc, DW_FORM_addr */ 6762 0x12, 0x1, /* DW_AT_high_pc, DW_FORM_addr */ 6763 0, 0, /* end of abbrev */ 6764 2, /* abbrev number (the fn) */ 6765 0x2e, 0, /* DW_TAG_subprogram, no children */ 6766 0x3, 0x8, /* DW_AT_name, DW_FORM_string */ 6767 0x11, 0x1, /* DW_AT_low_pc, DW_FORM_addr */ 6768 0x12, 0x1, /* DW_AT_high_pc, DW_FORM_addr */ 6769 0, 0, /* end of abbrev */ 6770 0 /* no more abbrev */ 6771 }, 6772 .str = "\0" ".text\0" ".debug_info\0" ".debug_abbrev\0" 6773 ".debug_frame\0" ".symtab\0" ".strtab\0" "code_gen_buffer", 6774 }; 6775 6776 /* We only need a single jit entry; statically allocate it. */ 6777 static struct jit_code_entry one_entry; 6778 6779 uintptr_t buf = (uintptr_t)buf_ptr; 6780 size_t img_size = sizeof(struct ElfImage) + debug_frame_size; 6781 DebugFrameHeader *dfh; 6782 6783 img = g_malloc(img_size); 6784 *img = img_template; 6785 6786 img->phdr.p_vaddr = buf; 6787 img->phdr.p_paddr = buf; 6788 img->phdr.p_memsz = buf_size; 6789 6790 img->shdr[1].sh_name = find_string(img->str, ".text"); 6791 img->shdr[1].sh_addr = buf; 6792 img->shdr[1].sh_size = buf_size; 6793 6794 img->shdr[2].sh_name = find_string(img->str, ".debug_info"); 6795 img->shdr[3].sh_name = find_string(img->str, ".debug_abbrev"); 6796 6797 img->shdr[4].sh_name = find_string(img->str, ".debug_frame"); 6798 img->shdr[4].sh_size = debug_frame_size; 6799 6800 img->shdr[5].sh_name = find_string(img->str, ".symtab"); 6801 img->shdr[6].sh_name = find_string(img->str, ".strtab"); 6802 6803 img->sym[1].st_name = find_string(img->str, "code_gen_buffer"); 6804 img->sym[1].st_value = buf; 6805 img->sym[1].st_size = buf_size; 6806 6807 img->di.cu_low_pc = buf; 6808 img->di.cu_high_pc = buf + buf_size; 6809 img->di.fn_low_pc = buf; 6810 img->di.fn_high_pc = buf + buf_size; 6811 6812 dfh = (DebugFrameHeader *)(img + 1); 6813 memcpy(dfh, debug_frame, debug_frame_size); 6814 dfh->fde.func_start = buf; 6815 dfh->fde.func_len = buf_size; 6816 6817 #ifdef DEBUG_JIT 6818 /* Enable this block to be able to debug the ELF image file creation. 6819 One can use readelf, objdump, or other inspection utilities. */ 6820 { 6821 g_autofree char *jit = g_strdup_printf("%s/qemu.jit", g_get_tmp_dir()); 6822 FILE *f = fopen(jit, "w+b"); 6823 if (f) { 6824 if (fwrite(img, img_size, 1, f) != img_size) { 6825 /* Avoid stupid unused return value warning for fwrite. */ 6826 } 6827 fclose(f); 6828 } 6829 } 6830 #endif 6831 6832 one_entry.symfile_addr = img; 6833 one_entry.symfile_size = img_size; 6834 6835 __jit_debug_descriptor.action_flag = JIT_REGISTER_FN; 6836 __jit_debug_descriptor.relevant_entry = &one_entry; 6837 __jit_debug_descriptor.first_entry = &one_entry; 6838 __jit_debug_register_code(); 6839 } 6840 #else 6841 /* No support for the feature. Provide the entry point expected by exec.c, 6842 and implement the internal function we declared earlier. */ 6843 6844 static void tcg_register_jit_int(const void *buf, size_t size, 6845 const void *debug_frame, 6846 size_t debug_frame_size) 6847 { 6848 } 6849 6850 void tcg_register_jit(const void *buf, size_t buf_size) 6851 { 6852 } 6853 #endif /* ELF_HOST_MACHINE */ 6854 6855 #if !TCG_TARGET_MAYBE_vec 6856 void tcg_expand_vec_op(TCGOpcode o, TCGType t, unsigned e, TCGArg a0, ...) 6857 { 6858 g_assert_not_reached(); 6859 } 6860 #endif 6861