1 /* 2 * Tiny Code Generator for QEMU 3 * 4 * Copyright (c) 2008 Fabrice Bellard 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a copy 7 * of this software and associated documentation files (the "Software"), to deal 8 * in the Software without restriction, including without limitation the rights 9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 * copies of the Software, and to permit persons to whom the Software is 11 * furnished to do so, subject to the following conditions: 12 * 13 * The above copyright notice and this permission notice shall be included in 14 * all copies or substantial portions of the Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 22 * THE SOFTWARE. 23 */ 24 25 #include "qemu/osdep.h" 26 27 /* Define to jump the ELF file used to communicate with GDB. */ 28 #undef DEBUG_JIT 29 30 #include "qemu/error-report.h" 31 #include "qemu/cutils.h" 32 #include "qemu/host-utils.h" 33 #include "qemu/qemu-print.h" 34 #include "qemu/cacheflush.h" 35 #include "qemu/cacheinfo.h" 36 #include "qemu/timer.h" 37 #include "exec/translation-block.h" 38 #include "exec/tlb-common.h" 39 #include "tcg/startup.h" 40 #include "tcg/tcg-op-common.h" 41 42 #if UINTPTR_MAX == UINT32_MAX 43 # define ELF_CLASS ELFCLASS32 44 #else 45 # define ELF_CLASS ELFCLASS64 46 #endif 47 #if HOST_BIG_ENDIAN 48 # define ELF_DATA ELFDATA2MSB 49 #else 50 # define ELF_DATA ELFDATA2LSB 51 #endif 52 53 #include "elf.h" 54 #include "exec/log.h" 55 #include "tcg/tcg-ldst.h" 56 #include "tcg/tcg-temp-internal.h" 57 #include "tcg-internal.h" 58 #include "tcg/perf.h" 59 #include "tcg-has.h" 60 #ifdef CONFIG_USER_ONLY 61 #include "user/guest-base.h" 62 #endif 63 64 /* Forward declarations for functions declared in tcg-target.c.inc and 65 used here. */ 66 static void tcg_target_init(TCGContext *s); 67 static void tcg_target_qemu_prologue(TCGContext *s); 68 static bool patch_reloc(tcg_insn_unit *code_ptr, int type, 69 intptr_t value, intptr_t addend); 70 static void tcg_out_nop_fill(tcg_insn_unit *p, int count); 71 72 typedef struct TCGLabelQemuLdst TCGLabelQemuLdst; 73 static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l); 74 static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l); 75 76 /* The CIE and FDE header definitions will be common to all hosts. */ 77 typedef struct { 78 uint32_t len __attribute__((aligned((sizeof(void *))))); 79 uint32_t id; 80 uint8_t version; 81 char augmentation[1]; 82 uint8_t code_align; 83 uint8_t data_align; 84 uint8_t return_column; 85 } DebugFrameCIE; 86 87 typedef struct QEMU_PACKED { 88 uint32_t len __attribute__((aligned((sizeof(void *))))); 89 uint32_t cie_offset; 90 uintptr_t func_start; 91 uintptr_t func_len; 92 } DebugFrameFDEHeader; 93 94 typedef struct QEMU_PACKED { 95 DebugFrameCIE cie; 96 DebugFrameFDEHeader fde; 97 } DebugFrameHeader; 98 99 struct TCGLabelQemuLdst { 100 bool is_ld; /* qemu_ld: true, qemu_st: false */ 101 MemOpIdx oi; 102 TCGType type; /* result type of a load */ 103 TCGReg addr_reg; /* reg index for guest virtual addr */ 104 TCGReg datalo_reg; /* reg index for low word to be loaded or stored */ 105 TCGReg datahi_reg; /* reg index for high word to be loaded or stored */ 106 const tcg_insn_unit *raddr; /* addr of the next IR of qemu_ld/st IR */ 107 tcg_insn_unit *label_ptr[2]; /* label pointers to be updated */ 108 QSIMPLEQ_ENTRY(TCGLabelQemuLdst) next; 109 }; 110 111 static void tcg_register_jit_int(const void *buf, size_t size, 112 const void *debug_frame, 113 size_t debug_frame_size) 114 __attribute__((unused)); 115 116 /* Forward declarations for functions declared and used in tcg-target.c.inc. */ 117 static void tcg_out_tb_start(TCGContext *s); 118 static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg1, 119 intptr_t arg2); 120 static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg); 121 static void tcg_out_movi(TCGContext *s, TCGType type, 122 TCGReg ret, tcg_target_long arg); 123 static void tcg_out_ext8s(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg); 124 static void tcg_out_ext16s(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg); 125 static void tcg_out_ext8u(TCGContext *s, TCGReg ret, TCGReg arg); 126 static void tcg_out_ext16u(TCGContext *s, TCGReg ret, TCGReg arg); 127 static void tcg_out_ext32s(TCGContext *s, TCGReg ret, TCGReg arg); 128 static void tcg_out_ext32u(TCGContext *s, TCGReg ret, TCGReg arg); 129 static void tcg_out_exts_i32_i64(TCGContext *s, TCGReg ret, TCGReg arg); 130 static void tcg_out_extu_i32_i64(TCGContext *s, TCGReg ret, TCGReg arg); 131 static void tcg_out_extrl_i64_i32(TCGContext *s, TCGReg ret, TCGReg arg); 132 static void tcg_out_addi_ptr(TCGContext *s, TCGReg, TCGReg, tcg_target_long); 133 static bool tcg_out_xchg(TCGContext *s, TCGType type, TCGReg r1, TCGReg r2); 134 static void tcg_out_exit_tb(TCGContext *s, uintptr_t arg); 135 static void tcg_out_goto_tb(TCGContext *s, int which); 136 static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, 137 const TCGArg args[TCG_MAX_OP_ARGS], 138 const int const_args[TCG_MAX_OP_ARGS]); 139 #if TCG_TARGET_MAYBE_vec 140 static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece, 141 TCGReg dst, TCGReg src); 142 static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece, 143 TCGReg dst, TCGReg base, intptr_t offset); 144 static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece, 145 TCGReg dst, int64_t arg); 146 static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, 147 unsigned vecl, unsigned vece, 148 const TCGArg args[TCG_MAX_OP_ARGS], 149 const int const_args[TCG_MAX_OP_ARGS]); 150 #else 151 static inline bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece, 152 TCGReg dst, TCGReg src) 153 { 154 g_assert_not_reached(); 155 } 156 static inline bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece, 157 TCGReg dst, TCGReg base, intptr_t offset) 158 { 159 g_assert_not_reached(); 160 } 161 static inline void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece, 162 TCGReg dst, int64_t arg) 163 { 164 g_assert_not_reached(); 165 } 166 static inline void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, 167 unsigned vecl, unsigned vece, 168 const TCGArg args[TCG_MAX_OP_ARGS], 169 const int const_args[TCG_MAX_OP_ARGS]) 170 { 171 g_assert_not_reached(); 172 } 173 int tcg_can_emit_vec_op(TCGOpcode o, TCGType t, unsigned ve) 174 { 175 return 0; 176 } 177 #endif 178 static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, TCGReg arg1, 179 intptr_t arg2); 180 static bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val, 181 TCGReg base, intptr_t ofs); 182 static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target, 183 const TCGHelperInfo *info); 184 static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot); 185 static bool tcg_target_const_match(int64_t val, int ct, 186 TCGType type, TCGCond cond, int vece); 187 188 #ifndef CONFIG_USER_ONLY 189 #define guest_base ({ qemu_build_not_reached(); (uintptr_t)0; }) 190 #endif 191 192 typedef struct TCGLdstHelperParam { 193 TCGReg (*ra_gen)(TCGContext *s, const TCGLabelQemuLdst *l, int arg_reg); 194 unsigned ntmp; 195 int tmp[3]; 196 } TCGLdstHelperParam; 197 198 static void tcg_out_ld_helper_args(TCGContext *s, const TCGLabelQemuLdst *l, 199 const TCGLdstHelperParam *p) 200 __attribute__((unused)); 201 static void tcg_out_ld_helper_ret(TCGContext *s, const TCGLabelQemuLdst *l, 202 bool load_sign, const TCGLdstHelperParam *p) 203 __attribute__((unused)); 204 static void tcg_out_st_helper_args(TCGContext *s, const TCGLabelQemuLdst *l, 205 const TCGLdstHelperParam *p) 206 __attribute__((unused)); 207 208 static void * const qemu_ld_helpers[MO_SSIZE + 1] __attribute__((unused)) = { 209 [MO_UB] = helper_ldub_mmu, 210 [MO_SB] = helper_ldsb_mmu, 211 [MO_UW] = helper_lduw_mmu, 212 [MO_SW] = helper_ldsw_mmu, 213 [MO_UL] = helper_ldul_mmu, 214 [MO_UQ] = helper_ldq_mmu, 215 #if TCG_TARGET_REG_BITS == 64 216 [MO_SL] = helper_ldsl_mmu, 217 [MO_128] = helper_ld16_mmu, 218 #endif 219 }; 220 221 static void * const qemu_st_helpers[MO_SIZE + 1] __attribute__((unused)) = { 222 [MO_8] = helper_stb_mmu, 223 [MO_16] = helper_stw_mmu, 224 [MO_32] = helper_stl_mmu, 225 [MO_64] = helper_stq_mmu, 226 #if TCG_TARGET_REG_BITS == 64 227 [MO_128] = helper_st16_mmu, 228 #endif 229 }; 230 231 typedef struct { 232 MemOp atom; /* lg2 bits of atomicity required */ 233 MemOp align; /* lg2 bits of alignment to use */ 234 } TCGAtomAlign; 235 236 static TCGAtomAlign atom_and_align_for_opc(TCGContext *s, MemOp opc, 237 MemOp host_atom, bool allow_two_ops) 238 __attribute__((unused)); 239 240 #ifdef CONFIG_USER_ONLY 241 bool tcg_use_softmmu; 242 #endif 243 244 TCGContext tcg_init_ctx; 245 __thread TCGContext *tcg_ctx; 246 247 TCGContext **tcg_ctxs; 248 unsigned int tcg_cur_ctxs; 249 unsigned int tcg_max_ctxs; 250 TCGv_env tcg_env; 251 const void *tcg_code_gen_epilogue; 252 uintptr_t tcg_splitwx_diff; 253 254 #ifndef CONFIG_TCG_INTERPRETER 255 tcg_prologue_fn *tcg_qemu_tb_exec; 256 #endif 257 258 static TCGRegSet tcg_target_available_regs[TCG_TYPE_COUNT]; 259 static TCGRegSet tcg_target_call_clobber_regs; 260 261 #if TCG_TARGET_INSN_UNIT_SIZE == 1 262 static __attribute__((unused)) inline void tcg_out8(TCGContext *s, uint8_t v) 263 { 264 *s->code_ptr++ = v; 265 } 266 267 static __attribute__((unused)) inline void tcg_patch8(tcg_insn_unit *p, 268 uint8_t v) 269 { 270 *p = v; 271 } 272 #endif 273 274 #if TCG_TARGET_INSN_UNIT_SIZE <= 2 275 static __attribute__((unused)) inline void tcg_out16(TCGContext *s, uint16_t v) 276 { 277 if (TCG_TARGET_INSN_UNIT_SIZE == 2) { 278 *s->code_ptr++ = v; 279 } else { 280 tcg_insn_unit *p = s->code_ptr; 281 memcpy(p, &v, sizeof(v)); 282 s->code_ptr = p + (2 / TCG_TARGET_INSN_UNIT_SIZE); 283 } 284 } 285 286 static __attribute__((unused)) inline void tcg_patch16(tcg_insn_unit *p, 287 uint16_t v) 288 { 289 if (TCG_TARGET_INSN_UNIT_SIZE == 2) { 290 *p = v; 291 } else { 292 memcpy(p, &v, sizeof(v)); 293 } 294 } 295 #endif 296 297 #if TCG_TARGET_INSN_UNIT_SIZE <= 4 298 static __attribute__((unused)) inline void tcg_out32(TCGContext *s, uint32_t v) 299 { 300 if (TCG_TARGET_INSN_UNIT_SIZE == 4) { 301 *s->code_ptr++ = v; 302 } else { 303 tcg_insn_unit *p = s->code_ptr; 304 memcpy(p, &v, sizeof(v)); 305 s->code_ptr = p + (4 / TCG_TARGET_INSN_UNIT_SIZE); 306 } 307 } 308 309 static __attribute__((unused)) inline void tcg_patch32(tcg_insn_unit *p, 310 uint32_t v) 311 { 312 if (TCG_TARGET_INSN_UNIT_SIZE == 4) { 313 *p = v; 314 } else { 315 memcpy(p, &v, sizeof(v)); 316 } 317 } 318 #endif 319 320 #if TCG_TARGET_INSN_UNIT_SIZE <= 8 321 static __attribute__((unused)) inline void tcg_out64(TCGContext *s, uint64_t v) 322 { 323 if (TCG_TARGET_INSN_UNIT_SIZE == 8) { 324 *s->code_ptr++ = v; 325 } else { 326 tcg_insn_unit *p = s->code_ptr; 327 memcpy(p, &v, sizeof(v)); 328 s->code_ptr = p + (8 / TCG_TARGET_INSN_UNIT_SIZE); 329 } 330 } 331 332 static __attribute__((unused)) inline void tcg_patch64(tcg_insn_unit *p, 333 uint64_t v) 334 { 335 if (TCG_TARGET_INSN_UNIT_SIZE == 8) { 336 *p = v; 337 } else { 338 memcpy(p, &v, sizeof(v)); 339 } 340 } 341 #endif 342 343 /* label relocation processing */ 344 345 static void tcg_out_reloc(TCGContext *s, tcg_insn_unit *code_ptr, int type, 346 TCGLabel *l, intptr_t addend) 347 { 348 TCGRelocation *r = tcg_malloc(sizeof(TCGRelocation)); 349 350 r->type = type; 351 r->ptr = code_ptr; 352 r->addend = addend; 353 QSIMPLEQ_INSERT_TAIL(&l->relocs, r, next); 354 } 355 356 static void tcg_out_label(TCGContext *s, TCGLabel *l) 357 { 358 tcg_debug_assert(!l->has_value); 359 l->has_value = 1; 360 l->u.value_ptr = tcg_splitwx_to_rx(s->code_ptr); 361 } 362 363 TCGLabel *gen_new_label(void) 364 { 365 TCGContext *s = tcg_ctx; 366 TCGLabel *l = tcg_malloc(sizeof(TCGLabel)); 367 368 memset(l, 0, sizeof(TCGLabel)); 369 l->id = s->nb_labels++; 370 QSIMPLEQ_INIT(&l->branches); 371 QSIMPLEQ_INIT(&l->relocs); 372 373 QSIMPLEQ_INSERT_TAIL(&s->labels, l, next); 374 375 return l; 376 } 377 378 static bool tcg_resolve_relocs(TCGContext *s) 379 { 380 TCGLabel *l; 381 382 QSIMPLEQ_FOREACH(l, &s->labels, next) { 383 TCGRelocation *r; 384 uintptr_t value = l->u.value; 385 386 QSIMPLEQ_FOREACH(r, &l->relocs, next) { 387 if (!patch_reloc(r->ptr, r->type, value, r->addend)) { 388 return false; 389 } 390 } 391 } 392 return true; 393 } 394 395 static void set_jmp_reset_offset(TCGContext *s, int which) 396 { 397 /* 398 * We will check for overflow at the end of the opcode loop in 399 * tcg_gen_code, where we bound tcg_current_code_size to UINT16_MAX. 400 */ 401 s->gen_tb->jmp_reset_offset[which] = tcg_current_code_size(s); 402 } 403 404 static void G_GNUC_UNUSED set_jmp_insn_offset(TCGContext *s, int which) 405 { 406 /* 407 * We will check for overflow at the end of the opcode loop in 408 * tcg_gen_code, where we bound tcg_current_code_size to UINT16_MAX. 409 */ 410 s->gen_tb->jmp_insn_offset[which] = tcg_current_code_size(s); 411 } 412 413 static uintptr_t G_GNUC_UNUSED get_jmp_target_addr(TCGContext *s, int which) 414 { 415 /* 416 * Return the read-execute version of the pointer, for the benefit 417 * of any pc-relative addressing mode. 418 */ 419 return (uintptr_t)tcg_splitwx_to_rx(&s->gen_tb->jmp_target_addr[which]); 420 } 421 422 static int __attribute__((unused)) 423 tlb_mask_table_ofs(TCGContext *s, int which) 424 { 425 return (offsetof(CPUNegativeOffsetState, tlb.f[which]) - 426 sizeof(CPUNegativeOffsetState)); 427 } 428 429 /* Signal overflow, starting over with fewer guest insns. */ 430 static G_NORETURN 431 void tcg_raise_tb_overflow(TCGContext *s) 432 { 433 siglongjmp(s->jmp_trans, -2); 434 } 435 436 /* 437 * Used by tcg_out_movext{1,2} to hold the arguments for tcg_out_movext. 438 * By the time we arrive at tcg_out_movext1, @dst is always a TCGReg. 439 * 440 * However, tcg_out_helper_load_slots reuses this field to hold an 441 * argument slot number (which may designate a argument register or an 442 * argument stack slot), converting to TCGReg once all arguments that 443 * are destined for the stack are processed. 444 */ 445 typedef struct TCGMovExtend { 446 unsigned dst; 447 TCGReg src; 448 TCGType dst_type; 449 TCGType src_type; 450 MemOp src_ext; 451 } TCGMovExtend; 452 453 /** 454 * tcg_out_movext -- move and extend 455 * @s: tcg context 456 * @dst_type: integral type for destination 457 * @dst: destination register 458 * @src_type: integral type for source 459 * @src_ext: extension to apply to source 460 * @src: source register 461 * 462 * Move or extend @src into @dst, depending on @src_ext and the types. 463 */ 464 static void tcg_out_movext(TCGContext *s, TCGType dst_type, TCGReg dst, 465 TCGType src_type, MemOp src_ext, TCGReg src) 466 { 467 switch (src_ext) { 468 case MO_UB: 469 tcg_out_ext8u(s, dst, src); 470 break; 471 case MO_SB: 472 tcg_out_ext8s(s, dst_type, dst, src); 473 break; 474 case MO_UW: 475 tcg_out_ext16u(s, dst, src); 476 break; 477 case MO_SW: 478 tcg_out_ext16s(s, dst_type, dst, src); 479 break; 480 case MO_UL: 481 case MO_SL: 482 if (dst_type == TCG_TYPE_I32) { 483 if (src_type == TCG_TYPE_I32) { 484 tcg_out_mov(s, TCG_TYPE_I32, dst, src); 485 } else { 486 tcg_out_extrl_i64_i32(s, dst, src); 487 } 488 } else if (src_type == TCG_TYPE_I32) { 489 if (src_ext & MO_SIGN) { 490 tcg_out_exts_i32_i64(s, dst, src); 491 } else { 492 tcg_out_extu_i32_i64(s, dst, src); 493 } 494 } else { 495 if (src_ext & MO_SIGN) { 496 tcg_out_ext32s(s, dst, src); 497 } else { 498 tcg_out_ext32u(s, dst, src); 499 } 500 } 501 break; 502 case MO_UQ: 503 tcg_debug_assert(TCG_TARGET_REG_BITS == 64); 504 if (dst_type == TCG_TYPE_I32) { 505 tcg_out_extrl_i64_i32(s, dst, src); 506 } else { 507 tcg_out_mov(s, TCG_TYPE_I64, dst, src); 508 } 509 break; 510 default: 511 g_assert_not_reached(); 512 } 513 } 514 515 /* Minor variations on a theme, using a structure. */ 516 static void tcg_out_movext1_new_src(TCGContext *s, const TCGMovExtend *i, 517 TCGReg src) 518 { 519 tcg_out_movext(s, i->dst_type, i->dst, i->src_type, i->src_ext, src); 520 } 521 522 static void tcg_out_movext1(TCGContext *s, const TCGMovExtend *i) 523 { 524 tcg_out_movext1_new_src(s, i, i->src); 525 } 526 527 /** 528 * tcg_out_movext2 -- move and extend two pair 529 * @s: tcg context 530 * @i1: first move description 531 * @i2: second move description 532 * @scratch: temporary register, or -1 for none 533 * 534 * As tcg_out_movext, for both @i1 and @i2, caring for overlap 535 * between the sources and destinations. 536 */ 537 538 static void tcg_out_movext2(TCGContext *s, const TCGMovExtend *i1, 539 const TCGMovExtend *i2, int scratch) 540 { 541 TCGReg src1 = i1->src; 542 TCGReg src2 = i2->src; 543 544 if (i1->dst != src2) { 545 tcg_out_movext1(s, i1); 546 tcg_out_movext1(s, i2); 547 return; 548 } 549 if (i2->dst == src1) { 550 TCGType src1_type = i1->src_type; 551 TCGType src2_type = i2->src_type; 552 553 if (tcg_out_xchg(s, MAX(src1_type, src2_type), src1, src2)) { 554 /* The data is now in the correct registers, now extend. */ 555 src1 = i2->src; 556 src2 = i1->src; 557 } else { 558 tcg_debug_assert(scratch >= 0); 559 tcg_out_mov(s, src1_type, scratch, src1); 560 src1 = scratch; 561 } 562 } 563 tcg_out_movext1_new_src(s, i2, src2); 564 tcg_out_movext1_new_src(s, i1, src1); 565 } 566 567 /** 568 * tcg_out_movext3 -- move and extend three pair 569 * @s: tcg context 570 * @i1: first move description 571 * @i2: second move description 572 * @i3: third move description 573 * @scratch: temporary register, or -1 for none 574 * 575 * As tcg_out_movext, for all of @i1, @i2 and @i3, caring for overlap 576 * between the sources and destinations. 577 */ 578 579 static void tcg_out_movext3(TCGContext *s, const TCGMovExtend *i1, 580 const TCGMovExtend *i2, const TCGMovExtend *i3, 581 int scratch) 582 { 583 TCGReg src1 = i1->src; 584 TCGReg src2 = i2->src; 585 TCGReg src3 = i3->src; 586 587 if (i1->dst != src2 && i1->dst != src3) { 588 tcg_out_movext1(s, i1); 589 tcg_out_movext2(s, i2, i3, scratch); 590 return; 591 } 592 if (i2->dst != src1 && i2->dst != src3) { 593 tcg_out_movext1(s, i2); 594 tcg_out_movext2(s, i1, i3, scratch); 595 return; 596 } 597 if (i3->dst != src1 && i3->dst != src2) { 598 tcg_out_movext1(s, i3); 599 tcg_out_movext2(s, i1, i2, scratch); 600 return; 601 } 602 603 /* 604 * There is a cycle. Since there are only 3 nodes, the cycle is 605 * either "clockwise" or "anti-clockwise", and can be solved with 606 * a single scratch or two xchg. 607 */ 608 if (i1->dst == src2 && i2->dst == src3 && i3->dst == src1) { 609 /* "Clockwise" */ 610 if (tcg_out_xchg(s, MAX(i1->src_type, i2->src_type), src1, src2)) { 611 tcg_out_xchg(s, MAX(i2->src_type, i3->src_type), src2, src3); 612 /* The data is now in the correct registers, now extend. */ 613 tcg_out_movext1_new_src(s, i1, i1->dst); 614 tcg_out_movext1_new_src(s, i2, i2->dst); 615 tcg_out_movext1_new_src(s, i3, i3->dst); 616 } else { 617 tcg_debug_assert(scratch >= 0); 618 tcg_out_mov(s, i1->src_type, scratch, src1); 619 tcg_out_movext1(s, i3); 620 tcg_out_movext1(s, i2); 621 tcg_out_movext1_new_src(s, i1, scratch); 622 } 623 } else if (i1->dst == src3 && i2->dst == src1 && i3->dst == src2) { 624 /* "Anti-clockwise" */ 625 if (tcg_out_xchg(s, MAX(i2->src_type, i3->src_type), src2, src3)) { 626 tcg_out_xchg(s, MAX(i1->src_type, i2->src_type), src1, src2); 627 /* The data is now in the correct registers, now extend. */ 628 tcg_out_movext1_new_src(s, i1, i1->dst); 629 tcg_out_movext1_new_src(s, i2, i2->dst); 630 tcg_out_movext1_new_src(s, i3, i3->dst); 631 } else { 632 tcg_debug_assert(scratch >= 0); 633 tcg_out_mov(s, i1->src_type, scratch, src1); 634 tcg_out_movext1(s, i2); 635 tcg_out_movext1(s, i3); 636 tcg_out_movext1_new_src(s, i1, scratch); 637 } 638 } else { 639 g_assert_not_reached(); 640 } 641 } 642 643 /* 644 * Allocate a new TCGLabelQemuLdst entry. 645 */ 646 647 __attribute__((unused)) 648 static TCGLabelQemuLdst *new_ldst_label(TCGContext *s) 649 { 650 TCGLabelQemuLdst *l = tcg_malloc(sizeof(*l)); 651 652 memset(l, 0, sizeof(*l)); 653 QSIMPLEQ_INSERT_TAIL(&s->ldst_labels, l, next); 654 655 return l; 656 } 657 658 /* 659 * Allocate new constant pool entries. 660 */ 661 662 typedef struct TCGLabelPoolData { 663 struct TCGLabelPoolData *next; 664 tcg_insn_unit *label; 665 intptr_t addend; 666 int rtype; 667 unsigned nlong; 668 tcg_target_ulong data[]; 669 } TCGLabelPoolData; 670 671 static TCGLabelPoolData *new_pool_alloc(TCGContext *s, int nlong, int rtype, 672 tcg_insn_unit *label, intptr_t addend) 673 { 674 TCGLabelPoolData *n = tcg_malloc(sizeof(TCGLabelPoolData) 675 + sizeof(tcg_target_ulong) * nlong); 676 677 n->label = label; 678 n->addend = addend; 679 n->rtype = rtype; 680 n->nlong = nlong; 681 return n; 682 } 683 684 static void new_pool_insert(TCGContext *s, TCGLabelPoolData *n) 685 { 686 TCGLabelPoolData *i, **pp; 687 int nlong = n->nlong; 688 689 /* Insertion sort on the pool. */ 690 for (pp = &s->pool_labels; (i = *pp) != NULL; pp = &i->next) { 691 if (nlong > i->nlong) { 692 break; 693 } 694 if (nlong < i->nlong) { 695 continue; 696 } 697 if (memcmp(n->data, i->data, sizeof(tcg_target_ulong) * nlong) >= 0) { 698 break; 699 } 700 } 701 n->next = *pp; 702 *pp = n; 703 } 704 705 /* The "usual" for generic integer code. */ 706 __attribute__((unused)) 707 static void new_pool_label(TCGContext *s, tcg_target_ulong d, int rtype, 708 tcg_insn_unit *label, intptr_t addend) 709 { 710 TCGLabelPoolData *n = new_pool_alloc(s, 1, rtype, label, addend); 711 n->data[0] = d; 712 new_pool_insert(s, n); 713 } 714 715 /* For v64 or v128, depending on the host. */ 716 __attribute__((unused)) 717 static void new_pool_l2(TCGContext *s, int rtype, tcg_insn_unit *label, 718 intptr_t addend, tcg_target_ulong d0, 719 tcg_target_ulong d1) 720 { 721 TCGLabelPoolData *n = new_pool_alloc(s, 2, rtype, label, addend); 722 n->data[0] = d0; 723 n->data[1] = d1; 724 new_pool_insert(s, n); 725 } 726 727 /* For v128 or v256, depending on the host. */ 728 __attribute__((unused)) 729 static void new_pool_l4(TCGContext *s, int rtype, tcg_insn_unit *label, 730 intptr_t addend, tcg_target_ulong d0, 731 tcg_target_ulong d1, tcg_target_ulong d2, 732 tcg_target_ulong d3) 733 { 734 TCGLabelPoolData *n = new_pool_alloc(s, 4, rtype, label, addend); 735 n->data[0] = d0; 736 n->data[1] = d1; 737 n->data[2] = d2; 738 n->data[3] = d3; 739 new_pool_insert(s, n); 740 } 741 742 /* For v256, for 32-bit host. */ 743 __attribute__((unused)) 744 static void new_pool_l8(TCGContext *s, int rtype, tcg_insn_unit *label, 745 intptr_t addend, tcg_target_ulong d0, 746 tcg_target_ulong d1, tcg_target_ulong d2, 747 tcg_target_ulong d3, tcg_target_ulong d4, 748 tcg_target_ulong d5, tcg_target_ulong d6, 749 tcg_target_ulong d7) 750 { 751 TCGLabelPoolData *n = new_pool_alloc(s, 8, rtype, label, addend); 752 n->data[0] = d0; 753 n->data[1] = d1; 754 n->data[2] = d2; 755 n->data[3] = d3; 756 n->data[4] = d4; 757 n->data[5] = d5; 758 n->data[6] = d6; 759 n->data[7] = d7; 760 new_pool_insert(s, n); 761 } 762 763 /* 764 * Generate TB finalization at the end of block 765 */ 766 767 static int tcg_out_ldst_finalize(TCGContext *s) 768 { 769 TCGLabelQemuLdst *lb; 770 771 /* qemu_ld/st slow paths */ 772 QSIMPLEQ_FOREACH(lb, &s->ldst_labels, next) { 773 if (lb->is_ld 774 ? !tcg_out_qemu_ld_slow_path(s, lb) 775 : !tcg_out_qemu_st_slow_path(s, lb)) { 776 return -2; 777 } 778 779 /* 780 * Test for (pending) buffer overflow. The assumption is that any 781 * one operation beginning below the high water mark cannot overrun 782 * the buffer completely. Thus we can test for overflow after 783 * generating code without having to check during generation. 784 */ 785 if (unlikely((void *)s->code_ptr > s->code_gen_highwater)) { 786 return -1; 787 } 788 } 789 return 0; 790 } 791 792 static int tcg_out_pool_finalize(TCGContext *s) 793 { 794 TCGLabelPoolData *p = s->pool_labels; 795 TCGLabelPoolData *l = NULL; 796 void *a; 797 798 if (p == NULL) { 799 return 0; 800 } 801 802 /* 803 * ??? Round up to qemu_icache_linesize, but then do not round 804 * again when allocating the next TranslationBlock structure. 805 */ 806 a = (void *)ROUND_UP((uintptr_t)s->code_ptr, 807 sizeof(tcg_target_ulong) * p->nlong); 808 tcg_out_nop_fill(s->code_ptr, (tcg_insn_unit *)a - s->code_ptr); 809 s->data_gen_ptr = a; 810 811 for (; p != NULL; p = p->next) { 812 size_t size = sizeof(tcg_target_ulong) * p->nlong; 813 uintptr_t value; 814 815 if (!l || l->nlong != p->nlong || memcmp(l->data, p->data, size)) { 816 if (unlikely(a > s->code_gen_highwater)) { 817 return -1; 818 } 819 memcpy(a, p->data, size); 820 a += size; 821 l = p; 822 } 823 824 value = (uintptr_t)tcg_splitwx_to_rx(a) - size; 825 if (!patch_reloc(p->label, p->rtype, value, p->addend)) { 826 return -2; 827 } 828 } 829 830 s->code_ptr = a; 831 return 0; 832 } 833 834 #define C_PFX1(P, A) P##A 835 #define C_PFX2(P, A, B) P##A##_##B 836 #define C_PFX3(P, A, B, C) P##A##_##B##_##C 837 #define C_PFX4(P, A, B, C, D) P##A##_##B##_##C##_##D 838 #define C_PFX5(P, A, B, C, D, E) P##A##_##B##_##C##_##D##_##E 839 #define C_PFX6(P, A, B, C, D, E, F) P##A##_##B##_##C##_##D##_##E##_##F 840 841 /* Define an enumeration for the various combinations. */ 842 843 #define C_O0_I1(I1) C_PFX1(c_o0_i1_, I1), 844 #define C_O0_I2(I1, I2) C_PFX2(c_o0_i2_, I1, I2), 845 #define C_O0_I3(I1, I2, I3) C_PFX3(c_o0_i3_, I1, I2, I3), 846 #define C_O0_I4(I1, I2, I3, I4) C_PFX4(c_o0_i4_, I1, I2, I3, I4), 847 848 #define C_O1_I1(O1, I1) C_PFX2(c_o1_i1_, O1, I1), 849 #define C_O1_I2(O1, I1, I2) C_PFX3(c_o1_i2_, O1, I1, I2), 850 #define C_O1_I3(O1, I1, I2, I3) C_PFX4(c_o1_i3_, O1, I1, I2, I3), 851 #define C_O1_I4(O1, I1, I2, I3, I4) C_PFX5(c_o1_i4_, O1, I1, I2, I3, I4), 852 853 #define C_N1_I2(O1, I1, I2) C_PFX3(c_n1_i2_, O1, I1, I2), 854 #define C_N1O1_I1(O1, O2, I1) C_PFX3(c_n1o1_i1_, O1, O2, I1), 855 #define C_N2_I1(O1, O2, I1) C_PFX3(c_n2_i1_, O1, O2, I1), 856 857 #define C_O2_I1(O1, O2, I1) C_PFX3(c_o2_i1_, O1, O2, I1), 858 #define C_O2_I2(O1, O2, I1, I2) C_PFX4(c_o2_i2_, O1, O2, I1, I2), 859 #define C_O2_I3(O1, O2, I1, I2, I3) C_PFX5(c_o2_i3_, O1, O2, I1, I2, I3), 860 #define C_O2_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_o2_i4_, O1, O2, I1, I2, I3, I4), 861 #define C_N1_O1_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_n1_o1_i4_, O1, O2, I1, I2, I3, I4), 862 863 typedef enum { 864 C_Dynamic = -2, 865 C_NotImplemented = -1, 866 #include "tcg-target-con-set.h" 867 } TCGConstraintSetIndex; 868 869 static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode, TCGType, unsigned); 870 871 #undef C_O0_I1 872 #undef C_O0_I2 873 #undef C_O0_I3 874 #undef C_O0_I4 875 #undef C_O1_I1 876 #undef C_O1_I2 877 #undef C_O1_I3 878 #undef C_O1_I4 879 #undef C_N1_I2 880 #undef C_N1O1_I1 881 #undef C_N2_I1 882 #undef C_O2_I1 883 #undef C_O2_I2 884 #undef C_O2_I3 885 #undef C_O2_I4 886 #undef C_N1_O1_I4 887 888 /* Put all of the constraint sets into an array, indexed by the enum. */ 889 890 typedef struct TCGConstraintSet { 891 uint8_t nb_oargs, nb_iargs; 892 const char *args_ct_str[TCG_MAX_OP_ARGS]; 893 } TCGConstraintSet; 894 895 #define C_O0_I1(I1) { 0, 1, { #I1 } }, 896 #define C_O0_I2(I1, I2) { 0, 2, { #I1, #I2 } }, 897 #define C_O0_I3(I1, I2, I3) { 0, 3, { #I1, #I2, #I3 } }, 898 #define C_O0_I4(I1, I2, I3, I4) { 0, 4, { #I1, #I2, #I3, #I4 } }, 899 900 #define C_O1_I1(O1, I1) { 1, 1, { #O1, #I1 } }, 901 #define C_O1_I2(O1, I1, I2) { 1, 2, { #O1, #I1, #I2 } }, 902 #define C_O1_I3(O1, I1, I2, I3) { 1, 3, { #O1, #I1, #I2, #I3 } }, 903 #define C_O1_I4(O1, I1, I2, I3, I4) { 1, 4, { #O1, #I1, #I2, #I3, #I4 } }, 904 905 #define C_N1_I2(O1, I1, I2) { 1, 2, { "&" #O1, #I1, #I2 } }, 906 #define C_N1O1_I1(O1, O2, I1) { 2, 1, { "&" #O1, #O2, #I1 } }, 907 #define C_N2_I1(O1, O2, I1) { 2, 1, { "&" #O1, "&" #O2, #I1 } }, 908 909 #define C_O2_I1(O1, O2, I1) { 2, 1, { #O1, #O2, #I1 } }, 910 #define C_O2_I2(O1, O2, I1, I2) { 2, 2, { #O1, #O2, #I1, #I2 } }, 911 #define C_O2_I3(O1, O2, I1, I2, I3) { 2, 3, { #O1, #O2, #I1, #I2, #I3 } }, 912 #define C_O2_I4(O1, O2, I1, I2, I3, I4) { 2, 4, { #O1, #O2, #I1, #I2, #I3, #I4 } }, 913 #define C_N1_O1_I4(O1, O2, I1, I2, I3, I4) { 2, 4, { "&" #O1, #O2, #I1, #I2, #I3, #I4 } }, 914 915 static const TCGConstraintSet constraint_sets[] = { 916 #include "tcg-target-con-set.h" 917 }; 918 919 #undef C_O0_I1 920 #undef C_O0_I2 921 #undef C_O0_I3 922 #undef C_O0_I4 923 #undef C_O1_I1 924 #undef C_O1_I2 925 #undef C_O1_I3 926 #undef C_O1_I4 927 #undef C_N1_I2 928 #undef C_N1O1_I1 929 #undef C_N2_I1 930 #undef C_O2_I1 931 #undef C_O2_I2 932 #undef C_O2_I3 933 #undef C_O2_I4 934 #undef C_N1_O1_I4 935 936 /* Expand the enumerator to be returned from tcg_target_op_def(). */ 937 938 #define C_O0_I1(I1) C_PFX1(c_o0_i1_, I1) 939 #define C_O0_I2(I1, I2) C_PFX2(c_o0_i2_, I1, I2) 940 #define C_O0_I3(I1, I2, I3) C_PFX3(c_o0_i3_, I1, I2, I3) 941 #define C_O0_I4(I1, I2, I3, I4) C_PFX4(c_o0_i4_, I1, I2, I3, I4) 942 943 #define C_O1_I1(O1, I1) C_PFX2(c_o1_i1_, O1, I1) 944 #define C_O1_I2(O1, I1, I2) C_PFX3(c_o1_i2_, O1, I1, I2) 945 #define C_O1_I3(O1, I1, I2, I3) C_PFX4(c_o1_i3_, O1, I1, I2, I3) 946 #define C_O1_I4(O1, I1, I2, I3, I4) C_PFX5(c_o1_i4_, O1, I1, I2, I3, I4) 947 948 #define C_N1_I2(O1, I1, I2) C_PFX3(c_n1_i2_, O1, I1, I2) 949 #define C_N1O1_I1(O1, O2, I1) C_PFX3(c_n1o1_i1_, O1, O2, I1) 950 #define C_N2_I1(O1, O2, I1) C_PFX3(c_n2_i1_, O1, O2, I1) 951 952 #define C_O2_I1(O1, O2, I1) C_PFX3(c_o2_i1_, O1, O2, I1) 953 #define C_O2_I2(O1, O2, I1, I2) C_PFX4(c_o2_i2_, O1, O2, I1, I2) 954 #define C_O2_I3(O1, O2, I1, I2, I3) C_PFX5(c_o2_i3_, O1, O2, I1, I2, I3) 955 #define C_O2_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_o2_i4_, O1, O2, I1, I2, I3, I4) 956 #define C_N1_O1_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_n1_o1_i4_, O1, O2, I1, I2, I3, I4) 957 958 /* 959 * TCGOutOp is the base class for a set of structures that describe how 960 * to generate code for a given TCGOpcode. 961 * 962 * @static_constraint: 963 * C_NotImplemented: The TCGOpcode is not supported by the backend. 964 * C_Dynamic: Use @dynamic_constraint to select a constraint set 965 * based on any of @type, @flags, or host isa. 966 * Otherwise: The register allocation constrains for the TCGOpcode. 967 * 968 * Subclasses of TCGOutOp will define a set of output routines that may 969 * be used. Such routines will often be selected by the set of registers 970 * and constants that come out of register allocation. The set of 971 * routines that are provided will guide the set of constraints that are 972 * legal. In particular, assume that tcg_optimize() has done its job in 973 * swapping commutative operands and folding operations for which all 974 * operands are constant. 975 */ 976 typedef struct TCGOutOp { 977 TCGConstraintSetIndex static_constraint; 978 TCGConstraintSetIndex (*dynamic_constraint)(TCGType type, unsigned flags); 979 } TCGOutOp; 980 981 typedef struct TCGOutOpBinary { 982 TCGOutOp base; 983 void (*out_rrr)(TCGContext *s, TCGType type, 984 TCGReg a0, TCGReg a1, TCGReg a2); 985 void (*out_rri)(TCGContext *s, TCGType type, 986 TCGReg a0, TCGReg a1, tcg_target_long a2); 987 } TCGOutOpBinary; 988 989 typedef struct TCGOutOpUnary { 990 TCGOutOp base; 991 void (*out_rr)(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1); 992 } TCGOutOpUnary; 993 994 typedef struct TCGOutOpSubtract { 995 TCGOutOp base; 996 void (*out_rrr)(TCGContext *s, TCGType type, 997 TCGReg a0, TCGReg a1, TCGReg a2); 998 void (*out_rir)(TCGContext *s, TCGType type, 999 TCGReg a0, tcg_target_long a1, TCGReg a2); 1000 } TCGOutOpSubtract; 1001 1002 #include "tcg-target.c.inc" 1003 1004 #ifndef CONFIG_TCG_INTERPRETER 1005 /* Validate CPUTLBDescFast placement. */ 1006 QEMU_BUILD_BUG_ON((int)(offsetof(CPUNegativeOffsetState, tlb.f[0]) - 1007 sizeof(CPUNegativeOffsetState)) 1008 < MIN_TLB_MASK_TABLE_OFS); 1009 #endif 1010 1011 /* 1012 * Register V as the TCGOutOp for O. 1013 * This verifies that V is of type T, otherwise give a nice compiler error. 1014 * This prevents trivial mistakes within each arch/tcg-target.c.inc. 1015 */ 1016 #define OUTOP(O, T, V) [O] = _Generic(V, T: &V.base) 1017 1018 /* Register allocation descriptions for every TCGOpcode. */ 1019 static const TCGOutOp * const all_outop[NB_OPS] = { 1020 OUTOP(INDEX_op_add, TCGOutOpBinary, outop_add), 1021 OUTOP(INDEX_op_and, TCGOutOpBinary, outop_and), 1022 OUTOP(INDEX_op_andc, TCGOutOpBinary, outop_andc), 1023 OUTOP(INDEX_op_eqv, TCGOutOpBinary, outop_eqv), 1024 OUTOP(INDEX_op_mul, TCGOutOpBinary, outop_mul), 1025 OUTOP(INDEX_op_mulsh, TCGOutOpBinary, outop_mulsh), 1026 OUTOP(INDEX_op_muluh, TCGOutOpBinary, outop_muluh), 1027 OUTOP(INDEX_op_nand, TCGOutOpBinary, outop_nand), 1028 OUTOP(INDEX_op_neg, TCGOutOpUnary, outop_neg), 1029 OUTOP(INDEX_op_nor, TCGOutOpBinary, outop_nor), 1030 OUTOP(INDEX_op_not, TCGOutOpUnary, outop_not), 1031 OUTOP(INDEX_op_or, TCGOutOpBinary, outop_or), 1032 OUTOP(INDEX_op_orc, TCGOutOpBinary, outop_orc), 1033 OUTOP(INDEX_op_sub, TCGOutOpSubtract, outop_sub), 1034 OUTOP(INDEX_op_xor, TCGOutOpBinary, outop_xor), 1035 }; 1036 1037 #undef OUTOP 1038 1039 /* 1040 * All TCG threads except the parent (i.e. the one that called tcg_context_init 1041 * and registered the target's TCG globals) must register with this function 1042 * before initiating translation. 1043 * 1044 * In user-mode we just point tcg_ctx to tcg_init_ctx. See the documentation 1045 * of tcg_region_init() for the reasoning behind this. 1046 * 1047 * In system-mode each caller registers its context in tcg_ctxs[]. Note that in 1048 * system-mode tcg_ctxs[] does not track tcg_ctx_init, since the initial context 1049 * is not used anymore for translation once this function is called. 1050 * 1051 * Not tracking tcg_init_ctx in tcg_ctxs[] in system-mode keeps code that 1052 * iterates over the array (e.g. tcg_code_size() the same for both system/user 1053 * modes. 1054 */ 1055 #ifdef CONFIG_USER_ONLY 1056 void tcg_register_thread(void) 1057 { 1058 tcg_ctx = &tcg_init_ctx; 1059 } 1060 #else 1061 void tcg_register_thread(void) 1062 { 1063 TCGContext *s = g_malloc(sizeof(*s)); 1064 unsigned int i, n; 1065 1066 *s = tcg_init_ctx; 1067 1068 /* Relink mem_base. */ 1069 for (i = 0, n = tcg_init_ctx.nb_globals; i < n; ++i) { 1070 if (tcg_init_ctx.temps[i].mem_base) { 1071 ptrdiff_t b = tcg_init_ctx.temps[i].mem_base - tcg_init_ctx.temps; 1072 tcg_debug_assert(b >= 0 && b < n); 1073 s->temps[i].mem_base = &s->temps[b]; 1074 } 1075 } 1076 1077 /* Claim an entry in tcg_ctxs */ 1078 n = qatomic_fetch_inc(&tcg_cur_ctxs); 1079 g_assert(n < tcg_max_ctxs); 1080 qatomic_set(&tcg_ctxs[n], s); 1081 1082 if (n > 0) { 1083 tcg_region_initial_alloc(s); 1084 } 1085 1086 tcg_ctx = s; 1087 } 1088 #endif /* !CONFIG_USER_ONLY */ 1089 1090 /* pool based memory allocation */ 1091 void *tcg_malloc_internal(TCGContext *s, int size) 1092 { 1093 TCGPool *p; 1094 int pool_size; 1095 1096 if (size > TCG_POOL_CHUNK_SIZE) { 1097 /* big malloc: insert a new pool (XXX: could optimize) */ 1098 p = g_malloc(sizeof(TCGPool) + size); 1099 p->size = size; 1100 p->next = s->pool_first_large; 1101 s->pool_first_large = p; 1102 return p->data; 1103 } else { 1104 p = s->pool_current; 1105 if (!p) { 1106 p = s->pool_first; 1107 if (!p) 1108 goto new_pool; 1109 } else { 1110 if (!p->next) { 1111 new_pool: 1112 pool_size = TCG_POOL_CHUNK_SIZE; 1113 p = g_malloc(sizeof(TCGPool) + pool_size); 1114 p->size = pool_size; 1115 p->next = NULL; 1116 if (s->pool_current) { 1117 s->pool_current->next = p; 1118 } else { 1119 s->pool_first = p; 1120 } 1121 } else { 1122 p = p->next; 1123 } 1124 } 1125 } 1126 s->pool_current = p; 1127 s->pool_cur = p->data + size; 1128 s->pool_end = p->data + p->size; 1129 return p->data; 1130 } 1131 1132 void tcg_pool_reset(TCGContext *s) 1133 { 1134 TCGPool *p, *t; 1135 for (p = s->pool_first_large; p; p = t) { 1136 t = p->next; 1137 g_free(p); 1138 } 1139 s->pool_first_large = NULL; 1140 s->pool_cur = s->pool_end = NULL; 1141 s->pool_current = NULL; 1142 } 1143 1144 /* 1145 * Create TCGHelperInfo structures for "tcg/tcg-ldst.h" functions, 1146 * akin to what "exec/helper-tcg.h" does with DEF_HELPER_FLAGS_N. 1147 * We only use these for layout in tcg_out_ld_helper_ret and 1148 * tcg_out_st_helper_args, and share them between several of 1149 * the helpers, with the end result that it's easier to build manually. 1150 */ 1151 1152 #if TCG_TARGET_REG_BITS == 32 1153 # define dh_typecode_ttl dh_typecode_i32 1154 #else 1155 # define dh_typecode_ttl dh_typecode_i64 1156 #endif 1157 1158 static TCGHelperInfo info_helper_ld32_mmu = { 1159 .flags = TCG_CALL_NO_WG, 1160 .typemask = dh_typemask(ttl, 0) /* return tcg_target_ulong */ 1161 | dh_typemask(env, 1) 1162 | dh_typemask(i64, 2) /* uint64_t addr */ 1163 | dh_typemask(i32, 3) /* unsigned oi */ 1164 | dh_typemask(ptr, 4) /* uintptr_t ra */ 1165 }; 1166 1167 static TCGHelperInfo info_helper_ld64_mmu = { 1168 .flags = TCG_CALL_NO_WG, 1169 .typemask = dh_typemask(i64, 0) /* return uint64_t */ 1170 | dh_typemask(env, 1) 1171 | dh_typemask(i64, 2) /* uint64_t addr */ 1172 | dh_typemask(i32, 3) /* unsigned oi */ 1173 | dh_typemask(ptr, 4) /* uintptr_t ra */ 1174 }; 1175 1176 static TCGHelperInfo info_helper_ld128_mmu = { 1177 .flags = TCG_CALL_NO_WG, 1178 .typemask = dh_typemask(i128, 0) /* return Int128 */ 1179 | dh_typemask(env, 1) 1180 | dh_typemask(i64, 2) /* uint64_t addr */ 1181 | dh_typemask(i32, 3) /* unsigned oi */ 1182 | dh_typemask(ptr, 4) /* uintptr_t ra */ 1183 }; 1184 1185 static TCGHelperInfo info_helper_st32_mmu = { 1186 .flags = TCG_CALL_NO_WG, 1187 .typemask = dh_typemask(void, 0) 1188 | dh_typemask(env, 1) 1189 | dh_typemask(i64, 2) /* uint64_t addr */ 1190 | dh_typemask(i32, 3) /* uint32_t data */ 1191 | dh_typemask(i32, 4) /* unsigned oi */ 1192 | dh_typemask(ptr, 5) /* uintptr_t ra */ 1193 }; 1194 1195 static TCGHelperInfo info_helper_st64_mmu = { 1196 .flags = TCG_CALL_NO_WG, 1197 .typemask = dh_typemask(void, 0) 1198 | dh_typemask(env, 1) 1199 | dh_typemask(i64, 2) /* uint64_t addr */ 1200 | dh_typemask(i64, 3) /* uint64_t data */ 1201 | dh_typemask(i32, 4) /* unsigned oi */ 1202 | dh_typemask(ptr, 5) /* uintptr_t ra */ 1203 }; 1204 1205 static TCGHelperInfo info_helper_st128_mmu = { 1206 .flags = TCG_CALL_NO_WG, 1207 .typemask = dh_typemask(void, 0) 1208 | dh_typemask(env, 1) 1209 | dh_typemask(i64, 2) /* uint64_t addr */ 1210 | dh_typemask(i128, 3) /* Int128 data */ 1211 | dh_typemask(i32, 4) /* unsigned oi */ 1212 | dh_typemask(ptr, 5) /* uintptr_t ra */ 1213 }; 1214 1215 #ifdef CONFIG_TCG_INTERPRETER 1216 static ffi_type *typecode_to_ffi(int argmask) 1217 { 1218 /* 1219 * libffi does not support __int128_t, so we have forced Int128 1220 * to use the structure definition instead of the builtin type. 1221 */ 1222 static ffi_type *ffi_type_i128_elements[3] = { 1223 &ffi_type_uint64, 1224 &ffi_type_uint64, 1225 NULL 1226 }; 1227 static ffi_type ffi_type_i128 = { 1228 .size = 16, 1229 .alignment = __alignof__(Int128), 1230 .type = FFI_TYPE_STRUCT, 1231 .elements = ffi_type_i128_elements, 1232 }; 1233 1234 switch (argmask) { 1235 case dh_typecode_void: 1236 return &ffi_type_void; 1237 case dh_typecode_i32: 1238 return &ffi_type_uint32; 1239 case dh_typecode_s32: 1240 return &ffi_type_sint32; 1241 case dh_typecode_i64: 1242 return &ffi_type_uint64; 1243 case dh_typecode_s64: 1244 return &ffi_type_sint64; 1245 case dh_typecode_ptr: 1246 return &ffi_type_pointer; 1247 case dh_typecode_i128: 1248 return &ffi_type_i128; 1249 } 1250 g_assert_not_reached(); 1251 } 1252 1253 static ffi_cif *init_ffi_layout(TCGHelperInfo *info) 1254 { 1255 unsigned typemask = info->typemask; 1256 struct { 1257 ffi_cif cif; 1258 ffi_type *args[]; 1259 } *ca; 1260 ffi_status status; 1261 int nargs; 1262 1263 /* Ignoring the return type, find the last non-zero field. */ 1264 nargs = 32 - clz32(typemask >> 3); 1265 nargs = DIV_ROUND_UP(nargs, 3); 1266 assert(nargs <= MAX_CALL_IARGS); 1267 1268 ca = g_malloc0(sizeof(*ca) + nargs * sizeof(ffi_type *)); 1269 ca->cif.rtype = typecode_to_ffi(typemask & 7); 1270 ca->cif.nargs = nargs; 1271 1272 if (nargs != 0) { 1273 ca->cif.arg_types = ca->args; 1274 for (int j = 0; j < nargs; ++j) { 1275 int typecode = extract32(typemask, (j + 1) * 3, 3); 1276 ca->args[j] = typecode_to_ffi(typecode); 1277 } 1278 } 1279 1280 status = ffi_prep_cif(&ca->cif, FFI_DEFAULT_ABI, nargs, 1281 ca->cif.rtype, ca->cif.arg_types); 1282 assert(status == FFI_OK); 1283 1284 return &ca->cif; 1285 } 1286 1287 #define HELPER_INFO_INIT(I) (&(I)->cif) 1288 #define HELPER_INFO_INIT_VAL(I) init_ffi_layout(I) 1289 #else 1290 #define HELPER_INFO_INIT(I) (&(I)->init) 1291 #define HELPER_INFO_INIT_VAL(I) 1 1292 #endif /* CONFIG_TCG_INTERPRETER */ 1293 1294 static inline bool arg_slot_reg_p(unsigned arg_slot) 1295 { 1296 /* 1297 * Split the sizeof away from the comparison to avoid Werror from 1298 * "unsigned < 0 is always false", when iarg_regs is empty. 1299 */ 1300 unsigned nreg = ARRAY_SIZE(tcg_target_call_iarg_regs); 1301 return arg_slot < nreg; 1302 } 1303 1304 static inline int arg_slot_stk_ofs(unsigned arg_slot) 1305 { 1306 unsigned max = TCG_STATIC_CALL_ARGS_SIZE / sizeof(tcg_target_long); 1307 unsigned stk_slot = arg_slot - ARRAY_SIZE(tcg_target_call_iarg_regs); 1308 1309 tcg_debug_assert(stk_slot < max); 1310 return TCG_TARGET_CALL_STACK_OFFSET + stk_slot * sizeof(tcg_target_long); 1311 } 1312 1313 typedef struct TCGCumulativeArgs { 1314 int arg_idx; /* tcg_gen_callN args[] */ 1315 int info_in_idx; /* TCGHelperInfo in[] */ 1316 int arg_slot; /* regs+stack slot */ 1317 int ref_slot; /* stack slots for references */ 1318 } TCGCumulativeArgs; 1319 1320 static void layout_arg_even(TCGCumulativeArgs *cum) 1321 { 1322 cum->arg_slot += cum->arg_slot & 1; 1323 } 1324 1325 static void layout_arg_1(TCGCumulativeArgs *cum, TCGHelperInfo *info, 1326 TCGCallArgumentKind kind) 1327 { 1328 TCGCallArgumentLoc *loc = &info->in[cum->info_in_idx]; 1329 1330 *loc = (TCGCallArgumentLoc){ 1331 .kind = kind, 1332 .arg_idx = cum->arg_idx, 1333 .arg_slot = cum->arg_slot, 1334 }; 1335 cum->info_in_idx++; 1336 cum->arg_slot++; 1337 } 1338 1339 static void layout_arg_normal_n(TCGCumulativeArgs *cum, 1340 TCGHelperInfo *info, int n) 1341 { 1342 TCGCallArgumentLoc *loc = &info->in[cum->info_in_idx]; 1343 1344 for (int i = 0; i < n; ++i) { 1345 /* Layout all using the same arg_idx, adjusting the subindex. */ 1346 loc[i] = (TCGCallArgumentLoc){ 1347 .kind = TCG_CALL_ARG_NORMAL, 1348 .arg_idx = cum->arg_idx, 1349 .tmp_subindex = i, 1350 .arg_slot = cum->arg_slot + i, 1351 }; 1352 } 1353 cum->info_in_idx += n; 1354 cum->arg_slot += n; 1355 } 1356 1357 static void layout_arg_by_ref(TCGCumulativeArgs *cum, TCGHelperInfo *info) 1358 { 1359 TCGCallArgumentLoc *loc = &info->in[cum->info_in_idx]; 1360 int n = 128 / TCG_TARGET_REG_BITS; 1361 1362 /* The first subindex carries the pointer. */ 1363 layout_arg_1(cum, info, TCG_CALL_ARG_BY_REF); 1364 1365 /* 1366 * The callee is allowed to clobber memory associated with 1367 * structure pass by-reference. Therefore we must make copies. 1368 * Allocate space from "ref_slot", which will be adjusted to 1369 * follow the parameters on the stack. 1370 */ 1371 loc[0].ref_slot = cum->ref_slot; 1372 1373 /* 1374 * Subsequent words also go into the reference slot, but 1375 * do not accumulate into the regular arguments. 1376 */ 1377 for (int i = 1; i < n; ++i) { 1378 loc[i] = (TCGCallArgumentLoc){ 1379 .kind = TCG_CALL_ARG_BY_REF_N, 1380 .arg_idx = cum->arg_idx, 1381 .tmp_subindex = i, 1382 .ref_slot = cum->ref_slot + i, 1383 }; 1384 } 1385 cum->info_in_idx += n - 1; /* i=0 accounted for in layout_arg_1 */ 1386 cum->ref_slot += n; 1387 } 1388 1389 static void init_call_layout(TCGHelperInfo *info) 1390 { 1391 int max_reg_slots = ARRAY_SIZE(tcg_target_call_iarg_regs); 1392 int max_stk_slots = TCG_STATIC_CALL_ARGS_SIZE / sizeof(tcg_target_long); 1393 unsigned typemask = info->typemask; 1394 unsigned typecode; 1395 TCGCumulativeArgs cum = { }; 1396 1397 /* 1398 * Parse and place any function return value. 1399 */ 1400 typecode = typemask & 7; 1401 switch (typecode) { 1402 case dh_typecode_void: 1403 info->nr_out = 0; 1404 break; 1405 case dh_typecode_i32: 1406 case dh_typecode_s32: 1407 case dh_typecode_ptr: 1408 info->nr_out = 1; 1409 info->out_kind = TCG_CALL_RET_NORMAL; 1410 break; 1411 case dh_typecode_i64: 1412 case dh_typecode_s64: 1413 info->nr_out = 64 / TCG_TARGET_REG_BITS; 1414 info->out_kind = TCG_CALL_RET_NORMAL; 1415 /* Query the last register now to trigger any assert early. */ 1416 tcg_target_call_oarg_reg(info->out_kind, info->nr_out - 1); 1417 break; 1418 case dh_typecode_i128: 1419 info->nr_out = 128 / TCG_TARGET_REG_BITS; 1420 info->out_kind = TCG_TARGET_CALL_RET_I128; 1421 switch (TCG_TARGET_CALL_RET_I128) { 1422 case TCG_CALL_RET_NORMAL: 1423 /* Query the last register now to trigger any assert early. */ 1424 tcg_target_call_oarg_reg(info->out_kind, info->nr_out - 1); 1425 break; 1426 case TCG_CALL_RET_BY_VEC: 1427 /* Query the single register now to trigger any assert early. */ 1428 tcg_target_call_oarg_reg(TCG_CALL_RET_BY_VEC, 0); 1429 break; 1430 case TCG_CALL_RET_BY_REF: 1431 /* 1432 * Allocate the first argument to the output. 1433 * We don't need to store this anywhere, just make it 1434 * unavailable for use in the input loop below. 1435 */ 1436 cum.arg_slot = 1; 1437 break; 1438 default: 1439 qemu_build_not_reached(); 1440 } 1441 break; 1442 default: 1443 g_assert_not_reached(); 1444 } 1445 1446 /* 1447 * Parse and place function arguments. 1448 */ 1449 for (typemask >>= 3; typemask; typemask >>= 3, cum.arg_idx++) { 1450 TCGCallArgumentKind kind; 1451 TCGType type; 1452 1453 typecode = typemask & 7; 1454 switch (typecode) { 1455 case dh_typecode_i32: 1456 case dh_typecode_s32: 1457 type = TCG_TYPE_I32; 1458 break; 1459 case dh_typecode_i64: 1460 case dh_typecode_s64: 1461 type = TCG_TYPE_I64; 1462 break; 1463 case dh_typecode_ptr: 1464 type = TCG_TYPE_PTR; 1465 break; 1466 case dh_typecode_i128: 1467 type = TCG_TYPE_I128; 1468 break; 1469 default: 1470 g_assert_not_reached(); 1471 } 1472 1473 switch (type) { 1474 case TCG_TYPE_I32: 1475 switch (TCG_TARGET_CALL_ARG_I32) { 1476 case TCG_CALL_ARG_EVEN: 1477 layout_arg_even(&cum); 1478 /* fall through */ 1479 case TCG_CALL_ARG_NORMAL: 1480 layout_arg_1(&cum, info, TCG_CALL_ARG_NORMAL); 1481 break; 1482 case TCG_CALL_ARG_EXTEND: 1483 kind = TCG_CALL_ARG_EXTEND_U + (typecode & 1); 1484 layout_arg_1(&cum, info, kind); 1485 break; 1486 default: 1487 qemu_build_not_reached(); 1488 } 1489 break; 1490 1491 case TCG_TYPE_I64: 1492 switch (TCG_TARGET_CALL_ARG_I64) { 1493 case TCG_CALL_ARG_EVEN: 1494 layout_arg_even(&cum); 1495 /* fall through */ 1496 case TCG_CALL_ARG_NORMAL: 1497 if (TCG_TARGET_REG_BITS == 32) { 1498 layout_arg_normal_n(&cum, info, 2); 1499 } else { 1500 layout_arg_1(&cum, info, TCG_CALL_ARG_NORMAL); 1501 } 1502 break; 1503 default: 1504 qemu_build_not_reached(); 1505 } 1506 break; 1507 1508 case TCG_TYPE_I128: 1509 switch (TCG_TARGET_CALL_ARG_I128) { 1510 case TCG_CALL_ARG_EVEN: 1511 layout_arg_even(&cum); 1512 /* fall through */ 1513 case TCG_CALL_ARG_NORMAL: 1514 layout_arg_normal_n(&cum, info, 128 / TCG_TARGET_REG_BITS); 1515 break; 1516 case TCG_CALL_ARG_BY_REF: 1517 layout_arg_by_ref(&cum, info); 1518 break; 1519 default: 1520 qemu_build_not_reached(); 1521 } 1522 break; 1523 1524 default: 1525 g_assert_not_reached(); 1526 } 1527 } 1528 info->nr_in = cum.info_in_idx; 1529 1530 /* Validate that we didn't overrun the input array. */ 1531 assert(cum.info_in_idx <= ARRAY_SIZE(info->in)); 1532 /* Validate the backend has enough argument space. */ 1533 assert(cum.arg_slot <= max_reg_slots + max_stk_slots); 1534 1535 /* 1536 * Relocate the "ref_slot" area to the end of the parameters. 1537 * Minimizing this stack offset helps code size for x86, 1538 * which has a signed 8-bit offset encoding. 1539 */ 1540 if (cum.ref_slot != 0) { 1541 int ref_base = 0; 1542 1543 if (cum.arg_slot > max_reg_slots) { 1544 int align = __alignof(Int128) / sizeof(tcg_target_long); 1545 1546 ref_base = cum.arg_slot - max_reg_slots; 1547 if (align > 1) { 1548 ref_base = ROUND_UP(ref_base, align); 1549 } 1550 } 1551 assert(ref_base + cum.ref_slot <= max_stk_slots); 1552 ref_base += max_reg_slots; 1553 1554 if (ref_base != 0) { 1555 for (int i = cum.info_in_idx - 1; i >= 0; --i) { 1556 TCGCallArgumentLoc *loc = &info->in[i]; 1557 switch (loc->kind) { 1558 case TCG_CALL_ARG_BY_REF: 1559 case TCG_CALL_ARG_BY_REF_N: 1560 loc->ref_slot += ref_base; 1561 break; 1562 default: 1563 break; 1564 } 1565 } 1566 } 1567 } 1568 } 1569 1570 static int indirect_reg_alloc_order[ARRAY_SIZE(tcg_target_reg_alloc_order)]; 1571 static void process_constraint_sets(void); 1572 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type, 1573 TCGReg reg, const char *name); 1574 1575 static void tcg_context_init(unsigned max_threads) 1576 { 1577 TCGContext *s = &tcg_init_ctx; 1578 int n, i; 1579 TCGTemp *ts; 1580 1581 memset(s, 0, sizeof(*s)); 1582 s->nb_globals = 0; 1583 1584 init_call_layout(&info_helper_ld32_mmu); 1585 init_call_layout(&info_helper_ld64_mmu); 1586 init_call_layout(&info_helper_ld128_mmu); 1587 init_call_layout(&info_helper_st32_mmu); 1588 init_call_layout(&info_helper_st64_mmu); 1589 init_call_layout(&info_helper_st128_mmu); 1590 1591 tcg_target_init(s); 1592 process_constraint_sets(); 1593 1594 /* Reverse the order of the saved registers, assuming they're all at 1595 the start of tcg_target_reg_alloc_order. */ 1596 for (n = 0; n < ARRAY_SIZE(tcg_target_reg_alloc_order); ++n) { 1597 int r = tcg_target_reg_alloc_order[n]; 1598 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, r)) { 1599 break; 1600 } 1601 } 1602 for (i = 0; i < n; ++i) { 1603 indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[n - 1 - i]; 1604 } 1605 for (; i < ARRAY_SIZE(tcg_target_reg_alloc_order); ++i) { 1606 indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[i]; 1607 } 1608 1609 tcg_ctx = s; 1610 /* 1611 * In user-mode we simply share the init context among threads, since we 1612 * use a single region. See the documentation tcg_region_init() for the 1613 * reasoning behind this. 1614 * In system-mode we will have at most max_threads TCG threads. 1615 */ 1616 #ifdef CONFIG_USER_ONLY 1617 tcg_ctxs = &tcg_ctx; 1618 tcg_cur_ctxs = 1; 1619 tcg_max_ctxs = 1; 1620 #else 1621 tcg_max_ctxs = max_threads; 1622 tcg_ctxs = g_new0(TCGContext *, max_threads); 1623 #endif 1624 1625 tcg_debug_assert(!tcg_regset_test_reg(s->reserved_regs, TCG_AREG0)); 1626 ts = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, TCG_AREG0, "env"); 1627 tcg_env = temp_tcgv_ptr(ts); 1628 } 1629 1630 void tcg_init(size_t tb_size, int splitwx, unsigned max_threads) 1631 { 1632 tcg_context_init(max_threads); 1633 tcg_region_init(tb_size, splitwx, max_threads); 1634 } 1635 1636 /* 1637 * Allocate TBs right before their corresponding translated code, making 1638 * sure that TBs and code are on different cache lines. 1639 */ 1640 TranslationBlock *tcg_tb_alloc(TCGContext *s) 1641 { 1642 uintptr_t align = qemu_icache_linesize; 1643 TranslationBlock *tb; 1644 void *next; 1645 1646 retry: 1647 tb = (void *)ROUND_UP((uintptr_t)s->code_gen_ptr, align); 1648 next = (void *)ROUND_UP((uintptr_t)(tb + 1), align); 1649 1650 if (unlikely(next > s->code_gen_highwater)) { 1651 if (tcg_region_alloc(s)) { 1652 return NULL; 1653 } 1654 goto retry; 1655 } 1656 qatomic_set(&s->code_gen_ptr, next); 1657 return tb; 1658 } 1659 1660 void tcg_prologue_init(void) 1661 { 1662 TCGContext *s = tcg_ctx; 1663 size_t prologue_size; 1664 1665 s->code_ptr = s->code_gen_ptr; 1666 s->code_buf = s->code_gen_ptr; 1667 s->data_gen_ptr = NULL; 1668 1669 #ifndef CONFIG_TCG_INTERPRETER 1670 tcg_qemu_tb_exec = (tcg_prologue_fn *)tcg_splitwx_to_rx(s->code_ptr); 1671 #endif 1672 1673 s->pool_labels = NULL; 1674 1675 qemu_thread_jit_write(); 1676 /* Generate the prologue. */ 1677 tcg_target_qemu_prologue(s); 1678 1679 /* Allow the prologue to put e.g. guest_base into a pool entry. */ 1680 { 1681 int result = tcg_out_pool_finalize(s); 1682 tcg_debug_assert(result == 0); 1683 } 1684 1685 prologue_size = tcg_current_code_size(s); 1686 perf_report_prologue(s->code_gen_ptr, prologue_size); 1687 1688 #ifndef CONFIG_TCG_INTERPRETER 1689 flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(s->code_buf), 1690 (uintptr_t)s->code_buf, prologue_size); 1691 #endif 1692 1693 if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM)) { 1694 FILE *logfile = qemu_log_trylock(); 1695 if (logfile) { 1696 fprintf(logfile, "PROLOGUE: [size=%zu]\n", prologue_size); 1697 if (s->data_gen_ptr) { 1698 size_t code_size = s->data_gen_ptr - s->code_gen_ptr; 1699 size_t data_size = prologue_size - code_size; 1700 size_t i; 1701 1702 disas(logfile, s->code_gen_ptr, code_size); 1703 1704 for (i = 0; i < data_size; i += sizeof(tcg_target_ulong)) { 1705 if (sizeof(tcg_target_ulong) == 8) { 1706 fprintf(logfile, 1707 "0x%08" PRIxPTR ": .quad 0x%016" PRIx64 "\n", 1708 (uintptr_t)s->data_gen_ptr + i, 1709 *(uint64_t *)(s->data_gen_ptr + i)); 1710 } else { 1711 fprintf(logfile, 1712 "0x%08" PRIxPTR ": .long 0x%08x\n", 1713 (uintptr_t)s->data_gen_ptr + i, 1714 *(uint32_t *)(s->data_gen_ptr + i)); 1715 } 1716 } 1717 } else { 1718 disas(logfile, s->code_gen_ptr, prologue_size); 1719 } 1720 fprintf(logfile, "\n"); 1721 qemu_log_unlock(logfile); 1722 } 1723 } 1724 1725 #ifndef CONFIG_TCG_INTERPRETER 1726 /* 1727 * Assert that goto_ptr is implemented completely, setting an epilogue. 1728 * For tci, we use NULL as the signal to return from the interpreter, 1729 * so skip this check. 1730 */ 1731 tcg_debug_assert(tcg_code_gen_epilogue != NULL); 1732 #endif 1733 1734 tcg_region_prologue_set(s); 1735 } 1736 1737 void tcg_func_start(TCGContext *s) 1738 { 1739 tcg_pool_reset(s); 1740 s->nb_temps = s->nb_globals; 1741 1742 /* No temps have been previously allocated for size or locality. */ 1743 tcg_temp_ebb_reset_freed(s); 1744 1745 /* No constant temps have been previously allocated. */ 1746 for (int i = 0; i < TCG_TYPE_COUNT; ++i) { 1747 if (s->const_table[i]) { 1748 g_hash_table_remove_all(s->const_table[i]); 1749 } 1750 } 1751 1752 s->nb_ops = 0; 1753 s->nb_labels = 0; 1754 s->current_frame_offset = s->frame_start; 1755 1756 #ifdef CONFIG_DEBUG_TCG 1757 s->goto_tb_issue_mask = 0; 1758 #endif 1759 1760 QTAILQ_INIT(&s->ops); 1761 QTAILQ_INIT(&s->free_ops); 1762 s->emit_before_op = NULL; 1763 QSIMPLEQ_INIT(&s->labels); 1764 1765 tcg_debug_assert(s->addr_type <= TCG_TYPE_REG); 1766 tcg_debug_assert(s->insn_start_words > 0); 1767 } 1768 1769 static TCGTemp *tcg_temp_alloc(TCGContext *s) 1770 { 1771 int n = s->nb_temps++; 1772 1773 if (n >= TCG_MAX_TEMPS) { 1774 tcg_raise_tb_overflow(s); 1775 } 1776 return memset(&s->temps[n], 0, sizeof(TCGTemp)); 1777 } 1778 1779 static TCGTemp *tcg_global_alloc(TCGContext *s) 1780 { 1781 TCGTemp *ts; 1782 1783 tcg_debug_assert(s->nb_globals == s->nb_temps); 1784 tcg_debug_assert(s->nb_globals < TCG_MAX_TEMPS); 1785 s->nb_globals++; 1786 ts = tcg_temp_alloc(s); 1787 ts->kind = TEMP_GLOBAL; 1788 1789 return ts; 1790 } 1791 1792 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type, 1793 TCGReg reg, const char *name) 1794 { 1795 TCGTemp *ts; 1796 1797 tcg_debug_assert(TCG_TARGET_REG_BITS == 64 || type == TCG_TYPE_I32); 1798 1799 ts = tcg_global_alloc(s); 1800 ts->base_type = type; 1801 ts->type = type; 1802 ts->kind = TEMP_FIXED; 1803 ts->reg = reg; 1804 ts->name = name; 1805 tcg_regset_set_reg(s->reserved_regs, reg); 1806 1807 return ts; 1808 } 1809 1810 void tcg_set_frame(TCGContext *s, TCGReg reg, intptr_t start, intptr_t size) 1811 { 1812 s->frame_start = start; 1813 s->frame_end = start + size; 1814 s->frame_temp 1815 = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, reg, "_frame"); 1816 } 1817 1818 static TCGTemp *tcg_global_mem_new_internal(TCGv_ptr base, intptr_t offset, 1819 const char *name, TCGType type) 1820 { 1821 TCGContext *s = tcg_ctx; 1822 TCGTemp *base_ts = tcgv_ptr_temp(base); 1823 TCGTemp *ts = tcg_global_alloc(s); 1824 int indirect_reg = 0; 1825 1826 switch (base_ts->kind) { 1827 case TEMP_FIXED: 1828 break; 1829 case TEMP_GLOBAL: 1830 /* We do not support double-indirect registers. */ 1831 tcg_debug_assert(!base_ts->indirect_reg); 1832 base_ts->indirect_base = 1; 1833 s->nb_indirects += (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64 1834 ? 2 : 1); 1835 indirect_reg = 1; 1836 break; 1837 default: 1838 g_assert_not_reached(); 1839 } 1840 1841 if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) { 1842 TCGTemp *ts2 = tcg_global_alloc(s); 1843 char buf[64]; 1844 1845 ts->base_type = TCG_TYPE_I64; 1846 ts->type = TCG_TYPE_I32; 1847 ts->indirect_reg = indirect_reg; 1848 ts->mem_allocated = 1; 1849 ts->mem_base = base_ts; 1850 ts->mem_offset = offset; 1851 pstrcpy(buf, sizeof(buf), name); 1852 pstrcat(buf, sizeof(buf), "_0"); 1853 ts->name = strdup(buf); 1854 1855 tcg_debug_assert(ts2 == ts + 1); 1856 ts2->base_type = TCG_TYPE_I64; 1857 ts2->type = TCG_TYPE_I32; 1858 ts2->indirect_reg = indirect_reg; 1859 ts2->mem_allocated = 1; 1860 ts2->mem_base = base_ts; 1861 ts2->mem_offset = offset + 4; 1862 ts2->temp_subindex = 1; 1863 pstrcpy(buf, sizeof(buf), name); 1864 pstrcat(buf, sizeof(buf), "_1"); 1865 ts2->name = strdup(buf); 1866 } else { 1867 ts->base_type = type; 1868 ts->type = type; 1869 ts->indirect_reg = indirect_reg; 1870 ts->mem_allocated = 1; 1871 ts->mem_base = base_ts; 1872 ts->mem_offset = offset; 1873 ts->name = name; 1874 } 1875 return ts; 1876 } 1877 1878 TCGv_i32 tcg_global_mem_new_i32(TCGv_ptr reg, intptr_t off, const char *name) 1879 { 1880 TCGTemp *ts = tcg_global_mem_new_internal(reg, off, name, TCG_TYPE_I32); 1881 return temp_tcgv_i32(ts); 1882 } 1883 1884 TCGv_i64 tcg_global_mem_new_i64(TCGv_ptr reg, intptr_t off, const char *name) 1885 { 1886 TCGTemp *ts = tcg_global_mem_new_internal(reg, off, name, TCG_TYPE_I64); 1887 return temp_tcgv_i64(ts); 1888 } 1889 1890 TCGv_ptr tcg_global_mem_new_ptr(TCGv_ptr reg, intptr_t off, const char *name) 1891 { 1892 TCGTemp *ts = tcg_global_mem_new_internal(reg, off, name, TCG_TYPE_PTR); 1893 return temp_tcgv_ptr(ts); 1894 } 1895 1896 TCGTemp *tcg_temp_new_internal(TCGType type, TCGTempKind kind) 1897 { 1898 TCGContext *s = tcg_ctx; 1899 TCGTemp *ts; 1900 int n; 1901 1902 if (kind == TEMP_EBB) { 1903 int idx = find_first_bit(s->free_temps[type].l, TCG_MAX_TEMPS); 1904 1905 if (idx < TCG_MAX_TEMPS) { 1906 /* There is already an available temp with the right type. */ 1907 clear_bit(idx, s->free_temps[type].l); 1908 1909 ts = &s->temps[idx]; 1910 ts->temp_allocated = 1; 1911 tcg_debug_assert(ts->base_type == type); 1912 tcg_debug_assert(ts->kind == kind); 1913 return ts; 1914 } 1915 } else { 1916 tcg_debug_assert(kind == TEMP_TB); 1917 } 1918 1919 switch (type) { 1920 case TCG_TYPE_I32: 1921 case TCG_TYPE_V64: 1922 case TCG_TYPE_V128: 1923 case TCG_TYPE_V256: 1924 n = 1; 1925 break; 1926 case TCG_TYPE_I64: 1927 n = 64 / TCG_TARGET_REG_BITS; 1928 break; 1929 case TCG_TYPE_I128: 1930 n = 128 / TCG_TARGET_REG_BITS; 1931 break; 1932 default: 1933 g_assert_not_reached(); 1934 } 1935 1936 ts = tcg_temp_alloc(s); 1937 ts->base_type = type; 1938 ts->temp_allocated = 1; 1939 ts->kind = kind; 1940 1941 if (n == 1) { 1942 ts->type = type; 1943 } else { 1944 ts->type = TCG_TYPE_REG; 1945 1946 for (int i = 1; i < n; ++i) { 1947 TCGTemp *ts2 = tcg_temp_alloc(s); 1948 1949 tcg_debug_assert(ts2 == ts + i); 1950 ts2->base_type = type; 1951 ts2->type = TCG_TYPE_REG; 1952 ts2->temp_allocated = 1; 1953 ts2->temp_subindex = i; 1954 ts2->kind = kind; 1955 } 1956 } 1957 return ts; 1958 } 1959 1960 TCGv_i32 tcg_temp_new_i32(void) 1961 { 1962 return temp_tcgv_i32(tcg_temp_new_internal(TCG_TYPE_I32, TEMP_TB)); 1963 } 1964 1965 TCGv_i32 tcg_temp_ebb_new_i32(void) 1966 { 1967 return temp_tcgv_i32(tcg_temp_new_internal(TCG_TYPE_I32, TEMP_EBB)); 1968 } 1969 1970 TCGv_i64 tcg_temp_new_i64(void) 1971 { 1972 return temp_tcgv_i64(tcg_temp_new_internal(TCG_TYPE_I64, TEMP_TB)); 1973 } 1974 1975 TCGv_i64 tcg_temp_ebb_new_i64(void) 1976 { 1977 return temp_tcgv_i64(tcg_temp_new_internal(TCG_TYPE_I64, TEMP_EBB)); 1978 } 1979 1980 TCGv_ptr tcg_temp_new_ptr(void) 1981 { 1982 return temp_tcgv_ptr(tcg_temp_new_internal(TCG_TYPE_PTR, TEMP_TB)); 1983 } 1984 1985 TCGv_ptr tcg_temp_ebb_new_ptr(void) 1986 { 1987 return temp_tcgv_ptr(tcg_temp_new_internal(TCG_TYPE_PTR, TEMP_EBB)); 1988 } 1989 1990 TCGv_i128 tcg_temp_new_i128(void) 1991 { 1992 return temp_tcgv_i128(tcg_temp_new_internal(TCG_TYPE_I128, TEMP_TB)); 1993 } 1994 1995 TCGv_i128 tcg_temp_ebb_new_i128(void) 1996 { 1997 return temp_tcgv_i128(tcg_temp_new_internal(TCG_TYPE_I128, TEMP_EBB)); 1998 } 1999 2000 TCGv_vec tcg_temp_new_vec(TCGType type) 2001 { 2002 TCGTemp *t; 2003 2004 #ifdef CONFIG_DEBUG_TCG 2005 switch (type) { 2006 case TCG_TYPE_V64: 2007 assert(TCG_TARGET_HAS_v64); 2008 break; 2009 case TCG_TYPE_V128: 2010 assert(TCG_TARGET_HAS_v128); 2011 break; 2012 case TCG_TYPE_V256: 2013 assert(TCG_TARGET_HAS_v256); 2014 break; 2015 default: 2016 g_assert_not_reached(); 2017 } 2018 #endif 2019 2020 t = tcg_temp_new_internal(type, TEMP_EBB); 2021 return temp_tcgv_vec(t); 2022 } 2023 2024 /* Create a new temp of the same type as an existing temp. */ 2025 TCGv_vec tcg_temp_new_vec_matching(TCGv_vec match) 2026 { 2027 TCGTemp *t = tcgv_vec_temp(match); 2028 2029 tcg_debug_assert(t->temp_allocated != 0); 2030 2031 t = tcg_temp_new_internal(t->base_type, TEMP_EBB); 2032 return temp_tcgv_vec(t); 2033 } 2034 2035 void tcg_temp_free_internal(TCGTemp *ts) 2036 { 2037 TCGContext *s = tcg_ctx; 2038 2039 switch (ts->kind) { 2040 case TEMP_CONST: 2041 case TEMP_TB: 2042 /* Silently ignore free. */ 2043 break; 2044 case TEMP_EBB: 2045 tcg_debug_assert(ts->temp_allocated != 0); 2046 ts->temp_allocated = 0; 2047 set_bit(temp_idx(ts), s->free_temps[ts->base_type].l); 2048 break; 2049 default: 2050 /* It never made sense to free TEMP_FIXED or TEMP_GLOBAL. */ 2051 g_assert_not_reached(); 2052 } 2053 } 2054 2055 void tcg_temp_free_i32(TCGv_i32 arg) 2056 { 2057 tcg_temp_free_internal(tcgv_i32_temp(arg)); 2058 } 2059 2060 void tcg_temp_free_i64(TCGv_i64 arg) 2061 { 2062 tcg_temp_free_internal(tcgv_i64_temp(arg)); 2063 } 2064 2065 void tcg_temp_free_i128(TCGv_i128 arg) 2066 { 2067 tcg_temp_free_internal(tcgv_i128_temp(arg)); 2068 } 2069 2070 void tcg_temp_free_ptr(TCGv_ptr arg) 2071 { 2072 tcg_temp_free_internal(tcgv_ptr_temp(arg)); 2073 } 2074 2075 void tcg_temp_free_vec(TCGv_vec arg) 2076 { 2077 tcg_temp_free_internal(tcgv_vec_temp(arg)); 2078 } 2079 2080 TCGTemp *tcg_constant_internal(TCGType type, int64_t val) 2081 { 2082 TCGContext *s = tcg_ctx; 2083 GHashTable *h = s->const_table[type]; 2084 TCGTemp *ts; 2085 2086 if (h == NULL) { 2087 h = g_hash_table_new(g_int64_hash, g_int64_equal); 2088 s->const_table[type] = h; 2089 } 2090 2091 ts = g_hash_table_lookup(h, &val); 2092 if (ts == NULL) { 2093 int64_t *val_ptr; 2094 2095 ts = tcg_temp_alloc(s); 2096 2097 if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) { 2098 TCGTemp *ts2 = tcg_temp_alloc(s); 2099 2100 tcg_debug_assert(ts2 == ts + 1); 2101 2102 ts->base_type = TCG_TYPE_I64; 2103 ts->type = TCG_TYPE_I32; 2104 ts->kind = TEMP_CONST; 2105 ts->temp_allocated = 1; 2106 2107 ts2->base_type = TCG_TYPE_I64; 2108 ts2->type = TCG_TYPE_I32; 2109 ts2->kind = TEMP_CONST; 2110 ts2->temp_allocated = 1; 2111 ts2->temp_subindex = 1; 2112 2113 /* 2114 * Retain the full value of the 64-bit constant in the low 2115 * part, so that the hash table works. Actual uses will 2116 * truncate the value to the low part. 2117 */ 2118 ts[HOST_BIG_ENDIAN].val = val; 2119 ts[!HOST_BIG_ENDIAN].val = val >> 32; 2120 val_ptr = &ts[HOST_BIG_ENDIAN].val; 2121 } else { 2122 ts->base_type = type; 2123 ts->type = type; 2124 ts->kind = TEMP_CONST; 2125 ts->temp_allocated = 1; 2126 ts->val = val; 2127 val_ptr = &ts->val; 2128 } 2129 g_hash_table_insert(h, val_ptr, ts); 2130 } 2131 2132 return ts; 2133 } 2134 2135 TCGv_i32 tcg_constant_i32(int32_t val) 2136 { 2137 return temp_tcgv_i32(tcg_constant_internal(TCG_TYPE_I32, val)); 2138 } 2139 2140 TCGv_i64 tcg_constant_i64(int64_t val) 2141 { 2142 return temp_tcgv_i64(tcg_constant_internal(TCG_TYPE_I64, val)); 2143 } 2144 2145 TCGv_ptr tcg_constant_ptr_int(intptr_t val) 2146 { 2147 return temp_tcgv_ptr(tcg_constant_internal(TCG_TYPE_PTR, val)); 2148 } 2149 2150 TCGv_vec tcg_constant_vec(TCGType type, unsigned vece, int64_t val) 2151 { 2152 val = dup_const(vece, val); 2153 return temp_tcgv_vec(tcg_constant_internal(type, val)); 2154 } 2155 2156 TCGv_vec tcg_constant_vec_matching(TCGv_vec match, unsigned vece, int64_t val) 2157 { 2158 TCGTemp *t = tcgv_vec_temp(match); 2159 2160 tcg_debug_assert(t->temp_allocated != 0); 2161 return tcg_constant_vec(t->base_type, vece, val); 2162 } 2163 2164 #ifdef CONFIG_DEBUG_TCG 2165 size_t temp_idx(TCGTemp *ts) 2166 { 2167 ptrdiff_t n = ts - tcg_ctx->temps; 2168 assert(n >= 0 && n < tcg_ctx->nb_temps); 2169 return n; 2170 } 2171 2172 TCGTemp *tcgv_i32_temp(TCGv_i32 v) 2173 { 2174 uintptr_t o = (uintptr_t)v - offsetof(TCGContext, temps); 2175 2176 assert(o < sizeof(TCGTemp) * tcg_ctx->nb_temps); 2177 assert(o % sizeof(TCGTemp) == 0); 2178 2179 return (void *)tcg_ctx + (uintptr_t)v; 2180 } 2181 #endif /* CONFIG_DEBUG_TCG */ 2182 2183 /* 2184 * Return true if OP may appear in the opcode stream with TYPE. 2185 * Test the runtime variable that controls each opcode. 2186 */ 2187 bool tcg_op_supported(TCGOpcode op, TCGType type, unsigned flags) 2188 { 2189 bool has_type; 2190 2191 switch (type) { 2192 case TCG_TYPE_I32: 2193 has_type = true; 2194 break; 2195 case TCG_TYPE_I64: 2196 has_type = TCG_TARGET_REG_BITS == 64; 2197 break; 2198 case TCG_TYPE_V64: 2199 has_type = TCG_TARGET_HAS_v64; 2200 break; 2201 case TCG_TYPE_V128: 2202 has_type = TCG_TARGET_HAS_v128; 2203 break; 2204 case TCG_TYPE_V256: 2205 has_type = TCG_TARGET_HAS_v256; 2206 break; 2207 default: 2208 has_type = false; 2209 break; 2210 } 2211 2212 switch (op) { 2213 case INDEX_op_discard: 2214 case INDEX_op_set_label: 2215 case INDEX_op_call: 2216 case INDEX_op_br: 2217 case INDEX_op_mb: 2218 case INDEX_op_insn_start: 2219 case INDEX_op_exit_tb: 2220 case INDEX_op_goto_tb: 2221 case INDEX_op_goto_ptr: 2222 case INDEX_op_qemu_ld_i32: 2223 case INDEX_op_qemu_st_i32: 2224 case INDEX_op_qemu_ld_i64: 2225 case INDEX_op_qemu_st_i64: 2226 return true; 2227 2228 case INDEX_op_qemu_st8_i32: 2229 return TCG_TARGET_HAS_qemu_st8_i32; 2230 2231 case INDEX_op_qemu_ld_i128: 2232 case INDEX_op_qemu_st_i128: 2233 return TCG_TARGET_HAS_qemu_ldst_i128; 2234 2235 case INDEX_op_add: 2236 case INDEX_op_and: 2237 case INDEX_op_mov: 2238 case INDEX_op_or: 2239 case INDEX_op_xor: 2240 return has_type; 2241 2242 case INDEX_op_setcond_i32: 2243 case INDEX_op_brcond_i32: 2244 case INDEX_op_movcond_i32: 2245 case INDEX_op_ld8u_i32: 2246 case INDEX_op_ld8s_i32: 2247 case INDEX_op_ld16u_i32: 2248 case INDEX_op_ld16s_i32: 2249 case INDEX_op_ld_i32: 2250 case INDEX_op_st8_i32: 2251 case INDEX_op_st16_i32: 2252 case INDEX_op_st_i32: 2253 case INDEX_op_shl_i32: 2254 case INDEX_op_shr_i32: 2255 case INDEX_op_sar_i32: 2256 case INDEX_op_extract_i32: 2257 case INDEX_op_sextract_i32: 2258 case INDEX_op_deposit_i32: 2259 return true; 2260 2261 case INDEX_op_negsetcond_i32: 2262 return TCG_TARGET_HAS_negsetcond_i32; 2263 case INDEX_op_div_i32: 2264 case INDEX_op_divu_i32: 2265 return TCG_TARGET_HAS_div_i32; 2266 case INDEX_op_rem_i32: 2267 case INDEX_op_remu_i32: 2268 return TCG_TARGET_HAS_rem_i32; 2269 case INDEX_op_div2_i32: 2270 case INDEX_op_divu2_i32: 2271 return TCG_TARGET_HAS_div2_i32; 2272 case INDEX_op_rotl_i32: 2273 case INDEX_op_rotr_i32: 2274 return TCG_TARGET_HAS_rot_i32; 2275 case INDEX_op_extract2_i32: 2276 return TCG_TARGET_HAS_extract2_i32; 2277 case INDEX_op_add2_i32: 2278 return TCG_TARGET_HAS_add2_i32; 2279 case INDEX_op_sub2_i32: 2280 return TCG_TARGET_HAS_sub2_i32; 2281 case INDEX_op_mulu2_i32: 2282 return TCG_TARGET_HAS_mulu2_i32; 2283 case INDEX_op_muls2_i32: 2284 return TCG_TARGET_HAS_muls2_i32; 2285 case INDEX_op_bswap16_i32: 2286 return TCG_TARGET_HAS_bswap16_i32; 2287 case INDEX_op_bswap32_i32: 2288 return TCG_TARGET_HAS_bswap32_i32; 2289 case INDEX_op_clz_i32: 2290 return TCG_TARGET_HAS_clz_i32; 2291 case INDEX_op_ctz_i32: 2292 return TCG_TARGET_HAS_ctz_i32; 2293 case INDEX_op_ctpop_i32: 2294 return TCG_TARGET_HAS_ctpop_i32; 2295 2296 case INDEX_op_brcond2_i32: 2297 case INDEX_op_setcond2_i32: 2298 return TCG_TARGET_REG_BITS == 32; 2299 2300 case INDEX_op_setcond_i64: 2301 case INDEX_op_brcond_i64: 2302 case INDEX_op_movcond_i64: 2303 case INDEX_op_ld8u_i64: 2304 case INDEX_op_ld8s_i64: 2305 case INDEX_op_ld16u_i64: 2306 case INDEX_op_ld16s_i64: 2307 case INDEX_op_ld32u_i64: 2308 case INDEX_op_ld32s_i64: 2309 case INDEX_op_ld_i64: 2310 case INDEX_op_st8_i64: 2311 case INDEX_op_st16_i64: 2312 case INDEX_op_st32_i64: 2313 case INDEX_op_st_i64: 2314 case INDEX_op_shl_i64: 2315 case INDEX_op_shr_i64: 2316 case INDEX_op_sar_i64: 2317 case INDEX_op_ext_i32_i64: 2318 case INDEX_op_extu_i32_i64: 2319 case INDEX_op_extract_i64: 2320 case INDEX_op_sextract_i64: 2321 case INDEX_op_deposit_i64: 2322 return TCG_TARGET_REG_BITS == 64; 2323 2324 case INDEX_op_negsetcond_i64: 2325 return TCG_TARGET_HAS_negsetcond_i64; 2326 case INDEX_op_div_i64: 2327 case INDEX_op_divu_i64: 2328 return TCG_TARGET_HAS_div_i64; 2329 case INDEX_op_rem_i64: 2330 case INDEX_op_remu_i64: 2331 return TCG_TARGET_HAS_rem_i64; 2332 case INDEX_op_div2_i64: 2333 case INDEX_op_divu2_i64: 2334 return TCG_TARGET_HAS_div2_i64; 2335 case INDEX_op_rotl_i64: 2336 case INDEX_op_rotr_i64: 2337 return TCG_TARGET_HAS_rot_i64; 2338 case INDEX_op_extract2_i64: 2339 return TCG_TARGET_HAS_extract2_i64; 2340 case INDEX_op_extrl_i64_i32: 2341 case INDEX_op_extrh_i64_i32: 2342 return TCG_TARGET_HAS_extr_i64_i32; 2343 case INDEX_op_bswap16_i64: 2344 return TCG_TARGET_HAS_bswap16_i64; 2345 case INDEX_op_bswap32_i64: 2346 return TCG_TARGET_HAS_bswap32_i64; 2347 case INDEX_op_bswap64_i64: 2348 return TCG_TARGET_HAS_bswap64_i64; 2349 case INDEX_op_clz_i64: 2350 return TCG_TARGET_HAS_clz_i64; 2351 case INDEX_op_ctz_i64: 2352 return TCG_TARGET_HAS_ctz_i64; 2353 case INDEX_op_ctpop_i64: 2354 return TCG_TARGET_HAS_ctpop_i64; 2355 case INDEX_op_add2_i64: 2356 return TCG_TARGET_HAS_add2_i64; 2357 case INDEX_op_sub2_i64: 2358 return TCG_TARGET_HAS_sub2_i64; 2359 case INDEX_op_mulu2_i64: 2360 return TCG_TARGET_HAS_mulu2_i64; 2361 case INDEX_op_muls2_i64: 2362 return TCG_TARGET_HAS_muls2_i64; 2363 2364 case INDEX_op_mov_vec: 2365 case INDEX_op_dup_vec: 2366 case INDEX_op_dupm_vec: 2367 case INDEX_op_ld_vec: 2368 case INDEX_op_st_vec: 2369 case INDEX_op_add_vec: 2370 case INDEX_op_sub_vec: 2371 case INDEX_op_and_vec: 2372 case INDEX_op_or_vec: 2373 case INDEX_op_xor_vec: 2374 case INDEX_op_cmp_vec: 2375 return has_type; 2376 case INDEX_op_dup2_vec: 2377 return has_type && TCG_TARGET_REG_BITS == 32; 2378 case INDEX_op_not_vec: 2379 return has_type && TCG_TARGET_HAS_not_vec; 2380 case INDEX_op_neg_vec: 2381 return has_type && TCG_TARGET_HAS_neg_vec; 2382 case INDEX_op_abs_vec: 2383 return has_type && TCG_TARGET_HAS_abs_vec; 2384 case INDEX_op_andc_vec: 2385 return has_type && TCG_TARGET_HAS_andc_vec; 2386 case INDEX_op_orc_vec: 2387 return has_type && TCG_TARGET_HAS_orc_vec; 2388 case INDEX_op_nand_vec: 2389 return has_type && TCG_TARGET_HAS_nand_vec; 2390 case INDEX_op_nor_vec: 2391 return has_type && TCG_TARGET_HAS_nor_vec; 2392 case INDEX_op_eqv_vec: 2393 return has_type && TCG_TARGET_HAS_eqv_vec; 2394 case INDEX_op_mul_vec: 2395 return has_type && TCG_TARGET_HAS_mul_vec; 2396 case INDEX_op_shli_vec: 2397 case INDEX_op_shri_vec: 2398 case INDEX_op_sari_vec: 2399 return has_type && TCG_TARGET_HAS_shi_vec; 2400 case INDEX_op_shls_vec: 2401 case INDEX_op_shrs_vec: 2402 case INDEX_op_sars_vec: 2403 return has_type && TCG_TARGET_HAS_shs_vec; 2404 case INDEX_op_shlv_vec: 2405 case INDEX_op_shrv_vec: 2406 case INDEX_op_sarv_vec: 2407 return has_type && TCG_TARGET_HAS_shv_vec; 2408 case INDEX_op_rotli_vec: 2409 return has_type && TCG_TARGET_HAS_roti_vec; 2410 case INDEX_op_rotls_vec: 2411 return has_type && TCG_TARGET_HAS_rots_vec; 2412 case INDEX_op_rotlv_vec: 2413 case INDEX_op_rotrv_vec: 2414 return has_type && TCG_TARGET_HAS_rotv_vec; 2415 case INDEX_op_ssadd_vec: 2416 case INDEX_op_usadd_vec: 2417 case INDEX_op_sssub_vec: 2418 case INDEX_op_ussub_vec: 2419 return has_type && TCG_TARGET_HAS_sat_vec; 2420 case INDEX_op_smin_vec: 2421 case INDEX_op_umin_vec: 2422 case INDEX_op_smax_vec: 2423 case INDEX_op_umax_vec: 2424 return has_type && TCG_TARGET_HAS_minmax_vec; 2425 case INDEX_op_bitsel_vec: 2426 return has_type && TCG_TARGET_HAS_bitsel_vec; 2427 case INDEX_op_cmpsel_vec: 2428 return has_type && TCG_TARGET_HAS_cmpsel_vec; 2429 2430 default: 2431 if (op < INDEX_op_last_generic) { 2432 const TCGOutOp *outop; 2433 TCGConstraintSetIndex con_set; 2434 2435 if (!has_type) { 2436 return false; 2437 } 2438 2439 outop = all_outop[op]; 2440 tcg_debug_assert(outop != NULL); 2441 2442 con_set = outop->static_constraint; 2443 if (con_set == C_Dynamic) { 2444 con_set = outop->dynamic_constraint(type, flags); 2445 } 2446 if (con_set >= 0) { 2447 return true; 2448 } 2449 tcg_debug_assert(con_set == C_NotImplemented); 2450 return false; 2451 } 2452 tcg_debug_assert(op < NB_OPS); 2453 return true; 2454 2455 case INDEX_op_last_generic: 2456 g_assert_not_reached(); 2457 } 2458 } 2459 2460 bool tcg_op_deposit_valid(TCGType type, unsigned ofs, unsigned len) 2461 { 2462 unsigned width; 2463 2464 tcg_debug_assert(type == TCG_TYPE_I32 || type == TCG_TYPE_I64); 2465 width = (type == TCG_TYPE_I32 ? 32 : 64); 2466 2467 tcg_debug_assert(ofs < width); 2468 tcg_debug_assert(len > 0); 2469 tcg_debug_assert(len <= width - ofs); 2470 2471 return TCG_TARGET_deposit_valid(type, ofs, len); 2472 } 2473 2474 static TCGOp *tcg_op_alloc(TCGOpcode opc, unsigned nargs); 2475 2476 static void tcg_gen_callN(void *func, TCGHelperInfo *info, 2477 TCGTemp *ret, TCGTemp **args) 2478 { 2479 TCGv_i64 extend_free[MAX_CALL_IARGS]; 2480 int n_extend = 0; 2481 TCGOp *op; 2482 int i, n, pi = 0, total_args; 2483 2484 if (unlikely(g_once_init_enter(HELPER_INFO_INIT(info)))) { 2485 init_call_layout(info); 2486 g_once_init_leave(HELPER_INFO_INIT(info), HELPER_INFO_INIT_VAL(info)); 2487 } 2488 2489 total_args = info->nr_out + info->nr_in + 2; 2490 op = tcg_op_alloc(INDEX_op_call, total_args); 2491 2492 #ifdef CONFIG_PLUGIN 2493 /* Flag helpers that may affect guest state */ 2494 if (tcg_ctx->plugin_insn && !(info->flags & TCG_CALL_NO_SIDE_EFFECTS)) { 2495 tcg_ctx->plugin_insn->calls_helpers = true; 2496 } 2497 #endif 2498 2499 TCGOP_CALLO(op) = n = info->nr_out; 2500 switch (n) { 2501 case 0: 2502 tcg_debug_assert(ret == NULL); 2503 break; 2504 case 1: 2505 tcg_debug_assert(ret != NULL); 2506 op->args[pi++] = temp_arg(ret); 2507 break; 2508 case 2: 2509 case 4: 2510 tcg_debug_assert(ret != NULL); 2511 tcg_debug_assert(ret->base_type == ret->type + ctz32(n)); 2512 tcg_debug_assert(ret->temp_subindex == 0); 2513 for (i = 0; i < n; ++i) { 2514 op->args[pi++] = temp_arg(ret + i); 2515 } 2516 break; 2517 default: 2518 g_assert_not_reached(); 2519 } 2520 2521 TCGOP_CALLI(op) = n = info->nr_in; 2522 for (i = 0; i < n; i++) { 2523 const TCGCallArgumentLoc *loc = &info->in[i]; 2524 TCGTemp *ts = args[loc->arg_idx] + loc->tmp_subindex; 2525 2526 switch (loc->kind) { 2527 case TCG_CALL_ARG_NORMAL: 2528 case TCG_CALL_ARG_BY_REF: 2529 case TCG_CALL_ARG_BY_REF_N: 2530 op->args[pi++] = temp_arg(ts); 2531 break; 2532 2533 case TCG_CALL_ARG_EXTEND_U: 2534 case TCG_CALL_ARG_EXTEND_S: 2535 { 2536 TCGv_i64 temp = tcg_temp_ebb_new_i64(); 2537 TCGv_i32 orig = temp_tcgv_i32(ts); 2538 2539 if (loc->kind == TCG_CALL_ARG_EXTEND_S) { 2540 tcg_gen_ext_i32_i64(temp, orig); 2541 } else { 2542 tcg_gen_extu_i32_i64(temp, orig); 2543 } 2544 op->args[pi++] = tcgv_i64_arg(temp); 2545 extend_free[n_extend++] = temp; 2546 } 2547 break; 2548 2549 default: 2550 g_assert_not_reached(); 2551 } 2552 } 2553 op->args[pi++] = (uintptr_t)func; 2554 op->args[pi++] = (uintptr_t)info; 2555 tcg_debug_assert(pi == total_args); 2556 2557 if (tcg_ctx->emit_before_op) { 2558 QTAILQ_INSERT_BEFORE(tcg_ctx->emit_before_op, op, link); 2559 } else { 2560 QTAILQ_INSERT_TAIL(&tcg_ctx->ops, op, link); 2561 } 2562 2563 tcg_debug_assert(n_extend < ARRAY_SIZE(extend_free)); 2564 for (i = 0; i < n_extend; ++i) { 2565 tcg_temp_free_i64(extend_free[i]); 2566 } 2567 } 2568 2569 void tcg_gen_call0(void *func, TCGHelperInfo *info, TCGTemp *ret) 2570 { 2571 tcg_gen_callN(func, info, ret, NULL); 2572 } 2573 2574 void tcg_gen_call1(void *func, TCGHelperInfo *info, TCGTemp *ret, TCGTemp *t1) 2575 { 2576 tcg_gen_callN(func, info, ret, &t1); 2577 } 2578 2579 void tcg_gen_call2(void *func, TCGHelperInfo *info, TCGTemp *ret, 2580 TCGTemp *t1, TCGTemp *t2) 2581 { 2582 TCGTemp *args[2] = { t1, t2 }; 2583 tcg_gen_callN(func, info, ret, args); 2584 } 2585 2586 void tcg_gen_call3(void *func, TCGHelperInfo *info, TCGTemp *ret, 2587 TCGTemp *t1, TCGTemp *t2, TCGTemp *t3) 2588 { 2589 TCGTemp *args[3] = { t1, t2, t3 }; 2590 tcg_gen_callN(func, info, ret, args); 2591 } 2592 2593 void tcg_gen_call4(void *func, TCGHelperInfo *info, TCGTemp *ret, 2594 TCGTemp *t1, TCGTemp *t2, TCGTemp *t3, TCGTemp *t4) 2595 { 2596 TCGTemp *args[4] = { t1, t2, t3, t4 }; 2597 tcg_gen_callN(func, info, ret, args); 2598 } 2599 2600 void tcg_gen_call5(void *func, TCGHelperInfo *info, TCGTemp *ret, TCGTemp *t1, 2601 TCGTemp *t2, TCGTemp *t3, TCGTemp *t4, TCGTemp *t5) 2602 { 2603 TCGTemp *args[5] = { t1, t2, t3, t4, t5 }; 2604 tcg_gen_callN(func, info, ret, args); 2605 } 2606 2607 void tcg_gen_call6(void *func, TCGHelperInfo *info, TCGTemp *ret, 2608 TCGTemp *t1, TCGTemp *t2, TCGTemp *t3, 2609 TCGTemp *t4, TCGTemp *t5, TCGTemp *t6) 2610 { 2611 TCGTemp *args[6] = { t1, t2, t3, t4, t5, t6 }; 2612 tcg_gen_callN(func, info, ret, args); 2613 } 2614 2615 void tcg_gen_call7(void *func, TCGHelperInfo *info, TCGTemp *ret, TCGTemp *t1, 2616 TCGTemp *t2, TCGTemp *t3, TCGTemp *t4, 2617 TCGTemp *t5, TCGTemp *t6, TCGTemp *t7) 2618 { 2619 TCGTemp *args[7] = { t1, t2, t3, t4, t5, t6, t7 }; 2620 tcg_gen_callN(func, info, ret, args); 2621 } 2622 2623 static void tcg_reg_alloc_start(TCGContext *s) 2624 { 2625 int i, n; 2626 2627 for (i = 0, n = s->nb_temps; i < n; i++) { 2628 TCGTemp *ts = &s->temps[i]; 2629 TCGTempVal val = TEMP_VAL_MEM; 2630 2631 switch (ts->kind) { 2632 case TEMP_CONST: 2633 val = TEMP_VAL_CONST; 2634 break; 2635 case TEMP_FIXED: 2636 val = TEMP_VAL_REG; 2637 break; 2638 case TEMP_GLOBAL: 2639 break; 2640 case TEMP_EBB: 2641 val = TEMP_VAL_DEAD; 2642 /* fall through */ 2643 case TEMP_TB: 2644 ts->mem_allocated = 0; 2645 break; 2646 default: 2647 g_assert_not_reached(); 2648 } 2649 ts->val_type = val; 2650 } 2651 2652 memset(s->reg_to_temp, 0, sizeof(s->reg_to_temp)); 2653 } 2654 2655 static char *tcg_get_arg_str_ptr(TCGContext *s, char *buf, int buf_size, 2656 TCGTemp *ts) 2657 { 2658 int idx = temp_idx(ts); 2659 2660 switch (ts->kind) { 2661 case TEMP_FIXED: 2662 case TEMP_GLOBAL: 2663 pstrcpy(buf, buf_size, ts->name); 2664 break; 2665 case TEMP_TB: 2666 snprintf(buf, buf_size, "loc%d", idx - s->nb_globals); 2667 break; 2668 case TEMP_EBB: 2669 snprintf(buf, buf_size, "tmp%d", idx - s->nb_globals); 2670 break; 2671 case TEMP_CONST: 2672 switch (ts->type) { 2673 case TCG_TYPE_I32: 2674 snprintf(buf, buf_size, "$0x%x", (int32_t)ts->val); 2675 break; 2676 #if TCG_TARGET_REG_BITS > 32 2677 case TCG_TYPE_I64: 2678 snprintf(buf, buf_size, "$0x%" PRIx64, ts->val); 2679 break; 2680 #endif 2681 case TCG_TYPE_V64: 2682 case TCG_TYPE_V128: 2683 case TCG_TYPE_V256: 2684 snprintf(buf, buf_size, "v%d$0x%" PRIx64, 2685 64 << (ts->type - TCG_TYPE_V64), ts->val); 2686 break; 2687 default: 2688 g_assert_not_reached(); 2689 } 2690 break; 2691 } 2692 return buf; 2693 } 2694 2695 static char *tcg_get_arg_str(TCGContext *s, char *buf, 2696 int buf_size, TCGArg arg) 2697 { 2698 return tcg_get_arg_str_ptr(s, buf, buf_size, arg_temp(arg)); 2699 } 2700 2701 static const char * const cond_name[] = 2702 { 2703 [TCG_COND_NEVER] = "never", 2704 [TCG_COND_ALWAYS] = "always", 2705 [TCG_COND_EQ] = "eq", 2706 [TCG_COND_NE] = "ne", 2707 [TCG_COND_LT] = "lt", 2708 [TCG_COND_GE] = "ge", 2709 [TCG_COND_LE] = "le", 2710 [TCG_COND_GT] = "gt", 2711 [TCG_COND_LTU] = "ltu", 2712 [TCG_COND_GEU] = "geu", 2713 [TCG_COND_LEU] = "leu", 2714 [TCG_COND_GTU] = "gtu", 2715 [TCG_COND_TSTEQ] = "tsteq", 2716 [TCG_COND_TSTNE] = "tstne", 2717 }; 2718 2719 static const char * const ldst_name[(MO_BSWAP | MO_SSIZE) + 1] = 2720 { 2721 [MO_UB] = "ub", 2722 [MO_SB] = "sb", 2723 [MO_LEUW] = "leuw", 2724 [MO_LESW] = "lesw", 2725 [MO_LEUL] = "leul", 2726 [MO_LESL] = "lesl", 2727 [MO_LEUQ] = "leq", 2728 [MO_BEUW] = "beuw", 2729 [MO_BESW] = "besw", 2730 [MO_BEUL] = "beul", 2731 [MO_BESL] = "besl", 2732 [MO_BEUQ] = "beq", 2733 [MO_128 + MO_BE] = "beo", 2734 [MO_128 + MO_LE] = "leo", 2735 }; 2736 2737 static const char * const alignment_name[(MO_AMASK >> MO_ASHIFT) + 1] = { 2738 [MO_UNALN >> MO_ASHIFT] = "un+", 2739 [MO_ALIGN >> MO_ASHIFT] = "al+", 2740 [MO_ALIGN_2 >> MO_ASHIFT] = "al2+", 2741 [MO_ALIGN_4 >> MO_ASHIFT] = "al4+", 2742 [MO_ALIGN_8 >> MO_ASHIFT] = "al8+", 2743 [MO_ALIGN_16 >> MO_ASHIFT] = "al16+", 2744 [MO_ALIGN_32 >> MO_ASHIFT] = "al32+", 2745 [MO_ALIGN_64 >> MO_ASHIFT] = "al64+", 2746 }; 2747 2748 static const char * const atom_name[(MO_ATOM_MASK >> MO_ATOM_SHIFT) + 1] = { 2749 [MO_ATOM_IFALIGN >> MO_ATOM_SHIFT] = "", 2750 [MO_ATOM_IFALIGN_PAIR >> MO_ATOM_SHIFT] = "pair+", 2751 [MO_ATOM_WITHIN16 >> MO_ATOM_SHIFT] = "w16+", 2752 [MO_ATOM_WITHIN16_PAIR >> MO_ATOM_SHIFT] = "w16p+", 2753 [MO_ATOM_SUBALIGN >> MO_ATOM_SHIFT] = "sub+", 2754 [MO_ATOM_NONE >> MO_ATOM_SHIFT] = "noat+", 2755 }; 2756 2757 static const char bswap_flag_name[][6] = { 2758 [TCG_BSWAP_IZ] = "iz", 2759 [TCG_BSWAP_OZ] = "oz", 2760 [TCG_BSWAP_OS] = "os", 2761 [TCG_BSWAP_IZ | TCG_BSWAP_OZ] = "iz,oz", 2762 [TCG_BSWAP_IZ | TCG_BSWAP_OS] = "iz,os", 2763 }; 2764 2765 #ifdef CONFIG_PLUGIN 2766 static const char * const plugin_from_name[] = { 2767 "from-tb", 2768 "from-insn", 2769 "after-insn", 2770 "after-tb", 2771 }; 2772 #endif 2773 2774 static inline bool tcg_regset_single(TCGRegSet d) 2775 { 2776 return (d & (d - 1)) == 0; 2777 } 2778 2779 static inline TCGReg tcg_regset_first(TCGRegSet d) 2780 { 2781 if (TCG_TARGET_NB_REGS <= 32) { 2782 return ctz32(d); 2783 } else { 2784 return ctz64(d); 2785 } 2786 } 2787 2788 /* Return only the number of characters output -- no error return. */ 2789 #define ne_fprintf(...) \ 2790 ({ int ret_ = fprintf(__VA_ARGS__); ret_ >= 0 ? ret_ : 0; }) 2791 2792 void tcg_dump_ops(TCGContext *s, FILE *f, bool have_prefs) 2793 { 2794 char buf[128]; 2795 TCGOp *op; 2796 2797 QTAILQ_FOREACH(op, &s->ops, link) { 2798 int i, k, nb_oargs, nb_iargs, nb_cargs; 2799 const TCGOpDef *def; 2800 TCGOpcode c; 2801 int col = 0; 2802 2803 c = op->opc; 2804 def = &tcg_op_defs[c]; 2805 2806 if (c == INDEX_op_insn_start) { 2807 nb_oargs = 0; 2808 col += ne_fprintf(f, "\n ----"); 2809 2810 for (i = 0, k = s->insn_start_words; i < k; ++i) { 2811 col += ne_fprintf(f, " %016" PRIx64, 2812 tcg_get_insn_start_param(op, i)); 2813 } 2814 } else if (c == INDEX_op_call) { 2815 const TCGHelperInfo *info = tcg_call_info(op); 2816 void *func = tcg_call_func(op); 2817 2818 /* variable number of arguments */ 2819 nb_oargs = TCGOP_CALLO(op); 2820 nb_iargs = TCGOP_CALLI(op); 2821 nb_cargs = def->nb_cargs; 2822 2823 col += ne_fprintf(f, " %s ", def->name); 2824 2825 /* 2826 * Print the function name from TCGHelperInfo, if available. 2827 * Note that plugins have a template function for the info, 2828 * but the actual function pointer comes from the plugin. 2829 */ 2830 if (func == info->func) { 2831 col += ne_fprintf(f, "%s", info->name); 2832 } else { 2833 col += ne_fprintf(f, "plugin(%p)", func); 2834 } 2835 2836 col += ne_fprintf(f, ",$0x%x,$%d", info->flags, nb_oargs); 2837 for (i = 0; i < nb_oargs; i++) { 2838 col += ne_fprintf(f, ",%s", tcg_get_arg_str(s, buf, sizeof(buf), 2839 op->args[i])); 2840 } 2841 for (i = 0; i < nb_iargs; i++) { 2842 TCGArg arg = op->args[nb_oargs + i]; 2843 const char *t = tcg_get_arg_str(s, buf, sizeof(buf), arg); 2844 col += ne_fprintf(f, ",%s", t); 2845 } 2846 } else { 2847 if (def->flags & TCG_OPF_INT) { 2848 col += ne_fprintf(f, " %s_i%d ", 2849 def->name, 2850 8 * tcg_type_size(TCGOP_TYPE(op))); 2851 } else if (def->flags & TCG_OPF_VECTOR) { 2852 col += ne_fprintf(f, "%s v%d,e%d,", 2853 def->name, 2854 8 * tcg_type_size(TCGOP_TYPE(op)), 2855 8 << TCGOP_VECE(op)); 2856 } else { 2857 col += ne_fprintf(f, " %s ", def->name); 2858 } 2859 2860 nb_oargs = def->nb_oargs; 2861 nb_iargs = def->nb_iargs; 2862 nb_cargs = def->nb_cargs; 2863 2864 k = 0; 2865 for (i = 0; i < nb_oargs; i++) { 2866 const char *sep = k ? "," : ""; 2867 col += ne_fprintf(f, "%s%s", sep, 2868 tcg_get_arg_str(s, buf, sizeof(buf), 2869 op->args[k++])); 2870 } 2871 for (i = 0; i < nb_iargs; i++) { 2872 const char *sep = k ? "," : ""; 2873 col += ne_fprintf(f, "%s%s", sep, 2874 tcg_get_arg_str(s, buf, sizeof(buf), 2875 op->args[k++])); 2876 } 2877 switch (c) { 2878 case INDEX_op_brcond_i32: 2879 case INDEX_op_setcond_i32: 2880 case INDEX_op_negsetcond_i32: 2881 case INDEX_op_movcond_i32: 2882 case INDEX_op_brcond2_i32: 2883 case INDEX_op_setcond2_i32: 2884 case INDEX_op_brcond_i64: 2885 case INDEX_op_setcond_i64: 2886 case INDEX_op_negsetcond_i64: 2887 case INDEX_op_movcond_i64: 2888 case INDEX_op_cmp_vec: 2889 case INDEX_op_cmpsel_vec: 2890 if (op->args[k] < ARRAY_SIZE(cond_name) 2891 && cond_name[op->args[k]]) { 2892 col += ne_fprintf(f, ",%s", cond_name[op->args[k++]]); 2893 } else { 2894 col += ne_fprintf(f, ",$0x%" TCG_PRIlx, op->args[k++]); 2895 } 2896 i = 1; 2897 break; 2898 case INDEX_op_qemu_ld_i32: 2899 case INDEX_op_qemu_st_i32: 2900 case INDEX_op_qemu_st8_i32: 2901 case INDEX_op_qemu_ld_i64: 2902 case INDEX_op_qemu_st_i64: 2903 case INDEX_op_qemu_ld_i128: 2904 case INDEX_op_qemu_st_i128: 2905 { 2906 const char *s_al, *s_op, *s_at; 2907 MemOpIdx oi = op->args[k++]; 2908 MemOp mop = get_memop(oi); 2909 unsigned ix = get_mmuidx(oi); 2910 2911 s_al = alignment_name[(mop & MO_AMASK) >> MO_ASHIFT]; 2912 s_op = ldst_name[mop & (MO_BSWAP | MO_SSIZE)]; 2913 s_at = atom_name[(mop & MO_ATOM_MASK) >> MO_ATOM_SHIFT]; 2914 mop &= ~(MO_AMASK | MO_BSWAP | MO_SSIZE | MO_ATOM_MASK); 2915 2916 /* If all fields are accounted for, print symbolically. */ 2917 if (!mop && s_al && s_op && s_at) { 2918 col += ne_fprintf(f, ",%s%s%s,%u", 2919 s_at, s_al, s_op, ix); 2920 } else { 2921 mop = get_memop(oi); 2922 col += ne_fprintf(f, ",$0x%x,%u", mop, ix); 2923 } 2924 i = 1; 2925 } 2926 break; 2927 case INDEX_op_bswap16_i32: 2928 case INDEX_op_bswap16_i64: 2929 case INDEX_op_bswap32_i32: 2930 case INDEX_op_bswap32_i64: 2931 case INDEX_op_bswap64_i64: 2932 { 2933 TCGArg flags = op->args[k]; 2934 const char *name = NULL; 2935 2936 if (flags < ARRAY_SIZE(bswap_flag_name)) { 2937 name = bswap_flag_name[flags]; 2938 } 2939 if (name) { 2940 col += ne_fprintf(f, ",%s", name); 2941 } else { 2942 col += ne_fprintf(f, ",$0x%" TCG_PRIlx, flags); 2943 } 2944 i = k = 1; 2945 } 2946 break; 2947 #ifdef CONFIG_PLUGIN 2948 case INDEX_op_plugin_cb: 2949 { 2950 TCGArg from = op->args[k++]; 2951 const char *name = NULL; 2952 2953 if (from < ARRAY_SIZE(plugin_from_name)) { 2954 name = plugin_from_name[from]; 2955 } 2956 if (name) { 2957 col += ne_fprintf(f, "%s", name); 2958 } else { 2959 col += ne_fprintf(f, "$0x%" TCG_PRIlx, from); 2960 } 2961 i = 1; 2962 } 2963 break; 2964 #endif 2965 default: 2966 i = 0; 2967 break; 2968 } 2969 switch (c) { 2970 case INDEX_op_set_label: 2971 case INDEX_op_br: 2972 case INDEX_op_brcond_i32: 2973 case INDEX_op_brcond_i64: 2974 case INDEX_op_brcond2_i32: 2975 col += ne_fprintf(f, "%s$L%d", k ? "," : "", 2976 arg_label(op->args[k])->id); 2977 i++, k++; 2978 break; 2979 case INDEX_op_mb: 2980 { 2981 TCGBar membar = op->args[k]; 2982 const char *b_op, *m_op; 2983 2984 switch (membar & TCG_BAR_SC) { 2985 case 0: 2986 b_op = "none"; 2987 break; 2988 case TCG_BAR_LDAQ: 2989 b_op = "acq"; 2990 break; 2991 case TCG_BAR_STRL: 2992 b_op = "rel"; 2993 break; 2994 case TCG_BAR_SC: 2995 b_op = "seq"; 2996 break; 2997 default: 2998 g_assert_not_reached(); 2999 } 3000 3001 switch (membar & TCG_MO_ALL) { 3002 case 0: 3003 m_op = "none"; 3004 break; 3005 case TCG_MO_LD_LD: 3006 m_op = "rr"; 3007 break; 3008 case TCG_MO_LD_ST: 3009 m_op = "rw"; 3010 break; 3011 case TCG_MO_ST_LD: 3012 m_op = "wr"; 3013 break; 3014 case TCG_MO_ST_ST: 3015 m_op = "ww"; 3016 break; 3017 case TCG_MO_LD_LD | TCG_MO_LD_ST: 3018 m_op = "rr+rw"; 3019 break; 3020 case TCG_MO_LD_LD | TCG_MO_ST_LD: 3021 m_op = "rr+wr"; 3022 break; 3023 case TCG_MO_LD_LD | TCG_MO_ST_ST: 3024 m_op = "rr+ww"; 3025 break; 3026 case TCG_MO_LD_ST | TCG_MO_ST_LD: 3027 m_op = "rw+wr"; 3028 break; 3029 case TCG_MO_LD_ST | TCG_MO_ST_ST: 3030 m_op = "rw+ww"; 3031 break; 3032 case TCG_MO_ST_LD | TCG_MO_ST_ST: 3033 m_op = "wr+ww"; 3034 break; 3035 case TCG_MO_LD_LD | TCG_MO_LD_ST | TCG_MO_ST_LD: 3036 m_op = "rr+rw+wr"; 3037 break; 3038 case TCG_MO_LD_LD | TCG_MO_LD_ST | TCG_MO_ST_ST: 3039 m_op = "rr+rw+ww"; 3040 break; 3041 case TCG_MO_LD_LD | TCG_MO_ST_LD | TCG_MO_ST_ST: 3042 m_op = "rr+wr+ww"; 3043 break; 3044 case TCG_MO_LD_ST | TCG_MO_ST_LD | TCG_MO_ST_ST: 3045 m_op = "rw+wr+ww"; 3046 break; 3047 case TCG_MO_ALL: 3048 m_op = "all"; 3049 break; 3050 default: 3051 g_assert_not_reached(); 3052 } 3053 3054 col += ne_fprintf(f, "%s%s:%s", (k ? "," : ""), b_op, m_op); 3055 i++, k++; 3056 } 3057 break; 3058 default: 3059 break; 3060 } 3061 for (; i < nb_cargs; i++, k++) { 3062 col += ne_fprintf(f, "%s$0x%" TCG_PRIlx, k ? "," : "", 3063 op->args[k]); 3064 } 3065 } 3066 3067 if (have_prefs || op->life) { 3068 for (; col < 40; ++col) { 3069 putc(' ', f); 3070 } 3071 } 3072 3073 if (op->life) { 3074 unsigned life = op->life; 3075 3076 if (life & (SYNC_ARG * 3)) { 3077 ne_fprintf(f, " sync:"); 3078 for (i = 0; i < 2; ++i) { 3079 if (life & (SYNC_ARG << i)) { 3080 ne_fprintf(f, " %d", i); 3081 } 3082 } 3083 } 3084 life /= DEAD_ARG; 3085 if (life) { 3086 ne_fprintf(f, " dead:"); 3087 for (i = 0; life; ++i, life >>= 1) { 3088 if (life & 1) { 3089 ne_fprintf(f, " %d", i); 3090 } 3091 } 3092 } 3093 } 3094 3095 if (have_prefs) { 3096 for (i = 0; i < nb_oargs; ++i) { 3097 TCGRegSet set = output_pref(op, i); 3098 3099 if (i == 0) { 3100 ne_fprintf(f, " pref="); 3101 } else { 3102 ne_fprintf(f, ","); 3103 } 3104 if (set == 0) { 3105 ne_fprintf(f, "none"); 3106 } else if (set == MAKE_64BIT_MASK(0, TCG_TARGET_NB_REGS)) { 3107 ne_fprintf(f, "all"); 3108 #ifdef CONFIG_DEBUG_TCG 3109 } else if (tcg_regset_single(set)) { 3110 TCGReg reg = tcg_regset_first(set); 3111 ne_fprintf(f, "%s", tcg_target_reg_names[reg]); 3112 #endif 3113 } else if (TCG_TARGET_NB_REGS <= 32) { 3114 ne_fprintf(f, "0x%x", (uint32_t)set); 3115 } else { 3116 ne_fprintf(f, "0x%" PRIx64, (uint64_t)set); 3117 } 3118 } 3119 } 3120 3121 putc('\n', f); 3122 } 3123 } 3124 3125 /* we give more priority to constraints with less registers */ 3126 static int get_constraint_priority(const TCGArgConstraint *arg_ct, int k) 3127 { 3128 int n; 3129 3130 arg_ct += k; 3131 n = ctpop64(arg_ct->regs); 3132 3133 /* 3134 * Sort constraints of a single register first, which includes output 3135 * aliases (which must exactly match the input already allocated). 3136 */ 3137 if (n == 1 || arg_ct->oalias) { 3138 return INT_MAX; 3139 } 3140 3141 /* 3142 * Sort register pairs next, first then second immediately after. 3143 * Arbitrarily sort multiple pairs by the index of the first reg; 3144 * there shouldn't be many pairs. 3145 */ 3146 switch (arg_ct->pair) { 3147 case 1: 3148 case 3: 3149 return (k + 1) * 2; 3150 case 2: 3151 return (arg_ct->pair_index + 1) * 2 - 1; 3152 } 3153 3154 /* Finally, sort by decreasing register count. */ 3155 assert(n > 1); 3156 return -n; 3157 } 3158 3159 /* sort from highest priority to lowest */ 3160 static void sort_constraints(TCGArgConstraint *a, int start, int n) 3161 { 3162 int i, j; 3163 3164 for (i = 0; i < n; i++) { 3165 a[start + i].sort_index = start + i; 3166 } 3167 if (n <= 1) { 3168 return; 3169 } 3170 for (i = 0; i < n - 1; i++) { 3171 for (j = i + 1; j < n; j++) { 3172 int p1 = get_constraint_priority(a, a[start + i].sort_index); 3173 int p2 = get_constraint_priority(a, a[start + j].sort_index); 3174 if (p1 < p2) { 3175 int tmp = a[start + i].sort_index; 3176 a[start + i].sort_index = a[start + j].sort_index; 3177 a[start + j].sort_index = tmp; 3178 } 3179 } 3180 } 3181 } 3182 3183 static const TCGArgConstraint empty_cts[TCG_MAX_OP_ARGS]; 3184 static TCGArgConstraint all_cts[ARRAY_SIZE(constraint_sets)][TCG_MAX_OP_ARGS]; 3185 3186 static void process_constraint_sets(void) 3187 { 3188 for (size_t c = 0; c < ARRAY_SIZE(constraint_sets); ++c) { 3189 const TCGConstraintSet *tdefs = &constraint_sets[c]; 3190 TCGArgConstraint *args_ct = all_cts[c]; 3191 int nb_oargs = tdefs->nb_oargs; 3192 int nb_iargs = tdefs->nb_iargs; 3193 int nb_args = nb_oargs + nb_iargs; 3194 bool saw_alias_pair = false; 3195 3196 for (int i = 0; i < nb_args; i++) { 3197 const char *ct_str = tdefs->args_ct_str[i]; 3198 bool input_p = i >= nb_oargs; 3199 int o; 3200 3201 switch (*ct_str) { 3202 case '0' ... '9': 3203 o = *ct_str - '0'; 3204 tcg_debug_assert(input_p); 3205 tcg_debug_assert(o < nb_oargs); 3206 tcg_debug_assert(args_ct[o].regs != 0); 3207 tcg_debug_assert(!args_ct[o].oalias); 3208 args_ct[i] = args_ct[o]; 3209 /* The output sets oalias. */ 3210 args_ct[o].oalias = 1; 3211 args_ct[o].alias_index = i; 3212 /* The input sets ialias. */ 3213 args_ct[i].ialias = 1; 3214 args_ct[i].alias_index = o; 3215 if (args_ct[i].pair) { 3216 saw_alias_pair = true; 3217 } 3218 tcg_debug_assert(ct_str[1] == '\0'); 3219 continue; 3220 3221 case '&': 3222 tcg_debug_assert(!input_p); 3223 args_ct[i].newreg = true; 3224 ct_str++; 3225 break; 3226 3227 case 'p': /* plus */ 3228 /* Allocate to the register after the previous. */ 3229 tcg_debug_assert(i > (input_p ? nb_oargs : 0)); 3230 o = i - 1; 3231 tcg_debug_assert(!args_ct[o].pair); 3232 tcg_debug_assert(!args_ct[o].ct); 3233 args_ct[i] = (TCGArgConstraint){ 3234 .pair = 2, 3235 .pair_index = o, 3236 .regs = args_ct[o].regs << 1, 3237 .newreg = args_ct[o].newreg, 3238 }; 3239 args_ct[o].pair = 1; 3240 args_ct[o].pair_index = i; 3241 tcg_debug_assert(ct_str[1] == '\0'); 3242 continue; 3243 3244 case 'm': /* minus */ 3245 /* Allocate to the register before the previous. */ 3246 tcg_debug_assert(i > (input_p ? nb_oargs : 0)); 3247 o = i - 1; 3248 tcg_debug_assert(!args_ct[o].pair); 3249 tcg_debug_assert(!args_ct[o].ct); 3250 args_ct[i] = (TCGArgConstraint){ 3251 .pair = 1, 3252 .pair_index = o, 3253 .regs = args_ct[o].regs >> 1, 3254 .newreg = args_ct[o].newreg, 3255 }; 3256 args_ct[o].pair = 2; 3257 args_ct[o].pair_index = i; 3258 tcg_debug_assert(ct_str[1] == '\0'); 3259 continue; 3260 } 3261 3262 do { 3263 switch (*ct_str) { 3264 case 'i': 3265 args_ct[i].ct |= TCG_CT_CONST; 3266 break; 3267 #ifdef TCG_REG_ZERO 3268 case 'z': 3269 args_ct[i].ct |= TCG_CT_REG_ZERO; 3270 break; 3271 #endif 3272 3273 /* Include all of the target-specific constraints. */ 3274 3275 #undef CONST 3276 #define CONST(CASE, MASK) \ 3277 case CASE: args_ct[i].ct |= MASK; break; 3278 #define REGS(CASE, MASK) \ 3279 case CASE: args_ct[i].regs |= MASK; break; 3280 3281 #include "tcg-target-con-str.h" 3282 3283 #undef REGS 3284 #undef CONST 3285 default: 3286 case '0' ... '9': 3287 case '&': 3288 case 'p': 3289 case 'm': 3290 /* Typo in TCGConstraintSet constraint. */ 3291 g_assert_not_reached(); 3292 } 3293 } while (*++ct_str != '\0'); 3294 } 3295 3296 /* 3297 * Fix up output pairs that are aliased with inputs. 3298 * When we created the alias, we copied pair from the output. 3299 * There are three cases: 3300 * (1a) Pairs of inputs alias pairs of outputs. 3301 * (1b) One input aliases the first of a pair of outputs. 3302 * (2) One input aliases the second of a pair of outputs. 3303 * 3304 * Case 1a is handled by making sure that the pair_index'es are 3305 * properly updated so that they appear the same as a pair of inputs. 3306 * 3307 * Case 1b is handled by setting the pair_index of the input to 3308 * itself, simply so it doesn't point to an unrelated argument. 3309 * Since we don't encounter the "second" during the input allocation 3310 * phase, nothing happens with the second half of the input pair. 3311 * 3312 * Case 2 is handled by setting the second input to pair=3, the 3313 * first output to pair=3, and the pair_index'es to match. 3314 */ 3315 if (saw_alias_pair) { 3316 for (int i = nb_oargs; i < nb_args; i++) { 3317 int o, o2, i2; 3318 3319 /* 3320 * Since [0-9pm] must be alone in the constraint string, 3321 * the only way they can both be set is if the pair comes 3322 * from the output alias. 3323 */ 3324 if (!args_ct[i].ialias) { 3325 continue; 3326 } 3327 switch (args_ct[i].pair) { 3328 case 0: 3329 break; 3330 case 1: 3331 o = args_ct[i].alias_index; 3332 o2 = args_ct[o].pair_index; 3333 tcg_debug_assert(args_ct[o].pair == 1); 3334 tcg_debug_assert(args_ct[o2].pair == 2); 3335 if (args_ct[o2].oalias) { 3336 /* Case 1a */ 3337 i2 = args_ct[o2].alias_index; 3338 tcg_debug_assert(args_ct[i2].pair == 2); 3339 args_ct[i2].pair_index = i; 3340 args_ct[i].pair_index = i2; 3341 } else { 3342 /* Case 1b */ 3343 args_ct[i].pair_index = i; 3344 } 3345 break; 3346 case 2: 3347 o = args_ct[i].alias_index; 3348 o2 = args_ct[o].pair_index; 3349 tcg_debug_assert(args_ct[o].pair == 2); 3350 tcg_debug_assert(args_ct[o2].pair == 1); 3351 if (args_ct[o2].oalias) { 3352 /* Case 1a */ 3353 i2 = args_ct[o2].alias_index; 3354 tcg_debug_assert(args_ct[i2].pair == 1); 3355 args_ct[i2].pair_index = i; 3356 args_ct[i].pair_index = i2; 3357 } else { 3358 /* Case 2 */ 3359 args_ct[i].pair = 3; 3360 args_ct[o2].pair = 3; 3361 args_ct[i].pair_index = o2; 3362 args_ct[o2].pair_index = i; 3363 } 3364 break; 3365 default: 3366 g_assert_not_reached(); 3367 } 3368 } 3369 } 3370 3371 /* sort the constraints (XXX: this is just an heuristic) */ 3372 sort_constraints(args_ct, 0, nb_oargs); 3373 sort_constraints(args_ct, nb_oargs, nb_iargs); 3374 } 3375 } 3376 3377 static const TCGArgConstraint *opcode_args_ct(const TCGOp *op) 3378 { 3379 TCGOpcode opc = op->opc; 3380 TCGType type = TCGOP_TYPE(op); 3381 unsigned flags = TCGOP_FLAGS(op); 3382 const TCGOpDef *def = &tcg_op_defs[opc]; 3383 const TCGOutOp *outop = all_outop[opc]; 3384 TCGConstraintSetIndex con_set; 3385 3386 if (def->flags & TCG_OPF_NOT_PRESENT) { 3387 return empty_cts; 3388 } 3389 3390 if (outop) { 3391 con_set = outop->static_constraint; 3392 if (con_set == C_Dynamic) { 3393 con_set = outop->dynamic_constraint(type, flags); 3394 } 3395 } else { 3396 con_set = tcg_target_op_def(opc, type, flags); 3397 } 3398 tcg_debug_assert(con_set >= 0); 3399 tcg_debug_assert(con_set < ARRAY_SIZE(constraint_sets)); 3400 3401 /* The constraint arguments must match TCGOpcode arguments. */ 3402 tcg_debug_assert(constraint_sets[con_set].nb_oargs == def->nb_oargs); 3403 tcg_debug_assert(constraint_sets[con_set].nb_iargs == def->nb_iargs); 3404 3405 return all_cts[con_set]; 3406 } 3407 3408 static void remove_label_use(TCGOp *op, int idx) 3409 { 3410 TCGLabel *label = arg_label(op->args[idx]); 3411 TCGLabelUse *use; 3412 3413 QSIMPLEQ_FOREACH(use, &label->branches, next) { 3414 if (use->op == op) { 3415 QSIMPLEQ_REMOVE(&label->branches, use, TCGLabelUse, next); 3416 return; 3417 } 3418 } 3419 g_assert_not_reached(); 3420 } 3421 3422 void tcg_op_remove(TCGContext *s, TCGOp *op) 3423 { 3424 switch (op->opc) { 3425 case INDEX_op_br: 3426 remove_label_use(op, 0); 3427 break; 3428 case INDEX_op_brcond_i32: 3429 case INDEX_op_brcond_i64: 3430 remove_label_use(op, 3); 3431 break; 3432 case INDEX_op_brcond2_i32: 3433 remove_label_use(op, 5); 3434 break; 3435 default: 3436 break; 3437 } 3438 3439 QTAILQ_REMOVE(&s->ops, op, link); 3440 QTAILQ_INSERT_TAIL(&s->free_ops, op, link); 3441 s->nb_ops--; 3442 } 3443 3444 void tcg_remove_ops_after(TCGOp *op) 3445 { 3446 TCGContext *s = tcg_ctx; 3447 3448 while (true) { 3449 TCGOp *last = tcg_last_op(); 3450 if (last == op) { 3451 return; 3452 } 3453 tcg_op_remove(s, last); 3454 } 3455 } 3456 3457 static TCGOp *tcg_op_alloc(TCGOpcode opc, unsigned nargs) 3458 { 3459 TCGContext *s = tcg_ctx; 3460 TCGOp *op = NULL; 3461 3462 if (unlikely(!QTAILQ_EMPTY(&s->free_ops))) { 3463 QTAILQ_FOREACH(op, &s->free_ops, link) { 3464 if (nargs <= op->nargs) { 3465 QTAILQ_REMOVE(&s->free_ops, op, link); 3466 nargs = op->nargs; 3467 goto found; 3468 } 3469 } 3470 } 3471 3472 /* Most opcodes have 3 or 4 operands: reduce fragmentation. */ 3473 nargs = MAX(4, nargs); 3474 op = tcg_malloc(sizeof(TCGOp) + sizeof(TCGArg) * nargs); 3475 3476 found: 3477 memset(op, 0, offsetof(TCGOp, link)); 3478 op->opc = opc; 3479 op->nargs = nargs; 3480 3481 /* Check for bitfield overflow. */ 3482 tcg_debug_assert(op->nargs == nargs); 3483 3484 s->nb_ops++; 3485 return op; 3486 } 3487 3488 TCGOp *tcg_emit_op(TCGOpcode opc, unsigned nargs) 3489 { 3490 TCGOp *op = tcg_op_alloc(opc, nargs); 3491 3492 if (tcg_ctx->emit_before_op) { 3493 QTAILQ_INSERT_BEFORE(tcg_ctx->emit_before_op, op, link); 3494 } else { 3495 QTAILQ_INSERT_TAIL(&tcg_ctx->ops, op, link); 3496 } 3497 return op; 3498 } 3499 3500 TCGOp *tcg_op_insert_before(TCGContext *s, TCGOp *old_op, 3501 TCGOpcode opc, TCGType type, unsigned nargs) 3502 { 3503 TCGOp *new_op = tcg_op_alloc(opc, nargs); 3504 3505 TCGOP_TYPE(new_op) = type; 3506 QTAILQ_INSERT_BEFORE(old_op, new_op, link); 3507 return new_op; 3508 } 3509 3510 TCGOp *tcg_op_insert_after(TCGContext *s, TCGOp *old_op, 3511 TCGOpcode opc, TCGType type, unsigned nargs) 3512 { 3513 TCGOp *new_op = tcg_op_alloc(opc, nargs); 3514 3515 TCGOP_TYPE(new_op) = type; 3516 QTAILQ_INSERT_AFTER(&s->ops, old_op, new_op, link); 3517 return new_op; 3518 } 3519 3520 static void move_label_uses(TCGLabel *to, TCGLabel *from) 3521 { 3522 TCGLabelUse *u; 3523 3524 QSIMPLEQ_FOREACH(u, &from->branches, next) { 3525 TCGOp *op = u->op; 3526 switch (op->opc) { 3527 case INDEX_op_br: 3528 op->args[0] = label_arg(to); 3529 break; 3530 case INDEX_op_brcond_i32: 3531 case INDEX_op_brcond_i64: 3532 op->args[3] = label_arg(to); 3533 break; 3534 case INDEX_op_brcond2_i32: 3535 op->args[5] = label_arg(to); 3536 break; 3537 default: 3538 g_assert_not_reached(); 3539 } 3540 } 3541 3542 QSIMPLEQ_CONCAT(&to->branches, &from->branches); 3543 } 3544 3545 /* Reachable analysis : remove unreachable code. */ 3546 static void __attribute__((noinline)) 3547 reachable_code_pass(TCGContext *s) 3548 { 3549 TCGOp *op, *op_next, *op_prev; 3550 bool dead = false; 3551 3552 QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) { 3553 bool remove = dead; 3554 TCGLabel *label; 3555 3556 switch (op->opc) { 3557 case INDEX_op_set_label: 3558 label = arg_label(op->args[0]); 3559 3560 /* 3561 * Note that the first op in the TB is always a load, 3562 * so there is always something before a label. 3563 */ 3564 op_prev = QTAILQ_PREV(op, link); 3565 3566 /* 3567 * If we find two sequential labels, move all branches to 3568 * reference the second label and remove the first label. 3569 * Do this before branch to next optimization, so that the 3570 * middle label is out of the way. 3571 */ 3572 if (op_prev->opc == INDEX_op_set_label) { 3573 move_label_uses(label, arg_label(op_prev->args[0])); 3574 tcg_op_remove(s, op_prev); 3575 op_prev = QTAILQ_PREV(op, link); 3576 } 3577 3578 /* 3579 * Optimization can fold conditional branches to unconditional. 3580 * If we find a label which is preceded by an unconditional 3581 * branch to next, remove the branch. We couldn't do this when 3582 * processing the branch because any dead code between the branch 3583 * and label had not yet been removed. 3584 */ 3585 if (op_prev->opc == INDEX_op_br && 3586 label == arg_label(op_prev->args[0])) { 3587 tcg_op_remove(s, op_prev); 3588 /* Fall through means insns become live again. */ 3589 dead = false; 3590 } 3591 3592 if (QSIMPLEQ_EMPTY(&label->branches)) { 3593 /* 3594 * While there is an occasional backward branch, virtually 3595 * all branches generated by the translators are forward. 3596 * Which means that generally we will have already removed 3597 * all references to the label that will be, and there is 3598 * little to be gained by iterating. 3599 */ 3600 remove = true; 3601 } else { 3602 /* Once we see a label, insns become live again. */ 3603 dead = false; 3604 remove = false; 3605 } 3606 break; 3607 3608 case INDEX_op_br: 3609 case INDEX_op_exit_tb: 3610 case INDEX_op_goto_ptr: 3611 /* Unconditional branches; everything following is dead. */ 3612 dead = true; 3613 break; 3614 3615 case INDEX_op_call: 3616 /* Notice noreturn helper calls, raising exceptions. */ 3617 if (tcg_call_flags(op) & TCG_CALL_NO_RETURN) { 3618 dead = true; 3619 } 3620 break; 3621 3622 case INDEX_op_insn_start: 3623 /* Never remove -- we need to keep these for unwind. */ 3624 remove = false; 3625 break; 3626 3627 default: 3628 break; 3629 } 3630 3631 if (remove) { 3632 tcg_op_remove(s, op); 3633 } 3634 } 3635 } 3636 3637 #define TS_DEAD 1 3638 #define TS_MEM 2 3639 3640 #define IS_DEAD_ARG(n) (arg_life & (DEAD_ARG << (n))) 3641 #define NEED_SYNC_ARG(n) (arg_life & (SYNC_ARG << (n))) 3642 3643 /* For liveness_pass_1, the register preferences for a given temp. */ 3644 static inline TCGRegSet *la_temp_pref(TCGTemp *ts) 3645 { 3646 return ts->state_ptr; 3647 } 3648 3649 /* For liveness_pass_1, reset the preferences for a given temp to the 3650 * maximal regset for its type. 3651 */ 3652 static inline void la_reset_pref(TCGTemp *ts) 3653 { 3654 *la_temp_pref(ts) 3655 = (ts->state == TS_DEAD ? 0 : tcg_target_available_regs[ts->type]); 3656 } 3657 3658 /* liveness analysis: end of function: all temps are dead, and globals 3659 should be in memory. */ 3660 static void la_func_end(TCGContext *s, int ng, int nt) 3661 { 3662 int i; 3663 3664 for (i = 0; i < ng; ++i) { 3665 s->temps[i].state = TS_DEAD | TS_MEM; 3666 la_reset_pref(&s->temps[i]); 3667 } 3668 for (i = ng; i < nt; ++i) { 3669 s->temps[i].state = TS_DEAD; 3670 la_reset_pref(&s->temps[i]); 3671 } 3672 } 3673 3674 /* liveness analysis: end of basic block: all temps are dead, globals 3675 and local temps should be in memory. */ 3676 static void la_bb_end(TCGContext *s, int ng, int nt) 3677 { 3678 int i; 3679 3680 for (i = 0; i < nt; ++i) { 3681 TCGTemp *ts = &s->temps[i]; 3682 int state; 3683 3684 switch (ts->kind) { 3685 case TEMP_FIXED: 3686 case TEMP_GLOBAL: 3687 case TEMP_TB: 3688 state = TS_DEAD | TS_MEM; 3689 break; 3690 case TEMP_EBB: 3691 case TEMP_CONST: 3692 state = TS_DEAD; 3693 break; 3694 default: 3695 g_assert_not_reached(); 3696 } 3697 ts->state = state; 3698 la_reset_pref(ts); 3699 } 3700 } 3701 3702 /* liveness analysis: sync globals back to memory. */ 3703 static void la_global_sync(TCGContext *s, int ng) 3704 { 3705 int i; 3706 3707 for (i = 0; i < ng; ++i) { 3708 int state = s->temps[i].state; 3709 s->temps[i].state = state | TS_MEM; 3710 if (state == TS_DEAD) { 3711 /* If the global was previously dead, reset prefs. */ 3712 la_reset_pref(&s->temps[i]); 3713 } 3714 } 3715 } 3716 3717 /* 3718 * liveness analysis: conditional branch: all temps are dead unless 3719 * explicitly live-across-conditional-branch, globals and local temps 3720 * should be synced. 3721 */ 3722 static void la_bb_sync(TCGContext *s, int ng, int nt) 3723 { 3724 la_global_sync(s, ng); 3725 3726 for (int i = ng; i < nt; ++i) { 3727 TCGTemp *ts = &s->temps[i]; 3728 int state; 3729 3730 switch (ts->kind) { 3731 case TEMP_TB: 3732 state = ts->state; 3733 ts->state = state | TS_MEM; 3734 if (state != TS_DEAD) { 3735 continue; 3736 } 3737 break; 3738 case TEMP_EBB: 3739 case TEMP_CONST: 3740 continue; 3741 default: 3742 g_assert_not_reached(); 3743 } 3744 la_reset_pref(&s->temps[i]); 3745 } 3746 } 3747 3748 /* liveness analysis: sync globals back to memory and kill. */ 3749 static void la_global_kill(TCGContext *s, int ng) 3750 { 3751 int i; 3752 3753 for (i = 0; i < ng; i++) { 3754 s->temps[i].state = TS_DEAD | TS_MEM; 3755 la_reset_pref(&s->temps[i]); 3756 } 3757 } 3758 3759 /* liveness analysis: note live globals crossing calls. */ 3760 static void la_cross_call(TCGContext *s, int nt) 3761 { 3762 TCGRegSet mask = ~tcg_target_call_clobber_regs; 3763 int i; 3764 3765 for (i = 0; i < nt; i++) { 3766 TCGTemp *ts = &s->temps[i]; 3767 if (!(ts->state & TS_DEAD)) { 3768 TCGRegSet *pset = la_temp_pref(ts); 3769 TCGRegSet set = *pset; 3770 3771 set &= mask; 3772 /* If the combination is not possible, restart. */ 3773 if (set == 0) { 3774 set = tcg_target_available_regs[ts->type] & mask; 3775 } 3776 *pset = set; 3777 } 3778 } 3779 } 3780 3781 /* 3782 * Liveness analysis: Verify the lifetime of TEMP_TB, and reduce 3783 * to TEMP_EBB, if possible. 3784 */ 3785 static void __attribute__((noinline)) 3786 liveness_pass_0(TCGContext *s) 3787 { 3788 void * const multiple_ebb = (void *)(uintptr_t)-1; 3789 int nb_temps = s->nb_temps; 3790 TCGOp *op, *ebb; 3791 3792 for (int i = s->nb_globals; i < nb_temps; ++i) { 3793 s->temps[i].state_ptr = NULL; 3794 } 3795 3796 /* 3797 * Represent each EBB by the op at which it begins. In the case of 3798 * the first EBB, this is the first op, otherwise it is a label. 3799 * Collect the uses of each TEMP_TB: NULL for unused, EBB for use 3800 * within a single EBB, else MULTIPLE_EBB. 3801 */ 3802 ebb = QTAILQ_FIRST(&s->ops); 3803 QTAILQ_FOREACH(op, &s->ops, link) { 3804 const TCGOpDef *def; 3805 int nb_oargs, nb_iargs; 3806 3807 switch (op->opc) { 3808 case INDEX_op_set_label: 3809 ebb = op; 3810 continue; 3811 case INDEX_op_discard: 3812 continue; 3813 case INDEX_op_call: 3814 nb_oargs = TCGOP_CALLO(op); 3815 nb_iargs = TCGOP_CALLI(op); 3816 break; 3817 default: 3818 def = &tcg_op_defs[op->opc]; 3819 nb_oargs = def->nb_oargs; 3820 nb_iargs = def->nb_iargs; 3821 break; 3822 } 3823 3824 for (int i = 0; i < nb_oargs + nb_iargs; ++i) { 3825 TCGTemp *ts = arg_temp(op->args[i]); 3826 3827 if (ts->kind != TEMP_TB) { 3828 continue; 3829 } 3830 if (ts->state_ptr == NULL) { 3831 ts->state_ptr = ebb; 3832 } else if (ts->state_ptr != ebb) { 3833 ts->state_ptr = multiple_ebb; 3834 } 3835 } 3836 } 3837 3838 /* 3839 * For TEMP_TB that turned out not to be used beyond one EBB, 3840 * reduce the liveness to TEMP_EBB. 3841 */ 3842 for (int i = s->nb_globals; i < nb_temps; ++i) { 3843 TCGTemp *ts = &s->temps[i]; 3844 if (ts->kind == TEMP_TB && ts->state_ptr != multiple_ebb) { 3845 ts->kind = TEMP_EBB; 3846 } 3847 } 3848 } 3849 3850 /* Liveness analysis : update the opc_arg_life array to tell if a 3851 given input arguments is dead. Instructions updating dead 3852 temporaries are removed. */ 3853 static void __attribute__((noinline)) 3854 liveness_pass_1(TCGContext *s) 3855 { 3856 int nb_globals = s->nb_globals; 3857 int nb_temps = s->nb_temps; 3858 TCGOp *op, *op_prev; 3859 TCGRegSet *prefs; 3860 int i; 3861 3862 prefs = tcg_malloc(sizeof(TCGRegSet) * nb_temps); 3863 for (i = 0; i < nb_temps; ++i) { 3864 s->temps[i].state_ptr = prefs + i; 3865 } 3866 3867 /* ??? Should be redundant with the exit_tb that ends the TB. */ 3868 la_func_end(s, nb_globals, nb_temps); 3869 3870 QTAILQ_FOREACH_REVERSE_SAFE(op, &s->ops, link, op_prev) { 3871 int nb_iargs, nb_oargs; 3872 TCGOpcode opc_new, opc_new2; 3873 TCGLifeData arg_life = 0; 3874 TCGTemp *ts; 3875 TCGOpcode opc = op->opc; 3876 const TCGOpDef *def = &tcg_op_defs[opc]; 3877 const TCGArgConstraint *args_ct; 3878 3879 switch (opc) { 3880 case INDEX_op_call: 3881 { 3882 const TCGHelperInfo *info = tcg_call_info(op); 3883 int call_flags = tcg_call_flags(op); 3884 3885 nb_oargs = TCGOP_CALLO(op); 3886 nb_iargs = TCGOP_CALLI(op); 3887 3888 /* pure functions can be removed if their result is unused */ 3889 if (call_flags & TCG_CALL_NO_SIDE_EFFECTS) { 3890 for (i = 0; i < nb_oargs; i++) { 3891 ts = arg_temp(op->args[i]); 3892 if (ts->state != TS_DEAD) { 3893 goto do_not_remove_call; 3894 } 3895 } 3896 goto do_remove; 3897 } 3898 do_not_remove_call: 3899 3900 /* Output args are dead. */ 3901 for (i = 0; i < nb_oargs; i++) { 3902 ts = arg_temp(op->args[i]); 3903 if (ts->state & TS_DEAD) { 3904 arg_life |= DEAD_ARG << i; 3905 } 3906 if (ts->state & TS_MEM) { 3907 arg_life |= SYNC_ARG << i; 3908 } 3909 ts->state = TS_DEAD; 3910 la_reset_pref(ts); 3911 } 3912 3913 /* Not used -- it will be tcg_target_call_oarg_reg(). */ 3914 memset(op->output_pref, 0, sizeof(op->output_pref)); 3915 3916 if (!(call_flags & (TCG_CALL_NO_WRITE_GLOBALS | 3917 TCG_CALL_NO_READ_GLOBALS))) { 3918 la_global_kill(s, nb_globals); 3919 } else if (!(call_flags & TCG_CALL_NO_READ_GLOBALS)) { 3920 la_global_sync(s, nb_globals); 3921 } 3922 3923 /* Record arguments that die in this helper. */ 3924 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) { 3925 ts = arg_temp(op->args[i]); 3926 if (ts->state & TS_DEAD) { 3927 arg_life |= DEAD_ARG << i; 3928 } 3929 } 3930 3931 /* For all live registers, remove call-clobbered prefs. */ 3932 la_cross_call(s, nb_temps); 3933 3934 /* 3935 * Input arguments are live for preceding opcodes. 3936 * 3937 * For those arguments that die, and will be allocated in 3938 * registers, clear the register set for that arg, to be 3939 * filled in below. For args that will be on the stack, 3940 * reset to any available reg. Process arguments in reverse 3941 * order so that if a temp is used more than once, the stack 3942 * reset to max happens before the register reset to 0. 3943 */ 3944 for (i = nb_iargs - 1; i >= 0; i--) { 3945 const TCGCallArgumentLoc *loc = &info->in[i]; 3946 ts = arg_temp(op->args[nb_oargs + i]); 3947 3948 if (ts->state & TS_DEAD) { 3949 switch (loc->kind) { 3950 case TCG_CALL_ARG_NORMAL: 3951 case TCG_CALL_ARG_EXTEND_U: 3952 case TCG_CALL_ARG_EXTEND_S: 3953 if (arg_slot_reg_p(loc->arg_slot)) { 3954 *la_temp_pref(ts) = 0; 3955 break; 3956 } 3957 /* fall through */ 3958 default: 3959 *la_temp_pref(ts) = 3960 tcg_target_available_regs[ts->type]; 3961 break; 3962 } 3963 ts->state &= ~TS_DEAD; 3964 } 3965 } 3966 3967 /* 3968 * For each input argument, add its input register to prefs. 3969 * If a temp is used once, this produces a single set bit; 3970 * if a temp is used multiple times, this produces a set. 3971 */ 3972 for (i = 0; i < nb_iargs; i++) { 3973 const TCGCallArgumentLoc *loc = &info->in[i]; 3974 ts = arg_temp(op->args[nb_oargs + i]); 3975 3976 switch (loc->kind) { 3977 case TCG_CALL_ARG_NORMAL: 3978 case TCG_CALL_ARG_EXTEND_U: 3979 case TCG_CALL_ARG_EXTEND_S: 3980 if (arg_slot_reg_p(loc->arg_slot)) { 3981 tcg_regset_set_reg(*la_temp_pref(ts), 3982 tcg_target_call_iarg_regs[loc->arg_slot]); 3983 } 3984 break; 3985 default: 3986 break; 3987 } 3988 } 3989 } 3990 break; 3991 case INDEX_op_insn_start: 3992 break; 3993 case INDEX_op_discard: 3994 /* mark the temporary as dead */ 3995 ts = arg_temp(op->args[0]); 3996 ts->state = TS_DEAD; 3997 la_reset_pref(ts); 3998 break; 3999 4000 case INDEX_op_add2_i32: 4001 case INDEX_op_add2_i64: 4002 opc_new = INDEX_op_add; 4003 goto do_addsub2; 4004 case INDEX_op_sub2_i32: 4005 case INDEX_op_sub2_i64: 4006 opc_new = INDEX_op_sub; 4007 do_addsub2: 4008 nb_iargs = 4; 4009 nb_oargs = 2; 4010 /* Test if the high part of the operation is dead, but not 4011 the low part. The result can be optimized to a simple 4012 add or sub. This happens often for x86_64 guest when the 4013 cpu mode is set to 32 bit. */ 4014 if (arg_temp(op->args[1])->state == TS_DEAD) { 4015 if (arg_temp(op->args[0])->state == TS_DEAD) { 4016 goto do_remove; 4017 } 4018 /* Replace the opcode and adjust the args in place, 4019 leaving 3 unused args at the end. */ 4020 op->opc = opc = opc_new; 4021 op->args[1] = op->args[2]; 4022 op->args[2] = op->args[4]; 4023 /* Fall through and mark the single-word operation live. */ 4024 nb_iargs = 2; 4025 nb_oargs = 1; 4026 } 4027 goto do_not_remove; 4028 4029 case INDEX_op_muls2_i32: 4030 case INDEX_op_muls2_i64: 4031 opc_new = INDEX_op_mul; 4032 opc_new2 = INDEX_op_mulsh; 4033 goto do_mul2; 4034 case INDEX_op_mulu2_i32: 4035 case INDEX_op_mulu2_i64: 4036 opc_new = INDEX_op_mul; 4037 opc_new2 = INDEX_op_muluh; 4038 do_mul2: 4039 nb_iargs = 2; 4040 nb_oargs = 2; 4041 if (arg_temp(op->args[1])->state == TS_DEAD) { 4042 if (arg_temp(op->args[0])->state == TS_DEAD) { 4043 /* Both parts of the operation are dead. */ 4044 goto do_remove; 4045 } 4046 /* The high part of the operation is dead; generate the low. */ 4047 op->opc = opc = opc_new; 4048 op->args[1] = op->args[2]; 4049 op->args[2] = op->args[3]; 4050 } else if (arg_temp(op->args[0])->state == TS_DEAD && 4051 tcg_op_supported(opc_new2, TCGOP_TYPE(op), 0)) { 4052 /* The low part of the operation is dead; generate the high. */ 4053 op->opc = opc = opc_new2; 4054 op->args[0] = op->args[1]; 4055 op->args[1] = op->args[2]; 4056 op->args[2] = op->args[3]; 4057 } else { 4058 goto do_not_remove; 4059 } 4060 /* Mark the single-word operation live. */ 4061 nb_oargs = 1; 4062 goto do_not_remove; 4063 4064 default: 4065 /* XXX: optimize by hardcoding common cases (e.g. triadic ops) */ 4066 nb_iargs = def->nb_iargs; 4067 nb_oargs = def->nb_oargs; 4068 4069 /* Test if the operation can be removed because all 4070 its outputs are dead. We assume that nb_oargs == 0 4071 implies side effects */ 4072 if (!(def->flags & TCG_OPF_SIDE_EFFECTS) && nb_oargs != 0) { 4073 for (i = 0; i < nb_oargs; i++) { 4074 if (arg_temp(op->args[i])->state != TS_DEAD) { 4075 goto do_not_remove; 4076 } 4077 } 4078 goto do_remove; 4079 } 4080 goto do_not_remove; 4081 4082 do_remove: 4083 tcg_op_remove(s, op); 4084 break; 4085 4086 do_not_remove: 4087 for (i = 0; i < nb_oargs; i++) { 4088 ts = arg_temp(op->args[i]); 4089 4090 /* Remember the preference of the uses that followed. */ 4091 if (i < ARRAY_SIZE(op->output_pref)) { 4092 op->output_pref[i] = *la_temp_pref(ts); 4093 } 4094 4095 /* Output args are dead. */ 4096 if (ts->state & TS_DEAD) { 4097 arg_life |= DEAD_ARG << i; 4098 } 4099 if (ts->state & TS_MEM) { 4100 arg_life |= SYNC_ARG << i; 4101 } 4102 ts->state = TS_DEAD; 4103 la_reset_pref(ts); 4104 } 4105 4106 /* If end of basic block, update. */ 4107 if (def->flags & TCG_OPF_BB_EXIT) { 4108 la_func_end(s, nb_globals, nb_temps); 4109 } else if (def->flags & TCG_OPF_COND_BRANCH) { 4110 la_bb_sync(s, nb_globals, nb_temps); 4111 } else if (def->flags & TCG_OPF_BB_END) { 4112 la_bb_end(s, nb_globals, nb_temps); 4113 } else if (def->flags & TCG_OPF_SIDE_EFFECTS) { 4114 la_global_sync(s, nb_globals); 4115 if (def->flags & TCG_OPF_CALL_CLOBBER) { 4116 la_cross_call(s, nb_temps); 4117 } 4118 } 4119 4120 /* Record arguments that die in this opcode. */ 4121 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) { 4122 ts = arg_temp(op->args[i]); 4123 if (ts->state & TS_DEAD) { 4124 arg_life |= DEAD_ARG << i; 4125 } 4126 } 4127 4128 /* Input arguments are live for preceding opcodes. */ 4129 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) { 4130 ts = arg_temp(op->args[i]); 4131 if (ts->state & TS_DEAD) { 4132 /* For operands that were dead, initially allow 4133 all regs for the type. */ 4134 *la_temp_pref(ts) = tcg_target_available_regs[ts->type]; 4135 ts->state &= ~TS_DEAD; 4136 } 4137 } 4138 4139 /* Incorporate constraints for this operand. */ 4140 switch (opc) { 4141 case INDEX_op_mov: 4142 /* Note that these are TCG_OPF_NOT_PRESENT and do not 4143 have proper constraints. That said, special case 4144 moves to propagate preferences backward. */ 4145 if (IS_DEAD_ARG(1)) { 4146 *la_temp_pref(arg_temp(op->args[0])) 4147 = *la_temp_pref(arg_temp(op->args[1])); 4148 } 4149 break; 4150 4151 default: 4152 args_ct = opcode_args_ct(op); 4153 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) { 4154 const TCGArgConstraint *ct = &args_ct[i]; 4155 TCGRegSet set, *pset; 4156 4157 ts = arg_temp(op->args[i]); 4158 pset = la_temp_pref(ts); 4159 set = *pset; 4160 4161 set &= ct->regs; 4162 if (ct->ialias) { 4163 set &= output_pref(op, ct->alias_index); 4164 } 4165 /* If the combination is not possible, restart. */ 4166 if (set == 0) { 4167 set = ct->regs; 4168 } 4169 *pset = set; 4170 } 4171 break; 4172 } 4173 break; 4174 } 4175 op->life = arg_life; 4176 } 4177 } 4178 4179 /* Liveness analysis: Convert indirect regs to direct temporaries. */ 4180 static bool __attribute__((noinline)) 4181 liveness_pass_2(TCGContext *s) 4182 { 4183 int nb_globals = s->nb_globals; 4184 int nb_temps, i; 4185 bool changes = false; 4186 TCGOp *op, *op_next; 4187 4188 /* Create a temporary for each indirect global. */ 4189 for (i = 0; i < nb_globals; ++i) { 4190 TCGTemp *its = &s->temps[i]; 4191 if (its->indirect_reg) { 4192 TCGTemp *dts = tcg_temp_alloc(s); 4193 dts->type = its->type; 4194 dts->base_type = its->base_type; 4195 dts->temp_subindex = its->temp_subindex; 4196 dts->kind = TEMP_EBB; 4197 its->state_ptr = dts; 4198 } else { 4199 its->state_ptr = NULL; 4200 } 4201 /* All globals begin dead. */ 4202 its->state = TS_DEAD; 4203 } 4204 for (nb_temps = s->nb_temps; i < nb_temps; ++i) { 4205 TCGTemp *its = &s->temps[i]; 4206 its->state_ptr = NULL; 4207 its->state = TS_DEAD; 4208 } 4209 4210 QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) { 4211 TCGOpcode opc = op->opc; 4212 const TCGOpDef *def = &tcg_op_defs[opc]; 4213 TCGLifeData arg_life = op->life; 4214 int nb_iargs, nb_oargs, call_flags; 4215 TCGTemp *arg_ts, *dir_ts; 4216 4217 if (opc == INDEX_op_call) { 4218 nb_oargs = TCGOP_CALLO(op); 4219 nb_iargs = TCGOP_CALLI(op); 4220 call_flags = tcg_call_flags(op); 4221 } else { 4222 nb_iargs = def->nb_iargs; 4223 nb_oargs = def->nb_oargs; 4224 4225 /* Set flags similar to how calls require. */ 4226 if (def->flags & TCG_OPF_COND_BRANCH) { 4227 /* Like reading globals: sync_globals */ 4228 call_flags = TCG_CALL_NO_WRITE_GLOBALS; 4229 } else if (def->flags & TCG_OPF_BB_END) { 4230 /* Like writing globals: save_globals */ 4231 call_flags = 0; 4232 } else if (def->flags & TCG_OPF_SIDE_EFFECTS) { 4233 /* Like reading globals: sync_globals */ 4234 call_flags = TCG_CALL_NO_WRITE_GLOBALS; 4235 } else { 4236 /* No effect on globals. */ 4237 call_flags = (TCG_CALL_NO_READ_GLOBALS | 4238 TCG_CALL_NO_WRITE_GLOBALS); 4239 } 4240 } 4241 4242 /* Make sure that input arguments are available. */ 4243 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) { 4244 arg_ts = arg_temp(op->args[i]); 4245 dir_ts = arg_ts->state_ptr; 4246 if (dir_ts && arg_ts->state == TS_DEAD) { 4247 TCGOpcode lopc = (arg_ts->type == TCG_TYPE_I32 4248 ? INDEX_op_ld_i32 4249 : INDEX_op_ld_i64); 4250 TCGOp *lop = tcg_op_insert_before(s, op, lopc, 4251 arg_ts->type, 3); 4252 4253 lop->args[0] = temp_arg(dir_ts); 4254 lop->args[1] = temp_arg(arg_ts->mem_base); 4255 lop->args[2] = arg_ts->mem_offset; 4256 4257 /* Loaded, but synced with memory. */ 4258 arg_ts->state = TS_MEM; 4259 } 4260 } 4261 4262 /* Perform input replacement, and mark inputs that became dead. 4263 No action is required except keeping temp_state up to date 4264 so that we reload when needed. */ 4265 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) { 4266 arg_ts = arg_temp(op->args[i]); 4267 dir_ts = arg_ts->state_ptr; 4268 if (dir_ts) { 4269 op->args[i] = temp_arg(dir_ts); 4270 changes = true; 4271 if (IS_DEAD_ARG(i)) { 4272 arg_ts->state = TS_DEAD; 4273 } 4274 } 4275 } 4276 4277 /* Liveness analysis should ensure that the following are 4278 all correct, for call sites and basic block end points. */ 4279 if (call_flags & TCG_CALL_NO_READ_GLOBALS) { 4280 /* Nothing to do */ 4281 } else if (call_flags & TCG_CALL_NO_WRITE_GLOBALS) { 4282 for (i = 0; i < nb_globals; ++i) { 4283 /* Liveness should see that globals are synced back, 4284 that is, either TS_DEAD or TS_MEM. */ 4285 arg_ts = &s->temps[i]; 4286 tcg_debug_assert(arg_ts->state_ptr == 0 4287 || arg_ts->state != 0); 4288 } 4289 } else { 4290 for (i = 0; i < nb_globals; ++i) { 4291 /* Liveness should see that globals are saved back, 4292 that is, TS_DEAD, waiting to be reloaded. */ 4293 arg_ts = &s->temps[i]; 4294 tcg_debug_assert(arg_ts->state_ptr == 0 4295 || arg_ts->state == TS_DEAD); 4296 } 4297 } 4298 4299 /* Outputs become available. */ 4300 if (opc == INDEX_op_mov) { 4301 arg_ts = arg_temp(op->args[0]); 4302 dir_ts = arg_ts->state_ptr; 4303 if (dir_ts) { 4304 op->args[0] = temp_arg(dir_ts); 4305 changes = true; 4306 4307 /* The output is now live and modified. */ 4308 arg_ts->state = 0; 4309 4310 if (NEED_SYNC_ARG(0)) { 4311 TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32 4312 ? INDEX_op_st_i32 4313 : INDEX_op_st_i64); 4314 TCGOp *sop = tcg_op_insert_after(s, op, sopc, 4315 arg_ts->type, 3); 4316 TCGTemp *out_ts = dir_ts; 4317 4318 if (IS_DEAD_ARG(0)) { 4319 out_ts = arg_temp(op->args[1]); 4320 arg_ts->state = TS_DEAD; 4321 tcg_op_remove(s, op); 4322 } else { 4323 arg_ts->state = TS_MEM; 4324 } 4325 4326 sop->args[0] = temp_arg(out_ts); 4327 sop->args[1] = temp_arg(arg_ts->mem_base); 4328 sop->args[2] = arg_ts->mem_offset; 4329 } else { 4330 tcg_debug_assert(!IS_DEAD_ARG(0)); 4331 } 4332 } 4333 } else { 4334 for (i = 0; i < nb_oargs; i++) { 4335 arg_ts = arg_temp(op->args[i]); 4336 dir_ts = arg_ts->state_ptr; 4337 if (!dir_ts) { 4338 continue; 4339 } 4340 op->args[i] = temp_arg(dir_ts); 4341 changes = true; 4342 4343 /* The output is now live and modified. */ 4344 arg_ts->state = 0; 4345 4346 /* Sync outputs upon their last write. */ 4347 if (NEED_SYNC_ARG(i)) { 4348 TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32 4349 ? INDEX_op_st_i32 4350 : INDEX_op_st_i64); 4351 TCGOp *sop = tcg_op_insert_after(s, op, sopc, 4352 arg_ts->type, 3); 4353 4354 sop->args[0] = temp_arg(dir_ts); 4355 sop->args[1] = temp_arg(arg_ts->mem_base); 4356 sop->args[2] = arg_ts->mem_offset; 4357 4358 arg_ts->state = TS_MEM; 4359 } 4360 /* Drop outputs that are dead. */ 4361 if (IS_DEAD_ARG(i)) { 4362 arg_ts->state = TS_DEAD; 4363 } 4364 } 4365 } 4366 } 4367 4368 return changes; 4369 } 4370 4371 static void temp_allocate_frame(TCGContext *s, TCGTemp *ts) 4372 { 4373 intptr_t off; 4374 int size, align; 4375 4376 /* When allocating an object, look at the full type. */ 4377 size = tcg_type_size(ts->base_type); 4378 switch (ts->base_type) { 4379 case TCG_TYPE_I32: 4380 align = 4; 4381 break; 4382 case TCG_TYPE_I64: 4383 case TCG_TYPE_V64: 4384 align = 8; 4385 break; 4386 case TCG_TYPE_I128: 4387 case TCG_TYPE_V128: 4388 case TCG_TYPE_V256: 4389 /* 4390 * Note that we do not require aligned storage for V256, 4391 * and that we provide alignment for I128 to match V128, 4392 * even if that's above what the host ABI requires. 4393 */ 4394 align = 16; 4395 break; 4396 default: 4397 g_assert_not_reached(); 4398 } 4399 4400 /* 4401 * Assume the stack is sufficiently aligned. 4402 * This affects e.g. ARM NEON, where we have 8 byte stack alignment 4403 * and do not require 16 byte vector alignment. This seems slightly 4404 * easier than fully parameterizing the above switch statement. 4405 */ 4406 align = MIN(TCG_TARGET_STACK_ALIGN, align); 4407 off = ROUND_UP(s->current_frame_offset, align); 4408 4409 /* If we've exhausted the stack frame, restart with a smaller TB. */ 4410 if (off + size > s->frame_end) { 4411 tcg_raise_tb_overflow(s); 4412 } 4413 s->current_frame_offset = off + size; 4414 #if defined(__sparc__) 4415 off += TCG_TARGET_STACK_BIAS; 4416 #endif 4417 4418 /* If the object was subdivided, assign memory to all the parts. */ 4419 if (ts->base_type != ts->type) { 4420 int part_size = tcg_type_size(ts->type); 4421 int part_count = size / part_size; 4422 4423 /* 4424 * Each part is allocated sequentially in tcg_temp_new_internal. 4425 * Jump back to the first part by subtracting the current index. 4426 */ 4427 ts -= ts->temp_subindex; 4428 for (int i = 0; i < part_count; ++i) { 4429 ts[i].mem_offset = off + i * part_size; 4430 ts[i].mem_base = s->frame_temp; 4431 ts[i].mem_allocated = 1; 4432 } 4433 } else { 4434 ts->mem_offset = off; 4435 ts->mem_base = s->frame_temp; 4436 ts->mem_allocated = 1; 4437 } 4438 } 4439 4440 /* Assign @reg to @ts, and update reg_to_temp[]. */ 4441 static void set_temp_val_reg(TCGContext *s, TCGTemp *ts, TCGReg reg) 4442 { 4443 if (ts->val_type == TEMP_VAL_REG) { 4444 TCGReg old = ts->reg; 4445 tcg_debug_assert(s->reg_to_temp[old] == ts); 4446 if (old == reg) { 4447 return; 4448 } 4449 s->reg_to_temp[old] = NULL; 4450 } 4451 tcg_debug_assert(s->reg_to_temp[reg] == NULL); 4452 s->reg_to_temp[reg] = ts; 4453 ts->val_type = TEMP_VAL_REG; 4454 ts->reg = reg; 4455 } 4456 4457 /* Assign a non-register value type to @ts, and update reg_to_temp[]. */ 4458 static void set_temp_val_nonreg(TCGContext *s, TCGTemp *ts, TCGTempVal type) 4459 { 4460 tcg_debug_assert(type != TEMP_VAL_REG); 4461 if (ts->val_type == TEMP_VAL_REG) { 4462 TCGReg reg = ts->reg; 4463 tcg_debug_assert(s->reg_to_temp[reg] == ts); 4464 s->reg_to_temp[reg] = NULL; 4465 } 4466 ts->val_type = type; 4467 } 4468 4469 static void temp_load(TCGContext *, TCGTemp *, TCGRegSet, TCGRegSet, TCGRegSet); 4470 4471 /* Mark a temporary as free or dead. If 'free_or_dead' is negative, 4472 mark it free; otherwise mark it dead. */ 4473 static void temp_free_or_dead(TCGContext *s, TCGTemp *ts, int free_or_dead) 4474 { 4475 TCGTempVal new_type; 4476 4477 switch (ts->kind) { 4478 case TEMP_FIXED: 4479 return; 4480 case TEMP_GLOBAL: 4481 case TEMP_TB: 4482 new_type = TEMP_VAL_MEM; 4483 break; 4484 case TEMP_EBB: 4485 new_type = free_or_dead < 0 ? TEMP_VAL_MEM : TEMP_VAL_DEAD; 4486 break; 4487 case TEMP_CONST: 4488 new_type = TEMP_VAL_CONST; 4489 break; 4490 default: 4491 g_assert_not_reached(); 4492 } 4493 set_temp_val_nonreg(s, ts, new_type); 4494 } 4495 4496 /* Mark a temporary as dead. */ 4497 static inline void temp_dead(TCGContext *s, TCGTemp *ts) 4498 { 4499 temp_free_or_dead(s, ts, 1); 4500 } 4501 4502 /* Sync a temporary to memory. 'allocated_regs' is used in case a temporary 4503 registers needs to be allocated to store a constant. If 'free_or_dead' 4504 is non-zero, subsequently release the temporary; if it is positive, the 4505 temp is dead; if it is negative, the temp is free. */ 4506 static void temp_sync(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs, 4507 TCGRegSet preferred_regs, int free_or_dead) 4508 { 4509 if (!temp_readonly(ts) && !ts->mem_coherent) { 4510 if (!ts->mem_allocated) { 4511 temp_allocate_frame(s, ts); 4512 } 4513 switch (ts->val_type) { 4514 case TEMP_VAL_CONST: 4515 /* If we're going to free the temp immediately, then we won't 4516 require it later in a register, so attempt to store the 4517 constant to memory directly. */ 4518 if (free_or_dead 4519 && tcg_out_sti(s, ts->type, ts->val, 4520 ts->mem_base->reg, ts->mem_offset)) { 4521 break; 4522 } 4523 temp_load(s, ts, tcg_target_available_regs[ts->type], 4524 allocated_regs, preferred_regs); 4525 /* fallthrough */ 4526 4527 case TEMP_VAL_REG: 4528 tcg_out_st(s, ts->type, ts->reg, 4529 ts->mem_base->reg, ts->mem_offset); 4530 break; 4531 4532 case TEMP_VAL_MEM: 4533 break; 4534 4535 case TEMP_VAL_DEAD: 4536 default: 4537 g_assert_not_reached(); 4538 } 4539 ts->mem_coherent = 1; 4540 } 4541 if (free_or_dead) { 4542 temp_free_or_dead(s, ts, free_or_dead); 4543 } 4544 } 4545 4546 /* free register 'reg' by spilling the corresponding temporary if necessary */ 4547 static void tcg_reg_free(TCGContext *s, TCGReg reg, TCGRegSet allocated_regs) 4548 { 4549 TCGTemp *ts = s->reg_to_temp[reg]; 4550 if (ts != NULL) { 4551 temp_sync(s, ts, allocated_regs, 0, -1); 4552 } 4553 } 4554 4555 /** 4556 * tcg_reg_alloc: 4557 * @required_regs: Set of registers in which we must allocate. 4558 * @allocated_regs: Set of registers which must be avoided. 4559 * @preferred_regs: Set of registers we should prefer. 4560 * @rev: True if we search the registers in "indirect" order. 4561 * 4562 * The allocated register must be in @required_regs & ~@allocated_regs, 4563 * but if we can put it in @preferred_regs we may save a move later. 4564 */ 4565 static TCGReg tcg_reg_alloc(TCGContext *s, TCGRegSet required_regs, 4566 TCGRegSet allocated_regs, 4567 TCGRegSet preferred_regs, bool rev) 4568 { 4569 int i, j, f, n = ARRAY_SIZE(tcg_target_reg_alloc_order); 4570 TCGRegSet reg_ct[2]; 4571 const int *order; 4572 4573 reg_ct[1] = required_regs & ~allocated_regs; 4574 tcg_debug_assert(reg_ct[1] != 0); 4575 reg_ct[0] = reg_ct[1] & preferred_regs; 4576 4577 /* Skip the preferred_regs option if it cannot be satisfied, 4578 or if the preference made no difference. */ 4579 f = reg_ct[0] == 0 || reg_ct[0] == reg_ct[1]; 4580 4581 order = rev ? indirect_reg_alloc_order : tcg_target_reg_alloc_order; 4582 4583 /* Try free registers, preferences first. */ 4584 for (j = f; j < 2; j++) { 4585 TCGRegSet set = reg_ct[j]; 4586 4587 if (tcg_regset_single(set)) { 4588 /* One register in the set. */ 4589 TCGReg reg = tcg_regset_first(set); 4590 if (s->reg_to_temp[reg] == NULL) { 4591 return reg; 4592 } 4593 } else { 4594 for (i = 0; i < n; i++) { 4595 TCGReg reg = order[i]; 4596 if (s->reg_to_temp[reg] == NULL && 4597 tcg_regset_test_reg(set, reg)) { 4598 return reg; 4599 } 4600 } 4601 } 4602 } 4603 4604 /* We must spill something. */ 4605 for (j = f; j < 2; j++) { 4606 TCGRegSet set = reg_ct[j]; 4607 4608 if (tcg_regset_single(set)) { 4609 /* One register in the set. */ 4610 TCGReg reg = tcg_regset_first(set); 4611 tcg_reg_free(s, reg, allocated_regs); 4612 return reg; 4613 } else { 4614 for (i = 0; i < n; i++) { 4615 TCGReg reg = order[i]; 4616 if (tcg_regset_test_reg(set, reg)) { 4617 tcg_reg_free(s, reg, allocated_regs); 4618 return reg; 4619 } 4620 } 4621 } 4622 } 4623 4624 g_assert_not_reached(); 4625 } 4626 4627 static TCGReg tcg_reg_alloc_pair(TCGContext *s, TCGRegSet required_regs, 4628 TCGRegSet allocated_regs, 4629 TCGRegSet preferred_regs, bool rev) 4630 { 4631 int i, j, k, fmin, n = ARRAY_SIZE(tcg_target_reg_alloc_order); 4632 TCGRegSet reg_ct[2]; 4633 const int *order; 4634 4635 /* Ensure that if I is not in allocated_regs, I+1 is not either. */ 4636 reg_ct[1] = required_regs & ~(allocated_regs | (allocated_regs >> 1)); 4637 tcg_debug_assert(reg_ct[1] != 0); 4638 reg_ct[0] = reg_ct[1] & preferred_regs; 4639 4640 order = rev ? indirect_reg_alloc_order : tcg_target_reg_alloc_order; 4641 4642 /* 4643 * Skip the preferred_regs option if it cannot be satisfied, 4644 * or if the preference made no difference. 4645 */ 4646 k = reg_ct[0] == 0 || reg_ct[0] == reg_ct[1]; 4647 4648 /* 4649 * Minimize the number of flushes by looking for 2 free registers first, 4650 * then a single flush, then two flushes. 4651 */ 4652 for (fmin = 2; fmin >= 0; fmin--) { 4653 for (j = k; j < 2; j++) { 4654 TCGRegSet set = reg_ct[j]; 4655 4656 for (i = 0; i < n; i++) { 4657 TCGReg reg = order[i]; 4658 4659 if (tcg_regset_test_reg(set, reg)) { 4660 int f = !s->reg_to_temp[reg] + !s->reg_to_temp[reg + 1]; 4661 if (f >= fmin) { 4662 tcg_reg_free(s, reg, allocated_regs); 4663 tcg_reg_free(s, reg + 1, allocated_regs); 4664 return reg; 4665 } 4666 } 4667 } 4668 } 4669 } 4670 g_assert_not_reached(); 4671 } 4672 4673 /* Make sure the temporary is in a register. If needed, allocate the register 4674 from DESIRED while avoiding ALLOCATED. */ 4675 static void temp_load(TCGContext *s, TCGTemp *ts, TCGRegSet desired_regs, 4676 TCGRegSet allocated_regs, TCGRegSet preferred_regs) 4677 { 4678 TCGReg reg; 4679 4680 switch (ts->val_type) { 4681 case TEMP_VAL_REG: 4682 return; 4683 case TEMP_VAL_CONST: 4684 reg = tcg_reg_alloc(s, desired_regs, allocated_regs, 4685 preferred_regs, ts->indirect_base); 4686 if (ts->type <= TCG_TYPE_I64) { 4687 tcg_out_movi(s, ts->type, reg, ts->val); 4688 } else { 4689 uint64_t val = ts->val; 4690 MemOp vece = MO_64; 4691 4692 /* 4693 * Find the minimal vector element that matches the constant. 4694 * The targets will, in general, have to do this search anyway, 4695 * do this generically. 4696 */ 4697 if (val == dup_const(MO_8, val)) { 4698 vece = MO_8; 4699 } else if (val == dup_const(MO_16, val)) { 4700 vece = MO_16; 4701 } else if (val == dup_const(MO_32, val)) { 4702 vece = MO_32; 4703 } 4704 4705 tcg_out_dupi_vec(s, ts->type, vece, reg, ts->val); 4706 } 4707 ts->mem_coherent = 0; 4708 break; 4709 case TEMP_VAL_MEM: 4710 if (!ts->mem_allocated) { 4711 temp_allocate_frame(s, ts); 4712 } 4713 reg = tcg_reg_alloc(s, desired_regs, allocated_regs, 4714 preferred_regs, ts->indirect_base); 4715 tcg_out_ld(s, ts->type, reg, ts->mem_base->reg, ts->mem_offset); 4716 ts->mem_coherent = 1; 4717 break; 4718 case TEMP_VAL_DEAD: 4719 default: 4720 g_assert_not_reached(); 4721 } 4722 set_temp_val_reg(s, ts, reg); 4723 } 4724 4725 /* Save a temporary to memory. 'allocated_regs' is used in case a 4726 temporary registers needs to be allocated to store a constant. */ 4727 static void temp_save(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs) 4728 { 4729 /* The liveness analysis already ensures that globals are back 4730 in memory. Keep an tcg_debug_assert for safety. */ 4731 tcg_debug_assert(ts->val_type == TEMP_VAL_MEM || temp_readonly(ts)); 4732 } 4733 4734 /* save globals to their canonical location and assume they can be 4735 modified be the following code. 'allocated_regs' is used in case a 4736 temporary registers needs to be allocated to store a constant. */ 4737 static void save_globals(TCGContext *s, TCGRegSet allocated_regs) 4738 { 4739 int i, n; 4740 4741 for (i = 0, n = s->nb_globals; i < n; i++) { 4742 temp_save(s, &s->temps[i], allocated_regs); 4743 } 4744 } 4745 4746 /* sync globals to their canonical location and assume they can be 4747 read by the following code. 'allocated_regs' is used in case a 4748 temporary registers needs to be allocated to store a constant. */ 4749 static void sync_globals(TCGContext *s, TCGRegSet allocated_regs) 4750 { 4751 int i, n; 4752 4753 for (i = 0, n = s->nb_globals; i < n; i++) { 4754 TCGTemp *ts = &s->temps[i]; 4755 tcg_debug_assert(ts->val_type != TEMP_VAL_REG 4756 || ts->kind == TEMP_FIXED 4757 || ts->mem_coherent); 4758 } 4759 } 4760 4761 /* at the end of a basic block, we assume all temporaries are dead and 4762 all globals are stored at their canonical location. */ 4763 static void tcg_reg_alloc_bb_end(TCGContext *s, TCGRegSet allocated_regs) 4764 { 4765 int i; 4766 4767 for (i = s->nb_globals; i < s->nb_temps; i++) { 4768 TCGTemp *ts = &s->temps[i]; 4769 4770 switch (ts->kind) { 4771 case TEMP_TB: 4772 temp_save(s, ts, allocated_regs); 4773 break; 4774 case TEMP_EBB: 4775 /* The liveness analysis already ensures that temps are dead. 4776 Keep an tcg_debug_assert for safety. */ 4777 tcg_debug_assert(ts->val_type == TEMP_VAL_DEAD); 4778 break; 4779 case TEMP_CONST: 4780 /* Similarly, we should have freed any allocated register. */ 4781 tcg_debug_assert(ts->val_type == TEMP_VAL_CONST); 4782 break; 4783 default: 4784 g_assert_not_reached(); 4785 } 4786 } 4787 4788 save_globals(s, allocated_regs); 4789 } 4790 4791 /* 4792 * At a conditional branch, we assume all temporaries are dead unless 4793 * explicitly live-across-conditional-branch; all globals and local 4794 * temps are synced to their location. 4795 */ 4796 static void tcg_reg_alloc_cbranch(TCGContext *s, TCGRegSet allocated_regs) 4797 { 4798 sync_globals(s, allocated_regs); 4799 4800 for (int i = s->nb_globals; i < s->nb_temps; i++) { 4801 TCGTemp *ts = &s->temps[i]; 4802 /* 4803 * The liveness analysis already ensures that temps are dead. 4804 * Keep tcg_debug_asserts for safety. 4805 */ 4806 switch (ts->kind) { 4807 case TEMP_TB: 4808 tcg_debug_assert(ts->val_type != TEMP_VAL_REG || ts->mem_coherent); 4809 break; 4810 case TEMP_EBB: 4811 case TEMP_CONST: 4812 break; 4813 default: 4814 g_assert_not_reached(); 4815 } 4816 } 4817 } 4818 4819 /* 4820 * Specialized code generation for INDEX_op_mov_* with a constant. 4821 */ 4822 static void tcg_reg_alloc_do_movi(TCGContext *s, TCGTemp *ots, 4823 tcg_target_ulong val, TCGLifeData arg_life, 4824 TCGRegSet preferred_regs) 4825 { 4826 /* ENV should not be modified. */ 4827 tcg_debug_assert(!temp_readonly(ots)); 4828 4829 /* The movi is not explicitly generated here. */ 4830 set_temp_val_nonreg(s, ots, TEMP_VAL_CONST); 4831 ots->val = val; 4832 ots->mem_coherent = 0; 4833 if (NEED_SYNC_ARG(0)) { 4834 temp_sync(s, ots, s->reserved_regs, preferred_regs, IS_DEAD_ARG(0)); 4835 } else if (IS_DEAD_ARG(0)) { 4836 temp_dead(s, ots); 4837 } 4838 } 4839 4840 /* 4841 * Specialized code generation for INDEX_op_mov_*. 4842 */ 4843 static void tcg_reg_alloc_mov(TCGContext *s, const TCGOp *op) 4844 { 4845 const TCGLifeData arg_life = op->life; 4846 TCGRegSet allocated_regs, preferred_regs; 4847 TCGTemp *ts, *ots; 4848 TCGType otype, itype; 4849 TCGReg oreg, ireg; 4850 4851 allocated_regs = s->reserved_regs; 4852 preferred_regs = output_pref(op, 0); 4853 ots = arg_temp(op->args[0]); 4854 ts = arg_temp(op->args[1]); 4855 4856 /* ENV should not be modified. */ 4857 tcg_debug_assert(!temp_readonly(ots)); 4858 4859 /* Note that otype != itype for no-op truncation. */ 4860 otype = ots->type; 4861 itype = ts->type; 4862 4863 if (ts->val_type == TEMP_VAL_CONST) { 4864 /* propagate constant or generate sti */ 4865 tcg_target_ulong val = ts->val; 4866 if (IS_DEAD_ARG(1)) { 4867 temp_dead(s, ts); 4868 } 4869 tcg_reg_alloc_do_movi(s, ots, val, arg_life, preferred_regs); 4870 return; 4871 } 4872 4873 /* If the source value is in memory we're going to be forced 4874 to have it in a register in order to perform the copy. Copy 4875 the SOURCE value into its own register first, that way we 4876 don't have to reload SOURCE the next time it is used. */ 4877 if (ts->val_type == TEMP_VAL_MEM) { 4878 temp_load(s, ts, tcg_target_available_regs[itype], 4879 allocated_regs, preferred_regs); 4880 } 4881 tcg_debug_assert(ts->val_type == TEMP_VAL_REG); 4882 ireg = ts->reg; 4883 4884 if (IS_DEAD_ARG(0)) { 4885 /* mov to a non-saved dead register makes no sense (even with 4886 liveness analysis disabled). */ 4887 tcg_debug_assert(NEED_SYNC_ARG(0)); 4888 if (!ots->mem_allocated) { 4889 temp_allocate_frame(s, ots); 4890 } 4891 tcg_out_st(s, otype, ireg, ots->mem_base->reg, ots->mem_offset); 4892 if (IS_DEAD_ARG(1)) { 4893 temp_dead(s, ts); 4894 } 4895 temp_dead(s, ots); 4896 return; 4897 } 4898 4899 if (IS_DEAD_ARG(1) && ts->kind != TEMP_FIXED) { 4900 /* 4901 * The mov can be suppressed. Kill input first, so that it 4902 * is unlinked from reg_to_temp, then set the output to the 4903 * reg that we saved from the input. 4904 */ 4905 temp_dead(s, ts); 4906 oreg = ireg; 4907 } else { 4908 if (ots->val_type == TEMP_VAL_REG) { 4909 oreg = ots->reg; 4910 } else { 4911 /* Make sure to not spill the input register during allocation. */ 4912 oreg = tcg_reg_alloc(s, tcg_target_available_regs[otype], 4913 allocated_regs | ((TCGRegSet)1 << ireg), 4914 preferred_regs, ots->indirect_base); 4915 } 4916 if (!tcg_out_mov(s, otype, oreg, ireg)) { 4917 /* 4918 * Cross register class move not supported. 4919 * Store the source register into the destination slot 4920 * and leave the destination temp as TEMP_VAL_MEM. 4921 */ 4922 assert(!temp_readonly(ots)); 4923 if (!ts->mem_allocated) { 4924 temp_allocate_frame(s, ots); 4925 } 4926 tcg_out_st(s, ts->type, ireg, ots->mem_base->reg, ots->mem_offset); 4927 set_temp_val_nonreg(s, ts, TEMP_VAL_MEM); 4928 ots->mem_coherent = 1; 4929 return; 4930 } 4931 } 4932 set_temp_val_reg(s, ots, oreg); 4933 ots->mem_coherent = 0; 4934 4935 if (NEED_SYNC_ARG(0)) { 4936 temp_sync(s, ots, allocated_regs, 0, 0); 4937 } 4938 } 4939 4940 /* 4941 * Specialized code generation for INDEX_op_dup_vec. 4942 */ 4943 static void tcg_reg_alloc_dup(TCGContext *s, const TCGOp *op) 4944 { 4945 const TCGLifeData arg_life = op->life; 4946 TCGRegSet dup_out_regs, dup_in_regs; 4947 const TCGArgConstraint *dup_args_ct; 4948 TCGTemp *its, *ots; 4949 TCGType itype, vtype; 4950 unsigned vece; 4951 int lowpart_ofs; 4952 bool ok; 4953 4954 ots = arg_temp(op->args[0]); 4955 its = arg_temp(op->args[1]); 4956 4957 /* ENV should not be modified. */ 4958 tcg_debug_assert(!temp_readonly(ots)); 4959 4960 itype = its->type; 4961 vece = TCGOP_VECE(op); 4962 vtype = TCGOP_TYPE(op); 4963 4964 if (its->val_type == TEMP_VAL_CONST) { 4965 /* Propagate constant via movi -> dupi. */ 4966 tcg_target_ulong val = its->val; 4967 if (IS_DEAD_ARG(1)) { 4968 temp_dead(s, its); 4969 } 4970 tcg_reg_alloc_do_movi(s, ots, val, arg_life, output_pref(op, 0)); 4971 return; 4972 } 4973 4974 dup_args_ct = opcode_args_ct(op); 4975 dup_out_regs = dup_args_ct[0].regs; 4976 dup_in_regs = dup_args_ct[1].regs; 4977 4978 /* Allocate the output register now. */ 4979 if (ots->val_type != TEMP_VAL_REG) { 4980 TCGRegSet allocated_regs = s->reserved_regs; 4981 TCGReg oreg; 4982 4983 if (!IS_DEAD_ARG(1) && its->val_type == TEMP_VAL_REG) { 4984 /* Make sure to not spill the input register. */ 4985 tcg_regset_set_reg(allocated_regs, its->reg); 4986 } 4987 oreg = tcg_reg_alloc(s, dup_out_regs, allocated_regs, 4988 output_pref(op, 0), ots->indirect_base); 4989 set_temp_val_reg(s, ots, oreg); 4990 } 4991 4992 switch (its->val_type) { 4993 case TEMP_VAL_REG: 4994 /* 4995 * The dup constriaints must be broad, covering all possible VECE. 4996 * However, tcg_op_dup_vec() gets to see the VECE and we allow it 4997 * to fail, indicating that extra moves are required for that case. 4998 */ 4999 if (tcg_regset_test_reg(dup_in_regs, its->reg)) { 5000 if (tcg_out_dup_vec(s, vtype, vece, ots->reg, its->reg)) { 5001 goto done; 5002 } 5003 /* Try again from memory or a vector input register. */ 5004 } 5005 if (!its->mem_coherent) { 5006 /* 5007 * The input register is not synced, and so an extra store 5008 * would be required to use memory. Attempt an integer-vector 5009 * register move first. We do not have a TCGRegSet for this. 5010 */ 5011 if (tcg_out_mov(s, itype, ots->reg, its->reg)) { 5012 break; 5013 } 5014 /* Sync the temp back to its slot and load from there. */ 5015 temp_sync(s, its, s->reserved_regs, 0, 0); 5016 } 5017 /* fall through */ 5018 5019 case TEMP_VAL_MEM: 5020 lowpart_ofs = 0; 5021 if (HOST_BIG_ENDIAN) { 5022 lowpart_ofs = tcg_type_size(itype) - (1 << vece); 5023 } 5024 if (tcg_out_dupm_vec(s, vtype, vece, ots->reg, its->mem_base->reg, 5025 its->mem_offset + lowpart_ofs)) { 5026 goto done; 5027 } 5028 /* Load the input into the destination vector register. */ 5029 tcg_out_ld(s, itype, ots->reg, its->mem_base->reg, its->mem_offset); 5030 break; 5031 5032 default: 5033 g_assert_not_reached(); 5034 } 5035 5036 /* We now have a vector input register, so dup must succeed. */ 5037 ok = tcg_out_dup_vec(s, vtype, vece, ots->reg, ots->reg); 5038 tcg_debug_assert(ok); 5039 5040 done: 5041 ots->mem_coherent = 0; 5042 if (IS_DEAD_ARG(1)) { 5043 temp_dead(s, its); 5044 } 5045 if (NEED_SYNC_ARG(0)) { 5046 temp_sync(s, ots, s->reserved_regs, 0, 0); 5047 } 5048 if (IS_DEAD_ARG(0)) { 5049 temp_dead(s, ots); 5050 } 5051 } 5052 5053 static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op) 5054 { 5055 const TCGLifeData arg_life = op->life; 5056 const TCGOpDef * const def = &tcg_op_defs[op->opc]; 5057 TCGRegSet i_allocated_regs; 5058 TCGRegSet o_allocated_regs; 5059 int i, k, nb_iargs, nb_oargs; 5060 TCGReg reg; 5061 TCGArg arg; 5062 const TCGArgConstraint *args_ct; 5063 const TCGArgConstraint *arg_ct; 5064 TCGTemp *ts; 5065 TCGArg new_args[TCG_MAX_OP_ARGS]; 5066 int const_args[TCG_MAX_OP_ARGS]; 5067 TCGCond op_cond; 5068 5069 nb_oargs = def->nb_oargs; 5070 nb_iargs = def->nb_iargs; 5071 5072 /* copy constants */ 5073 memcpy(new_args + nb_oargs + nb_iargs, 5074 op->args + nb_oargs + nb_iargs, 5075 sizeof(TCGArg) * def->nb_cargs); 5076 5077 i_allocated_regs = s->reserved_regs; 5078 o_allocated_regs = s->reserved_regs; 5079 5080 switch (op->opc) { 5081 case INDEX_op_brcond_i32: 5082 case INDEX_op_brcond_i64: 5083 op_cond = op->args[2]; 5084 break; 5085 case INDEX_op_setcond_i32: 5086 case INDEX_op_setcond_i64: 5087 case INDEX_op_negsetcond_i32: 5088 case INDEX_op_negsetcond_i64: 5089 case INDEX_op_cmp_vec: 5090 op_cond = op->args[3]; 5091 break; 5092 case INDEX_op_brcond2_i32: 5093 op_cond = op->args[4]; 5094 break; 5095 case INDEX_op_movcond_i32: 5096 case INDEX_op_movcond_i64: 5097 case INDEX_op_setcond2_i32: 5098 case INDEX_op_cmpsel_vec: 5099 op_cond = op->args[5]; 5100 break; 5101 default: 5102 /* No condition within opcode. */ 5103 op_cond = TCG_COND_ALWAYS; 5104 break; 5105 } 5106 5107 args_ct = opcode_args_ct(op); 5108 5109 /* satisfy input constraints */ 5110 for (k = 0; k < nb_iargs; k++) { 5111 TCGRegSet i_preferred_regs, i_required_regs; 5112 bool allocate_new_reg, copyto_new_reg; 5113 TCGTemp *ts2; 5114 int i1, i2; 5115 5116 i = args_ct[nb_oargs + k].sort_index; 5117 arg = op->args[i]; 5118 arg_ct = &args_ct[i]; 5119 ts = arg_temp(arg); 5120 5121 if (ts->val_type == TEMP_VAL_CONST) { 5122 #ifdef TCG_REG_ZERO 5123 if (ts->val == 0 && (arg_ct->ct & TCG_CT_REG_ZERO)) { 5124 /* Hardware zero register: indicate register via non-const. */ 5125 const_args[i] = 0; 5126 new_args[i] = TCG_REG_ZERO; 5127 continue; 5128 } 5129 #endif 5130 5131 if (tcg_target_const_match(ts->val, arg_ct->ct, ts->type, 5132 op_cond, TCGOP_VECE(op))) { 5133 /* constant is OK for instruction */ 5134 const_args[i] = 1; 5135 new_args[i] = ts->val; 5136 continue; 5137 } 5138 } 5139 5140 reg = ts->reg; 5141 i_preferred_regs = 0; 5142 i_required_regs = arg_ct->regs; 5143 allocate_new_reg = false; 5144 copyto_new_reg = false; 5145 5146 switch (arg_ct->pair) { 5147 case 0: /* not paired */ 5148 if (arg_ct->ialias) { 5149 i_preferred_regs = output_pref(op, arg_ct->alias_index); 5150 5151 /* 5152 * If the input is readonly, then it cannot also be an 5153 * output and aliased to itself. If the input is not 5154 * dead after the instruction, we must allocate a new 5155 * register and move it. 5156 */ 5157 if (temp_readonly(ts) || !IS_DEAD_ARG(i) 5158 || args_ct[arg_ct->alias_index].newreg) { 5159 allocate_new_reg = true; 5160 } else if (ts->val_type == TEMP_VAL_REG) { 5161 /* 5162 * Check if the current register has already been 5163 * allocated for another input. 5164 */ 5165 allocate_new_reg = 5166 tcg_regset_test_reg(i_allocated_regs, reg); 5167 } 5168 } 5169 if (!allocate_new_reg) { 5170 temp_load(s, ts, i_required_regs, i_allocated_regs, 5171 i_preferred_regs); 5172 reg = ts->reg; 5173 allocate_new_reg = !tcg_regset_test_reg(i_required_regs, reg); 5174 } 5175 if (allocate_new_reg) { 5176 /* 5177 * Allocate a new register matching the constraint 5178 * and move the temporary register into it. 5179 */ 5180 temp_load(s, ts, tcg_target_available_regs[ts->type], 5181 i_allocated_regs, 0); 5182 reg = tcg_reg_alloc(s, i_required_regs, i_allocated_regs, 5183 i_preferred_regs, ts->indirect_base); 5184 copyto_new_reg = true; 5185 } 5186 break; 5187 5188 case 1: 5189 /* First of an input pair; if i1 == i2, the second is an output. */ 5190 i1 = i; 5191 i2 = arg_ct->pair_index; 5192 ts2 = i1 != i2 ? arg_temp(op->args[i2]) : NULL; 5193 5194 /* 5195 * It is easier to default to allocating a new pair 5196 * and to identify a few cases where it's not required. 5197 */ 5198 if (arg_ct->ialias) { 5199 i_preferred_regs = output_pref(op, arg_ct->alias_index); 5200 if (IS_DEAD_ARG(i1) && 5201 IS_DEAD_ARG(i2) && 5202 !temp_readonly(ts) && 5203 ts->val_type == TEMP_VAL_REG && 5204 ts->reg < TCG_TARGET_NB_REGS - 1 && 5205 tcg_regset_test_reg(i_required_regs, reg) && 5206 !tcg_regset_test_reg(i_allocated_regs, reg) && 5207 !tcg_regset_test_reg(i_allocated_regs, reg + 1) && 5208 (ts2 5209 ? ts2->val_type == TEMP_VAL_REG && 5210 ts2->reg == reg + 1 && 5211 !temp_readonly(ts2) 5212 : s->reg_to_temp[reg + 1] == NULL)) { 5213 break; 5214 } 5215 } else { 5216 /* Without aliasing, the pair must also be an input. */ 5217 tcg_debug_assert(ts2); 5218 if (ts->val_type == TEMP_VAL_REG && 5219 ts2->val_type == TEMP_VAL_REG && 5220 ts2->reg == reg + 1 && 5221 tcg_regset_test_reg(i_required_regs, reg)) { 5222 break; 5223 } 5224 } 5225 reg = tcg_reg_alloc_pair(s, i_required_regs, i_allocated_regs, 5226 0, ts->indirect_base); 5227 goto do_pair; 5228 5229 case 2: /* pair second */ 5230 reg = new_args[arg_ct->pair_index] + 1; 5231 goto do_pair; 5232 5233 case 3: /* ialias with second output, no first input */ 5234 tcg_debug_assert(arg_ct->ialias); 5235 i_preferred_regs = output_pref(op, arg_ct->alias_index); 5236 5237 if (IS_DEAD_ARG(i) && 5238 !temp_readonly(ts) && 5239 ts->val_type == TEMP_VAL_REG && 5240 reg > 0 && 5241 s->reg_to_temp[reg - 1] == NULL && 5242 tcg_regset_test_reg(i_required_regs, reg) && 5243 !tcg_regset_test_reg(i_allocated_regs, reg) && 5244 !tcg_regset_test_reg(i_allocated_regs, reg - 1)) { 5245 tcg_regset_set_reg(i_allocated_regs, reg - 1); 5246 break; 5247 } 5248 reg = tcg_reg_alloc_pair(s, i_required_regs >> 1, 5249 i_allocated_regs, 0, 5250 ts->indirect_base); 5251 tcg_regset_set_reg(i_allocated_regs, reg); 5252 reg += 1; 5253 goto do_pair; 5254 5255 do_pair: 5256 /* 5257 * If an aliased input is not dead after the instruction, 5258 * we must allocate a new register and move it. 5259 */ 5260 if (arg_ct->ialias && (!IS_DEAD_ARG(i) || temp_readonly(ts))) { 5261 TCGRegSet t_allocated_regs = i_allocated_regs; 5262 5263 /* 5264 * Because of the alias, and the continued life, make sure 5265 * that the temp is somewhere *other* than the reg pair, 5266 * and we get a copy in reg. 5267 */ 5268 tcg_regset_set_reg(t_allocated_regs, reg); 5269 tcg_regset_set_reg(t_allocated_regs, reg + 1); 5270 if (ts->val_type == TEMP_VAL_REG && ts->reg == reg) { 5271 /* If ts was already in reg, copy it somewhere else. */ 5272 TCGReg nr; 5273 bool ok; 5274 5275 tcg_debug_assert(ts->kind != TEMP_FIXED); 5276 nr = tcg_reg_alloc(s, tcg_target_available_regs[ts->type], 5277 t_allocated_regs, 0, ts->indirect_base); 5278 ok = tcg_out_mov(s, ts->type, nr, reg); 5279 tcg_debug_assert(ok); 5280 5281 set_temp_val_reg(s, ts, nr); 5282 } else { 5283 temp_load(s, ts, tcg_target_available_regs[ts->type], 5284 t_allocated_regs, 0); 5285 copyto_new_reg = true; 5286 } 5287 } else { 5288 /* Preferably allocate to reg, otherwise copy. */ 5289 i_required_regs = (TCGRegSet)1 << reg; 5290 temp_load(s, ts, i_required_regs, i_allocated_regs, 5291 i_preferred_regs); 5292 copyto_new_reg = ts->reg != reg; 5293 } 5294 break; 5295 5296 default: 5297 g_assert_not_reached(); 5298 } 5299 5300 if (copyto_new_reg) { 5301 if (!tcg_out_mov(s, ts->type, reg, ts->reg)) { 5302 /* 5303 * Cross register class move not supported. Sync the 5304 * temp back to its slot and load from there. 5305 */ 5306 temp_sync(s, ts, i_allocated_regs, 0, 0); 5307 tcg_out_ld(s, ts->type, reg, 5308 ts->mem_base->reg, ts->mem_offset); 5309 } 5310 } 5311 new_args[i] = reg; 5312 const_args[i] = 0; 5313 tcg_regset_set_reg(i_allocated_regs, reg); 5314 } 5315 5316 /* mark dead temporaries and free the associated registers */ 5317 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) { 5318 if (IS_DEAD_ARG(i)) { 5319 temp_dead(s, arg_temp(op->args[i])); 5320 } 5321 } 5322 5323 if (def->flags & TCG_OPF_COND_BRANCH) { 5324 tcg_reg_alloc_cbranch(s, i_allocated_regs); 5325 } else if (def->flags & TCG_OPF_BB_END) { 5326 tcg_reg_alloc_bb_end(s, i_allocated_regs); 5327 } else { 5328 if (def->flags & TCG_OPF_CALL_CLOBBER) { 5329 /* XXX: permit generic clobber register list ? */ 5330 for (i = 0; i < TCG_TARGET_NB_REGS; i++) { 5331 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) { 5332 tcg_reg_free(s, i, i_allocated_regs); 5333 } 5334 } 5335 } 5336 if (def->flags & TCG_OPF_SIDE_EFFECTS) { 5337 /* sync globals if the op has side effects and might trigger 5338 an exception. */ 5339 sync_globals(s, i_allocated_regs); 5340 } 5341 5342 /* satisfy the output constraints */ 5343 for (k = 0; k < nb_oargs; k++) { 5344 i = args_ct[k].sort_index; 5345 arg = op->args[i]; 5346 arg_ct = &args_ct[i]; 5347 ts = arg_temp(arg); 5348 5349 /* ENV should not be modified. */ 5350 tcg_debug_assert(!temp_readonly(ts)); 5351 5352 switch (arg_ct->pair) { 5353 case 0: /* not paired */ 5354 if (arg_ct->oalias && !const_args[arg_ct->alias_index]) { 5355 reg = new_args[arg_ct->alias_index]; 5356 } else if (arg_ct->newreg) { 5357 reg = tcg_reg_alloc(s, arg_ct->regs, 5358 i_allocated_regs | o_allocated_regs, 5359 output_pref(op, k), ts->indirect_base); 5360 } else { 5361 reg = tcg_reg_alloc(s, arg_ct->regs, o_allocated_regs, 5362 output_pref(op, k), ts->indirect_base); 5363 } 5364 break; 5365 5366 case 1: /* first of pair */ 5367 if (arg_ct->oalias) { 5368 reg = new_args[arg_ct->alias_index]; 5369 } else if (arg_ct->newreg) { 5370 reg = tcg_reg_alloc_pair(s, arg_ct->regs, 5371 i_allocated_regs | o_allocated_regs, 5372 output_pref(op, k), 5373 ts->indirect_base); 5374 } else { 5375 reg = tcg_reg_alloc_pair(s, arg_ct->regs, o_allocated_regs, 5376 output_pref(op, k), 5377 ts->indirect_base); 5378 } 5379 break; 5380 5381 case 2: /* second of pair */ 5382 if (arg_ct->oalias) { 5383 reg = new_args[arg_ct->alias_index]; 5384 } else { 5385 reg = new_args[arg_ct->pair_index] + 1; 5386 } 5387 break; 5388 5389 case 3: /* first of pair, aliasing with a second input */ 5390 tcg_debug_assert(!arg_ct->newreg); 5391 reg = new_args[arg_ct->pair_index] - 1; 5392 break; 5393 5394 default: 5395 g_assert_not_reached(); 5396 } 5397 tcg_regset_set_reg(o_allocated_regs, reg); 5398 set_temp_val_reg(s, ts, reg); 5399 ts->mem_coherent = 0; 5400 new_args[i] = reg; 5401 } 5402 } 5403 5404 /* emit instruction */ 5405 TCGType type = TCGOP_TYPE(op); 5406 switch (op->opc) { 5407 case INDEX_op_ext_i32_i64: 5408 tcg_out_exts_i32_i64(s, new_args[0], new_args[1]); 5409 break; 5410 case INDEX_op_extu_i32_i64: 5411 tcg_out_extu_i32_i64(s, new_args[0], new_args[1]); 5412 break; 5413 case INDEX_op_extrl_i64_i32: 5414 tcg_out_extrl_i64_i32(s, new_args[0], new_args[1]); 5415 break; 5416 5417 case INDEX_op_add: 5418 case INDEX_op_and: 5419 case INDEX_op_andc: 5420 case INDEX_op_eqv: 5421 case INDEX_op_mul: 5422 case INDEX_op_mulsh: 5423 case INDEX_op_muluh: 5424 case INDEX_op_nand: 5425 case INDEX_op_nor: 5426 case INDEX_op_or: 5427 case INDEX_op_orc: 5428 case INDEX_op_xor: 5429 { 5430 const TCGOutOpBinary *out = 5431 container_of(all_outop[op->opc], TCGOutOpBinary, base); 5432 5433 /* Constants should never appear in the first source operand. */ 5434 tcg_debug_assert(!const_args[1]); 5435 if (const_args[2]) { 5436 out->out_rri(s, type, new_args[0], new_args[1], new_args[2]); 5437 } else { 5438 out->out_rrr(s, type, new_args[0], new_args[1], new_args[2]); 5439 } 5440 } 5441 break; 5442 5443 case INDEX_op_sub: 5444 { 5445 const TCGOutOpSubtract *out = &outop_sub; 5446 5447 /* 5448 * Constants should never appear in the second source operand. 5449 * These are folded to add with negative constant. 5450 */ 5451 tcg_debug_assert(!const_args[2]); 5452 if (const_args[1]) { 5453 out->out_rir(s, type, new_args[0], new_args[1], new_args[2]); 5454 } else { 5455 out->out_rrr(s, type, new_args[0], new_args[1], new_args[2]); 5456 } 5457 } 5458 break; 5459 5460 case INDEX_op_neg: 5461 case INDEX_op_not: 5462 { 5463 const TCGOutOpUnary *out = 5464 container_of(all_outop[op->opc], TCGOutOpUnary, base); 5465 5466 /* Constants should have been folded. */ 5467 tcg_debug_assert(!const_args[1]); 5468 out->out_rr(s, type, new_args[0], new_args[1]); 5469 } 5470 break; 5471 5472 default: 5473 if (def->flags & TCG_OPF_VECTOR) { 5474 tcg_out_vec_op(s, op->opc, type - TCG_TYPE_V64, 5475 TCGOP_VECE(op), new_args, const_args); 5476 } else { 5477 tcg_out_op(s, op->opc, type, new_args, const_args); 5478 } 5479 break; 5480 } 5481 5482 /* move the outputs in the correct register if needed */ 5483 for(i = 0; i < nb_oargs; i++) { 5484 ts = arg_temp(op->args[i]); 5485 5486 /* ENV should not be modified. */ 5487 tcg_debug_assert(!temp_readonly(ts)); 5488 5489 if (NEED_SYNC_ARG(i)) { 5490 temp_sync(s, ts, o_allocated_regs, 0, IS_DEAD_ARG(i)); 5491 } else if (IS_DEAD_ARG(i)) { 5492 temp_dead(s, ts); 5493 } 5494 } 5495 } 5496 5497 static bool tcg_reg_alloc_dup2(TCGContext *s, const TCGOp *op) 5498 { 5499 const TCGLifeData arg_life = op->life; 5500 TCGTemp *ots, *itsl, *itsh; 5501 TCGType vtype = TCGOP_TYPE(op); 5502 5503 /* This opcode is only valid for 32-bit hosts, for 64-bit elements. */ 5504 tcg_debug_assert(TCG_TARGET_REG_BITS == 32); 5505 tcg_debug_assert(TCGOP_VECE(op) == MO_64); 5506 5507 ots = arg_temp(op->args[0]); 5508 itsl = arg_temp(op->args[1]); 5509 itsh = arg_temp(op->args[2]); 5510 5511 /* ENV should not be modified. */ 5512 tcg_debug_assert(!temp_readonly(ots)); 5513 5514 /* Allocate the output register now. */ 5515 if (ots->val_type != TEMP_VAL_REG) { 5516 TCGRegSet allocated_regs = s->reserved_regs; 5517 TCGRegSet dup_out_regs = opcode_args_ct(op)[0].regs; 5518 TCGReg oreg; 5519 5520 /* Make sure to not spill the input registers. */ 5521 if (!IS_DEAD_ARG(1) && itsl->val_type == TEMP_VAL_REG) { 5522 tcg_regset_set_reg(allocated_regs, itsl->reg); 5523 } 5524 if (!IS_DEAD_ARG(2) && itsh->val_type == TEMP_VAL_REG) { 5525 tcg_regset_set_reg(allocated_regs, itsh->reg); 5526 } 5527 5528 oreg = tcg_reg_alloc(s, dup_out_regs, allocated_regs, 5529 output_pref(op, 0), ots->indirect_base); 5530 set_temp_val_reg(s, ots, oreg); 5531 } 5532 5533 /* Promote dup2 of immediates to dupi_vec. */ 5534 if (itsl->val_type == TEMP_VAL_CONST && itsh->val_type == TEMP_VAL_CONST) { 5535 uint64_t val = deposit64(itsl->val, 32, 32, itsh->val); 5536 MemOp vece = MO_64; 5537 5538 if (val == dup_const(MO_8, val)) { 5539 vece = MO_8; 5540 } else if (val == dup_const(MO_16, val)) { 5541 vece = MO_16; 5542 } else if (val == dup_const(MO_32, val)) { 5543 vece = MO_32; 5544 } 5545 5546 tcg_out_dupi_vec(s, vtype, vece, ots->reg, val); 5547 goto done; 5548 } 5549 5550 /* If the two inputs form one 64-bit value, try dupm_vec. */ 5551 if (itsl->temp_subindex == HOST_BIG_ENDIAN && 5552 itsh->temp_subindex == !HOST_BIG_ENDIAN && 5553 itsl == itsh + (HOST_BIG_ENDIAN ? 1 : -1)) { 5554 TCGTemp *its = itsl - HOST_BIG_ENDIAN; 5555 5556 temp_sync(s, its + 0, s->reserved_regs, 0, 0); 5557 temp_sync(s, its + 1, s->reserved_regs, 0, 0); 5558 5559 if (tcg_out_dupm_vec(s, vtype, MO_64, ots->reg, 5560 its->mem_base->reg, its->mem_offset)) { 5561 goto done; 5562 } 5563 } 5564 5565 /* Fall back to generic expansion. */ 5566 return false; 5567 5568 done: 5569 ots->mem_coherent = 0; 5570 if (IS_DEAD_ARG(1)) { 5571 temp_dead(s, itsl); 5572 } 5573 if (IS_DEAD_ARG(2)) { 5574 temp_dead(s, itsh); 5575 } 5576 if (NEED_SYNC_ARG(0)) { 5577 temp_sync(s, ots, s->reserved_regs, 0, IS_DEAD_ARG(0)); 5578 } else if (IS_DEAD_ARG(0)) { 5579 temp_dead(s, ots); 5580 } 5581 return true; 5582 } 5583 5584 static void load_arg_reg(TCGContext *s, TCGReg reg, TCGTemp *ts, 5585 TCGRegSet allocated_regs) 5586 { 5587 if (ts->val_type == TEMP_VAL_REG) { 5588 if (ts->reg != reg) { 5589 tcg_reg_free(s, reg, allocated_regs); 5590 if (!tcg_out_mov(s, ts->type, reg, ts->reg)) { 5591 /* 5592 * Cross register class move not supported. Sync the 5593 * temp back to its slot and load from there. 5594 */ 5595 temp_sync(s, ts, allocated_regs, 0, 0); 5596 tcg_out_ld(s, ts->type, reg, 5597 ts->mem_base->reg, ts->mem_offset); 5598 } 5599 } 5600 } else { 5601 TCGRegSet arg_set = 0; 5602 5603 tcg_reg_free(s, reg, allocated_regs); 5604 tcg_regset_set_reg(arg_set, reg); 5605 temp_load(s, ts, arg_set, allocated_regs, 0); 5606 } 5607 } 5608 5609 static void load_arg_stk(TCGContext *s, unsigned arg_slot, TCGTemp *ts, 5610 TCGRegSet allocated_regs) 5611 { 5612 /* 5613 * When the destination is on the stack, load up the temp and store. 5614 * If there are many call-saved registers, the temp might live to 5615 * see another use; otherwise it'll be discarded. 5616 */ 5617 temp_load(s, ts, tcg_target_available_regs[ts->type], allocated_regs, 0); 5618 tcg_out_st(s, ts->type, ts->reg, TCG_REG_CALL_STACK, 5619 arg_slot_stk_ofs(arg_slot)); 5620 } 5621 5622 static void load_arg_normal(TCGContext *s, const TCGCallArgumentLoc *l, 5623 TCGTemp *ts, TCGRegSet *allocated_regs) 5624 { 5625 if (arg_slot_reg_p(l->arg_slot)) { 5626 TCGReg reg = tcg_target_call_iarg_regs[l->arg_slot]; 5627 load_arg_reg(s, reg, ts, *allocated_regs); 5628 tcg_regset_set_reg(*allocated_regs, reg); 5629 } else { 5630 load_arg_stk(s, l->arg_slot, ts, *allocated_regs); 5631 } 5632 } 5633 5634 static void load_arg_ref(TCGContext *s, unsigned arg_slot, TCGReg ref_base, 5635 intptr_t ref_off, TCGRegSet *allocated_regs) 5636 { 5637 TCGReg reg; 5638 5639 if (arg_slot_reg_p(arg_slot)) { 5640 reg = tcg_target_call_iarg_regs[arg_slot]; 5641 tcg_reg_free(s, reg, *allocated_regs); 5642 tcg_out_addi_ptr(s, reg, ref_base, ref_off); 5643 tcg_regset_set_reg(*allocated_regs, reg); 5644 } else { 5645 reg = tcg_reg_alloc(s, tcg_target_available_regs[TCG_TYPE_PTR], 5646 *allocated_regs, 0, false); 5647 tcg_out_addi_ptr(s, reg, ref_base, ref_off); 5648 tcg_out_st(s, TCG_TYPE_PTR, reg, TCG_REG_CALL_STACK, 5649 arg_slot_stk_ofs(arg_slot)); 5650 } 5651 } 5652 5653 static void tcg_reg_alloc_call(TCGContext *s, TCGOp *op) 5654 { 5655 const int nb_oargs = TCGOP_CALLO(op); 5656 const int nb_iargs = TCGOP_CALLI(op); 5657 const TCGLifeData arg_life = op->life; 5658 const TCGHelperInfo *info = tcg_call_info(op); 5659 TCGRegSet allocated_regs = s->reserved_regs; 5660 int i; 5661 5662 /* 5663 * Move inputs into place in reverse order, 5664 * so that we place stacked arguments first. 5665 */ 5666 for (i = nb_iargs - 1; i >= 0; --i) { 5667 const TCGCallArgumentLoc *loc = &info->in[i]; 5668 TCGTemp *ts = arg_temp(op->args[nb_oargs + i]); 5669 5670 switch (loc->kind) { 5671 case TCG_CALL_ARG_NORMAL: 5672 case TCG_CALL_ARG_EXTEND_U: 5673 case TCG_CALL_ARG_EXTEND_S: 5674 load_arg_normal(s, loc, ts, &allocated_regs); 5675 break; 5676 case TCG_CALL_ARG_BY_REF: 5677 load_arg_stk(s, loc->ref_slot, ts, allocated_regs); 5678 load_arg_ref(s, loc->arg_slot, TCG_REG_CALL_STACK, 5679 arg_slot_stk_ofs(loc->ref_slot), 5680 &allocated_regs); 5681 break; 5682 case TCG_CALL_ARG_BY_REF_N: 5683 load_arg_stk(s, loc->ref_slot, ts, allocated_regs); 5684 break; 5685 default: 5686 g_assert_not_reached(); 5687 } 5688 } 5689 5690 /* Mark dead temporaries and free the associated registers. */ 5691 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) { 5692 if (IS_DEAD_ARG(i)) { 5693 temp_dead(s, arg_temp(op->args[i])); 5694 } 5695 } 5696 5697 /* Clobber call registers. */ 5698 for (i = 0; i < TCG_TARGET_NB_REGS; i++) { 5699 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) { 5700 tcg_reg_free(s, i, allocated_regs); 5701 } 5702 } 5703 5704 /* 5705 * Save globals if they might be written by the helper, 5706 * sync them if they might be read. 5707 */ 5708 if (info->flags & TCG_CALL_NO_READ_GLOBALS) { 5709 /* Nothing to do */ 5710 } else if (info->flags & TCG_CALL_NO_WRITE_GLOBALS) { 5711 sync_globals(s, allocated_regs); 5712 } else { 5713 save_globals(s, allocated_regs); 5714 } 5715 5716 /* 5717 * If the ABI passes a pointer to the returned struct as the first 5718 * argument, load that now. Pass a pointer to the output home slot. 5719 */ 5720 if (info->out_kind == TCG_CALL_RET_BY_REF) { 5721 TCGTemp *ts = arg_temp(op->args[0]); 5722 5723 if (!ts->mem_allocated) { 5724 temp_allocate_frame(s, ts); 5725 } 5726 load_arg_ref(s, 0, ts->mem_base->reg, ts->mem_offset, &allocated_regs); 5727 } 5728 5729 tcg_out_call(s, tcg_call_func(op), info); 5730 5731 /* Assign output registers and emit moves if needed. */ 5732 switch (info->out_kind) { 5733 case TCG_CALL_RET_NORMAL: 5734 for (i = 0; i < nb_oargs; i++) { 5735 TCGTemp *ts = arg_temp(op->args[i]); 5736 TCGReg reg = tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, i); 5737 5738 /* ENV should not be modified. */ 5739 tcg_debug_assert(!temp_readonly(ts)); 5740 5741 set_temp_val_reg(s, ts, reg); 5742 ts->mem_coherent = 0; 5743 } 5744 break; 5745 5746 case TCG_CALL_RET_BY_VEC: 5747 { 5748 TCGTemp *ts = arg_temp(op->args[0]); 5749 5750 tcg_debug_assert(ts->base_type == TCG_TYPE_I128); 5751 tcg_debug_assert(ts->temp_subindex == 0); 5752 if (!ts->mem_allocated) { 5753 temp_allocate_frame(s, ts); 5754 } 5755 tcg_out_st(s, TCG_TYPE_V128, 5756 tcg_target_call_oarg_reg(TCG_CALL_RET_BY_VEC, 0), 5757 ts->mem_base->reg, ts->mem_offset); 5758 } 5759 /* fall through to mark all parts in memory */ 5760 5761 case TCG_CALL_RET_BY_REF: 5762 /* The callee has performed a write through the reference. */ 5763 for (i = 0; i < nb_oargs; i++) { 5764 TCGTemp *ts = arg_temp(op->args[i]); 5765 ts->val_type = TEMP_VAL_MEM; 5766 } 5767 break; 5768 5769 default: 5770 g_assert_not_reached(); 5771 } 5772 5773 /* Flush or discard output registers as needed. */ 5774 for (i = 0; i < nb_oargs; i++) { 5775 TCGTemp *ts = arg_temp(op->args[i]); 5776 if (NEED_SYNC_ARG(i)) { 5777 temp_sync(s, ts, s->reserved_regs, 0, IS_DEAD_ARG(i)); 5778 } else if (IS_DEAD_ARG(i)) { 5779 temp_dead(s, ts); 5780 } 5781 } 5782 } 5783 5784 /** 5785 * atom_and_align_for_opc: 5786 * @s: tcg context 5787 * @opc: memory operation code 5788 * @host_atom: MO_ATOM_{IFALIGN,WITHIN16,SUBALIGN} for host operations 5789 * @allow_two_ops: true if we are prepared to issue two operations 5790 * 5791 * Return the alignment and atomicity to use for the inline fast path 5792 * for the given memory operation. The alignment may be larger than 5793 * that specified in @opc, and the correct alignment will be diagnosed 5794 * by the slow path helper. 5795 * 5796 * If @allow_two_ops, the host is prepared to test for 2x alignment, 5797 * and issue two loads or stores for subalignment. 5798 */ 5799 static TCGAtomAlign atom_and_align_for_opc(TCGContext *s, MemOp opc, 5800 MemOp host_atom, bool allow_two_ops) 5801 { 5802 MemOp align = memop_alignment_bits(opc); 5803 MemOp size = opc & MO_SIZE; 5804 MemOp half = size ? size - 1 : 0; 5805 MemOp atom = opc & MO_ATOM_MASK; 5806 MemOp atmax; 5807 5808 switch (atom) { 5809 case MO_ATOM_NONE: 5810 /* The operation requires no specific atomicity. */ 5811 atmax = MO_8; 5812 break; 5813 5814 case MO_ATOM_IFALIGN: 5815 atmax = size; 5816 break; 5817 5818 case MO_ATOM_IFALIGN_PAIR: 5819 atmax = half; 5820 break; 5821 5822 case MO_ATOM_WITHIN16: 5823 atmax = size; 5824 if (size == MO_128) { 5825 /* Misalignment implies !within16, and therefore no atomicity. */ 5826 } else if (host_atom != MO_ATOM_WITHIN16) { 5827 /* The host does not implement within16, so require alignment. */ 5828 align = MAX(align, size); 5829 } 5830 break; 5831 5832 case MO_ATOM_WITHIN16_PAIR: 5833 atmax = size; 5834 /* 5835 * Misalignment implies !within16, and therefore half atomicity. 5836 * Any host prepared for two operations can implement this with 5837 * half alignment. 5838 */ 5839 if (host_atom != MO_ATOM_WITHIN16 && allow_two_ops) { 5840 align = MAX(align, half); 5841 } 5842 break; 5843 5844 case MO_ATOM_SUBALIGN: 5845 atmax = size; 5846 if (host_atom != MO_ATOM_SUBALIGN) { 5847 /* If unaligned but not odd, there are subobjects up to half. */ 5848 if (allow_two_ops) { 5849 align = MAX(align, half); 5850 } else { 5851 align = MAX(align, size); 5852 } 5853 } 5854 break; 5855 5856 default: 5857 g_assert_not_reached(); 5858 } 5859 5860 return (TCGAtomAlign){ .atom = atmax, .align = align }; 5861 } 5862 5863 /* 5864 * Similarly for qemu_ld/st slow path helpers. 5865 * We must re-implement tcg_gen_callN and tcg_reg_alloc_call simultaneously, 5866 * using only the provided backend tcg_out_* functions. 5867 */ 5868 5869 static int tcg_out_helper_stk_ofs(TCGType type, unsigned slot) 5870 { 5871 int ofs = arg_slot_stk_ofs(slot); 5872 5873 /* 5874 * Each stack slot is TCG_TARGET_LONG_BITS. If the host does not 5875 * require extension to uint64_t, adjust the address for uint32_t. 5876 */ 5877 if (HOST_BIG_ENDIAN && 5878 TCG_TARGET_REG_BITS == 64 && 5879 type == TCG_TYPE_I32) { 5880 ofs += 4; 5881 } 5882 return ofs; 5883 } 5884 5885 static void tcg_out_helper_load_slots(TCGContext *s, 5886 unsigned nmov, TCGMovExtend *mov, 5887 const TCGLdstHelperParam *parm) 5888 { 5889 unsigned i; 5890 TCGReg dst3; 5891 5892 /* 5893 * Start from the end, storing to the stack first. 5894 * This frees those registers, so we need not consider overlap. 5895 */ 5896 for (i = nmov; i-- > 0; ) { 5897 unsigned slot = mov[i].dst; 5898 5899 if (arg_slot_reg_p(slot)) { 5900 goto found_reg; 5901 } 5902 5903 TCGReg src = mov[i].src; 5904 TCGType dst_type = mov[i].dst_type; 5905 MemOp dst_mo = dst_type == TCG_TYPE_I32 ? MO_32 : MO_64; 5906 5907 /* The argument is going onto the stack; extend into scratch. */ 5908 if ((mov[i].src_ext & MO_SIZE) != dst_mo) { 5909 tcg_debug_assert(parm->ntmp != 0); 5910 mov[i].dst = src = parm->tmp[0]; 5911 tcg_out_movext1(s, &mov[i]); 5912 } 5913 5914 tcg_out_st(s, dst_type, src, TCG_REG_CALL_STACK, 5915 tcg_out_helper_stk_ofs(dst_type, slot)); 5916 } 5917 return; 5918 5919 found_reg: 5920 /* 5921 * The remaining arguments are in registers. 5922 * Convert slot numbers to argument registers. 5923 */ 5924 nmov = i + 1; 5925 for (i = 0; i < nmov; ++i) { 5926 mov[i].dst = tcg_target_call_iarg_regs[mov[i].dst]; 5927 } 5928 5929 switch (nmov) { 5930 case 4: 5931 /* The backend must have provided enough temps for the worst case. */ 5932 tcg_debug_assert(parm->ntmp >= 2); 5933 5934 dst3 = mov[3].dst; 5935 for (unsigned j = 0; j < 3; ++j) { 5936 if (dst3 == mov[j].src) { 5937 /* 5938 * Conflict. Copy the source to a temporary, perform the 5939 * remaining moves, then the extension from our scratch 5940 * on the way out. 5941 */ 5942 TCGReg scratch = parm->tmp[1]; 5943 5944 tcg_out_mov(s, mov[3].src_type, scratch, mov[3].src); 5945 tcg_out_movext3(s, mov, mov + 1, mov + 2, parm->tmp[0]); 5946 tcg_out_movext1_new_src(s, &mov[3], scratch); 5947 break; 5948 } 5949 } 5950 5951 /* No conflicts: perform this move and continue. */ 5952 tcg_out_movext1(s, &mov[3]); 5953 /* fall through */ 5954 5955 case 3: 5956 tcg_out_movext3(s, mov, mov + 1, mov + 2, 5957 parm->ntmp ? parm->tmp[0] : -1); 5958 break; 5959 case 2: 5960 tcg_out_movext2(s, mov, mov + 1, 5961 parm->ntmp ? parm->tmp[0] : -1); 5962 break; 5963 case 1: 5964 tcg_out_movext1(s, mov); 5965 break; 5966 default: 5967 g_assert_not_reached(); 5968 } 5969 } 5970 5971 static void tcg_out_helper_load_imm(TCGContext *s, unsigned slot, 5972 TCGType type, tcg_target_long imm, 5973 const TCGLdstHelperParam *parm) 5974 { 5975 if (arg_slot_reg_p(slot)) { 5976 tcg_out_movi(s, type, tcg_target_call_iarg_regs[slot], imm); 5977 } else { 5978 int ofs = tcg_out_helper_stk_ofs(type, slot); 5979 if (!tcg_out_sti(s, type, imm, TCG_REG_CALL_STACK, ofs)) { 5980 tcg_debug_assert(parm->ntmp != 0); 5981 tcg_out_movi(s, type, parm->tmp[0], imm); 5982 tcg_out_st(s, type, parm->tmp[0], TCG_REG_CALL_STACK, ofs); 5983 } 5984 } 5985 } 5986 5987 static void tcg_out_helper_load_common_args(TCGContext *s, 5988 const TCGLabelQemuLdst *ldst, 5989 const TCGLdstHelperParam *parm, 5990 const TCGHelperInfo *info, 5991 unsigned next_arg) 5992 { 5993 TCGMovExtend ptr_mov = { 5994 .dst_type = TCG_TYPE_PTR, 5995 .src_type = TCG_TYPE_PTR, 5996 .src_ext = sizeof(void *) == 4 ? MO_32 : MO_64 5997 }; 5998 const TCGCallArgumentLoc *loc = &info->in[0]; 5999 TCGType type; 6000 unsigned slot; 6001 tcg_target_ulong imm; 6002 6003 /* 6004 * Handle env, which is always first. 6005 */ 6006 ptr_mov.dst = loc->arg_slot; 6007 ptr_mov.src = TCG_AREG0; 6008 tcg_out_helper_load_slots(s, 1, &ptr_mov, parm); 6009 6010 /* 6011 * Handle oi. 6012 */ 6013 imm = ldst->oi; 6014 loc = &info->in[next_arg]; 6015 type = TCG_TYPE_I32; 6016 switch (loc->kind) { 6017 case TCG_CALL_ARG_NORMAL: 6018 break; 6019 case TCG_CALL_ARG_EXTEND_U: 6020 case TCG_CALL_ARG_EXTEND_S: 6021 /* No extension required for MemOpIdx. */ 6022 tcg_debug_assert(imm <= INT32_MAX); 6023 type = TCG_TYPE_REG; 6024 break; 6025 default: 6026 g_assert_not_reached(); 6027 } 6028 tcg_out_helper_load_imm(s, loc->arg_slot, type, imm, parm); 6029 next_arg++; 6030 6031 /* 6032 * Handle ra. 6033 */ 6034 loc = &info->in[next_arg]; 6035 slot = loc->arg_slot; 6036 if (parm->ra_gen) { 6037 int arg_reg = -1; 6038 TCGReg ra_reg; 6039 6040 if (arg_slot_reg_p(slot)) { 6041 arg_reg = tcg_target_call_iarg_regs[slot]; 6042 } 6043 ra_reg = parm->ra_gen(s, ldst, arg_reg); 6044 6045 ptr_mov.dst = slot; 6046 ptr_mov.src = ra_reg; 6047 tcg_out_helper_load_slots(s, 1, &ptr_mov, parm); 6048 } else { 6049 imm = (uintptr_t)ldst->raddr; 6050 tcg_out_helper_load_imm(s, slot, TCG_TYPE_PTR, imm, parm); 6051 } 6052 } 6053 6054 static unsigned tcg_out_helper_add_mov(TCGMovExtend *mov, 6055 const TCGCallArgumentLoc *loc, 6056 TCGType dst_type, TCGType src_type, 6057 TCGReg lo, TCGReg hi) 6058 { 6059 MemOp reg_mo; 6060 6061 if (dst_type <= TCG_TYPE_REG) { 6062 MemOp src_ext; 6063 6064 switch (loc->kind) { 6065 case TCG_CALL_ARG_NORMAL: 6066 src_ext = src_type == TCG_TYPE_I32 ? MO_32 : MO_64; 6067 break; 6068 case TCG_CALL_ARG_EXTEND_U: 6069 dst_type = TCG_TYPE_REG; 6070 src_ext = MO_UL; 6071 break; 6072 case TCG_CALL_ARG_EXTEND_S: 6073 dst_type = TCG_TYPE_REG; 6074 src_ext = MO_SL; 6075 break; 6076 default: 6077 g_assert_not_reached(); 6078 } 6079 6080 mov[0].dst = loc->arg_slot; 6081 mov[0].dst_type = dst_type; 6082 mov[0].src = lo; 6083 mov[0].src_type = src_type; 6084 mov[0].src_ext = src_ext; 6085 return 1; 6086 } 6087 6088 if (TCG_TARGET_REG_BITS == 32) { 6089 assert(dst_type == TCG_TYPE_I64); 6090 reg_mo = MO_32; 6091 } else { 6092 assert(dst_type == TCG_TYPE_I128); 6093 reg_mo = MO_64; 6094 } 6095 6096 mov[0].dst = loc[HOST_BIG_ENDIAN].arg_slot; 6097 mov[0].src = lo; 6098 mov[0].dst_type = TCG_TYPE_REG; 6099 mov[0].src_type = TCG_TYPE_REG; 6100 mov[0].src_ext = reg_mo; 6101 6102 mov[1].dst = loc[!HOST_BIG_ENDIAN].arg_slot; 6103 mov[1].src = hi; 6104 mov[1].dst_type = TCG_TYPE_REG; 6105 mov[1].src_type = TCG_TYPE_REG; 6106 mov[1].src_ext = reg_mo; 6107 6108 return 2; 6109 } 6110 6111 static void tcg_out_ld_helper_args(TCGContext *s, const TCGLabelQemuLdst *ldst, 6112 const TCGLdstHelperParam *parm) 6113 { 6114 const TCGHelperInfo *info; 6115 const TCGCallArgumentLoc *loc; 6116 TCGMovExtend mov[2]; 6117 unsigned next_arg, nmov; 6118 MemOp mop = get_memop(ldst->oi); 6119 6120 switch (mop & MO_SIZE) { 6121 case MO_8: 6122 case MO_16: 6123 case MO_32: 6124 info = &info_helper_ld32_mmu; 6125 break; 6126 case MO_64: 6127 info = &info_helper_ld64_mmu; 6128 break; 6129 case MO_128: 6130 info = &info_helper_ld128_mmu; 6131 break; 6132 default: 6133 g_assert_not_reached(); 6134 } 6135 6136 /* Defer env argument. */ 6137 next_arg = 1; 6138 6139 loc = &info->in[next_arg]; 6140 if (TCG_TARGET_REG_BITS == 32 && s->addr_type == TCG_TYPE_I32) { 6141 /* 6142 * 32-bit host with 32-bit guest: zero-extend the guest address 6143 * to 64-bits for the helper by storing the low part, then 6144 * load a zero for the high part. 6145 */ 6146 tcg_out_helper_add_mov(mov, loc + HOST_BIG_ENDIAN, 6147 TCG_TYPE_I32, TCG_TYPE_I32, 6148 ldst->addr_reg, -1); 6149 tcg_out_helper_load_slots(s, 1, mov, parm); 6150 6151 tcg_out_helper_load_imm(s, loc[!HOST_BIG_ENDIAN].arg_slot, 6152 TCG_TYPE_I32, 0, parm); 6153 next_arg += 2; 6154 } else { 6155 nmov = tcg_out_helper_add_mov(mov, loc, TCG_TYPE_I64, s->addr_type, 6156 ldst->addr_reg, -1); 6157 tcg_out_helper_load_slots(s, nmov, mov, parm); 6158 next_arg += nmov; 6159 } 6160 6161 switch (info->out_kind) { 6162 case TCG_CALL_RET_NORMAL: 6163 case TCG_CALL_RET_BY_VEC: 6164 break; 6165 case TCG_CALL_RET_BY_REF: 6166 /* 6167 * The return reference is in the first argument slot. 6168 * We need memory in which to return: re-use the top of stack. 6169 */ 6170 { 6171 int ofs_slot0 = TCG_TARGET_CALL_STACK_OFFSET; 6172 6173 if (arg_slot_reg_p(0)) { 6174 tcg_out_addi_ptr(s, tcg_target_call_iarg_regs[0], 6175 TCG_REG_CALL_STACK, ofs_slot0); 6176 } else { 6177 tcg_debug_assert(parm->ntmp != 0); 6178 tcg_out_addi_ptr(s, parm->tmp[0], 6179 TCG_REG_CALL_STACK, ofs_slot0); 6180 tcg_out_st(s, TCG_TYPE_PTR, parm->tmp[0], 6181 TCG_REG_CALL_STACK, ofs_slot0); 6182 } 6183 } 6184 break; 6185 default: 6186 g_assert_not_reached(); 6187 } 6188 6189 tcg_out_helper_load_common_args(s, ldst, parm, info, next_arg); 6190 } 6191 6192 static void tcg_out_ld_helper_ret(TCGContext *s, const TCGLabelQemuLdst *ldst, 6193 bool load_sign, 6194 const TCGLdstHelperParam *parm) 6195 { 6196 MemOp mop = get_memop(ldst->oi); 6197 TCGMovExtend mov[2]; 6198 int ofs_slot0; 6199 6200 switch (ldst->type) { 6201 case TCG_TYPE_I64: 6202 if (TCG_TARGET_REG_BITS == 32) { 6203 break; 6204 } 6205 /* fall through */ 6206 6207 case TCG_TYPE_I32: 6208 mov[0].dst = ldst->datalo_reg; 6209 mov[0].src = tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, 0); 6210 mov[0].dst_type = ldst->type; 6211 mov[0].src_type = TCG_TYPE_REG; 6212 6213 /* 6214 * If load_sign, then we allowed the helper to perform the 6215 * appropriate sign extension to tcg_target_ulong, and all 6216 * we need now is a plain move. 6217 * 6218 * If they do not, then we expect the relevant extension 6219 * instruction to be no more expensive than a move, and 6220 * we thus save the icache etc by only using one of two 6221 * helper functions. 6222 */ 6223 if (load_sign || !(mop & MO_SIGN)) { 6224 if (TCG_TARGET_REG_BITS == 32 || ldst->type == TCG_TYPE_I32) { 6225 mov[0].src_ext = MO_32; 6226 } else { 6227 mov[0].src_ext = MO_64; 6228 } 6229 } else { 6230 mov[0].src_ext = mop & MO_SSIZE; 6231 } 6232 tcg_out_movext1(s, mov); 6233 return; 6234 6235 case TCG_TYPE_I128: 6236 tcg_debug_assert(TCG_TARGET_REG_BITS == 64); 6237 ofs_slot0 = TCG_TARGET_CALL_STACK_OFFSET; 6238 switch (TCG_TARGET_CALL_RET_I128) { 6239 case TCG_CALL_RET_NORMAL: 6240 break; 6241 case TCG_CALL_RET_BY_VEC: 6242 tcg_out_st(s, TCG_TYPE_V128, 6243 tcg_target_call_oarg_reg(TCG_CALL_RET_BY_VEC, 0), 6244 TCG_REG_CALL_STACK, ofs_slot0); 6245 /* fall through */ 6246 case TCG_CALL_RET_BY_REF: 6247 tcg_out_ld(s, TCG_TYPE_I64, ldst->datalo_reg, 6248 TCG_REG_CALL_STACK, ofs_slot0 + 8 * HOST_BIG_ENDIAN); 6249 tcg_out_ld(s, TCG_TYPE_I64, ldst->datahi_reg, 6250 TCG_REG_CALL_STACK, ofs_slot0 + 8 * !HOST_BIG_ENDIAN); 6251 return; 6252 default: 6253 g_assert_not_reached(); 6254 } 6255 break; 6256 6257 default: 6258 g_assert_not_reached(); 6259 } 6260 6261 mov[0].dst = ldst->datalo_reg; 6262 mov[0].src = 6263 tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, HOST_BIG_ENDIAN); 6264 mov[0].dst_type = TCG_TYPE_REG; 6265 mov[0].src_type = TCG_TYPE_REG; 6266 mov[0].src_ext = TCG_TARGET_REG_BITS == 32 ? MO_32 : MO_64; 6267 6268 mov[1].dst = ldst->datahi_reg; 6269 mov[1].src = 6270 tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, !HOST_BIG_ENDIAN); 6271 mov[1].dst_type = TCG_TYPE_REG; 6272 mov[1].src_type = TCG_TYPE_REG; 6273 mov[1].src_ext = TCG_TARGET_REG_BITS == 32 ? MO_32 : MO_64; 6274 6275 tcg_out_movext2(s, mov, mov + 1, parm->ntmp ? parm->tmp[0] : -1); 6276 } 6277 6278 static void tcg_out_st_helper_args(TCGContext *s, const TCGLabelQemuLdst *ldst, 6279 const TCGLdstHelperParam *parm) 6280 { 6281 const TCGHelperInfo *info; 6282 const TCGCallArgumentLoc *loc; 6283 TCGMovExtend mov[4]; 6284 TCGType data_type; 6285 unsigned next_arg, nmov, n; 6286 MemOp mop = get_memop(ldst->oi); 6287 6288 switch (mop & MO_SIZE) { 6289 case MO_8: 6290 case MO_16: 6291 case MO_32: 6292 info = &info_helper_st32_mmu; 6293 data_type = TCG_TYPE_I32; 6294 break; 6295 case MO_64: 6296 info = &info_helper_st64_mmu; 6297 data_type = TCG_TYPE_I64; 6298 break; 6299 case MO_128: 6300 info = &info_helper_st128_mmu; 6301 data_type = TCG_TYPE_I128; 6302 break; 6303 default: 6304 g_assert_not_reached(); 6305 } 6306 6307 /* Defer env argument. */ 6308 next_arg = 1; 6309 nmov = 0; 6310 6311 /* Handle addr argument. */ 6312 loc = &info->in[next_arg]; 6313 tcg_debug_assert(s->addr_type <= TCG_TYPE_REG); 6314 if (TCG_TARGET_REG_BITS == 32) { 6315 /* 6316 * 32-bit host (and thus 32-bit guest): zero-extend the guest address 6317 * to 64-bits for the helper by storing the low part. Later, 6318 * after we have processed the register inputs, we will load a 6319 * zero for the high part. 6320 */ 6321 tcg_out_helper_add_mov(mov, loc + HOST_BIG_ENDIAN, 6322 TCG_TYPE_I32, TCG_TYPE_I32, 6323 ldst->addr_reg, -1); 6324 next_arg += 2; 6325 nmov += 1; 6326 } else { 6327 n = tcg_out_helper_add_mov(mov, loc, TCG_TYPE_I64, s->addr_type, 6328 ldst->addr_reg, -1); 6329 next_arg += n; 6330 nmov += n; 6331 } 6332 6333 /* Handle data argument. */ 6334 loc = &info->in[next_arg]; 6335 switch (loc->kind) { 6336 case TCG_CALL_ARG_NORMAL: 6337 case TCG_CALL_ARG_EXTEND_U: 6338 case TCG_CALL_ARG_EXTEND_S: 6339 n = tcg_out_helper_add_mov(mov + nmov, loc, data_type, ldst->type, 6340 ldst->datalo_reg, ldst->datahi_reg); 6341 next_arg += n; 6342 nmov += n; 6343 tcg_out_helper_load_slots(s, nmov, mov, parm); 6344 break; 6345 6346 case TCG_CALL_ARG_BY_REF: 6347 tcg_debug_assert(TCG_TARGET_REG_BITS == 64); 6348 tcg_debug_assert(data_type == TCG_TYPE_I128); 6349 tcg_out_st(s, TCG_TYPE_I64, 6350 HOST_BIG_ENDIAN ? ldst->datahi_reg : ldst->datalo_reg, 6351 TCG_REG_CALL_STACK, arg_slot_stk_ofs(loc[0].ref_slot)); 6352 tcg_out_st(s, TCG_TYPE_I64, 6353 HOST_BIG_ENDIAN ? ldst->datalo_reg : ldst->datahi_reg, 6354 TCG_REG_CALL_STACK, arg_slot_stk_ofs(loc[1].ref_slot)); 6355 6356 tcg_out_helper_load_slots(s, nmov, mov, parm); 6357 6358 if (arg_slot_reg_p(loc->arg_slot)) { 6359 tcg_out_addi_ptr(s, tcg_target_call_iarg_regs[loc->arg_slot], 6360 TCG_REG_CALL_STACK, 6361 arg_slot_stk_ofs(loc->ref_slot)); 6362 } else { 6363 tcg_debug_assert(parm->ntmp != 0); 6364 tcg_out_addi_ptr(s, parm->tmp[0], TCG_REG_CALL_STACK, 6365 arg_slot_stk_ofs(loc->ref_slot)); 6366 tcg_out_st(s, TCG_TYPE_PTR, parm->tmp[0], 6367 TCG_REG_CALL_STACK, arg_slot_stk_ofs(loc->arg_slot)); 6368 } 6369 next_arg += 2; 6370 break; 6371 6372 default: 6373 g_assert_not_reached(); 6374 } 6375 6376 if (TCG_TARGET_REG_BITS == 32) { 6377 /* Zero extend the address by loading a zero for the high part. */ 6378 loc = &info->in[1 + !HOST_BIG_ENDIAN]; 6379 tcg_out_helper_load_imm(s, loc->arg_slot, TCG_TYPE_I32, 0, parm); 6380 } 6381 6382 tcg_out_helper_load_common_args(s, ldst, parm, info, next_arg); 6383 } 6384 6385 int tcg_gen_code(TCGContext *s, TranslationBlock *tb, uint64_t pc_start) 6386 { 6387 int i, start_words, num_insns; 6388 TCGOp *op; 6389 6390 if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP) 6391 && qemu_log_in_addr_range(pc_start))) { 6392 FILE *logfile = qemu_log_trylock(); 6393 if (logfile) { 6394 fprintf(logfile, "OP:\n"); 6395 tcg_dump_ops(s, logfile, false); 6396 fprintf(logfile, "\n"); 6397 qemu_log_unlock(logfile); 6398 } 6399 } 6400 6401 #ifdef CONFIG_DEBUG_TCG 6402 /* Ensure all labels referenced have been emitted. */ 6403 { 6404 TCGLabel *l; 6405 bool error = false; 6406 6407 QSIMPLEQ_FOREACH(l, &s->labels, next) { 6408 if (unlikely(!l->present) && !QSIMPLEQ_EMPTY(&l->branches)) { 6409 qemu_log_mask(CPU_LOG_TB_OP, 6410 "$L%d referenced but not present.\n", l->id); 6411 error = true; 6412 } 6413 } 6414 assert(!error); 6415 } 6416 #endif 6417 6418 /* Do not reuse any EBB that may be allocated within the TB. */ 6419 tcg_temp_ebb_reset_freed(s); 6420 6421 tcg_optimize(s); 6422 6423 reachable_code_pass(s); 6424 liveness_pass_0(s); 6425 liveness_pass_1(s); 6426 6427 if (s->nb_indirects > 0) { 6428 if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_IND) 6429 && qemu_log_in_addr_range(pc_start))) { 6430 FILE *logfile = qemu_log_trylock(); 6431 if (logfile) { 6432 fprintf(logfile, "OP before indirect lowering:\n"); 6433 tcg_dump_ops(s, logfile, false); 6434 fprintf(logfile, "\n"); 6435 qemu_log_unlock(logfile); 6436 } 6437 } 6438 6439 /* Replace indirect temps with direct temps. */ 6440 if (liveness_pass_2(s)) { 6441 /* If changes were made, re-run liveness. */ 6442 liveness_pass_1(s); 6443 } 6444 } 6445 6446 if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_OPT) 6447 && qemu_log_in_addr_range(pc_start))) { 6448 FILE *logfile = qemu_log_trylock(); 6449 if (logfile) { 6450 fprintf(logfile, "OP after optimization and liveness analysis:\n"); 6451 tcg_dump_ops(s, logfile, true); 6452 fprintf(logfile, "\n"); 6453 qemu_log_unlock(logfile); 6454 } 6455 } 6456 6457 /* Initialize goto_tb jump offsets. */ 6458 tb->jmp_reset_offset[0] = TB_JMP_OFFSET_INVALID; 6459 tb->jmp_reset_offset[1] = TB_JMP_OFFSET_INVALID; 6460 tb->jmp_insn_offset[0] = TB_JMP_OFFSET_INVALID; 6461 tb->jmp_insn_offset[1] = TB_JMP_OFFSET_INVALID; 6462 6463 tcg_reg_alloc_start(s); 6464 6465 /* 6466 * Reset the buffer pointers when restarting after overflow. 6467 * TODO: Move this into translate-all.c with the rest of the 6468 * buffer management. Having only this done here is confusing. 6469 */ 6470 s->code_buf = tcg_splitwx_to_rw(tb->tc.ptr); 6471 s->code_ptr = s->code_buf; 6472 s->data_gen_ptr = NULL; 6473 6474 QSIMPLEQ_INIT(&s->ldst_labels); 6475 s->pool_labels = NULL; 6476 6477 start_words = s->insn_start_words; 6478 s->gen_insn_data = 6479 tcg_malloc(sizeof(uint64_t) * s->gen_tb->icount * start_words); 6480 6481 tcg_out_tb_start(s); 6482 6483 num_insns = -1; 6484 QTAILQ_FOREACH(op, &s->ops, link) { 6485 TCGOpcode opc = op->opc; 6486 6487 switch (opc) { 6488 case INDEX_op_mov: 6489 case INDEX_op_mov_vec: 6490 tcg_reg_alloc_mov(s, op); 6491 break; 6492 case INDEX_op_dup_vec: 6493 tcg_reg_alloc_dup(s, op); 6494 break; 6495 case INDEX_op_insn_start: 6496 if (num_insns >= 0) { 6497 size_t off = tcg_current_code_size(s); 6498 s->gen_insn_end_off[num_insns] = off; 6499 /* Assert that we do not overflow our stored offset. */ 6500 assert(s->gen_insn_end_off[num_insns] == off); 6501 } 6502 num_insns++; 6503 for (i = 0; i < start_words; ++i) { 6504 s->gen_insn_data[num_insns * start_words + i] = 6505 tcg_get_insn_start_param(op, i); 6506 } 6507 break; 6508 case INDEX_op_discard: 6509 temp_dead(s, arg_temp(op->args[0])); 6510 break; 6511 case INDEX_op_set_label: 6512 tcg_reg_alloc_bb_end(s, s->reserved_regs); 6513 tcg_out_label(s, arg_label(op->args[0])); 6514 break; 6515 case INDEX_op_call: 6516 tcg_reg_alloc_call(s, op); 6517 break; 6518 case INDEX_op_exit_tb: 6519 tcg_out_exit_tb(s, op->args[0]); 6520 break; 6521 case INDEX_op_goto_tb: 6522 tcg_out_goto_tb(s, op->args[0]); 6523 break; 6524 case INDEX_op_dup2_vec: 6525 if (tcg_reg_alloc_dup2(s, op)) { 6526 break; 6527 } 6528 /* fall through */ 6529 default: 6530 /* Sanity check that we've not introduced any unhandled opcodes. */ 6531 tcg_debug_assert(tcg_op_supported(opc, TCGOP_TYPE(op), 6532 TCGOP_FLAGS(op))); 6533 /* Note: in order to speed up the code, it would be much 6534 faster to have specialized register allocator functions for 6535 some common argument patterns */ 6536 tcg_reg_alloc_op(s, op); 6537 break; 6538 } 6539 /* Test for (pending) buffer overflow. The assumption is that any 6540 one operation beginning below the high water mark cannot overrun 6541 the buffer completely. Thus we can test for overflow after 6542 generating code without having to check during generation. */ 6543 if (unlikely((void *)s->code_ptr > s->code_gen_highwater)) { 6544 return -1; 6545 } 6546 /* Test for TB overflow, as seen by gen_insn_end_off. */ 6547 if (unlikely(tcg_current_code_size(s) > UINT16_MAX)) { 6548 return -2; 6549 } 6550 } 6551 tcg_debug_assert(num_insns + 1 == s->gen_tb->icount); 6552 s->gen_insn_end_off[num_insns] = tcg_current_code_size(s); 6553 6554 /* Generate TB finalization at the end of block */ 6555 i = tcg_out_ldst_finalize(s); 6556 if (i < 0) { 6557 return i; 6558 } 6559 i = tcg_out_pool_finalize(s); 6560 if (i < 0) { 6561 return i; 6562 } 6563 if (!tcg_resolve_relocs(s)) { 6564 return -2; 6565 } 6566 6567 #ifndef CONFIG_TCG_INTERPRETER 6568 /* flush instruction cache */ 6569 flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(s->code_buf), 6570 (uintptr_t)s->code_buf, 6571 tcg_ptr_byte_diff(s->code_ptr, s->code_buf)); 6572 #endif 6573 6574 return tcg_current_code_size(s); 6575 } 6576 6577 #ifdef ELF_HOST_MACHINE 6578 /* In order to use this feature, the backend needs to do three things: 6579 6580 (1) Define ELF_HOST_MACHINE to indicate both what value to 6581 put into the ELF image and to indicate support for the feature. 6582 6583 (2) Define tcg_register_jit. This should create a buffer containing 6584 the contents of a .debug_frame section that describes the post- 6585 prologue unwind info for the tcg machine. 6586 6587 (3) Call tcg_register_jit_int, with the constructed .debug_frame. 6588 */ 6589 6590 /* Begin GDB interface. THE FOLLOWING MUST MATCH GDB DOCS. */ 6591 typedef enum { 6592 JIT_NOACTION = 0, 6593 JIT_REGISTER_FN, 6594 JIT_UNREGISTER_FN 6595 } jit_actions_t; 6596 6597 struct jit_code_entry { 6598 struct jit_code_entry *next_entry; 6599 struct jit_code_entry *prev_entry; 6600 const void *symfile_addr; 6601 uint64_t symfile_size; 6602 }; 6603 6604 struct jit_descriptor { 6605 uint32_t version; 6606 uint32_t action_flag; 6607 struct jit_code_entry *relevant_entry; 6608 struct jit_code_entry *first_entry; 6609 }; 6610 6611 void __jit_debug_register_code(void) __attribute__((noinline)); 6612 void __jit_debug_register_code(void) 6613 { 6614 asm(""); 6615 } 6616 6617 /* Must statically initialize the version, because GDB may check 6618 the version before we can set it. */ 6619 struct jit_descriptor __jit_debug_descriptor = { 1, 0, 0, 0 }; 6620 6621 /* End GDB interface. */ 6622 6623 static int find_string(const char *strtab, const char *str) 6624 { 6625 const char *p = strtab + 1; 6626 6627 while (1) { 6628 if (strcmp(p, str) == 0) { 6629 return p - strtab; 6630 } 6631 p += strlen(p) + 1; 6632 } 6633 } 6634 6635 static void tcg_register_jit_int(const void *buf_ptr, size_t buf_size, 6636 const void *debug_frame, 6637 size_t debug_frame_size) 6638 { 6639 struct __attribute__((packed)) DebugInfo { 6640 uint32_t len; 6641 uint16_t version; 6642 uint32_t abbrev; 6643 uint8_t ptr_size; 6644 uint8_t cu_die; 6645 uint16_t cu_lang; 6646 uintptr_t cu_low_pc; 6647 uintptr_t cu_high_pc; 6648 uint8_t fn_die; 6649 char fn_name[16]; 6650 uintptr_t fn_low_pc; 6651 uintptr_t fn_high_pc; 6652 uint8_t cu_eoc; 6653 }; 6654 6655 struct ElfImage { 6656 ElfW(Ehdr) ehdr; 6657 ElfW(Phdr) phdr; 6658 ElfW(Shdr) shdr[7]; 6659 ElfW(Sym) sym[2]; 6660 struct DebugInfo di; 6661 uint8_t da[24]; 6662 char str[80]; 6663 }; 6664 6665 struct ElfImage *img; 6666 6667 static const struct ElfImage img_template = { 6668 .ehdr = { 6669 .e_ident[EI_MAG0] = ELFMAG0, 6670 .e_ident[EI_MAG1] = ELFMAG1, 6671 .e_ident[EI_MAG2] = ELFMAG2, 6672 .e_ident[EI_MAG3] = ELFMAG3, 6673 .e_ident[EI_CLASS] = ELF_CLASS, 6674 .e_ident[EI_DATA] = ELF_DATA, 6675 .e_ident[EI_VERSION] = EV_CURRENT, 6676 .e_type = ET_EXEC, 6677 .e_machine = ELF_HOST_MACHINE, 6678 .e_version = EV_CURRENT, 6679 .e_phoff = offsetof(struct ElfImage, phdr), 6680 .e_shoff = offsetof(struct ElfImage, shdr), 6681 .e_ehsize = sizeof(ElfW(Shdr)), 6682 .e_phentsize = sizeof(ElfW(Phdr)), 6683 .e_phnum = 1, 6684 .e_shentsize = sizeof(ElfW(Shdr)), 6685 .e_shnum = ARRAY_SIZE(img->shdr), 6686 .e_shstrndx = ARRAY_SIZE(img->shdr) - 1, 6687 #ifdef ELF_HOST_FLAGS 6688 .e_flags = ELF_HOST_FLAGS, 6689 #endif 6690 #ifdef ELF_OSABI 6691 .e_ident[EI_OSABI] = ELF_OSABI, 6692 #endif 6693 }, 6694 .phdr = { 6695 .p_type = PT_LOAD, 6696 .p_flags = PF_X, 6697 }, 6698 .shdr = { 6699 [0] = { .sh_type = SHT_NULL }, 6700 /* Trick: The contents of code_gen_buffer are not present in 6701 this fake ELF file; that got allocated elsewhere. Therefore 6702 we mark .text as SHT_NOBITS (similar to .bss) so that readers 6703 will not look for contents. We can record any address. */ 6704 [1] = { /* .text */ 6705 .sh_type = SHT_NOBITS, 6706 .sh_flags = SHF_EXECINSTR | SHF_ALLOC, 6707 }, 6708 [2] = { /* .debug_info */ 6709 .sh_type = SHT_PROGBITS, 6710 .sh_offset = offsetof(struct ElfImage, di), 6711 .sh_size = sizeof(struct DebugInfo), 6712 }, 6713 [3] = { /* .debug_abbrev */ 6714 .sh_type = SHT_PROGBITS, 6715 .sh_offset = offsetof(struct ElfImage, da), 6716 .sh_size = sizeof(img->da), 6717 }, 6718 [4] = { /* .debug_frame */ 6719 .sh_type = SHT_PROGBITS, 6720 .sh_offset = sizeof(struct ElfImage), 6721 }, 6722 [5] = { /* .symtab */ 6723 .sh_type = SHT_SYMTAB, 6724 .sh_offset = offsetof(struct ElfImage, sym), 6725 .sh_size = sizeof(img->sym), 6726 .sh_info = 1, 6727 .sh_link = ARRAY_SIZE(img->shdr) - 1, 6728 .sh_entsize = sizeof(ElfW(Sym)), 6729 }, 6730 [6] = { /* .strtab */ 6731 .sh_type = SHT_STRTAB, 6732 .sh_offset = offsetof(struct ElfImage, str), 6733 .sh_size = sizeof(img->str), 6734 } 6735 }, 6736 .sym = { 6737 [1] = { /* code_gen_buffer */ 6738 .st_info = ELF_ST_INFO(STB_GLOBAL, STT_FUNC), 6739 .st_shndx = 1, 6740 } 6741 }, 6742 .di = { 6743 .len = sizeof(struct DebugInfo) - 4, 6744 .version = 2, 6745 .ptr_size = sizeof(void *), 6746 .cu_die = 1, 6747 .cu_lang = 0x8001, /* DW_LANG_Mips_Assembler */ 6748 .fn_die = 2, 6749 .fn_name = "code_gen_buffer" 6750 }, 6751 .da = { 6752 1, /* abbrev number (the cu) */ 6753 0x11, 1, /* DW_TAG_compile_unit, has children */ 6754 0x13, 0x5, /* DW_AT_language, DW_FORM_data2 */ 6755 0x11, 0x1, /* DW_AT_low_pc, DW_FORM_addr */ 6756 0x12, 0x1, /* DW_AT_high_pc, DW_FORM_addr */ 6757 0, 0, /* end of abbrev */ 6758 2, /* abbrev number (the fn) */ 6759 0x2e, 0, /* DW_TAG_subprogram, no children */ 6760 0x3, 0x8, /* DW_AT_name, DW_FORM_string */ 6761 0x11, 0x1, /* DW_AT_low_pc, DW_FORM_addr */ 6762 0x12, 0x1, /* DW_AT_high_pc, DW_FORM_addr */ 6763 0, 0, /* end of abbrev */ 6764 0 /* no more abbrev */ 6765 }, 6766 .str = "\0" ".text\0" ".debug_info\0" ".debug_abbrev\0" 6767 ".debug_frame\0" ".symtab\0" ".strtab\0" "code_gen_buffer", 6768 }; 6769 6770 /* We only need a single jit entry; statically allocate it. */ 6771 static struct jit_code_entry one_entry; 6772 6773 uintptr_t buf = (uintptr_t)buf_ptr; 6774 size_t img_size = sizeof(struct ElfImage) + debug_frame_size; 6775 DebugFrameHeader *dfh; 6776 6777 img = g_malloc(img_size); 6778 *img = img_template; 6779 6780 img->phdr.p_vaddr = buf; 6781 img->phdr.p_paddr = buf; 6782 img->phdr.p_memsz = buf_size; 6783 6784 img->shdr[1].sh_name = find_string(img->str, ".text"); 6785 img->shdr[1].sh_addr = buf; 6786 img->shdr[1].sh_size = buf_size; 6787 6788 img->shdr[2].sh_name = find_string(img->str, ".debug_info"); 6789 img->shdr[3].sh_name = find_string(img->str, ".debug_abbrev"); 6790 6791 img->shdr[4].sh_name = find_string(img->str, ".debug_frame"); 6792 img->shdr[4].sh_size = debug_frame_size; 6793 6794 img->shdr[5].sh_name = find_string(img->str, ".symtab"); 6795 img->shdr[6].sh_name = find_string(img->str, ".strtab"); 6796 6797 img->sym[1].st_name = find_string(img->str, "code_gen_buffer"); 6798 img->sym[1].st_value = buf; 6799 img->sym[1].st_size = buf_size; 6800 6801 img->di.cu_low_pc = buf; 6802 img->di.cu_high_pc = buf + buf_size; 6803 img->di.fn_low_pc = buf; 6804 img->di.fn_high_pc = buf + buf_size; 6805 6806 dfh = (DebugFrameHeader *)(img + 1); 6807 memcpy(dfh, debug_frame, debug_frame_size); 6808 dfh->fde.func_start = buf; 6809 dfh->fde.func_len = buf_size; 6810 6811 #ifdef DEBUG_JIT 6812 /* Enable this block to be able to debug the ELF image file creation. 6813 One can use readelf, objdump, or other inspection utilities. */ 6814 { 6815 g_autofree char *jit = g_strdup_printf("%s/qemu.jit", g_get_tmp_dir()); 6816 FILE *f = fopen(jit, "w+b"); 6817 if (f) { 6818 if (fwrite(img, img_size, 1, f) != img_size) { 6819 /* Avoid stupid unused return value warning for fwrite. */ 6820 } 6821 fclose(f); 6822 } 6823 } 6824 #endif 6825 6826 one_entry.symfile_addr = img; 6827 one_entry.symfile_size = img_size; 6828 6829 __jit_debug_descriptor.action_flag = JIT_REGISTER_FN; 6830 __jit_debug_descriptor.relevant_entry = &one_entry; 6831 __jit_debug_descriptor.first_entry = &one_entry; 6832 __jit_debug_register_code(); 6833 } 6834 #else 6835 /* No support for the feature. Provide the entry point expected by exec.c, 6836 and implement the internal function we declared earlier. */ 6837 6838 static void tcg_register_jit_int(const void *buf, size_t size, 6839 const void *debug_frame, 6840 size_t debug_frame_size) 6841 { 6842 } 6843 6844 void tcg_register_jit(const void *buf, size_t buf_size) 6845 { 6846 } 6847 #endif /* ELF_HOST_MACHINE */ 6848 6849 #if !TCG_TARGET_MAYBE_vec 6850 void tcg_expand_vec_op(TCGOpcode o, TCGType t, unsigned e, TCGArg a0, ...) 6851 { 6852 g_assert_not_reached(); 6853 } 6854 #endif 6855