1 /* 2 * Tiny Code Generator for QEMU 3 * 4 * Copyright (c) 2008 Fabrice Bellard 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a copy 7 * of this software and associated documentation files (the "Software"), to deal 8 * in the Software without restriction, including without limitation the rights 9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 * copies of the Software, and to permit persons to whom the Software is 11 * furnished to do so, subject to the following conditions: 12 * 13 * The above copyright notice and this permission notice shall be included in 14 * all copies or substantial portions of the Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 22 * THE SOFTWARE. 23 */ 24 25 #include "qemu/osdep.h" 26 27 /* Define to jump the ELF file used to communicate with GDB. */ 28 #undef DEBUG_JIT 29 30 #include "qemu/error-report.h" 31 #include "qemu/cutils.h" 32 #include "qemu/host-utils.h" 33 #include "qemu/qemu-print.h" 34 #include "qemu/cacheflush.h" 35 #include "qemu/cacheinfo.h" 36 #include "qemu/timer.h" 37 #include "exec/translation-block.h" 38 #include "exec/tlb-common.h" 39 #include "tcg/startup.h" 40 #include "tcg/tcg-op-common.h" 41 42 #if UINTPTR_MAX == UINT32_MAX 43 # define ELF_CLASS ELFCLASS32 44 #else 45 # define ELF_CLASS ELFCLASS64 46 #endif 47 #if HOST_BIG_ENDIAN 48 # define ELF_DATA ELFDATA2MSB 49 #else 50 # define ELF_DATA ELFDATA2LSB 51 #endif 52 53 #include "elf.h" 54 #include "exec/log.h" 55 #include "tcg/tcg-ldst.h" 56 #include "tcg/tcg-temp-internal.h" 57 #include "tcg-internal.h" 58 #include "tcg/perf.h" 59 #include "tcg-has.h" 60 #ifdef CONFIG_USER_ONLY 61 #include "user/guest-base.h" 62 #endif 63 64 /* Forward declarations for functions declared in tcg-target.c.inc and 65 used here. */ 66 static void tcg_target_init(TCGContext *s); 67 static void tcg_target_qemu_prologue(TCGContext *s); 68 static bool patch_reloc(tcg_insn_unit *code_ptr, int type, 69 intptr_t value, intptr_t addend); 70 static void tcg_out_nop_fill(tcg_insn_unit *p, int count); 71 72 typedef struct TCGLabelQemuLdst TCGLabelQemuLdst; 73 static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l); 74 static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l); 75 76 /* The CIE and FDE header definitions will be common to all hosts. */ 77 typedef struct { 78 uint32_t len __attribute__((aligned((sizeof(void *))))); 79 uint32_t id; 80 uint8_t version; 81 char augmentation[1]; 82 uint8_t code_align; 83 uint8_t data_align; 84 uint8_t return_column; 85 } DebugFrameCIE; 86 87 typedef struct QEMU_PACKED { 88 uint32_t len __attribute__((aligned((sizeof(void *))))); 89 uint32_t cie_offset; 90 uintptr_t func_start; 91 uintptr_t func_len; 92 } DebugFrameFDEHeader; 93 94 typedef struct QEMU_PACKED { 95 DebugFrameCIE cie; 96 DebugFrameFDEHeader fde; 97 } DebugFrameHeader; 98 99 struct TCGLabelQemuLdst { 100 bool is_ld; /* qemu_ld: true, qemu_st: false */ 101 MemOpIdx oi; 102 TCGType type; /* result type of a load */ 103 TCGReg addr_reg; /* reg index for guest virtual addr */ 104 TCGReg datalo_reg; /* reg index for low word to be loaded or stored */ 105 TCGReg datahi_reg; /* reg index for high word to be loaded or stored */ 106 const tcg_insn_unit *raddr; /* addr of the next IR of qemu_ld/st IR */ 107 tcg_insn_unit *label_ptr[2]; /* label pointers to be updated */ 108 QSIMPLEQ_ENTRY(TCGLabelQemuLdst) next; 109 }; 110 111 static void tcg_register_jit_int(const void *buf, size_t size, 112 const void *debug_frame, 113 size_t debug_frame_size) 114 __attribute__((unused)); 115 116 /* Forward declarations for functions declared and used in tcg-target.c.inc. */ 117 static void tcg_out_tb_start(TCGContext *s); 118 static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg1, 119 intptr_t arg2); 120 static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg); 121 static void tcg_out_movi(TCGContext *s, TCGType type, 122 TCGReg ret, tcg_target_long arg); 123 static void tcg_out_ext8s(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg); 124 static void tcg_out_ext16s(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg); 125 static void tcg_out_ext8u(TCGContext *s, TCGReg ret, TCGReg arg); 126 static void tcg_out_ext16u(TCGContext *s, TCGReg ret, TCGReg arg); 127 static void tcg_out_ext32s(TCGContext *s, TCGReg ret, TCGReg arg); 128 static void tcg_out_ext32u(TCGContext *s, TCGReg ret, TCGReg arg); 129 static void tcg_out_exts_i32_i64(TCGContext *s, TCGReg ret, TCGReg arg); 130 static void tcg_out_extu_i32_i64(TCGContext *s, TCGReg ret, TCGReg arg); 131 static void tcg_out_extrl_i64_i32(TCGContext *s, TCGReg ret, TCGReg arg); 132 static void tcg_out_addi_ptr(TCGContext *s, TCGReg, TCGReg, tcg_target_long); 133 static bool tcg_out_xchg(TCGContext *s, TCGType type, TCGReg r1, TCGReg r2); 134 static void tcg_out_exit_tb(TCGContext *s, uintptr_t arg); 135 static void tcg_out_goto_tb(TCGContext *s, int which); 136 static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, 137 const TCGArg args[TCG_MAX_OP_ARGS], 138 const int const_args[TCG_MAX_OP_ARGS]); 139 #if TCG_TARGET_MAYBE_vec 140 static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece, 141 TCGReg dst, TCGReg src); 142 static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece, 143 TCGReg dst, TCGReg base, intptr_t offset); 144 static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece, 145 TCGReg dst, int64_t arg); 146 static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, 147 unsigned vecl, unsigned vece, 148 const TCGArg args[TCG_MAX_OP_ARGS], 149 const int const_args[TCG_MAX_OP_ARGS]); 150 #else 151 static inline bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece, 152 TCGReg dst, TCGReg src) 153 { 154 g_assert_not_reached(); 155 } 156 static inline bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece, 157 TCGReg dst, TCGReg base, intptr_t offset) 158 { 159 g_assert_not_reached(); 160 } 161 static inline void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece, 162 TCGReg dst, int64_t arg) 163 { 164 g_assert_not_reached(); 165 } 166 static inline void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, 167 unsigned vecl, unsigned vece, 168 const TCGArg args[TCG_MAX_OP_ARGS], 169 const int const_args[TCG_MAX_OP_ARGS]) 170 { 171 g_assert_not_reached(); 172 } 173 int tcg_can_emit_vec_op(TCGOpcode o, TCGType t, unsigned ve) 174 { 175 return 0; 176 } 177 #endif 178 static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, TCGReg arg1, 179 intptr_t arg2); 180 static bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val, 181 TCGReg base, intptr_t ofs); 182 static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target, 183 const TCGHelperInfo *info); 184 static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot); 185 static bool tcg_target_const_match(int64_t val, int ct, 186 TCGType type, TCGCond cond, int vece); 187 188 #ifndef CONFIG_USER_ONLY 189 #define guest_base ({ qemu_build_not_reached(); (uintptr_t)0; }) 190 #endif 191 192 typedef struct TCGLdstHelperParam { 193 TCGReg (*ra_gen)(TCGContext *s, const TCGLabelQemuLdst *l, int arg_reg); 194 unsigned ntmp; 195 int tmp[3]; 196 } TCGLdstHelperParam; 197 198 static void tcg_out_ld_helper_args(TCGContext *s, const TCGLabelQemuLdst *l, 199 const TCGLdstHelperParam *p) 200 __attribute__((unused)); 201 static void tcg_out_ld_helper_ret(TCGContext *s, const TCGLabelQemuLdst *l, 202 bool load_sign, const TCGLdstHelperParam *p) 203 __attribute__((unused)); 204 static void tcg_out_st_helper_args(TCGContext *s, const TCGLabelQemuLdst *l, 205 const TCGLdstHelperParam *p) 206 __attribute__((unused)); 207 208 static void * const qemu_ld_helpers[MO_SSIZE + 1] __attribute__((unused)) = { 209 [MO_UB] = helper_ldub_mmu, 210 [MO_SB] = helper_ldsb_mmu, 211 [MO_UW] = helper_lduw_mmu, 212 [MO_SW] = helper_ldsw_mmu, 213 [MO_UL] = helper_ldul_mmu, 214 [MO_UQ] = helper_ldq_mmu, 215 #if TCG_TARGET_REG_BITS == 64 216 [MO_SL] = helper_ldsl_mmu, 217 [MO_128] = helper_ld16_mmu, 218 #endif 219 }; 220 221 static void * const qemu_st_helpers[MO_SIZE + 1] __attribute__((unused)) = { 222 [MO_8] = helper_stb_mmu, 223 [MO_16] = helper_stw_mmu, 224 [MO_32] = helper_stl_mmu, 225 [MO_64] = helper_stq_mmu, 226 #if TCG_TARGET_REG_BITS == 64 227 [MO_128] = helper_st16_mmu, 228 #endif 229 }; 230 231 typedef struct { 232 MemOp atom; /* lg2 bits of atomicity required */ 233 MemOp align; /* lg2 bits of alignment to use */ 234 } TCGAtomAlign; 235 236 static TCGAtomAlign atom_and_align_for_opc(TCGContext *s, MemOp opc, 237 MemOp host_atom, bool allow_two_ops) 238 __attribute__((unused)); 239 240 #ifdef CONFIG_USER_ONLY 241 bool tcg_use_softmmu; 242 #endif 243 244 TCGContext tcg_init_ctx; 245 __thread TCGContext *tcg_ctx; 246 247 TCGContext **tcg_ctxs; 248 unsigned int tcg_cur_ctxs; 249 unsigned int tcg_max_ctxs; 250 TCGv_env tcg_env; 251 const void *tcg_code_gen_epilogue; 252 uintptr_t tcg_splitwx_diff; 253 254 #ifndef CONFIG_TCG_INTERPRETER 255 tcg_prologue_fn *tcg_qemu_tb_exec; 256 #endif 257 258 static TCGRegSet tcg_target_available_regs[TCG_TYPE_COUNT]; 259 static TCGRegSet tcg_target_call_clobber_regs; 260 261 #if TCG_TARGET_INSN_UNIT_SIZE == 1 262 static __attribute__((unused)) inline void tcg_out8(TCGContext *s, uint8_t v) 263 { 264 *s->code_ptr++ = v; 265 } 266 267 static __attribute__((unused)) inline void tcg_patch8(tcg_insn_unit *p, 268 uint8_t v) 269 { 270 *p = v; 271 } 272 #endif 273 274 #if TCG_TARGET_INSN_UNIT_SIZE <= 2 275 static __attribute__((unused)) inline void tcg_out16(TCGContext *s, uint16_t v) 276 { 277 if (TCG_TARGET_INSN_UNIT_SIZE == 2) { 278 *s->code_ptr++ = v; 279 } else { 280 tcg_insn_unit *p = s->code_ptr; 281 memcpy(p, &v, sizeof(v)); 282 s->code_ptr = p + (2 / TCG_TARGET_INSN_UNIT_SIZE); 283 } 284 } 285 286 static __attribute__((unused)) inline void tcg_patch16(tcg_insn_unit *p, 287 uint16_t v) 288 { 289 if (TCG_TARGET_INSN_UNIT_SIZE == 2) { 290 *p = v; 291 } else { 292 memcpy(p, &v, sizeof(v)); 293 } 294 } 295 #endif 296 297 #if TCG_TARGET_INSN_UNIT_SIZE <= 4 298 static __attribute__((unused)) inline void tcg_out32(TCGContext *s, uint32_t v) 299 { 300 if (TCG_TARGET_INSN_UNIT_SIZE == 4) { 301 *s->code_ptr++ = v; 302 } else { 303 tcg_insn_unit *p = s->code_ptr; 304 memcpy(p, &v, sizeof(v)); 305 s->code_ptr = p + (4 / TCG_TARGET_INSN_UNIT_SIZE); 306 } 307 } 308 309 static __attribute__((unused)) inline void tcg_patch32(tcg_insn_unit *p, 310 uint32_t v) 311 { 312 if (TCG_TARGET_INSN_UNIT_SIZE == 4) { 313 *p = v; 314 } else { 315 memcpy(p, &v, sizeof(v)); 316 } 317 } 318 #endif 319 320 #if TCG_TARGET_INSN_UNIT_SIZE <= 8 321 static __attribute__((unused)) inline void tcg_out64(TCGContext *s, uint64_t v) 322 { 323 if (TCG_TARGET_INSN_UNIT_SIZE == 8) { 324 *s->code_ptr++ = v; 325 } else { 326 tcg_insn_unit *p = s->code_ptr; 327 memcpy(p, &v, sizeof(v)); 328 s->code_ptr = p + (8 / TCG_TARGET_INSN_UNIT_SIZE); 329 } 330 } 331 332 static __attribute__((unused)) inline void tcg_patch64(tcg_insn_unit *p, 333 uint64_t v) 334 { 335 if (TCG_TARGET_INSN_UNIT_SIZE == 8) { 336 *p = v; 337 } else { 338 memcpy(p, &v, sizeof(v)); 339 } 340 } 341 #endif 342 343 /* label relocation processing */ 344 345 static void tcg_out_reloc(TCGContext *s, tcg_insn_unit *code_ptr, int type, 346 TCGLabel *l, intptr_t addend) 347 { 348 TCGRelocation *r = tcg_malloc(sizeof(TCGRelocation)); 349 350 r->type = type; 351 r->ptr = code_ptr; 352 r->addend = addend; 353 QSIMPLEQ_INSERT_TAIL(&l->relocs, r, next); 354 } 355 356 static void tcg_out_label(TCGContext *s, TCGLabel *l) 357 { 358 tcg_debug_assert(!l->has_value); 359 l->has_value = 1; 360 l->u.value_ptr = tcg_splitwx_to_rx(s->code_ptr); 361 } 362 363 TCGLabel *gen_new_label(void) 364 { 365 TCGContext *s = tcg_ctx; 366 TCGLabel *l = tcg_malloc(sizeof(TCGLabel)); 367 368 memset(l, 0, sizeof(TCGLabel)); 369 l->id = s->nb_labels++; 370 QSIMPLEQ_INIT(&l->branches); 371 QSIMPLEQ_INIT(&l->relocs); 372 373 QSIMPLEQ_INSERT_TAIL(&s->labels, l, next); 374 375 return l; 376 } 377 378 static bool tcg_resolve_relocs(TCGContext *s) 379 { 380 TCGLabel *l; 381 382 QSIMPLEQ_FOREACH(l, &s->labels, next) { 383 TCGRelocation *r; 384 uintptr_t value = l->u.value; 385 386 QSIMPLEQ_FOREACH(r, &l->relocs, next) { 387 if (!patch_reloc(r->ptr, r->type, value, r->addend)) { 388 return false; 389 } 390 } 391 } 392 return true; 393 } 394 395 static void set_jmp_reset_offset(TCGContext *s, int which) 396 { 397 /* 398 * We will check for overflow at the end of the opcode loop in 399 * tcg_gen_code, where we bound tcg_current_code_size to UINT16_MAX. 400 */ 401 s->gen_tb->jmp_reset_offset[which] = tcg_current_code_size(s); 402 } 403 404 static void G_GNUC_UNUSED set_jmp_insn_offset(TCGContext *s, int which) 405 { 406 /* 407 * We will check for overflow at the end of the opcode loop in 408 * tcg_gen_code, where we bound tcg_current_code_size to UINT16_MAX. 409 */ 410 s->gen_tb->jmp_insn_offset[which] = tcg_current_code_size(s); 411 } 412 413 static uintptr_t G_GNUC_UNUSED get_jmp_target_addr(TCGContext *s, int which) 414 { 415 /* 416 * Return the read-execute version of the pointer, for the benefit 417 * of any pc-relative addressing mode. 418 */ 419 return (uintptr_t)tcg_splitwx_to_rx(&s->gen_tb->jmp_target_addr[which]); 420 } 421 422 static int __attribute__((unused)) 423 tlb_mask_table_ofs(TCGContext *s, int which) 424 { 425 return (offsetof(CPUNegativeOffsetState, tlb.f[which]) - 426 sizeof(CPUNegativeOffsetState)); 427 } 428 429 /* Signal overflow, starting over with fewer guest insns. */ 430 static G_NORETURN 431 void tcg_raise_tb_overflow(TCGContext *s) 432 { 433 siglongjmp(s->jmp_trans, -2); 434 } 435 436 /* 437 * Used by tcg_out_movext{1,2} to hold the arguments for tcg_out_movext. 438 * By the time we arrive at tcg_out_movext1, @dst is always a TCGReg. 439 * 440 * However, tcg_out_helper_load_slots reuses this field to hold an 441 * argument slot number (which may designate a argument register or an 442 * argument stack slot), converting to TCGReg once all arguments that 443 * are destined for the stack are processed. 444 */ 445 typedef struct TCGMovExtend { 446 unsigned dst; 447 TCGReg src; 448 TCGType dst_type; 449 TCGType src_type; 450 MemOp src_ext; 451 } TCGMovExtend; 452 453 /** 454 * tcg_out_movext -- move and extend 455 * @s: tcg context 456 * @dst_type: integral type for destination 457 * @dst: destination register 458 * @src_type: integral type for source 459 * @src_ext: extension to apply to source 460 * @src: source register 461 * 462 * Move or extend @src into @dst, depending on @src_ext and the types. 463 */ 464 static void tcg_out_movext(TCGContext *s, TCGType dst_type, TCGReg dst, 465 TCGType src_type, MemOp src_ext, TCGReg src) 466 { 467 switch (src_ext) { 468 case MO_UB: 469 tcg_out_ext8u(s, dst, src); 470 break; 471 case MO_SB: 472 tcg_out_ext8s(s, dst_type, dst, src); 473 break; 474 case MO_UW: 475 tcg_out_ext16u(s, dst, src); 476 break; 477 case MO_SW: 478 tcg_out_ext16s(s, dst_type, dst, src); 479 break; 480 case MO_UL: 481 case MO_SL: 482 if (dst_type == TCG_TYPE_I32) { 483 if (src_type == TCG_TYPE_I32) { 484 tcg_out_mov(s, TCG_TYPE_I32, dst, src); 485 } else { 486 tcg_out_extrl_i64_i32(s, dst, src); 487 } 488 } else if (src_type == TCG_TYPE_I32) { 489 if (src_ext & MO_SIGN) { 490 tcg_out_exts_i32_i64(s, dst, src); 491 } else { 492 tcg_out_extu_i32_i64(s, dst, src); 493 } 494 } else { 495 if (src_ext & MO_SIGN) { 496 tcg_out_ext32s(s, dst, src); 497 } else { 498 tcg_out_ext32u(s, dst, src); 499 } 500 } 501 break; 502 case MO_UQ: 503 tcg_debug_assert(TCG_TARGET_REG_BITS == 64); 504 if (dst_type == TCG_TYPE_I32) { 505 tcg_out_extrl_i64_i32(s, dst, src); 506 } else { 507 tcg_out_mov(s, TCG_TYPE_I64, dst, src); 508 } 509 break; 510 default: 511 g_assert_not_reached(); 512 } 513 } 514 515 /* Minor variations on a theme, using a structure. */ 516 static void tcg_out_movext1_new_src(TCGContext *s, const TCGMovExtend *i, 517 TCGReg src) 518 { 519 tcg_out_movext(s, i->dst_type, i->dst, i->src_type, i->src_ext, src); 520 } 521 522 static void tcg_out_movext1(TCGContext *s, const TCGMovExtend *i) 523 { 524 tcg_out_movext1_new_src(s, i, i->src); 525 } 526 527 /** 528 * tcg_out_movext2 -- move and extend two pair 529 * @s: tcg context 530 * @i1: first move description 531 * @i2: second move description 532 * @scratch: temporary register, or -1 for none 533 * 534 * As tcg_out_movext, for both @i1 and @i2, caring for overlap 535 * between the sources and destinations. 536 */ 537 538 static void tcg_out_movext2(TCGContext *s, const TCGMovExtend *i1, 539 const TCGMovExtend *i2, int scratch) 540 { 541 TCGReg src1 = i1->src; 542 TCGReg src2 = i2->src; 543 544 if (i1->dst != src2) { 545 tcg_out_movext1(s, i1); 546 tcg_out_movext1(s, i2); 547 return; 548 } 549 if (i2->dst == src1) { 550 TCGType src1_type = i1->src_type; 551 TCGType src2_type = i2->src_type; 552 553 if (tcg_out_xchg(s, MAX(src1_type, src2_type), src1, src2)) { 554 /* The data is now in the correct registers, now extend. */ 555 src1 = i2->src; 556 src2 = i1->src; 557 } else { 558 tcg_debug_assert(scratch >= 0); 559 tcg_out_mov(s, src1_type, scratch, src1); 560 src1 = scratch; 561 } 562 } 563 tcg_out_movext1_new_src(s, i2, src2); 564 tcg_out_movext1_new_src(s, i1, src1); 565 } 566 567 /** 568 * tcg_out_movext3 -- move and extend three pair 569 * @s: tcg context 570 * @i1: first move description 571 * @i2: second move description 572 * @i3: third move description 573 * @scratch: temporary register, or -1 for none 574 * 575 * As tcg_out_movext, for all of @i1, @i2 and @i3, caring for overlap 576 * between the sources and destinations. 577 */ 578 579 static void tcg_out_movext3(TCGContext *s, const TCGMovExtend *i1, 580 const TCGMovExtend *i2, const TCGMovExtend *i3, 581 int scratch) 582 { 583 TCGReg src1 = i1->src; 584 TCGReg src2 = i2->src; 585 TCGReg src3 = i3->src; 586 587 if (i1->dst != src2 && i1->dst != src3) { 588 tcg_out_movext1(s, i1); 589 tcg_out_movext2(s, i2, i3, scratch); 590 return; 591 } 592 if (i2->dst != src1 && i2->dst != src3) { 593 tcg_out_movext1(s, i2); 594 tcg_out_movext2(s, i1, i3, scratch); 595 return; 596 } 597 if (i3->dst != src1 && i3->dst != src2) { 598 tcg_out_movext1(s, i3); 599 tcg_out_movext2(s, i1, i2, scratch); 600 return; 601 } 602 603 /* 604 * There is a cycle. Since there are only 3 nodes, the cycle is 605 * either "clockwise" or "anti-clockwise", and can be solved with 606 * a single scratch or two xchg. 607 */ 608 if (i1->dst == src2 && i2->dst == src3 && i3->dst == src1) { 609 /* "Clockwise" */ 610 if (tcg_out_xchg(s, MAX(i1->src_type, i2->src_type), src1, src2)) { 611 tcg_out_xchg(s, MAX(i2->src_type, i3->src_type), src2, src3); 612 /* The data is now in the correct registers, now extend. */ 613 tcg_out_movext1_new_src(s, i1, i1->dst); 614 tcg_out_movext1_new_src(s, i2, i2->dst); 615 tcg_out_movext1_new_src(s, i3, i3->dst); 616 } else { 617 tcg_debug_assert(scratch >= 0); 618 tcg_out_mov(s, i1->src_type, scratch, src1); 619 tcg_out_movext1(s, i3); 620 tcg_out_movext1(s, i2); 621 tcg_out_movext1_new_src(s, i1, scratch); 622 } 623 } else if (i1->dst == src3 && i2->dst == src1 && i3->dst == src2) { 624 /* "Anti-clockwise" */ 625 if (tcg_out_xchg(s, MAX(i2->src_type, i3->src_type), src2, src3)) { 626 tcg_out_xchg(s, MAX(i1->src_type, i2->src_type), src1, src2); 627 /* The data is now in the correct registers, now extend. */ 628 tcg_out_movext1_new_src(s, i1, i1->dst); 629 tcg_out_movext1_new_src(s, i2, i2->dst); 630 tcg_out_movext1_new_src(s, i3, i3->dst); 631 } else { 632 tcg_debug_assert(scratch >= 0); 633 tcg_out_mov(s, i1->src_type, scratch, src1); 634 tcg_out_movext1(s, i2); 635 tcg_out_movext1(s, i3); 636 tcg_out_movext1_new_src(s, i1, scratch); 637 } 638 } else { 639 g_assert_not_reached(); 640 } 641 } 642 643 /* 644 * Allocate a new TCGLabelQemuLdst entry. 645 */ 646 647 __attribute__((unused)) 648 static TCGLabelQemuLdst *new_ldst_label(TCGContext *s) 649 { 650 TCGLabelQemuLdst *l = tcg_malloc(sizeof(*l)); 651 652 memset(l, 0, sizeof(*l)); 653 QSIMPLEQ_INSERT_TAIL(&s->ldst_labels, l, next); 654 655 return l; 656 } 657 658 /* 659 * Allocate new constant pool entries. 660 */ 661 662 typedef struct TCGLabelPoolData { 663 struct TCGLabelPoolData *next; 664 tcg_insn_unit *label; 665 intptr_t addend; 666 int rtype; 667 unsigned nlong; 668 tcg_target_ulong data[]; 669 } TCGLabelPoolData; 670 671 static TCGLabelPoolData *new_pool_alloc(TCGContext *s, int nlong, int rtype, 672 tcg_insn_unit *label, intptr_t addend) 673 { 674 TCGLabelPoolData *n = tcg_malloc(sizeof(TCGLabelPoolData) 675 + sizeof(tcg_target_ulong) * nlong); 676 677 n->label = label; 678 n->addend = addend; 679 n->rtype = rtype; 680 n->nlong = nlong; 681 return n; 682 } 683 684 static void new_pool_insert(TCGContext *s, TCGLabelPoolData *n) 685 { 686 TCGLabelPoolData *i, **pp; 687 int nlong = n->nlong; 688 689 /* Insertion sort on the pool. */ 690 for (pp = &s->pool_labels; (i = *pp) != NULL; pp = &i->next) { 691 if (nlong > i->nlong) { 692 break; 693 } 694 if (nlong < i->nlong) { 695 continue; 696 } 697 if (memcmp(n->data, i->data, sizeof(tcg_target_ulong) * nlong) >= 0) { 698 break; 699 } 700 } 701 n->next = *pp; 702 *pp = n; 703 } 704 705 /* The "usual" for generic integer code. */ 706 __attribute__((unused)) 707 static void new_pool_label(TCGContext *s, tcg_target_ulong d, int rtype, 708 tcg_insn_unit *label, intptr_t addend) 709 { 710 TCGLabelPoolData *n = new_pool_alloc(s, 1, rtype, label, addend); 711 n->data[0] = d; 712 new_pool_insert(s, n); 713 } 714 715 /* For v64 or v128, depending on the host. */ 716 __attribute__((unused)) 717 static void new_pool_l2(TCGContext *s, int rtype, tcg_insn_unit *label, 718 intptr_t addend, tcg_target_ulong d0, 719 tcg_target_ulong d1) 720 { 721 TCGLabelPoolData *n = new_pool_alloc(s, 2, rtype, label, addend); 722 n->data[0] = d0; 723 n->data[1] = d1; 724 new_pool_insert(s, n); 725 } 726 727 /* For v128 or v256, depending on the host. */ 728 __attribute__((unused)) 729 static void new_pool_l4(TCGContext *s, int rtype, tcg_insn_unit *label, 730 intptr_t addend, tcg_target_ulong d0, 731 tcg_target_ulong d1, tcg_target_ulong d2, 732 tcg_target_ulong d3) 733 { 734 TCGLabelPoolData *n = new_pool_alloc(s, 4, rtype, label, addend); 735 n->data[0] = d0; 736 n->data[1] = d1; 737 n->data[2] = d2; 738 n->data[3] = d3; 739 new_pool_insert(s, n); 740 } 741 742 /* For v256, for 32-bit host. */ 743 __attribute__((unused)) 744 static void new_pool_l8(TCGContext *s, int rtype, tcg_insn_unit *label, 745 intptr_t addend, tcg_target_ulong d0, 746 tcg_target_ulong d1, tcg_target_ulong d2, 747 tcg_target_ulong d3, tcg_target_ulong d4, 748 tcg_target_ulong d5, tcg_target_ulong d6, 749 tcg_target_ulong d7) 750 { 751 TCGLabelPoolData *n = new_pool_alloc(s, 8, rtype, label, addend); 752 n->data[0] = d0; 753 n->data[1] = d1; 754 n->data[2] = d2; 755 n->data[3] = d3; 756 n->data[4] = d4; 757 n->data[5] = d5; 758 n->data[6] = d6; 759 n->data[7] = d7; 760 new_pool_insert(s, n); 761 } 762 763 /* 764 * Generate TB finalization at the end of block 765 */ 766 767 static int tcg_out_ldst_finalize(TCGContext *s) 768 { 769 TCGLabelQemuLdst *lb; 770 771 /* qemu_ld/st slow paths */ 772 QSIMPLEQ_FOREACH(lb, &s->ldst_labels, next) { 773 if (lb->is_ld 774 ? !tcg_out_qemu_ld_slow_path(s, lb) 775 : !tcg_out_qemu_st_slow_path(s, lb)) { 776 return -2; 777 } 778 779 /* 780 * Test for (pending) buffer overflow. The assumption is that any 781 * one operation beginning below the high water mark cannot overrun 782 * the buffer completely. Thus we can test for overflow after 783 * generating code without having to check during generation. 784 */ 785 if (unlikely((void *)s->code_ptr > s->code_gen_highwater)) { 786 return -1; 787 } 788 } 789 return 0; 790 } 791 792 static int tcg_out_pool_finalize(TCGContext *s) 793 { 794 TCGLabelPoolData *p = s->pool_labels; 795 TCGLabelPoolData *l = NULL; 796 void *a; 797 798 if (p == NULL) { 799 return 0; 800 } 801 802 /* 803 * ??? Round up to qemu_icache_linesize, but then do not round 804 * again when allocating the next TranslationBlock structure. 805 */ 806 a = (void *)ROUND_UP((uintptr_t)s->code_ptr, 807 sizeof(tcg_target_ulong) * p->nlong); 808 tcg_out_nop_fill(s->code_ptr, (tcg_insn_unit *)a - s->code_ptr); 809 s->data_gen_ptr = a; 810 811 for (; p != NULL; p = p->next) { 812 size_t size = sizeof(tcg_target_ulong) * p->nlong; 813 uintptr_t value; 814 815 if (!l || l->nlong != p->nlong || memcmp(l->data, p->data, size)) { 816 if (unlikely(a > s->code_gen_highwater)) { 817 return -1; 818 } 819 memcpy(a, p->data, size); 820 a += size; 821 l = p; 822 } 823 824 value = (uintptr_t)tcg_splitwx_to_rx(a) - size; 825 if (!patch_reloc(p->label, p->rtype, value, p->addend)) { 826 return -2; 827 } 828 } 829 830 s->code_ptr = a; 831 return 0; 832 } 833 834 #define C_PFX1(P, A) P##A 835 #define C_PFX2(P, A, B) P##A##_##B 836 #define C_PFX3(P, A, B, C) P##A##_##B##_##C 837 #define C_PFX4(P, A, B, C, D) P##A##_##B##_##C##_##D 838 #define C_PFX5(P, A, B, C, D, E) P##A##_##B##_##C##_##D##_##E 839 #define C_PFX6(P, A, B, C, D, E, F) P##A##_##B##_##C##_##D##_##E##_##F 840 841 /* Define an enumeration for the various combinations. */ 842 843 #define C_O0_I1(I1) C_PFX1(c_o0_i1_, I1), 844 #define C_O0_I2(I1, I2) C_PFX2(c_o0_i2_, I1, I2), 845 #define C_O0_I3(I1, I2, I3) C_PFX3(c_o0_i3_, I1, I2, I3), 846 #define C_O0_I4(I1, I2, I3, I4) C_PFX4(c_o0_i4_, I1, I2, I3, I4), 847 848 #define C_O1_I1(O1, I1) C_PFX2(c_o1_i1_, O1, I1), 849 #define C_O1_I2(O1, I1, I2) C_PFX3(c_o1_i2_, O1, I1, I2), 850 #define C_O1_I3(O1, I1, I2, I3) C_PFX4(c_o1_i3_, O1, I1, I2, I3), 851 #define C_O1_I4(O1, I1, I2, I3, I4) C_PFX5(c_o1_i4_, O1, I1, I2, I3, I4), 852 853 #define C_N1_I2(O1, I1, I2) C_PFX3(c_n1_i2_, O1, I1, I2), 854 #define C_N1O1_I1(O1, O2, I1) C_PFX3(c_n1o1_i1_, O1, O2, I1), 855 #define C_N2_I1(O1, O2, I1) C_PFX3(c_n2_i1_, O1, O2, I1), 856 857 #define C_O2_I1(O1, O2, I1) C_PFX3(c_o2_i1_, O1, O2, I1), 858 #define C_O2_I2(O1, O2, I1, I2) C_PFX4(c_o2_i2_, O1, O2, I1, I2), 859 #define C_O2_I3(O1, O2, I1, I2, I3) C_PFX5(c_o2_i3_, O1, O2, I1, I2, I3), 860 #define C_O2_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_o2_i4_, O1, O2, I1, I2, I3, I4), 861 #define C_N1_O1_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_n1_o1_i4_, O1, O2, I1, I2, I3, I4), 862 863 typedef enum { 864 C_Dynamic = -2, 865 C_NotImplemented = -1, 866 #include "tcg-target-con-set.h" 867 } TCGConstraintSetIndex; 868 869 static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode, TCGType, unsigned); 870 871 #undef C_O0_I1 872 #undef C_O0_I2 873 #undef C_O0_I3 874 #undef C_O0_I4 875 #undef C_O1_I1 876 #undef C_O1_I2 877 #undef C_O1_I3 878 #undef C_O1_I4 879 #undef C_N1_I2 880 #undef C_N1O1_I1 881 #undef C_N2_I1 882 #undef C_O2_I1 883 #undef C_O2_I2 884 #undef C_O2_I3 885 #undef C_O2_I4 886 #undef C_N1_O1_I4 887 888 /* Put all of the constraint sets into an array, indexed by the enum. */ 889 890 typedef struct TCGConstraintSet { 891 uint8_t nb_oargs, nb_iargs; 892 const char *args_ct_str[TCG_MAX_OP_ARGS]; 893 } TCGConstraintSet; 894 895 #define C_O0_I1(I1) { 0, 1, { #I1 } }, 896 #define C_O0_I2(I1, I2) { 0, 2, { #I1, #I2 } }, 897 #define C_O0_I3(I1, I2, I3) { 0, 3, { #I1, #I2, #I3 } }, 898 #define C_O0_I4(I1, I2, I3, I4) { 0, 4, { #I1, #I2, #I3, #I4 } }, 899 900 #define C_O1_I1(O1, I1) { 1, 1, { #O1, #I1 } }, 901 #define C_O1_I2(O1, I1, I2) { 1, 2, { #O1, #I1, #I2 } }, 902 #define C_O1_I3(O1, I1, I2, I3) { 1, 3, { #O1, #I1, #I2, #I3 } }, 903 #define C_O1_I4(O1, I1, I2, I3, I4) { 1, 4, { #O1, #I1, #I2, #I3, #I4 } }, 904 905 #define C_N1_I2(O1, I1, I2) { 1, 2, { "&" #O1, #I1, #I2 } }, 906 #define C_N1O1_I1(O1, O2, I1) { 2, 1, { "&" #O1, #O2, #I1 } }, 907 #define C_N2_I1(O1, O2, I1) { 2, 1, { "&" #O1, "&" #O2, #I1 } }, 908 909 #define C_O2_I1(O1, O2, I1) { 2, 1, { #O1, #O2, #I1 } }, 910 #define C_O2_I2(O1, O2, I1, I2) { 2, 2, { #O1, #O2, #I1, #I2 } }, 911 #define C_O2_I3(O1, O2, I1, I2, I3) { 2, 3, { #O1, #O2, #I1, #I2, #I3 } }, 912 #define C_O2_I4(O1, O2, I1, I2, I3, I4) { 2, 4, { #O1, #O2, #I1, #I2, #I3, #I4 } }, 913 #define C_N1_O1_I4(O1, O2, I1, I2, I3, I4) { 2, 4, { "&" #O1, #O2, #I1, #I2, #I3, #I4 } }, 914 915 static const TCGConstraintSet constraint_sets[] = { 916 #include "tcg-target-con-set.h" 917 }; 918 919 #undef C_O0_I1 920 #undef C_O0_I2 921 #undef C_O0_I3 922 #undef C_O0_I4 923 #undef C_O1_I1 924 #undef C_O1_I2 925 #undef C_O1_I3 926 #undef C_O1_I4 927 #undef C_N1_I2 928 #undef C_N1O1_I1 929 #undef C_N2_I1 930 #undef C_O2_I1 931 #undef C_O2_I2 932 #undef C_O2_I3 933 #undef C_O2_I4 934 #undef C_N1_O1_I4 935 936 /* Expand the enumerator to be returned from tcg_target_op_def(). */ 937 938 #define C_O0_I1(I1) C_PFX1(c_o0_i1_, I1) 939 #define C_O0_I2(I1, I2) C_PFX2(c_o0_i2_, I1, I2) 940 #define C_O0_I3(I1, I2, I3) C_PFX3(c_o0_i3_, I1, I2, I3) 941 #define C_O0_I4(I1, I2, I3, I4) C_PFX4(c_o0_i4_, I1, I2, I3, I4) 942 943 #define C_O1_I1(O1, I1) C_PFX2(c_o1_i1_, O1, I1) 944 #define C_O1_I2(O1, I1, I2) C_PFX3(c_o1_i2_, O1, I1, I2) 945 #define C_O1_I3(O1, I1, I2, I3) C_PFX4(c_o1_i3_, O1, I1, I2, I3) 946 #define C_O1_I4(O1, I1, I2, I3, I4) C_PFX5(c_o1_i4_, O1, I1, I2, I3, I4) 947 948 #define C_N1_I2(O1, I1, I2) C_PFX3(c_n1_i2_, O1, I1, I2) 949 #define C_N1O1_I1(O1, O2, I1) C_PFX3(c_n1o1_i1_, O1, O2, I1) 950 #define C_N2_I1(O1, O2, I1) C_PFX3(c_n2_i1_, O1, O2, I1) 951 952 #define C_O2_I1(O1, O2, I1) C_PFX3(c_o2_i1_, O1, O2, I1) 953 #define C_O2_I2(O1, O2, I1, I2) C_PFX4(c_o2_i2_, O1, O2, I1, I2) 954 #define C_O2_I3(O1, O2, I1, I2, I3) C_PFX5(c_o2_i3_, O1, O2, I1, I2, I3) 955 #define C_O2_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_o2_i4_, O1, O2, I1, I2, I3, I4) 956 #define C_N1_O1_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_n1_o1_i4_, O1, O2, I1, I2, I3, I4) 957 958 /* 959 * TCGOutOp is the base class for a set of structures that describe how 960 * to generate code for a given TCGOpcode. 961 * 962 * @static_constraint: 963 * C_NotImplemented: The TCGOpcode is not supported by the backend. 964 * C_Dynamic: Use @dynamic_constraint to select a constraint set 965 * based on any of @type, @flags, or host isa. 966 * Otherwise: The register allocation constrains for the TCGOpcode. 967 * 968 * Subclasses of TCGOutOp will define a set of output routines that may 969 * be used. Such routines will often be selected by the set of registers 970 * and constants that come out of register allocation. The set of 971 * routines that are provided will guide the set of constraints that are 972 * legal. In particular, assume that tcg_optimize() has done its job in 973 * swapping commutative operands and folding operations for which all 974 * operands are constant. 975 */ 976 typedef struct TCGOutOp { 977 TCGConstraintSetIndex static_constraint; 978 TCGConstraintSetIndex (*dynamic_constraint)(TCGType type, unsigned flags); 979 } TCGOutOp; 980 981 typedef struct TCGOutOpBinary { 982 TCGOutOp base; 983 void (*out_rrr)(TCGContext *s, TCGType type, 984 TCGReg a0, TCGReg a1, TCGReg a2); 985 void (*out_rri)(TCGContext *s, TCGType type, 986 TCGReg a0, TCGReg a1, tcg_target_long a2); 987 } TCGOutOpBinary; 988 989 typedef struct TCGOutOpDivRem { 990 TCGOutOp base; 991 void (*out_rr01r)(TCGContext *s, TCGType type, 992 TCGReg a0, TCGReg a1, TCGReg a4); 993 } TCGOutOpDivRem; 994 995 typedef struct TCGOutOpUnary { 996 TCGOutOp base; 997 void (*out_rr)(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1); 998 } TCGOutOpUnary; 999 1000 typedef struct TCGOutOpSubtract { 1001 TCGOutOp base; 1002 void (*out_rrr)(TCGContext *s, TCGType type, 1003 TCGReg a0, TCGReg a1, TCGReg a2); 1004 void (*out_rir)(TCGContext *s, TCGType type, 1005 TCGReg a0, tcg_target_long a1, TCGReg a2); 1006 } TCGOutOpSubtract; 1007 1008 #include "tcg-target.c.inc" 1009 1010 #ifndef CONFIG_TCG_INTERPRETER 1011 /* Validate CPUTLBDescFast placement. */ 1012 QEMU_BUILD_BUG_ON((int)(offsetof(CPUNegativeOffsetState, tlb.f[0]) - 1013 sizeof(CPUNegativeOffsetState)) 1014 < MIN_TLB_MASK_TABLE_OFS); 1015 #endif 1016 1017 /* 1018 * Register V as the TCGOutOp for O. 1019 * This verifies that V is of type T, otherwise give a nice compiler error. 1020 * This prevents trivial mistakes within each arch/tcg-target.c.inc. 1021 */ 1022 #define OUTOP(O, T, V) [O] = _Generic(V, T: &V.base) 1023 1024 /* Register allocation descriptions for every TCGOpcode. */ 1025 static const TCGOutOp * const all_outop[NB_OPS] = { 1026 OUTOP(INDEX_op_add, TCGOutOpBinary, outop_add), 1027 OUTOP(INDEX_op_and, TCGOutOpBinary, outop_and), 1028 OUTOP(INDEX_op_andc, TCGOutOpBinary, outop_andc), 1029 OUTOP(INDEX_op_clz, TCGOutOpBinary, outop_clz), 1030 OUTOP(INDEX_op_divs, TCGOutOpBinary, outop_divs), 1031 OUTOP(INDEX_op_divu, TCGOutOpBinary, outop_divu), 1032 OUTOP(INDEX_op_divs2, TCGOutOpDivRem, outop_divs2), 1033 OUTOP(INDEX_op_divu2, TCGOutOpDivRem, outop_divu2), 1034 OUTOP(INDEX_op_eqv, TCGOutOpBinary, outop_eqv), 1035 OUTOP(INDEX_op_mul, TCGOutOpBinary, outop_mul), 1036 OUTOP(INDEX_op_mulsh, TCGOutOpBinary, outop_mulsh), 1037 OUTOP(INDEX_op_muluh, TCGOutOpBinary, outop_muluh), 1038 OUTOP(INDEX_op_nand, TCGOutOpBinary, outop_nand), 1039 OUTOP(INDEX_op_neg, TCGOutOpUnary, outop_neg), 1040 OUTOP(INDEX_op_nor, TCGOutOpBinary, outop_nor), 1041 OUTOP(INDEX_op_not, TCGOutOpUnary, outop_not), 1042 OUTOP(INDEX_op_or, TCGOutOpBinary, outop_or), 1043 OUTOP(INDEX_op_orc, TCGOutOpBinary, outop_orc), 1044 OUTOP(INDEX_op_rems, TCGOutOpBinary, outop_rems), 1045 OUTOP(INDEX_op_remu, TCGOutOpBinary, outop_remu), 1046 OUTOP(INDEX_op_rotl, TCGOutOpBinary, outop_rotl), 1047 OUTOP(INDEX_op_rotr, TCGOutOpBinary, outop_rotr), 1048 OUTOP(INDEX_op_sar, TCGOutOpBinary, outop_sar), 1049 OUTOP(INDEX_op_shl, TCGOutOpBinary, outop_shl), 1050 OUTOP(INDEX_op_shr, TCGOutOpBinary, outop_shr), 1051 OUTOP(INDEX_op_sub, TCGOutOpSubtract, outop_sub), 1052 OUTOP(INDEX_op_xor, TCGOutOpBinary, outop_xor), 1053 }; 1054 1055 #undef OUTOP 1056 1057 /* 1058 * All TCG threads except the parent (i.e. the one that called tcg_context_init 1059 * and registered the target's TCG globals) must register with this function 1060 * before initiating translation. 1061 * 1062 * In user-mode we just point tcg_ctx to tcg_init_ctx. See the documentation 1063 * of tcg_region_init() for the reasoning behind this. 1064 * 1065 * In system-mode each caller registers its context in tcg_ctxs[]. Note that in 1066 * system-mode tcg_ctxs[] does not track tcg_ctx_init, since the initial context 1067 * is not used anymore for translation once this function is called. 1068 * 1069 * Not tracking tcg_init_ctx in tcg_ctxs[] in system-mode keeps code that 1070 * iterates over the array (e.g. tcg_code_size() the same for both system/user 1071 * modes. 1072 */ 1073 #ifdef CONFIG_USER_ONLY 1074 void tcg_register_thread(void) 1075 { 1076 tcg_ctx = &tcg_init_ctx; 1077 } 1078 #else 1079 void tcg_register_thread(void) 1080 { 1081 TCGContext *s = g_malloc(sizeof(*s)); 1082 unsigned int i, n; 1083 1084 *s = tcg_init_ctx; 1085 1086 /* Relink mem_base. */ 1087 for (i = 0, n = tcg_init_ctx.nb_globals; i < n; ++i) { 1088 if (tcg_init_ctx.temps[i].mem_base) { 1089 ptrdiff_t b = tcg_init_ctx.temps[i].mem_base - tcg_init_ctx.temps; 1090 tcg_debug_assert(b >= 0 && b < n); 1091 s->temps[i].mem_base = &s->temps[b]; 1092 } 1093 } 1094 1095 /* Claim an entry in tcg_ctxs */ 1096 n = qatomic_fetch_inc(&tcg_cur_ctxs); 1097 g_assert(n < tcg_max_ctxs); 1098 qatomic_set(&tcg_ctxs[n], s); 1099 1100 if (n > 0) { 1101 tcg_region_initial_alloc(s); 1102 } 1103 1104 tcg_ctx = s; 1105 } 1106 #endif /* !CONFIG_USER_ONLY */ 1107 1108 /* pool based memory allocation */ 1109 void *tcg_malloc_internal(TCGContext *s, int size) 1110 { 1111 TCGPool *p; 1112 int pool_size; 1113 1114 if (size > TCG_POOL_CHUNK_SIZE) { 1115 /* big malloc: insert a new pool (XXX: could optimize) */ 1116 p = g_malloc(sizeof(TCGPool) + size); 1117 p->size = size; 1118 p->next = s->pool_first_large; 1119 s->pool_first_large = p; 1120 return p->data; 1121 } else { 1122 p = s->pool_current; 1123 if (!p) { 1124 p = s->pool_first; 1125 if (!p) 1126 goto new_pool; 1127 } else { 1128 if (!p->next) { 1129 new_pool: 1130 pool_size = TCG_POOL_CHUNK_SIZE; 1131 p = g_malloc(sizeof(TCGPool) + pool_size); 1132 p->size = pool_size; 1133 p->next = NULL; 1134 if (s->pool_current) { 1135 s->pool_current->next = p; 1136 } else { 1137 s->pool_first = p; 1138 } 1139 } else { 1140 p = p->next; 1141 } 1142 } 1143 } 1144 s->pool_current = p; 1145 s->pool_cur = p->data + size; 1146 s->pool_end = p->data + p->size; 1147 return p->data; 1148 } 1149 1150 void tcg_pool_reset(TCGContext *s) 1151 { 1152 TCGPool *p, *t; 1153 for (p = s->pool_first_large; p; p = t) { 1154 t = p->next; 1155 g_free(p); 1156 } 1157 s->pool_first_large = NULL; 1158 s->pool_cur = s->pool_end = NULL; 1159 s->pool_current = NULL; 1160 } 1161 1162 /* 1163 * Create TCGHelperInfo structures for "tcg/tcg-ldst.h" functions, 1164 * akin to what "exec/helper-tcg.h" does with DEF_HELPER_FLAGS_N. 1165 * We only use these for layout in tcg_out_ld_helper_ret and 1166 * tcg_out_st_helper_args, and share them between several of 1167 * the helpers, with the end result that it's easier to build manually. 1168 */ 1169 1170 #if TCG_TARGET_REG_BITS == 32 1171 # define dh_typecode_ttl dh_typecode_i32 1172 #else 1173 # define dh_typecode_ttl dh_typecode_i64 1174 #endif 1175 1176 static TCGHelperInfo info_helper_ld32_mmu = { 1177 .flags = TCG_CALL_NO_WG, 1178 .typemask = dh_typemask(ttl, 0) /* return tcg_target_ulong */ 1179 | dh_typemask(env, 1) 1180 | dh_typemask(i64, 2) /* uint64_t addr */ 1181 | dh_typemask(i32, 3) /* unsigned oi */ 1182 | dh_typemask(ptr, 4) /* uintptr_t ra */ 1183 }; 1184 1185 static TCGHelperInfo info_helper_ld64_mmu = { 1186 .flags = TCG_CALL_NO_WG, 1187 .typemask = dh_typemask(i64, 0) /* return uint64_t */ 1188 | dh_typemask(env, 1) 1189 | dh_typemask(i64, 2) /* uint64_t addr */ 1190 | dh_typemask(i32, 3) /* unsigned oi */ 1191 | dh_typemask(ptr, 4) /* uintptr_t ra */ 1192 }; 1193 1194 static TCGHelperInfo info_helper_ld128_mmu = { 1195 .flags = TCG_CALL_NO_WG, 1196 .typemask = dh_typemask(i128, 0) /* return Int128 */ 1197 | dh_typemask(env, 1) 1198 | dh_typemask(i64, 2) /* uint64_t addr */ 1199 | dh_typemask(i32, 3) /* unsigned oi */ 1200 | dh_typemask(ptr, 4) /* uintptr_t ra */ 1201 }; 1202 1203 static TCGHelperInfo info_helper_st32_mmu = { 1204 .flags = TCG_CALL_NO_WG, 1205 .typemask = dh_typemask(void, 0) 1206 | dh_typemask(env, 1) 1207 | dh_typemask(i64, 2) /* uint64_t addr */ 1208 | dh_typemask(i32, 3) /* uint32_t data */ 1209 | dh_typemask(i32, 4) /* unsigned oi */ 1210 | dh_typemask(ptr, 5) /* uintptr_t ra */ 1211 }; 1212 1213 static TCGHelperInfo info_helper_st64_mmu = { 1214 .flags = TCG_CALL_NO_WG, 1215 .typemask = dh_typemask(void, 0) 1216 | dh_typemask(env, 1) 1217 | dh_typemask(i64, 2) /* uint64_t addr */ 1218 | dh_typemask(i64, 3) /* uint64_t data */ 1219 | dh_typemask(i32, 4) /* unsigned oi */ 1220 | dh_typemask(ptr, 5) /* uintptr_t ra */ 1221 }; 1222 1223 static TCGHelperInfo info_helper_st128_mmu = { 1224 .flags = TCG_CALL_NO_WG, 1225 .typemask = dh_typemask(void, 0) 1226 | dh_typemask(env, 1) 1227 | dh_typemask(i64, 2) /* uint64_t addr */ 1228 | dh_typemask(i128, 3) /* Int128 data */ 1229 | dh_typemask(i32, 4) /* unsigned oi */ 1230 | dh_typemask(ptr, 5) /* uintptr_t ra */ 1231 }; 1232 1233 #ifdef CONFIG_TCG_INTERPRETER 1234 static ffi_type *typecode_to_ffi(int argmask) 1235 { 1236 /* 1237 * libffi does not support __int128_t, so we have forced Int128 1238 * to use the structure definition instead of the builtin type. 1239 */ 1240 static ffi_type *ffi_type_i128_elements[3] = { 1241 &ffi_type_uint64, 1242 &ffi_type_uint64, 1243 NULL 1244 }; 1245 static ffi_type ffi_type_i128 = { 1246 .size = 16, 1247 .alignment = __alignof__(Int128), 1248 .type = FFI_TYPE_STRUCT, 1249 .elements = ffi_type_i128_elements, 1250 }; 1251 1252 switch (argmask) { 1253 case dh_typecode_void: 1254 return &ffi_type_void; 1255 case dh_typecode_i32: 1256 return &ffi_type_uint32; 1257 case dh_typecode_s32: 1258 return &ffi_type_sint32; 1259 case dh_typecode_i64: 1260 return &ffi_type_uint64; 1261 case dh_typecode_s64: 1262 return &ffi_type_sint64; 1263 case dh_typecode_ptr: 1264 return &ffi_type_pointer; 1265 case dh_typecode_i128: 1266 return &ffi_type_i128; 1267 } 1268 g_assert_not_reached(); 1269 } 1270 1271 static ffi_cif *init_ffi_layout(TCGHelperInfo *info) 1272 { 1273 unsigned typemask = info->typemask; 1274 struct { 1275 ffi_cif cif; 1276 ffi_type *args[]; 1277 } *ca; 1278 ffi_status status; 1279 int nargs; 1280 1281 /* Ignoring the return type, find the last non-zero field. */ 1282 nargs = 32 - clz32(typemask >> 3); 1283 nargs = DIV_ROUND_UP(nargs, 3); 1284 assert(nargs <= MAX_CALL_IARGS); 1285 1286 ca = g_malloc0(sizeof(*ca) + nargs * sizeof(ffi_type *)); 1287 ca->cif.rtype = typecode_to_ffi(typemask & 7); 1288 ca->cif.nargs = nargs; 1289 1290 if (nargs != 0) { 1291 ca->cif.arg_types = ca->args; 1292 for (int j = 0; j < nargs; ++j) { 1293 int typecode = extract32(typemask, (j + 1) * 3, 3); 1294 ca->args[j] = typecode_to_ffi(typecode); 1295 } 1296 } 1297 1298 status = ffi_prep_cif(&ca->cif, FFI_DEFAULT_ABI, nargs, 1299 ca->cif.rtype, ca->cif.arg_types); 1300 assert(status == FFI_OK); 1301 1302 return &ca->cif; 1303 } 1304 1305 #define HELPER_INFO_INIT(I) (&(I)->cif) 1306 #define HELPER_INFO_INIT_VAL(I) init_ffi_layout(I) 1307 #else 1308 #define HELPER_INFO_INIT(I) (&(I)->init) 1309 #define HELPER_INFO_INIT_VAL(I) 1 1310 #endif /* CONFIG_TCG_INTERPRETER */ 1311 1312 static inline bool arg_slot_reg_p(unsigned arg_slot) 1313 { 1314 /* 1315 * Split the sizeof away from the comparison to avoid Werror from 1316 * "unsigned < 0 is always false", when iarg_regs is empty. 1317 */ 1318 unsigned nreg = ARRAY_SIZE(tcg_target_call_iarg_regs); 1319 return arg_slot < nreg; 1320 } 1321 1322 static inline int arg_slot_stk_ofs(unsigned arg_slot) 1323 { 1324 unsigned max = TCG_STATIC_CALL_ARGS_SIZE / sizeof(tcg_target_long); 1325 unsigned stk_slot = arg_slot - ARRAY_SIZE(tcg_target_call_iarg_regs); 1326 1327 tcg_debug_assert(stk_slot < max); 1328 return TCG_TARGET_CALL_STACK_OFFSET + stk_slot * sizeof(tcg_target_long); 1329 } 1330 1331 typedef struct TCGCumulativeArgs { 1332 int arg_idx; /* tcg_gen_callN args[] */ 1333 int info_in_idx; /* TCGHelperInfo in[] */ 1334 int arg_slot; /* regs+stack slot */ 1335 int ref_slot; /* stack slots for references */ 1336 } TCGCumulativeArgs; 1337 1338 static void layout_arg_even(TCGCumulativeArgs *cum) 1339 { 1340 cum->arg_slot += cum->arg_slot & 1; 1341 } 1342 1343 static void layout_arg_1(TCGCumulativeArgs *cum, TCGHelperInfo *info, 1344 TCGCallArgumentKind kind) 1345 { 1346 TCGCallArgumentLoc *loc = &info->in[cum->info_in_idx]; 1347 1348 *loc = (TCGCallArgumentLoc){ 1349 .kind = kind, 1350 .arg_idx = cum->arg_idx, 1351 .arg_slot = cum->arg_slot, 1352 }; 1353 cum->info_in_idx++; 1354 cum->arg_slot++; 1355 } 1356 1357 static void layout_arg_normal_n(TCGCumulativeArgs *cum, 1358 TCGHelperInfo *info, int n) 1359 { 1360 TCGCallArgumentLoc *loc = &info->in[cum->info_in_idx]; 1361 1362 for (int i = 0; i < n; ++i) { 1363 /* Layout all using the same arg_idx, adjusting the subindex. */ 1364 loc[i] = (TCGCallArgumentLoc){ 1365 .kind = TCG_CALL_ARG_NORMAL, 1366 .arg_idx = cum->arg_idx, 1367 .tmp_subindex = i, 1368 .arg_slot = cum->arg_slot + i, 1369 }; 1370 } 1371 cum->info_in_idx += n; 1372 cum->arg_slot += n; 1373 } 1374 1375 static void layout_arg_by_ref(TCGCumulativeArgs *cum, TCGHelperInfo *info) 1376 { 1377 TCGCallArgumentLoc *loc = &info->in[cum->info_in_idx]; 1378 int n = 128 / TCG_TARGET_REG_BITS; 1379 1380 /* The first subindex carries the pointer. */ 1381 layout_arg_1(cum, info, TCG_CALL_ARG_BY_REF); 1382 1383 /* 1384 * The callee is allowed to clobber memory associated with 1385 * structure pass by-reference. Therefore we must make copies. 1386 * Allocate space from "ref_slot", which will be adjusted to 1387 * follow the parameters on the stack. 1388 */ 1389 loc[0].ref_slot = cum->ref_slot; 1390 1391 /* 1392 * Subsequent words also go into the reference slot, but 1393 * do not accumulate into the regular arguments. 1394 */ 1395 for (int i = 1; i < n; ++i) { 1396 loc[i] = (TCGCallArgumentLoc){ 1397 .kind = TCG_CALL_ARG_BY_REF_N, 1398 .arg_idx = cum->arg_idx, 1399 .tmp_subindex = i, 1400 .ref_slot = cum->ref_slot + i, 1401 }; 1402 } 1403 cum->info_in_idx += n - 1; /* i=0 accounted for in layout_arg_1 */ 1404 cum->ref_slot += n; 1405 } 1406 1407 static void init_call_layout(TCGHelperInfo *info) 1408 { 1409 int max_reg_slots = ARRAY_SIZE(tcg_target_call_iarg_regs); 1410 int max_stk_slots = TCG_STATIC_CALL_ARGS_SIZE / sizeof(tcg_target_long); 1411 unsigned typemask = info->typemask; 1412 unsigned typecode; 1413 TCGCumulativeArgs cum = { }; 1414 1415 /* 1416 * Parse and place any function return value. 1417 */ 1418 typecode = typemask & 7; 1419 switch (typecode) { 1420 case dh_typecode_void: 1421 info->nr_out = 0; 1422 break; 1423 case dh_typecode_i32: 1424 case dh_typecode_s32: 1425 case dh_typecode_ptr: 1426 info->nr_out = 1; 1427 info->out_kind = TCG_CALL_RET_NORMAL; 1428 break; 1429 case dh_typecode_i64: 1430 case dh_typecode_s64: 1431 info->nr_out = 64 / TCG_TARGET_REG_BITS; 1432 info->out_kind = TCG_CALL_RET_NORMAL; 1433 /* Query the last register now to trigger any assert early. */ 1434 tcg_target_call_oarg_reg(info->out_kind, info->nr_out - 1); 1435 break; 1436 case dh_typecode_i128: 1437 info->nr_out = 128 / TCG_TARGET_REG_BITS; 1438 info->out_kind = TCG_TARGET_CALL_RET_I128; 1439 switch (TCG_TARGET_CALL_RET_I128) { 1440 case TCG_CALL_RET_NORMAL: 1441 /* Query the last register now to trigger any assert early. */ 1442 tcg_target_call_oarg_reg(info->out_kind, info->nr_out - 1); 1443 break; 1444 case TCG_CALL_RET_BY_VEC: 1445 /* Query the single register now to trigger any assert early. */ 1446 tcg_target_call_oarg_reg(TCG_CALL_RET_BY_VEC, 0); 1447 break; 1448 case TCG_CALL_RET_BY_REF: 1449 /* 1450 * Allocate the first argument to the output. 1451 * We don't need to store this anywhere, just make it 1452 * unavailable for use in the input loop below. 1453 */ 1454 cum.arg_slot = 1; 1455 break; 1456 default: 1457 qemu_build_not_reached(); 1458 } 1459 break; 1460 default: 1461 g_assert_not_reached(); 1462 } 1463 1464 /* 1465 * Parse and place function arguments. 1466 */ 1467 for (typemask >>= 3; typemask; typemask >>= 3, cum.arg_idx++) { 1468 TCGCallArgumentKind kind; 1469 TCGType type; 1470 1471 typecode = typemask & 7; 1472 switch (typecode) { 1473 case dh_typecode_i32: 1474 case dh_typecode_s32: 1475 type = TCG_TYPE_I32; 1476 break; 1477 case dh_typecode_i64: 1478 case dh_typecode_s64: 1479 type = TCG_TYPE_I64; 1480 break; 1481 case dh_typecode_ptr: 1482 type = TCG_TYPE_PTR; 1483 break; 1484 case dh_typecode_i128: 1485 type = TCG_TYPE_I128; 1486 break; 1487 default: 1488 g_assert_not_reached(); 1489 } 1490 1491 switch (type) { 1492 case TCG_TYPE_I32: 1493 switch (TCG_TARGET_CALL_ARG_I32) { 1494 case TCG_CALL_ARG_EVEN: 1495 layout_arg_even(&cum); 1496 /* fall through */ 1497 case TCG_CALL_ARG_NORMAL: 1498 layout_arg_1(&cum, info, TCG_CALL_ARG_NORMAL); 1499 break; 1500 case TCG_CALL_ARG_EXTEND: 1501 kind = TCG_CALL_ARG_EXTEND_U + (typecode & 1); 1502 layout_arg_1(&cum, info, kind); 1503 break; 1504 default: 1505 qemu_build_not_reached(); 1506 } 1507 break; 1508 1509 case TCG_TYPE_I64: 1510 switch (TCG_TARGET_CALL_ARG_I64) { 1511 case TCG_CALL_ARG_EVEN: 1512 layout_arg_even(&cum); 1513 /* fall through */ 1514 case TCG_CALL_ARG_NORMAL: 1515 if (TCG_TARGET_REG_BITS == 32) { 1516 layout_arg_normal_n(&cum, info, 2); 1517 } else { 1518 layout_arg_1(&cum, info, TCG_CALL_ARG_NORMAL); 1519 } 1520 break; 1521 default: 1522 qemu_build_not_reached(); 1523 } 1524 break; 1525 1526 case TCG_TYPE_I128: 1527 switch (TCG_TARGET_CALL_ARG_I128) { 1528 case TCG_CALL_ARG_EVEN: 1529 layout_arg_even(&cum); 1530 /* fall through */ 1531 case TCG_CALL_ARG_NORMAL: 1532 layout_arg_normal_n(&cum, info, 128 / TCG_TARGET_REG_BITS); 1533 break; 1534 case TCG_CALL_ARG_BY_REF: 1535 layout_arg_by_ref(&cum, info); 1536 break; 1537 default: 1538 qemu_build_not_reached(); 1539 } 1540 break; 1541 1542 default: 1543 g_assert_not_reached(); 1544 } 1545 } 1546 info->nr_in = cum.info_in_idx; 1547 1548 /* Validate that we didn't overrun the input array. */ 1549 assert(cum.info_in_idx <= ARRAY_SIZE(info->in)); 1550 /* Validate the backend has enough argument space. */ 1551 assert(cum.arg_slot <= max_reg_slots + max_stk_slots); 1552 1553 /* 1554 * Relocate the "ref_slot" area to the end of the parameters. 1555 * Minimizing this stack offset helps code size for x86, 1556 * which has a signed 8-bit offset encoding. 1557 */ 1558 if (cum.ref_slot != 0) { 1559 int ref_base = 0; 1560 1561 if (cum.arg_slot > max_reg_slots) { 1562 int align = __alignof(Int128) / sizeof(tcg_target_long); 1563 1564 ref_base = cum.arg_slot - max_reg_slots; 1565 if (align > 1) { 1566 ref_base = ROUND_UP(ref_base, align); 1567 } 1568 } 1569 assert(ref_base + cum.ref_slot <= max_stk_slots); 1570 ref_base += max_reg_slots; 1571 1572 if (ref_base != 0) { 1573 for (int i = cum.info_in_idx - 1; i >= 0; --i) { 1574 TCGCallArgumentLoc *loc = &info->in[i]; 1575 switch (loc->kind) { 1576 case TCG_CALL_ARG_BY_REF: 1577 case TCG_CALL_ARG_BY_REF_N: 1578 loc->ref_slot += ref_base; 1579 break; 1580 default: 1581 break; 1582 } 1583 } 1584 } 1585 } 1586 } 1587 1588 static int indirect_reg_alloc_order[ARRAY_SIZE(tcg_target_reg_alloc_order)]; 1589 static void process_constraint_sets(void); 1590 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type, 1591 TCGReg reg, const char *name); 1592 1593 static void tcg_context_init(unsigned max_threads) 1594 { 1595 TCGContext *s = &tcg_init_ctx; 1596 int n, i; 1597 TCGTemp *ts; 1598 1599 memset(s, 0, sizeof(*s)); 1600 s->nb_globals = 0; 1601 1602 init_call_layout(&info_helper_ld32_mmu); 1603 init_call_layout(&info_helper_ld64_mmu); 1604 init_call_layout(&info_helper_ld128_mmu); 1605 init_call_layout(&info_helper_st32_mmu); 1606 init_call_layout(&info_helper_st64_mmu); 1607 init_call_layout(&info_helper_st128_mmu); 1608 1609 tcg_target_init(s); 1610 process_constraint_sets(); 1611 1612 /* Reverse the order of the saved registers, assuming they're all at 1613 the start of tcg_target_reg_alloc_order. */ 1614 for (n = 0; n < ARRAY_SIZE(tcg_target_reg_alloc_order); ++n) { 1615 int r = tcg_target_reg_alloc_order[n]; 1616 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, r)) { 1617 break; 1618 } 1619 } 1620 for (i = 0; i < n; ++i) { 1621 indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[n - 1 - i]; 1622 } 1623 for (; i < ARRAY_SIZE(tcg_target_reg_alloc_order); ++i) { 1624 indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[i]; 1625 } 1626 1627 tcg_ctx = s; 1628 /* 1629 * In user-mode we simply share the init context among threads, since we 1630 * use a single region. See the documentation tcg_region_init() for the 1631 * reasoning behind this. 1632 * In system-mode we will have at most max_threads TCG threads. 1633 */ 1634 #ifdef CONFIG_USER_ONLY 1635 tcg_ctxs = &tcg_ctx; 1636 tcg_cur_ctxs = 1; 1637 tcg_max_ctxs = 1; 1638 #else 1639 tcg_max_ctxs = max_threads; 1640 tcg_ctxs = g_new0(TCGContext *, max_threads); 1641 #endif 1642 1643 tcg_debug_assert(!tcg_regset_test_reg(s->reserved_regs, TCG_AREG0)); 1644 ts = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, TCG_AREG0, "env"); 1645 tcg_env = temp_tcgv_ptr(ts); 1646 } 1647 1648 void tcg_init(size_t tb_size, int splitwx, unsigned max_threads) 1649 { 1650 tcg_context_init(max_threads); 1651 tcg_region_init(tb_size, splitwx, max_threads); 1652 } 1653 1654 /* 1655 * Allocate TBs right before their corresponding translated code, making 1656 * sure that TBs and code are on different cache lines. 1657 */ 1658 TranslationBlock *tcg_tb_alloc(TCGContext *s) 1659 { 1660 uintptr_t align = qemu_icache_linesize; 1661 TranslationBlock *tb; 1662 void *next; 1663 1664 retry: 1665 tb = (void *)ROUND_UP((uintptr_t)s->code_gen_ptr, align); 1666 next = (void *)ROUND_UP((uintptr_t)(tb + 1), align); 1667 1668 if (unlikely(next > s->code_gen_highwater)) { 1669 if (tcg_region_alloc(s)) { 1670 return NULL; 1671 } 1672 goto retry; 1673 } 1674 qatomic_set(&s->code_gen_ptr, next); 1675 return tb; 1676 } 1677 1678 void tcg_prologue_init(void) 1679 { 1680 TCGContext *s = tcg_ctx; 1681 size_t prologue_size; 1682 1683 s->code_ptr = s->code_gen_ptr; 1684 s->code_buf = s->code_gen_ptr; 1685 s->data_gen_ptr = NULL; 1686 1687 #ifndef CONFIG_TCG_INTERPRETER 1688 tcg_qemu_tb_exec = (tcg_prologue_fn *)tcg_splitwx_to_rx(s->code_ptr); 1689 #endif 1690 1691 s->pool_labels = NULL; 1692 1693 qemu_thread_jit_write(); 1694 /* Generate the prologue. */ 1695 tcg_target_qemu_prologue(s); 1696 1697 /* Allow the prologue to put e.g. guest_base into a pool entry. */ 1698 { 1699 int result = tcg_out_pool_finalize(s); 1700 tcg_debug_assert(result == 0); 1701 } 1702 1703 prologue_size = tcg_current_code_size(s); 1704 perf_report_prologue(s->code_gen_ptr, prologue_size); 1705 1706 #ifndef CONFIG_TCG_INTERPRETER 1707 flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(s->code_buf), 1708 (uintptr_t)s->code_buf, prologue_size); 1709 #endif 1710 1711 if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM)) { 1712 FILE *logfile = qemu_log_trylock(); 1713 if (logfile) { 1714 fprintf(logfile, "PROLOGUE: [size=%zu]\n", prologue_size); 1715 if (s->data_gen_ptr) { 1716 size_t code_size = s->data_gen_ptr - s->code_gen_ptr; 1717 size_t data_size = prologue_size - code_size; 1718 size_t i; 1719 1720 disas(logfile, s->code_gen_ptr, code_size); 1721 1722 for (i = 0; i < data_size; i += sizeof(tcg_target_ulong)) { 1723 if (sizeof(tcg_target_ulong) == 8) { 1724 fprintf(logfile, 1725 "0x%08" PRIxPTR ": .quad 0x%016" PRIx64 "\n", 1726 (uintptr_t)s->data_gen_ptr + i, 1727 *(uint64_t *)(s->data_gen_ptr + i)); 1728 } else { 1729 fprintf(logfile, 1730 "0x%08" PRIxPTR ": .long 0x%08x\n", 1731 (uintptr_t)s->data_gen_ptr + i, 1732 *(uint32_t *)(s->data_gen_ptr + i)); 1733 } 1734 } 1735 } else { 1736 disas(logfile, s->code_gen_ptr, prologue_size); 1737 } 1738 fprintf(logfile, "\n"); 1739 qemu_log_unlock(logfile); 1740 } 1741 } 1742 1743 #ifndef CONFIG_TCG_INTERPRETER 1744 /* 1745 * Assert that goto_ptr is implemented completely, setting an epilogue. 1746 * For tci, we use NULL as the signal to return from the interpreter, 1747 * so skip this check. 1748 */ 1749 tcg_debug_assert(tcg_code_gen_epilogue != NULL); 1750 #endif 1751 1752 tcg_region_prologue_set(s); 1753 } 1754 1755 void tcg_func_start(TCGContext *s) 1756 { 1757 tcg_pool_reset(s); 1758 s->nb_temps = s->nb_globals; 1759 1760 /* No temps have been previously allocated for size or locality. */ 1761 tcg_temp_ebb_reset_freed(s); 1762 1763 /* No constant temps have been previously allocated. */ 1764 for (int i = 0; i < TCG_TYPE_COUNT; ++i) { 1765 if (s->const_table[i]) { 1766 g_hash_table_remove_all(s->const_table[i]); 1767 } 1768 } 1769 1770 s->nb_ops = 0; 1771 s->nb_labels = 0; 1772 s->current_frame_offset = s->frame_start; 1773 1774 #ifdef CONFIG_DEBUG_TCG 1775 s->goto_tb_issue_mask = 0; 1776 #endif 1777 1778 QTAILQ_INIT(&s->ops); 1779 QTAILQ_INIT(&s->free_ops); 1780 s->emit_before_op = NULL; 1781 QSIMPLEQ_INIT(&s->labels); 1782 1783 tcg_debug_assert(s->addr_type <= TCG_TYPE_REG); 1784 tcg_debug_assert(s->insn_start_words > 0); 1785 } 1786 1787 static TCGTemp *tcg_temp_alloc(TCGContext *s) 1788 { 1789 int n = s->nb_temps++; 1790 1791 if (n >= TCG_MAX_TEMPS) { 1792 tcg_raise_tb_overflow(s); 1793 } 1794 return memset(&s->temps[n], 0, sizeof(TCGTemp)); 1795 } 1796 1797 static TCGTemp *tcg_global_alloc(TCGContext *s) 1798 { 1799 TCGTemp *ts; 1800 1801 tcg_debug_assert(s->nb_globals == s->nb_temps); 1802 tcg_debug_assert(s->nb_globals < TCG_MAX_TEMPS); 1803 s->nb_globals++; 1804 ts = tcg_temp_alloc(s); 1805 ts->kind = TEMP_GLOBAL; 1806 1807 return ts; 1808 } 1809 1810 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type, 1811 TCGReg reg, const char *name) 1812 { 1813 TCGTemp *ts; 1814 1815 tcg_debug_assert(TCG_TARGET_REG_BITS == 64 || type == TCG_TYPE_I32); 1816 1817 ts = tcg_global_alloc(s); 1818 ts->base_type = type; 1819 ts->type = type; 1820 ts->kind = TEMP_FIXED; 1821 ts->reg = reg; 1822 ts->name = name; 1823 tcg_regset_set_reg(s->reserved_regs, reg); 1824 1825 return ts; 1826 } 1827 1828 void tcg_set_frame(TCGContext *s, TCGReg reg, intptr_t start, intptr_t size) 1829 { 1830 s->frame_start = start; 1831 s->frame_end = start + size; 1832 s->frame_temp 1833 = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, reg, "_frame"); 1834 } 1835 1836 static TCGTemp *tcg_global_mem_new_internal(TCGv_ptr base, intptr_t offset, 1837 const char *name, TCGType type) 1838 { 1839 TCGContext *s = tcg_ctx; 1840 TCGTemp *base_ts = tcgv_ptr_temp(base); 1841 TCGTemp *ts = tcg_global_alloc(s); 1842 int indirect_reg = 0; 1843 1844 switch (base_ts->kind) { 1845 case TEMP_FIXED: 1846 break; 1847 case TEMP_GLOBAL: 1848 /* We do not support double-indirect registers. */ 1849 tcg_debug_assert(!base_ts->indirect_reg); 1850 base_ts->indirect_base = 1; 1851 s->nb_indirects += (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64 1852 ? 2 : 1); 1853 indirect_reg = 1; 1854 break; 1855 default: 1856 g_assert_not_reached(); 1857 } 1858 1859 if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) { 1860 TCGTemp *ts2 = tcg_global_alloc(s); 1861 char buf[64]; 1862 1863 ts->base_type = TCG_TYPE_I64; 1864 ts->type = TCG_TYPE_I32; 1865 ts->indirect_reg = indirect_reg; 1866 ts->mem_allocated = 1; 1867 ts->mem_base = base_ts; 1868 ts->mem_offset = offset; 1869 pstrcpy(buf, sizeof(buf), name); 1870 pstrcat(buf, sizeof(buf), "_0"); 1871 ts->name = strdup(buf); 1872 1873 tcg_debug_assert(ts2 == ts + 1); 1874 ts2->base_type = TCG_TYPE_I64; 1875 ts2->type = TCG_TYPE_I32; 1876 ts2->indirect_reg = indirect_reg; 1877 ts2->mem_allocated = 1; 1878 ts2->mem_base = base_ts; 1879 ts2->mem_offset = offset + 4; 1880 ts2->temp_subindex = 1; 1881 pstrcpy(buf, sizeof(buf), name); 1882 pstrcat(buf, sizeof(buf), "_1"); 1883 ts2->name = strdup(buf); 1884 } else { 1885 ts->base_type = type; 1886 ts->type = type; 1887 ts->indirect_reg = indirect_reg; 1888 ts->mem_allocated = 1; 1889 ts->mem_base = base_ts; 1890 ts->mem_offset = offset; 1891 ts->name = name; 1892 } 1893 return ts; 1894 } 1895 1896 TCGv_i32 tcg_global_mem_new_i32(TCGv_ptr reg, intptr_t off, const char *name) 1897 { 1898 TCGTemp *ts = tcg_global_mem_new_internal(reg, off, name, TCG_TYPE_I32); 1899 return temp_tcgv_i32(ts); 1900 } 1901 1902 TCGv_i64 tcg_global_mem_new_i64(TCGv_ptr reg, intptr_t off, const char *name) 1903 { 1904 TCGTemp *ts = tcg_global_mem_new_internal(reg, off, name, TCG_TYPE_I64); 1905 return temp_tcgv_i64(ts); 1906 } 1907 1908 TCGv_ptr tcg_global_mem_new_ptr(TCGv_ptr reg, intptr_t off, const char *name) 1909 { 1910 TCGTemp *ts = tcg_global_mem_new_internal(reg, off, name, TCG_TYPE_PTR); 1911 return temp_tcgv_ptr(ts); 1912 } 1913 1914 TCGTemp *tcg_temp_new_internal(TCGType type, TCGTempKind kind) 1915 { 1916 TCGContext *s = tcg_ctx; 1917 TCGTemp *ts; 1918 int n; 1919 1920 if (kind == TEMP_EBB) { 1921 int idx = find_first_bit(s->free_temps[type].l, TCG_MAX_TEMPS); 1922 1923 if (idx < TCG_MAX_TEMPS) { 1924 /* There is already an available temp with the right type. */ 1925 clear_bit(idx, s->free_temps[type].l); 1926 1927 ts = &s->temps[idx]; 1928 ts->temp_allocated = 1; 1929 tcg_debug_assert(ts->base_type == type); 1930 tcg_debug_assert(ts->kind == kind); 1931 return ts; 1932 } 1933 } else { 1934 tcg_debug_assert(kind == TEMP_TB); 1935 } 1936 1937 switch (type) { 1938 case TCG_TYPE_I32: 1939 case TCG_TYPE_V64: 1940 case TCG_TYPE_V128: 1941 case TCG_TYPE_V256: 1942 n = 1; 1943 break; 1944 case TCG_TYPE_I64: 1945 n = 64 / TCG_TARGET_REG_BITS; 1946 break; 1947 case TCG_TYPE_I128: 1948 n = 128 / TCG_TARGET_REG_BITS; 1949 break; 1950 default: 1951 g_assert_not_reached(); 1952 } 1953 1954 ts = tcg_temp_alloc(s); 1955 ts->base_type = type; 1956 ts->temp_allocated = 1; 1957 ts->kind = kind; 1958 1959 if (n == 1) { 1960 ts->type = type; 1961 } else { 1962 ts->type = TCG_TYPE_REG; 1963 1964 for (int i = 1; i < n; ++i) { 1965 TCGTemp *ts2 = tcg_temp_alloc(s); 1966 1967 tcg_debug_assert(ts2 == ts + i); 1968 ts2->base_type = type; 1969 ts2->type = TCG_TYPE_REG; 1970 ts2->temp_allocated = 1; 1971 ts2->temp_subindex = i; 1972 ts2->kind = kind; 1973 } 1974 } 1975 return ts; 1976 } 1977 1978 TCGv_i32 tcg_temp_new_i32(void) 1979 { 1980 return temp_tcgv_i32(tcg_temp_new_internal(TCG_TYPE_I32, TEMP_TB)); 1981 } 1982 1983 TCGv_i32 tcg_temp_ebb_new_i32(void) 1984 { 1985 return temp_tcgv_i32(tcg_temp_new_internal(TCG_TYPE_I32, TEMP_EBB)); 1986 } 1987 1988 TCGv_i64 tcg_temp_new_i64(void) 1989 { 1990 return temp_tcgv_i64(tcg_temp_new_internal(TCG_TYPE_I64, TEMP_TB)); 1991 } 1992 1993 TCGv_i64 tcg_temp_ebb_new_i64(void) 1994 { 1995 return temp_tcgv_i64(tcg_temp_new_internal(TCG_TYPE_I64, TEMP_EBB)); 1996 } 1997 1998 TCGv_ptr tcg_temp_new_ptr(void) 1999 { 2000 return temp_tcgv_ptr(tcg_temp_new_internal(TCG_TYPE_PTR, TEMP_TB)); 2001 } 2002 2003 TCGv_ptr tcg_temp_ebb_new_ptr(void) 2004 { 2005 return temp_tcgv_ptr(tcg_temp_new_internal(TCG_TYPE_PTR, TEMP_EBB)); 2006 } 2007 2008 TCGv_i128 tcg_temp_new_i128(void) 2009 { 2010 return temp_tcgv_i128(tcg_temp_new_internal(TCG_TYPE_I128, TEMP_TB)); 2011 } 2012 2013 TCGv_i128 tcg_temp_ebb_new_i128(void) 2014 { 2015 return temp_tcgv_i128(tcg_temp_new_internal(TCG_TYPE_I128, TEMP_EBB)); 2016 } 2017 2018 TCGv_vec tcg_temp_new_vec(TCGType type) 2019 { 2020 TCGTemp *t; 2021 2022 #ifdef CONFIG_DEBUG_TCG 2023 switch (type) { 2024 case TCG_TYPE_V64: 2025 assert(TCG_TARGET_HAS_v64); 2026 break; 2027 case TCG_TYPE_V128: 2028 assert(TCG_TARGET_HAS_v128); 2029 break; 2030 case TCG_TYPE_V256: 2031 assert(TCG_TARGET_HAS_v256); 2032 break; 2033 default: 2034 g_assert_not_reached(); 2035 } 2036 #endif 2037 2038 t = tcg_temp_new_internal(type, TEMP_EBB); 2039 return temp_tcgv_vec(t); 2040 } 2041 2042 /* Create a new temp of the same type as an existing temp. */ 2043 TCGv_vec tcg_temp_new_vec_matching(TCGv_vec match) 2044 { 2045 TCGTemp *t = tcgv_vec_temp(match); 2046 2047 tcg_debug_assert(t->temp_allocated != 0); 2048 2049 t = tcg_temp_new_internal(t->base_type, TEMP_EBB); 2050 return temp_tcgv_vec(t); 2051 } 2052 2053 void tcg_temp_free_internal(TCGTemp *ts) 2054 { 2055 TCGContext *s = tcg_ctx; 2056 2057 switch (ts->kind) { 2058 case TEMP_CONST: 2059 case TEMP_TB: 2060 /* Silently ignore free. */ 2061 break; 2062 case TEMP_EBB: 2063 tcg_debug_assert(ts->temp_allocated != 0); 2064 ts->temp_allocated = 0; 2065 set_bit(temp_idx(ts), s->free_temps[ts->base_type].l); 2066 break; 2067 default: 2068 /* It never made sense to free TEMP_FIXED or TEMP_GLOBAL. */ 2069 g_assert_not_reached(); 2070 } 2071 } 2072 2073 void tcg_temp_free_i32(TCGv_i32 arg) 2074 { 2075 tcg_temp_free_internal(tcgv_i32_temp(arg)); 2076 } 2077 2078 void tcg_temp_free_i64(TCGv_i64 arg) 2079 { 2080 tcg_temp_free_internal(tcgv_i64_temp(arg)); 2081 } 2082 2083 void tcg_temp_free_i128(TCGv_i128 arg) 2084 { 2085 tcg_temp_free_internal(tcgv_i128_temp(arg)); 2086 } 2087 2088 void tcg_temp_free_ptr(TCGv_ptr arg) 2089 { 2090 tcg_temp_free_internal(tcgv_ptr_temp(arg)); 2091 } 2092 2093 void tcg_temp_free_vec(TCGv_vec arg) 2094 { 2095 tcg_temp_free_internal(tcgv_vec_temp(arg)); 2096 } 2097 2098 TCGTemp *tcg_constant_internal(TCGType type, int64_t val) 2099 { 2100 TCGContext *s = tcg_ctx; 2101 GHashTable *h = s->const_table[type]; 2102 TCGTemp *ts; 2103 2104 if (h == NULL) { 2105 h = g_hash_table_new(g_int64_hash, g_int64_equal); 2106 s->const_table[type] = h; 2107 } 2108 2109 ts = g_hash_table_lookup(h, &val); 2110 if (ts == NULL) { 2111 int64_t *val_ptr; 2112 2113 ts = tcg_temp_alloc(s); 2114 2115 if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) { 2116 TCGTemp *ts2 = tcg_temp_alloc(s); 2117 2118 tcg_debug_assert(ts2 == ts + 1); 2119 2120 ts->base_type = TCG_TYPE_I64; 2121 ts->type = TCG_TYPE_I32; 2122 ts->kind = TEMP_CONST; 2123 ts->temp_allocated = 1; 2124 2125 ts2->base_type = TCG_TYPE_I64; 2126 ts2->type = TCG_TYPE_I32; 2127 ts2->kind = TEMP_CONST; 2128 ts2->temp_allocated = 1; 2129 ts2->temp_subindex = 1; 2130 2131 /* 2132 * Retain the full value of the 64-bit constant in the low 2133 * part, so that the hash table works. Actual uses will 2134 * truncate the value to the low part. 2135 */ 2136 ts[HOST_BIG_ENDIAN].val = val; 2137 ts[!HOST_BIG_ENDIAN].val = val >> 32; 2138 val_ptr = &ts[HOST_BIG_ENDIAN].val; 2139 } else { 2140 ts->base_type = type; 2141 ts->type = type; 2142 ts->kind = TEMP_CONST; 2143 ts->temp_allocated = 1; 2144 ts->val = val; 2145 val_ptr = &ts->val; 2146 } 2147 g_hash_table_insert(h, val_ptr, ts); 2148 } 2149 2150 return ts; 2151 } 2152 2153 TCGv_i32 tcg_constant_i32(int32_t val) 2154 { 2155 return temp_tcgv_i32(tcg_constant_internal(TCG_TYPE_I32, val)); 2156 } 2157 2158 TCGv_i64 tcg_constant_i64(int64_t val) 2159 { 2160 return temp_tcgv_i64(tcg_constant_internal(TCG_TYPE_I64, val)); 2161 } 2162 2163 TCGv_ptr tcg_constant_ptr_int(intptr_t val) 2164 { 2165 return temp_tcgv_ptr(tcg_constant_internal(TCG_TYPE_PTR, val)); 2166 } 2167 2168 TCGv_vec tcg_constant_vec(TCGType type, unsigned vece, int64_t val) 2169 { 2170 val = dup_const(vece, val); 2171 return temp_tcgv_vec(tcg_constant_internal(type, val)); 2172 } 2173 2174 TCGv_vec tcg_constant_vec_matching(TCGv_vec match, unsigned vece, int64_t val) 2175 { 2176 TCGTemp *t = tcgv_vec_temp(match); 2177 2178 tcg_debug_assert(t->temp_allocated != 0); 2179 return tcg_constant_vec(t->base_type, vece, val); 2180 } 2181 2182 #ifdef CONFIG_DEBUG_TCG 2183 size_t temp_idx(TCGTemp *ts) 2184 { 2185 ptrdiff_t n = ts - tcg_ctx->temps; 2186 assert(n >= 0 && n < tcg_ctx->nb_temps); 2187 return n; 2188 } 2189 2190 TCGTemp *tcgv_i32_temp(TCGv_i32 v) 2191 { 2192 uintptr_t o = (uintptr_t)v - offsetof(TCGContext, temps); 2193 2194 assert(o < sizeof(TCGTemp) * tcg_ctx->nb_temps); 2195 assert(o % sizeof(TCGTemp) == 0); 2196 2197 return (void *)tcg_ctx + (uintptr_t)v; 2198 } 2199 #endif /* CONFIG_DEBUG_TCG */ 2200 2201 /* 2202 * Return true if OP may appear in the opcode stream with TYPE. 2203 * Test the runtime variable that controls each opcode. 2204 */ 2205 bool tcg_op_supported(TCGOpcode op, TCGType type, unsigned flags) 2206 { 2207 bool has_type; 2208 2209 switch (type) { 2210 case TCG_TYPE_I32: 2211 has_type = true; 2212 break; 2213 case TCG_TYPE_I64: 2214 has_type = TCG_TARGET_REG_BITS == 64; 2215 break; 2216 case TCG_TYPE_V64: 2217 has_type = TCG_TARGET_HAS_v64; 2218 break; 2219 case TCG_TYPE_V128: 2220 has_type = TCG_TARGET_HAS_v128; 2221 break; 2222 case TCG_TYPE_V256: 2223 has_type = TCG_TARGET_HAS_v256; 2224 break; 2225 default: 2226 has_type = false; 2227 break; 2228 } 2229 2230 switch (op) { 2231 case INDEX_op_discard: 2232 case INDEX_op_set_label: 2233 case INDEX_op_call: 2234 case INDEX_op_br: 2235 case INDEX_op_mb: 2236 case INDEX_op_insn_start: 2237 case INDEX_op_exit_tb: 2238 case INDEX_op_goto_tb: 2239 case INDEX_op_goto_ptr: 2240 case INDEX_op_qemu_ld_i32: 2241 case INDEX_op_qemu_st_i32: 2242 case INDEX_op_qemu_ld_i64: 2243 case INDEX_op_qemu_st_i64: 2244 return true; 2245 2246 case INDEX_op_qemu_st8_i32: 2247 return TCG_TARGET_HAS_qemu_st8_i32; 2248 2249 case INDEX_op_qemu_ld_i128: 2250 case INDEX_op_qemu_st_i128: 2251 return TCG_TARGET_HAS_qemu_ldst_i128; 2252 2253 case INDEX_op_add: 2254 case INDEX_op_and: 2255 case INDEX_op_mov: 2256 case INDEX_op_or: 2257 case INDEX_op_xor: 2258 return has_type; 2259 2260 case INDEX_op_setcond_i32: 2261 case INDEX_op_brcond_i32: 2262 case INDEX_op_movcond_i32: 2263 case INDEX_op_ld8u_i32: 2264 case INDEX_op_ld8s_i32: 2265 case INDEX_op_ld16u_i32: 2266 case INDEX_op_ld16s_i32: 2267 case INDEX_op_ld_i32: 2268 case INDEX_op_st8_i32: 2269 case INDEX_op_st16_i32: 2270 case INDEX_op_st_i32: 2271 case INDEX_op_extract_i32: 2272 case INDEX_op_sextract_i32: 2273 case INDEX_op_deposit_i32: 2274 return true; 2275 2276 case INDEX_op_negsetcond_i32: 2277 return TCG_TARGET_HAS_negsetcond_i32; 2278 case INDEX_op_extract2_i32: 2279 return TCG_TARGET_HAS_extract2_i32; 2280 case INDEX_op_add2_i32: 2281 return TCG_TARGET_HAS_add2_i32; 2282 case INDEX_op_sub2_i32: 2283 return TCG_TARGET_HAS_sub2_i32; 2284 case INDEX_op_mulu2_i32: 2285 return TCG_TARGET_HAS_mulu2_i32; 2286 case INDEX_op_muls2_i32: 2287 return TCG_TARGET_HAS_muls2_i32; 2288 case INDEX_op_bswap16_i32: 2289 return TCG_TARGET_HAS_bswap16_i32; 2290 case INDEX_op_bswap32_i32: 2291 return TCG_TARGET_HAS_bswap32_i32; 2292 case INDEX_op_ctz_i32: 2293 return TCG_TARGET_HAS_ctz_i32; 2294 case INDEX_op_ctpop_i32: 2295 return TCG_TARGET_HAS_ctpop_i32; 2296 2297 case INDEX_op_brcond2_i32: 2298 case INDEX_op_setcond2_i32: 2299 return TCG_TARGET_REG_BITS == 32; 2300 2301 case INDEX_op_setcond_i64: 2302 case INDEX_op_brcond_i64: 2303 case INDEX_op_movcond_i64: 2304 case INDEX_op_ld8u_i64: 2305 case INDEX_op_ld8s_i64: 2306 case INDEX_op_ld16u_i64: 2307 case INDEX_op_ld16s_i64: 2308 case INDEX_op_ld32u_i64: 2309 case INDEX_op_ld32s_i64: 2310 case INDEX_op_ld_i64: 2311 case INDEX_op_st8_i64: 2312 case INDEX_op_st16_i64: 2313 case INDEX_op_st32_i64: 2314 case INDEX_op_st_i64: 2315 case INDEX_op_ext_i32_i64: 2316 case INDEX_op_extu_i32_i64: 2317 case INDEX_op_extract_i64: 2318 case INDEX_op_sextract_i64: 2319 case INDEX_op_deposit_i64: 2320 return TCG_TARGET_REG_BITS == 64; 2321 2322 case INDEX_op_negsetcond_i64: 2323 return TCG_TARGET_HAS_negsetcond_i64; 2324 case INDEX_op_extract2_i64: 2325 return TCG_TARGET_HAS_extract2_i64; 2326 case INDEX_op_extrl_i64_i32: 2327 case INDEX_op_extrh_i64_i32: 2328 return TCG_TARGET_HAS_extr_i64_i32; 2329 case INDEX_op_bswap16_i64: 2330 return TCG_TARGET_HAS_bswap16_i64; 2331 case INDEX_op_bswap32_i64: 2332 return TCG_TARGET_HAS_bswap32_i64; 2333 case INDEX_op_bswap64_i64: 2334 return TCG_TARGET_HAS_bswap64_i64; 2335 case INDEX_op_ctz_i64: 2336 return TCG_TARGET_HAS_ctz_i64; 2337 case INDEX_op_ctpop_i64: 2338 return TCG_TARGET_HAS_ctpop_i64; 2339 case INDEX_op_add2_i64: 2340 return TCG_TARGET_HAS_add2_i64; 2341 case INDEX_op_sub2_i64: 2342 return TCG_TARGET_HAS_sub2_i64; 2343 case INDEX_op_mulu2_i64: 2344 return TCG_TARGET_HAS_mulu2_i64; 2345 case INDEX_op_muls2_i64: 2346 return TCG_TARGET_HAS_muls2_i64; 2347 2348 case INDEX_op_mov_vec: 2349 case INDEX_op_dup_vec: 2350 case INDEX_op_dupm_vec: 2351 case INDEX_op_ld_vec: 2352 case INDEX_op_st_vec: 2353 case INDEX_op_add_vec: 2354 case INDEX_op_sub_vec: 2355 case INDEX_op_and_vec: 2356 case INDEX_op_or_vec: 2357 case INDEX_op_xor_vec: 2358 case INDEX_op_cmp_vec: 2359 return has_type; 2360 case INDEX_op_dup2_vec: 2361 return has_type && TCG_TARGET_REG_BITS == 32; 2362 case INDEX_op_not_vec: 2363 return has_type && TCG_TARGET_HAS_not_vec; 2364 case INDEX_op_neg_vec: 2365 return has_type && TCG_TARGET_HAS_neg_vec; 2366 case INDEX_op_abs_vec: 2367 return has_type && TCG_TARGET_HAS_abs_vec; 2368 case INDEX_op_andc_vec: 2369 return has_type && TCG_TARGET_HAS_andc_vec; 2370 case INDEX_op_orc_vec: 2371 return has_type && TCG_TARGET_HAS_orc_vec; 2372 case INDEX_op_nand_vec: 2373 return has_type && TCG_TARGET_HAS_nand_vec; 2374 case INDEX_op_nor_vec: 2375 return has_type && TCG_TARGET_HAS_nor_vec; 2376 case INDEX_op_eqv_vec: 2377 return has_type && TCG_TARGET_HAS_eqv_vec; 2378 case INDEX_op_mul_vec: 2379 return has_type && TCG_TARGET_HAS_mul_vec; 2380 case INDEX_op_shli_vec: 2381 case INDEX_op_shri_vec: 2382 case INDEX_op_sari_vec: 2383 return has_type && TCG_TARGET_HAS_shi_vec; 2384 case INDEX_op_shls_vec: 2385 case INDEX_op_shrs_vec: 2386 case INDEX_op_sars_vec: 2387 return has_type && TCG_TARGET_HAS_shs_vec; 2388 case INDEX_op_shlv_vec: 2389 case INDEX_op_shrv_vec: 2390 case INDEX_op_sarv_vec: 2391 return has_type && TCG_TARGET_HAS_shv_vec; 2392 case INDEX_op_rotli_vec: 2393 return has_type && TCG_TARGET_HAS_roti_vec; 2394 case INDEX_op_rotls_vec: 2395 return has_type && TCG_TARGET_HAS_rots_vec; 2396 case INDEX_op_rotlv_vec: 2397 case INDEX_op_rotrv_vec: 2398 return has_type && TCG_TARGET_HAS_rotv_vec; 2399 case INDEX_op_ssadd_vec: 2400 case INDEX_op_usadd_vec: 2401 case INDEX_op_sssub_vec: 2402 case INDEX_op_ussub_vec: 2403 return has_type && TCG_TARGET_HAS_sat_vec; 2404 case INDEX_op_smin_vec: 2405 case INDEX_op_umin_vec: 2406 case INDEX_op_smax_vec: 2407 case INDEX_op_umax_vec: 2408 return has_type && TCG_TARGET_HAS_minmax_vec; 2409 case INDEX_op_bitsel_vec: 2410 return has_type && TCG_TARGET_HAS_bitsel_vec; 2411 case INDEX_op_cmpsel_vec: 2412 return has_type && TCG_TARGET_HAS_cmpsel_vec; 2413 2414 default: 2415 if (op < INDEX_op_last_generic) { 2416 const TCGOutOp *outop; 2417 TCGConstraintSetIndex con_set; 2418 2419 if (!has_type) { 2420 return false; 2421 } 2422 2423 outop = all_outop[op]; 2424 tcg_debug_assert(outop != NULL); 2425 2426 con_set = outop->static_constraint; 2427 if (con_set == C_Dynamic) { 2428 con_set = outop->dynamic_constraint(type, flags); 2429 } 2430 if (con_set >= 0) { 2431 return true; 2432 } 2433 tcg_debug_assert(con_set == C_NotImplemented); 2434 return false; 2435 } 2436 tcg_debug_assert(op < NB_OPS); 2437 return true; 2438 2439 case INDEX_op_last_generic: 2440 g_assert_not_reached(); 2441 } 2442 } 2443 2444 bool tcg_op_deposit_valid(TCGType type, unsigned ofs, unsigned len) 2445 { 2446 unsigned width; 2447 2448 tcg_debug_assert(type == TCG_TYPE_I32 || type == TCG_TYPE_I64); 2449 width = (type == TCG_TYPE_I32 ? 32 : 64); 2450 2451 tcg_debug_assert(ofs < width); 2452 tcg_debug_assert(len > 0); 2453 tcg_debug_assert(len <= width - ofs); 2454 2455 return TCG_TARGET_deposit_valid(type, ofs, len); 2456 } 2457 2458 static TCGOp *tcg_op_alloc(TCGOpcode opc, unsigned nargs); 2459 2460 static void tcg_gen_callN(void *func, TCGHelperInfo *info, 2461 TCGTemp *ret, TCGTemp **args) 2462 { 2463 TCGv_i64 extend_free[MAX_CALL_IARGS]; 2464 int n_extend = 0; 2465 TCGOp *op; 2466 int i, n, pi = 0, total_args; 2467 2468 if (unlikely(g_once_init_enter(HELPER_INFO_INIT(info)))) { 2469 init_call_layout(info); 2470 g_once_init_leave(HELPER_INFO_INIT(info), HELPER_INFO_INIT_VAL(info)); 2471 } 2472 2473 total_args = info->nr_out + info->nr_in + 2; 2474 op = tcg_op_alloc(INDEX_op_call, total_args); 2475 2476 #ifdef CONFIG_PLUGIN 2477 /* Flag helpers that may affect guest state */ 2478 if (tcg_ctx->plugin_insn && !(info->flags & TCG_CALL_NO_SIDE_EFFECTS)) { 2479 tcg_ctx->plugin_insn->calls_helpers = true; 2480 } 2481 #endif 2482 2483 TCGOP_CALLO(op) = n = info->nr_out; 2484 switch (n) { 2485 case 0: 2486 tcg_debug_assert(ret == NULL); 2487 break; 2488 case 1: 2489 tcg_debug_assert(ret != NULL); 2490 op->args[pi++] = temp_arg(ret); 2491 break; 2492 case 2: 2493 case 4: 2494 tcg_debug_assert(ret != NULL); 2495 tcg_debug_assert(ret->base_type == ret->type + ctz32(n)); 2496 tcg_debug_assert(ret->temp_subindex == 0); 2497 for (i = 0; i < n; ++i) { 2498 op->args[pi++] = temp_arg(ret + i); 2499 } 2500 break; 2501 default: 2502 g_assert_not_reached(); 2503 } 2504 2505 TCGOP_CALLI(op) = n = info->nr_in; 2506 for (i = 0; i < n; i++) { 2507 const TCGCallArgumentLoc *loc = &info->in[i]; 2508 TCGTemp *ts = args[loc->arg_idx] + loc->tmp_subindex; 2509 2510 switch (loc->kind) { 2511 case TCG_CALL_ARG_NORMAL: 2512 case TCG_CALL_ARG_BY_REF: 2513 case TCG_CALL_ARG_BY_REF_N: 2514 op->args[pi++] = temp_arg(ts); 2515 break; 2516 2517 case TCG_CALL_ARG_EXTEND_U: 2518 case TCG_CALL_ARG_EXTEND_S: 2519 { 2520 TCGv_i64 temp = tcg_temp_ebb_new_i64(); 2521 TCGv_i32 orig = temp_tcgv_i32(ts); 2522 2523 if (loc->kind == TCG_CALL_ARG_EXTEND_S) { 2524 tcg_gen_ext_i32_i64(temp, orig); 2525 } else { 2526 tcg_gen_extu_i32_i64(temp, orig); 2527 } 2528 op->args[pi++] = tcgv_i64_arg(temp); 2529 extend_free[n_extend++] = temp; 2530 } 2531 break; 2532 2533 default: 2534 g_assert_not_reached(); 2535 } 2536 } 2537 op->args[pi++] = (uintptr_t)func; 2538 op->args[pi++] = (uintptr_t)info; 2539 tcg_debug_assert(pi == total_args); 2540 2541 if (tcg_ctx->emit_before_op) { 2542 QTAILQ_INSERT_BEFORE(tcg_ctx->emit_before_op, op, link); 2543 } else { 2544 QTAILQ_INSERT_TAIL(&tcg_ctx->ops, op, link); 2545 } 2546 2547 tcg_debug_assert(n_extend < ARRAY_SIZE(extend_free)); 2548 for (i = 0; i < n_extend; ++i) { 2549 tcg_temp_free_i64(extend_free[i]); 2550 } 2551 } 2552 2553 void tcg_gen_call0(void *func, TCGHelperInfo *info, TCGTemp *ret) 2554 { 2555 tcg_gen_callN(func, info, ret, NULL); 2556 } 2557 2558 void tcg_gen_call1(void *func, TCGHelperInfo *info, TCGTemp *ret, TCGTemp *t1) 2559 { 2560 tcg_gen_callN(func, info, ret, &t1); 2561 } 2562 2563 void tcg_gen_call2(void *func, TCGHelperInfo *info, TCGTemp *ret, 2564 TCGTemp *t1, TCGTemp *t2) 2565 { 2566 TCGTemp *args[2] = { t1, t2 }; 2567 tcg_gen_callN(func, info, ret, args); 2568 } 2569 2570 void tcg_gen_call3(void *func, TCGHelperInfo *info, TCGTemp *ret, 2571 TCGTemp *t1, TCGTemp *t2, TCGTemp *t3) 2572 { 2573 TCGTemp *args[3] = { t1, t2, t3 }; 2574 tcg_gen_callN(func, info, ret, args); 2575 } 2576 2577 void tcg_gen_call4(void *func, TCGHelperInfo *info, TCGTemp *ret, 2578 TCGTemp *t1, TCGTemp *t2, TCGTemp *t3, TCGTemp *t4) 2579 { 2580 TCGTemp *args[4] = { t1, t2, t3, t4 }; 2581 tcg_gen_callN(func, info, ret, args); 2582 } 2583 2584 void tcg_gen_call5(void *func, TCGHelperInfo *info, TCGTemp *ret, TCGTemp *t1, 2585 TCGTemp *t2, TCGTemp *t3, TCGTemp *t4, TCGTemp *t5) 2586 { 2587 TCGTemp *args[5] = { t1, t2, t3, t4, t5 }; 2588 tcg_gen_callN(func, info, ret, args); 2589 } 2590 2591 void tcg_gen_call6(void *func, TCGHelperInfo *info, TCGTemp *ret, 2592 TCGTemp *t1, TCGTemp *t2, TCGTemp *t3, 2593 TCGTemp *t4, TCGTemp *t5, TCGTemp *t6) 2594 { 2595 TCGTemp *args[6] = { t1, t2, t3, t4, t5, t6 }; 2596 tcg_gen_callN(func, info, ret, args); 2597 } 2598 2599 void tcg_gen_call7(void *func, TCGHelperInfo *info, TCGTemp *ret, TCGTemp *t1, 2600 TCGTemp *t2, TCGTemp *t3, TCGTemp *t4, 2601 TCGTemp *t5, TCGTemp *t6, TCGTemp *t7) 2602 { 2603 TCGTemp *args[7] = { t1, t2, t3, t4, t5, t6, t7 }; 2604 tcg_gen_callN(func, info, ret, args); 2605 } 2606 2607 static void tcg_reg_alloc_start(TCGContext *s) 2608 { 2609 int i, n; 2610 2611 for (i = 0, n = s->nb_temps; i < n; i++) { 2612 TCGTemp *ts = &s->temps[i]; 2613 TCGTempVal val = TEMP_VAL_MEM; 2614 2615 switch (ts->kind) { 2616 case TEMP_CONST: 2617 val = TEMP_VAL_CONST; 2618 break; 2619 case TEMP_FIXED: 2620 val = TEMP_VAL_REG; 2621 break; 2622 case TEMP_GLOBAL: 2623 break; 2624 case TEMP_EBB: 2625 val = TEMP_VAL_DEAD; 2626 /* fall through */ 2627 case TEMP_TB: 2628 ts->mem_allocated = 0; 2629 break; 2630 default: 2631 g_assert_not_reached(); 2632 } 2633 ts->val_type = val; 2634 } 2635 2636 memset(s->reg_to_temp, 0, sizeof(s->reg_to_temp)); 2637 } 2638 2639 static char *tcg_get_arg_str_ptr(TCGContext *s, char *buf, int buf_size, 2640 TCGTemp *ts) 2641 { 2642 int idx = temp_idx(ts); 2643 2644 switch (ts->kind) { 2645 case TEMP_FIXED: 2646 case TEMP_GLOBAL: 2647 pstrcpy(buf, buf_size, ts->name); 2648 break; 2649 case TEMP_TB: 2650 snprintf(buf, buf_size, "loc%d", idx - s->nb_globals); 2651 break; 2652 case TEMP_EBB: 2653 snprintf(buf, buf_size, "tmp%d", idx - s->nb_globals); 2654 break; 2655 case TEMP_CONST: 2656 switch (ts->type) { 2657 case TCG_TYPE_I32: 2658 snprintf(buf, buf_size, "$0x%x", (int32_t)ts->val); 2659 break; 2660 #if TCG_TARGET_REG_BITS > 32 2661 case TCG_TYPE_I64: 2662 snprintf(buf, buf_size, "$0x%" PRIx64, ts->val); 2663 break; 2664 #endif 2665 case TCG_TYPE_V64: 2666 case TCG_TYPE_V128: 2667 case TCG_TYPE_V256: 2668 snprintf(buf, buf_size, "v%d$0x%" PRIx64, 2669 64 << (ts->type - TCG_TYPE_V64), ts->val); 2670 break; 2671 default: 2672 g_assert_not_reached(); 2673 } 2674 break; 2675 } 2676 return buf; 2677 } 2678 2679 static char *tcg_get_arg_str(TCGContext *s, char *buf, 2680 int buf_size, TCGArg arg) 2681 { 2682 return tcg_get_arg_str_ptr(s, buf, buf_size, arg_temp(arg)); 2683 } 2684 2685 static const char * const cond_name[] = 2686 { 2687 [TCG_COND_NEVER] = "never", 2688 [TCG_COND_ALWAYS] = "always", 2689 [TCG_COND_EQ] = "eq", 2690 [TCG_COND_NE] = "ne", 2691 [TCG_COND_LT] = "lt", 2692 [TCG_COND_GE] = "ge", 2693 [TCG_COND_LE] = "le", 2694 [TCG_COND_GT] = "gt", 2695 [TCG_COND_LTU] = "ltu", 2696 [TCG_COND_GEU] = "geu", 2697 [TCG_COND_LEU] = "leu", 2698 [TCG_COND_GTU] = "gtu", 2699 [TCG_COND_TSTEQ] = "tsteq", 2700 [TCG_COND_TSTNE] = "tstne", 2701 }; 2702 2703 static const char * const ldst_name[(MO_BSWAP | MO_SSIZE) + 1] = 2704 { 2705 [MO_UB] = "ub", 2706 [MO_SB] = "sb", 2707 [MO_LEUW] = "leuw", 2708 [MO_LESW] = "lesw", 2709 [MO_LEUL] = "leul", 2710 [MO_LESL] = "lesl", 2711 [MO_LEUQ] = "leq", 2712 [MO_BEUW] = "beuw", 2713 [MO_BESW] = "besw", 2714 [MO_BEUL] = "beul", 2715 [MO_BESL] = "besl", 2716 [MO_BEUQ] = "beq", 2717 [MO_128 + MO_BE] = "beo", 2718 [MO_128 + MO_LE] = "leo", 2719 }; 2720 2721 static const char * const alignment_name[(MO_AMASK >> MO_ASHIFT) + 1] = { 2722 [MO_UNALN >> MO_ASHIFT] = "un+", 2723 [MO_ALIGN >> MO_ASHIFT] = "al+", 2724 [MO_ALIGN_2 >> MO_ASHIFT] = "al2+", 2725 [MO_ALIGN_4 >> MO_ASHIFT] = "al4+", 2726 [MO_ALIGN_8 >> MO_ASHIFT] = "al8+", 2727 [MO_ALIGN_16 >> MO_ASHIFT] = "al16+", 2728 [MO_ALIGN_32 >> MO_ASHIFT] = "al32+", 2729 [MO_ALIGN_64 >> MO_ASHIFT] = "al64+", 2730 }; 2731 2732 static const char * const atom_name[(MO_ATOM_MASK >> MO_ATOM_SHIFT) + 1] = { 2733 [MO_ATOM_IFALIGN >> MO_ATOM_SHIFT] = "", 2734 [MO_ATOM_IFALIGN_PAIR >> MO_ATOM_SHIFT] = "pair+", 2735 [MO_ATOM_WITHIN16 >> MO_ATOM_SHIFT] = "w16+", 2736 [MO_ATOM_WITHIN16_PAIR >> MO_ATOM_SHIFT] = "w16p+", 2737 [MO_ATOM_SUBALIGN >> MO_ATOM_SHIFT] = "sub+", 2738 [MO_ATOM_NONE >> MO_ATOM_SHIFT] = "noat+", 2739 }; 2740 2741 static const char bswap_flag_name[][6] = { 2742 [TCG_BSWAP_IZ] = "iz", 2743 [TCG_BSWAP_OZ] = "oz", 2744 [TCG_BSWAP_OS] = "os", 2745 [TCG_BSWAP_IZ | TCG_BSWAP_OZ] = "iz,oz", 2746 [TCG_BSWAP_IZ | TCG_BSWAP_OS] = "iz,os", 2747 }; 2748 2749 #ifdef CONFIG_PLUGIN 2750 static const char * const plugin_from_name[] = { 2751 "from-tb", 2752 "from-insn", 2753 "after-insn", 2754 "after-tb", 2755 }; 2756 #endif 2757 2758 static inline bool tcg_regset_single(TCGRegSet d) 2759 { 2760 return (d & (d - 1)) == 0; 2761 } 2762 2763 static inline TCGReg tcg_regset_first(TCGRegSet d) 2764 { 2765 if (TCG_TARGET_NB_REGS <= 32) { 2766 return ctz32(d); 2767 } else { 2768 return ctz64(d); 2769 } 2770 } 2771 2772 /* Return only the number of characters output -- no error return. */ 2773 #define ne_fprintf(...) \ 2774 ({ int ret_ = fprintf(__VA_ARGS__); ret_ >= 0 ? ret_ : 0; }) 2775 2776 void tcg_dump_ops(TCGContext *s, FILE *f, bool have_prefs) 2777 { 2778 char buf[128]; 2779 TCGOp *op; 2780 2781 QTAILQ_FOREACH(op, &s->ops, link) { 2782 int i, k, nb_oargs, nb_iargs, nb_cargs; 2783 const TCGOpDef *def; 2784 TCGOpcode c; 2785 int col = 0; 2786 2787 c = op->opc; 2788 def = &tcg_op_defs[c]; 2789 2790 if (c == INDEX_op_insn_start) { 2791 nb_oargs = 0; 2792 col += ne_fprintf(f, "\n ----"); 2793 2794 for (i = 0, k = s->insn_start_words; i < k; ++i) { 2795 col += ne_fprintf(f, " %016" PRIx64, 2796 tcg_get_insn_start_param(op, i)); 2797 } 2798 } else if (c == INDEX_op_call) { 2799 const TCGHelperInfo *info = tcg_call_info(op); 2800 void *func = tcg_call_func(op); 2801 2802 /* variable number of arguments */ 2803 nb_oargs = TCGOP_CALLO(op); 2804 nb_iargs = TCGOP_CALLI(op); 2805 nb_cargs = def->nb_cargs; 2806 2807 col += ne_fprintf(f, " %s ", def->name); 2808 2809 /* 2810 * Print the function name from TCGHelperInfo, if available. 2811 * Note that plugins have a template function for the info, 2812 * but the actual function pointer comes from the plugin. 2813 */ 2814 if (func == info->func) { 2815 col += ne_fprintf(f, "%s", info->name); 2816 } else { 2817 col += ne_fprintf(f, "plugin(%p)", func); 2818 } 2819 2820 col += ne_fprintf(f, ",$0x%x,$%d", info->flags, nb_oargs); 2821 for (i = 0; i < nb_oargs; i++) { 2822 col += ne_fprintf(f, ",%s", tcg_get_arg_str(s, buf, sizeof(buf), 2823 op->args[i])); 2824 } 2825 for (i = 0; i < nb_iargs; i++) { 2826 TCGArg arg = op->args[nb_oargs + i]; 2827 const char *t = tcg_get_arg_str(s, buf, sizeof(buf), arg); 2828 col += ne_fprintf(f, ",%s", t); 2829 } 2830 } else { 2831 if (def->flags & TCG_OPF_INT) { 2832 col += ne_fprintf(f, " %s_i%d ", 2833 def->name, 2834 8 * tcg_type_size(TCGOP_TYPE(op))); 2835 } else if (def->flags & TCG_OPF_VECTOR) { 2836 col += ne_fprintf(f, "%s v%d,e%d,", 2837 def->name, 2838 8 * tcg_type_size(TCGOP_TYPE(op)), 2839 8 << TCGOP_VECE(op)); 2840 } else { 2841 col += ne_fprintf(f, " %s ", def->name); 2842 } 2843 2844 nb_oargs = def->nb_oargs; 2845 nb_iargs = def->nb_iargs; 2846 nb_cargs = def->nb_cargs; 2847 2848 k = 0; 2849 for (i = 0; i < nb_oargs; i++) { 2850 const char *sep = k ? "," : ""; 2851 col += ne_fprintf(f, "%s%s", sep, 2852 tcg_get_arg_str(s, buf, sizeof(buf), 2853 op->args[k++])); 2854 } 2855 for (i = 0; i < nb_iargs; i++) { 2856 const char *sep = k ? "," : ""; 2857 col += ne_fprintf(f, "%s%s", sep, 2858 tcg_get_arg_str(s, buf, sizeof(buf), 2859 op->args[k++])); 2860 } 2861 switch (c) { 2862 case INDEX_op_brcond_i32: 2863 case INDEX_op_setcond_i32: 2864 case INDEX_op_negsetcond_i32: 2865 case INDEX_op_movcond_i32: 2866 case INDEX_op_brcond2_i32: 2867 case INDEX_op_setcond2_i32: 2868 case INDEX_op_brcond_i64: 2869 case INDEX_op_setcond_i64: 2870 case INDEX_op_negsetcond_i64: 2871 case INDEX_op_movcond_i64: 2872 case INDEX_op_cmp_vec: 2873 case INDEX_op_cmpsel_vec: 2874 if (op->args[k] < ARRAY_SIZE(cond_name) 2875 && cond_name[op->args[k]]) { 2876 col += ne_fprintf(f, ",%s", cond_name[op->args[k++]]); 2877 } else { 2878 col += ne_fprintf(f, ",$0x%" TCG_PRIlx, op->args[k++]); 2879 } 2880 i = 1; 2881 break; 2882 case INDEX_op_qemu_ld_i32: 2883 case INDEX_op_qemu_st_i32: 2884 case INDEX_op_qemu_st8_i32: 2885 case INDEX_op_qemu_ld_i64: 2886 case INDEX_op_qemu_st_i64: 2887 case INDEX_op_qemu_ld_i128: 2888 case INDEX_op_qemu_st_i128: 2889 { 2890 const char *s_al, *s_op, *s_at; 2891 MemOpIdx oi = op->args[k++]; 2892 MemOp mop = get_memop(oi); 2893 unsigned ix = get_mmuidx(oi); 2894 2895 s_al = alignment_name[(mop & MO_AMASK) >> MO_ASHIFT]; 2896 s_op = ldst_name[mop & (MO_BSWAP | MO_SSIZE)]; 2897 s_at = atom_name[(mop & MO_ATOM_MASK) >> MO_ATOM_SHIFT]; 2898 mop &= ~(MO_AMASK | MO_BSWAP | MO_SSIZE | MO_ATOM_MASK); 2899 2900 /* If all fields are accounted for, print symbolically. */ 2901 if (!mop && s_al && s_op && s_at) { 2902 col += ne_fprintf(f, ",%s%s%s,%u", 2903 s_at, s_al, s_op, ix); 2904 } else { 2905 mop = get_memop(oi); 2906 col += ne_fprintf(f, ",$0x%x,%u", mop, ix); 2907 } 2908 i = 1; 2909 } 2910 break; 2911 case INDEX_op_bswap16_i32: 2912 case INDEX_op_bswap16_i64: 2913 case INDEX_op_bswap32_i32: 2914 case INDEX_op_bswap32_i64: 2915 case INDEX_op_bswap64_i64: 2916 { 2917 TCGArg flags = op->args[k]; 2918 const char *name = NULL; 2919 2920 if (flags < ARRAY_SIZE(bswap_flag_name)) { 2921 name = bswap_flag_name[flags]; 2922 } 2923 if (name) { 2924 col += ne_fprintf(f, ",%s", name); 2925 } else { 2926 col += ne_fprintf(f, ",$0x%" TCG_PRIlx, flags); 2927 } 2928 i = k = 1; 2929 } 2930 break; 2931 #ifdef CONFIG_PLUGIN 2932 case INDEX_op_plugin_cb: 2933 { 2934 TCGArg from = op->args[k++]; 2935 const char *name = NULL; 2936 2937 if (from < ARRAY_SIZE(plugin_from_name)) { 2938 name = plugin_from_name[from]; 2939 } 2940 if (name) { 2941 col += ne_fprintf(f, "%s", name); 2942 } else { 2943 col += ne_fprintf(f, "$0x%" TCG_PRIlx, from); 2944 } 2945 i = 1; 2946 } 2947 break; 2948 #endif 2949 default: 2950 i = 0; 2951 break; 2952 } 2953 switch (c) { 2954 case INDEX_op_set_label: 2955 case INDEX_op_br: 2956 case INDEX_op_brcond_i32: 2957 case INDEX_op_brcond_i64: 2958 case INDEX_op_brcond2_i32: 2959 col += ne_fprintf(f, "%s$L%d", k ? "," : "", 2960 arg_label(op->args[k])->id); 2961 i++, k++; 2962 break; 2963 case INDEX_op_mb: 2964 { 2965 TCGBar membar = op->args[k]; 2966 const char *b_op, *m_op; 2967 2968 switch (membar & TCG_BAR_SC) { 2969 case 0: 2970 b_op = "none"; 2971 break; 2972 case TCG_BAR_LDAQ: 2973 b_op = "acq"; 2974 break; 2975 case TCG_BAR_STRL: 2976 b_op = "rel"; 2977 break; 2978 case TCG_BAR_SC: 2979 b_op = "seq"; 2980 break; 2981 default: 2982 g_assert_not_reached(); 2983 } 2984 2985 switch (membar & TCG_MO_ALL) { 2986 case 0: 2987 m_op = "none"; 2988 break; 2989 case TCG_MO_LD_LD: 2990 m_op = "rr"; 2991 break; 2992 case TCG_MO_LD_ST: 2993 m_op = "rw"; 2994 break; 2995 case TCG_MO_ST_LD: 2996 m_op = "wr"; 2997 break; 2998 case TCG_MO_ST_ST: 2999 m_op = "ww"; 3000 break; 3001 case TCG_MO_LD_LD | TCG_MO_LD_ST: 3002 m_op = "rr+rw"; 3003 break; 3004 case TCG_MO_LD_LD | TCG_MO_ST_LD: 3005 m_op = "rr+wr"; 3006 break; 3007 case TCG_MO_LD_LD | TCG_MO_ST_ST: 3008 m_op = "rr+ww"; 3009 break; 3010 case TCG_MO_LD_ST | TCG_MO_ST_LD: 3011 m_op = "rw+wr"; 3012 break; 3013 case TCG_MO_LD_ST | TCG_MO_ST_ST: 3014 m_op = "rw+ww"; 3015 break; 3016 case TCG_MO_ST_LD | TCG_MO_ST_ST: 3017 m_op = "wr+ww"; 3018 break; 3019 case TCG_MO_LD_LD | TCG_MO_LD_ST | TCG_MO_ST_LD: 3020 m_op = "rr+rw+wr"; 3021 break; 3022 case TCG_MO_LD_LD | TCG_MO_LD_ST | TCG_MO_ST_ST: 3023 m_op = "rr+rw+ww"; 3024 break; 3025 case TCG_MO_LD_LD | TCG_MO_ST_LD | TCG_MO_ST_ST: 3026 m_op = "rr+wr+ww"; 3027 break; 3028 case TCG_MO_LD_ST | TCG_MO_ST_LD | TCG_MO_ST_ST: 3029 m_op = "rw+wr+ww"; 3030 break; 3031 case TCG_MO_ALL: 3032 m_op = "all"; 3033 break; 3034 default: 3035 g_assert_not_reached(); 3036 } 3037 3038 col += ne_fprintf(f, "%s%s:%s", (k ? "," : ""), b_op, m_op); 3039 i++, k++; 3040 } 3041 break; 3042 default: 3043 break; 3044 } 3045 for (; i < nb_cargs; i++, k++) { 3046 col += ne_fprintf(f, "%s$0x%" TCG_PRIlx, k ? "," : "", 3047 op->args[k]); 3048 } 3049 } 3050 3051 if (have_prefs || op->life) { 3052 for (; col < 40; ++col) { 3053 putc(' ', f); 3054 } 3055 } 3056 3057 if (op->life) { 3058 unsigned life = op->life; 3059 3060 if (life & (SYNC_ARG * 3)) { 3061 ne_fprintf(f, " sync:"); 3062 for (i = 0; i < 2; ++i) { 3063 if (life & (SYNC_ARG << i)) { 3064 ne_fprintf(f, " %d", i); 3065 } 3066 } 3067 } 3068 life /= DEAD_ARG; 3069 if (life) { 3070 ne_fprintf(f, " dead:"); 3071 for (i = 0; life; ++i, life >>= 1) { 3072 if (life & 1) { 3073 ne_fprintf(f, " %d", i); 3074 } 3075 } 3076 } 3077 } 3078 3079 if (have_prefs) { 3080 for (i = 0; i < nb_oargs; ++i) { 3081 TCGRegSet set = output_pref(op, i); 3082 3083 if (i == 0) { 3084 ne_fprintf(f, " pref="); 3085 } else { 3086 ne_fprintf(f, ","); 3087 } 3088 if (set == 0) { 3089 ne_fprintf(f, "none"); 3090 } else if (set == MAKE_64BIT_MASK(0, TCG_TARGET_NB_REGS)) { 3091 ne_fprintf(f, "all"); 3092 #ifdef CONFIG_DEBUG_TCG 3093 } else if (tcg_regset_single(set)) { 3094 TCGReg reg = tcg_regset_first(set); 3095 ne_fprintf(f, "%s", tcg_target_reg_names[reg]); 3096 #endif 3097 } else if (TCG_TARGET_NB_REGS <= 32) { 3098 ne_fprintf(f, "0x%x", (uint32_t)set); 3099 } else { 3100 ne_fprintf(f, "0x%" PRIx64, (uint64_t)set); 3101 } 3102 } 3103 } 3104 3105 putc('\n', f); 3106 } 3107 } 3108 3109 /* we give more priority to constraints with less registers */ 3110 static int get_constraint_priority(const TCGArgConstraint *arg_ct, int k) 3111 { 3112 int n; 3113 3114 arg_ct += k; 3115 n = ctpop64(arg_ct->regs); 3116 3117 /* 3118 * Sort constraints of a single register first, which includes output 3119 * aliases (which must exactly match the input already allocated). 3120 */ 3121 if (n == 1 || arg_ct->oalias) { 3122 return INT_MAX; 3123 } 3124 3125 /* 3126 * Sort register pairs next, first then second immediately after. 3127 * Arbitrarily sort multiple pairs by the index of the first reg; 3128 * there shouldn't be many pairs. 3129 */ 3130 switch (arg_ct->pair) { 3131 case 1: 3132 case 3: 3133 return (k + 1) * 2; 3134 case 2: 3135 return (arg_ct->pair_index + 1) * 2 - 1; 3136 } 3137 3138 /* Finally, sort by decreasing register count. */ 3139 assert(n > 1); 3140 return -n; 3141 } 3142 3143 /* sort from highest priority to lowest */ 3144 static void sort_constraints(TCGArgConstraint *a, int start, int n) 3145 { 3146 int i, j; 3147 3148 for (i = 0; i < n; i++) { 3149 a[start + i].sort_index = start + i; 3150 } 3151 if (n <= 1) { 3152 return; 3153 } 3154 for (i = 0; i < n - 1; i++) { 3155 for (j = i + 1; j < n; j++) { 3156 int p1 = get_constraint_priority(a, a[start + i].sort_index); 3157 int p2 = get_constraint_priority(a, a[start + j].sort_index); 3158 if (p1 < p2) { 3159 int tmp = a[start + i].sort_index; 3160 a[start + i].sort_index = a[start + j].sort_index; 3161 a[start + j].sort_index = tmp; 3162 } 3163 } 3164 } 3165 } 3166 3167 static const TCGArgConstraint empty_cts[TCG_MAX_OP_ARGS]; 3168 static TCGArgConstraint all_cts[ARRAY_SIZE(constraint_sets)][TCG_MAX_OP_ARGS]; 3169 3170 static void process_constraint_sets(void) 3171 { 3172 for (size_t c = 0; c < ARRAY_SIZE(constraint_sets); ++c) { 3173 const TCGConstraintSet *tdefs = &constraint_sets[c]; 3174 TCGArgConstraint *args_ct = all_cts[c]; 3175 int nb_oargs = tdefs->nb_oargs; 3176 int nb_iargs = tdefs->nb_iargs; 3177 int nb_args = nb_oargs + nb_iargs; 3178 bool saw_alias_pair = false; 3179 3180 for (int i = 0; i < nb_args; i++) { 3181 const char *ct_str = tdefs->args_ct_str[i]; 3182 bool input_p = i >= nb_oargs; 3183 int o; 3184 3185 switch (*ct_str) { 3186 case '0' ... '9': 3187 o = *ct_str - '0'; 3188 tcg_debug_assert(input_p); 3189 tcg_debug_assert(o < nb_oargs); 3190 tcg_debug_assert(args_ct[o].regs != 0); 3191 tcg_debug_assert(!args_ct[o].oalias); 3192 args_ct[i] = args_ct[o]; 3193 /* The output sets oalias. */ 3194 args_ct[o].oalias = 1; 3195 args_ct[o].alias_index = i; 3196 /* The input sets ialias. */ 3197 args_ct[i].ialias = 1; 3198 args_ct[i].alias_index = o; 3199 if (args_ct[i].pair) { 3200 saw_alias_pair = true; 3201 } 3202 tcg_debug_assert(ct_str[1] == '\0'); 3203 continue; 3204 3205 case '&': 3206 tcg_debug_assert(!input_p); 3207 args_ct[i].newreg = true; 3208 ct_str++; 3209 break; 3210 3211 case 'p': /* plus */ 3212 /* Allocate to the register after the previous. */ 3213 tcg_debug_assert(i > (input_p ? nb_oargs : 0)); 3214 o = i - 1; 3215 tcg_debug_assert(!args_ct[o].pair); 3216 tcg_debug_assert(!args_ct[o].ct); 3217 args_ct[i] = (TCGArgConstraint){ 3218 .pair = 2, 3219 .pair_index = o, 3220 .regs = args_ct[o].regs << 1, 3221 .newreg = args_ct[o].newreg, 3222 }; 3223 args_ct[o].pair = 1; 3224 args_ct[o].pair_index = i; 3225 tcg_debug_assert(ct_str[1] == '\0'); 3226 continue; 3227 3228 case 'm': /* minus */ 3229 /* Allocate to the register before the previous. */ 3230 tcg_debug_assert(i > (input_p ? nb_oargs : 0)); 3231 o = i - 1; 3232 tcg_debug_assert(!args_ct[o].pair); 3233 tcg_debug_assert(!args_ct[o].ct); 3234 args_ct[i] = (TCGArgConstraint){ 3235 .pair = 1, 3236 .pair_index = o, 3237 .regs = args_ct[o].regs >> 1, 3238 .newreg = args_ct[o].newreg, 3239 }; 3240 args_ct[o].pair = 2; 3241 args_ct[o].pair_index = i; 3242 tcg_debug_assert(ct_str[1] == '\0'); 3243 continue; 3244 } 3245 3246 do { 3247 switch (*ct_str) { 3248 case 'i': 3249 args_ct[i].ct |= TCG_CT_CONST; 3250 break; 3251 #ifdef TCG_REG_ZERO 3252 case 'z': 3253 args_ct[i].ct |= TCG_CT_REG_ZERO; 3254 break; 3255 #endif 3256 3257 /* Include all of the target-specific constraints. */ 3258 3259 #undef CONST 3260 #define CONST(CASE, MASK) \ 3261 case CASE: args_ct[i].ct |= MASK; break; 3262 #define REGS(CASE, MASK) \ 3263 case CASE: args_ct[i].regs |= MASK; break; 3264 3265 #include "tcg-target-con-str.h" 3266 3267 #undef REGS 3268 #undef CONST 3269 default: 3270 case '0' ... '9': 3271 case '&': 3272 case 'p': 3273 case 'm': 3274 /* Typo in TCGConstraintSet constraint. */ 3275 g_assert_not_reached(); 3276 } 3277 } while (*++ct_str != '\0'); 3278 } 3279 3280 /* 3281 * Fix up output pairs that are aliased with inputs. 3282 * When we created the alias, we copied pair from the output. 3283 * There are three cases: 3284 * (1a) Pairs of inputs alias pairs of outputs. 3285 * (1b) One input aliases the first of a pair of outputs. 3286 * (2) One input aliases the second of a pair of outputs. 3287 * 3288 * Case 1a is handled by making sure that the pair_index'es are 3289 * properly updated so that they appear the same as a pair of inputs. 3290 * 3291 * Case 1b is handled by setting the pair_index of the input to 3292 * itself, simply so it doesn't point to an unrelated argument. 3293 * Since we don't encounter the "second" during the input allocation 3294 * phase, nothing happens with the second half of the input pair. 3295 * 3296 * Case 2 is handled by setting the second input to pair=3, the 3297 * first output to pair=3, and the pair_index'es to match. 3298 */ 3299 if (saw_alias_pair) { 3300 for (int i = nb_oargs; i < nb_args; i++) { 3301 int o, o2, i2; 3302 3303 /* 3304 * Since [0-9pm] must be alone in the constraint string, 3305 * the only way they can both be set is if the pair comes 3306 * from the output alias. 3307 */ 3308 if (!args_ct[i].ialias) { 3309 continue; 3310 } 3311 switch (args_ct[i].pair) { 3312 case 0: 3313 break; 3314 case 1: 3315 o = args_ct[i].alias_index; 3316 o2 = args_ct[o].pair_index; 3317 tcg_debug_assert(args_ct[o].pair == 1); 3318 tcg_debug_assert(args_ct[o2].pair == 2); 3319 if (args_ct[o2].oalias) { 3320 /* Case 1a */ 3321 i2 = args_ct[o2].alias_index; 3322 tcg_debug_assert(args_ct[i2].pair == 2); 3323 args_ct[i2].pair_index = i; 3324 args_ct[i].pair_index = i2; 3325 } else { 3326 /* Case 1b */ 3327 args_ct[i].pair_index = i; 3328 } 3329 break; 3330 case 2: 3331 o = args_ct[i].alias_index; 3332 o2 = args_ct[o].pair_index; 3333 tcg_debug_assert(args_ct[o].pair == 2); 3334 tcg_debug_assert(args_ct[o2].pair == 1); 3335 if (args_ct[o2].oalias) { 3336 /* Case 1a */ 3337 i2 = args_ct[o2].alias_index; 3338 tcg_debug_assert(args_ct[i2].pair == 1); 3339 args_ct[i2].pair_index = i; 3340 args_ct[i].pair_index = i2; 3341 } else { 3342 /* Case 2 */ 3343 args_ct[i].pair = 3; 3344 args_ct[o2].pair = 3; 3345 args_ct[i].pair_index = o2; 3346 args_ct[o2].pair_index = i; 3347 } 3348 break; 3349 default: 3350 g_assert_not_reached(); 3351 } 3352 } 3353 } 3354 3355 /* sort the constraints (XXX: this is just an heuristic) */ 3356 sort_constraints(args_ct, 0, nb_oargs); 3357 sort_constraints(args_ct, nb_oargs, nb_iargs); 3358 } 3359 } 3360 3361 static const TCGArgConstraint *opcode_args_ct(const TCGOp *op) 3362 { 3363 TCGOpcode opc = op->opc; 3364 TCGType type = TCGOP_TYPE(op); 3365 unsigned flags = TCGOP_FLAGS(op); 3366 const TCGOpDef *def = &tcg_op_defs[opc]; 3367 const TCGOutOp *outop = all_outop[opc]; 3368 TCGConstraintSetIndex con_set; 3369 3370 if (def->flags & TCG_OPF_NOT_PRESENT) { 3371 return empty_cts; 3372 } 3373 3374 if (outop) { 3375 con_set = outop->static_constraint; 3376 if (con_set == C_Dynamic) { 3377 con_set = outop->dynamic_constraint(type, flags); 3378 } 3379 } else { 3380 con_set = tcg_target_op_def(opc, type, flags); 3381 } 3382 tcg_debug_assert(con_set >= 0); 3383 tcg_debug_assert(con_set < ARRAY_SIZE(constraint_sets)); 3384 3385 /* The constraint arguments must match TCGOpcode arguments. */ 3386 tcg_debug_assert(constraint_sets[con_set].nb_oargs == def->nb_oargs); 3387 tcg_debug_assert(constraint_sets[con_set].nb_iargs == def->nb_iargs); 3388 3389 return all_cts[con_set]; 3390 } 3391 3392 static void remove_label_use(TCGOp *op, int idx) 3393 { 3394 TCGLabel *label = arg_label(op->args[idx]); 3395 TCGLabelUse *use; 3396 3397 QSIMPLEQ_FOREACH(use, &label->branches, next) { 3398 if (use->op == op) { 3399 QSIMPLEQ_REMOVE(&label->branches, use, TCGLabelUse, next); 3400 return; 3401 } 3402 } 3403 g_assert_not_reached(); 3404 } 3405 3406 void tcg_op_remove(TCGContext *s, TCGOp *op) 3407 { 3408 switch (op->opc) { 3409 case INDEX_op_br: 3410 remove_label_use(op, 0); 3411 break; 3412 case INDEX_op_brcond_i32: 3413 case INDEX_op_brcond_i64: 3414 remove_label_use(op, 3); 3415 break; 3416 case INDEX_op_brcond2_i32: 3417 remove_label_use(op, 5); 3418 break; 3419 default: 3420 break; 3421 } 3422 3423 QTAILQ_REMOVE(&s->ops, op, link); 3424 QTAILQ_INSERT_TAIL(&s->free_ops, op, link); 3425 s->nb_ops--; 3426 } 3427 3428 void tcg_remove_ops_after(TCGOp *op) 3429 { 3430 TCGContext *s = tcg_ctx; 3431 3432 while (true) { 3433 TCGOp *last = tcg_last_op(); 3434 if (last == op) { 3435 return; 3436 } 3437 tcg_op_remove(s, last); 3438 } 3439 } 3440 3441 static TCGOp *tcg_op_alloc(TCGOpcode opc, unsigned nargs) 3442 { 3443 TCGContext *s = tcg_ctx; 3444 TCGOp *op = NULL; 3445 3446 if (unlikely(!QTAILQ_EMPTY(&s->free_ops))) { 3447 QTAILQ_FOREACH(op, &s->free_ops, link) { 3448 if (nargs <= op->nargs) { 3449 QTAILQ_REMOVE(&s->free_ops, op, link); 3450 nargs = op->nargs; 3451 goto found; 3452 } 3453 } 3454 } 3455 3456 /* Most opcodes have 3 or 4 operands: reduce fragmentation. */ 3457 nargs = MAX(4, nargs); 3458 op = tcg_malloc(sizeof(TCGOp) + sizeof(TCGArg) * nargs); 3459 3460 found: 3461 memset(op, 0, offsetof(TCGOp, link)); 3462 op->opc = opc; 3463 op->nargs = nargs; 3464 3465 /* Check for bitfield overflow. */ 3466 tcg_debug_assert(op->nargs == nargs); 3467 3468 s->nb_ops++; 3469 return op; 3470 } 3471 3472 TCGOp *tcg_emit_op(TCGOpcode opc, unsigned nargs) 3473 { 3474 TCGOp *op = tcg_op_alloc(opc, nargs); 3475 3476 if (tcg_ctx->emit_before_op) { 3477 QTAILQ_INSERT_BEFORE(tcg_ctx->emit_before_op, op, link); 3478 } else { 3479 QTAILQ_INSERT_TAIL(&tcg_ctx->ops, op, link); 3480 } 3481 return op; 3482 } 3483 3484 TCGOp *tcg_op_insert_before(TCGContext *s, TCGOp *old_op, 3485 TCGOpcode opc, TCGType type, unsigned nargs) 3486 { 3487 TCGOp *new_op = tcg_op_alloc(opc, nargs); 3488 3489 TCGOP_TYPE(new_op) = type; 3490 QTAILQ_INSERT_BEFORE(old_op, new_op, link); 3491 return new_op; 3492 } 3493 3494 TCGOp *tcg_op_insert_after(TCGContext *s, TCGOp *old_op, 3495 TCGOpcode opc, TCGType type, unsigned nargs) 3496 { 3497 TCGOp *new_op = tcg_op_alloc(opc, nargs); 3498 3499 TCGOP_TYPE(new_op) = type; 3500 QTAILQ_INSERT_AFTER(&s->ops, old_op, new_op, link); 3501 return new_op; 3502 } 3503 3504 static void move_label_uses(TCGLabel *to, TCGLabel *from) 3505 { 3506 TCGLabelUse *u; 3507 3508 QSIMPLEQ_FOREACH(u, &from->branches, next) { 3509 TCGOp *op = u->op; 3510 switch (op->opc) { 3511 case INDEX_op_br: 3512 op->args[0] = label_arg(to); 3513 break; 3514 case INDEX_op_brcond_i32: 3515 case INDEX_op_brcond_i64: 3516 op->args[3] = label_arg(to); 3517 break; 3518 case INDEX_op_brcond2_i32: 3519 op->args[5] = label_arg(to); 3520 break; 3521 default: 3522 g_assert_not_reached(); 3523 } 3524 } 3525 3526 QSIMPLEQ_CONCAT(&to->branches, &from->branches); 3527 } 3528 3529 /* Reachable analysis : remove unreachable code. */ 3530 static void __attribute__((noinline)) 3531 reachable_code_pass(TCGContext *s) 3532 { 3533 TCGOp *op, *op_next, *op_prev; 3534 bool dead = false; 3535 3536 QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) { 3537 bool remove = dead; 3538 TCGLabel *label; 3539 3540 switch (op->opc) { 3541 case INDEX_op_set_label: 3542 label = arg_label(op->args[0]); 3543 3544 /* 3545 * Note that the first op in the TB is always a load, 3546 * so there is always something before a label. 3547 */ 3548 op_prev = QTAILQ_PREV(op, link); 3549 3550 /* 3551 * If we find two sequential labels, move all branches to 3552 * reference the second label and remove the first label. 3553 * Do this before branch to next optimization, so that the 3554 * middle label is out of the way. 3555 */ 3556 if (op_prev->opc == INDEX_op_set_label) { 3557 move_label_uses(label, arg_label(op_prev->args[0])); 3558 tcg_op_remove(s, op_prev); 3559 op_prev = QTAILQ_PREV(op, link); 3560 } 3561 3562 /* 3563 * Optimization can fold conditional branches to unconditional. 3564 * If we find a label which is preceded by an unconditional 3565 * branch to next, remove the branch. We couldn't do this when 3566 * processing the branch because any dead code between the branch 3567 * and label had not yet been removed. 3568 */ 3569 if (op_prev->opc == INDEX_op_br && 3570 label == arg_label(op_prev->args[0])) { 3571 tcg_op_remove(s, op_prev); 3572 /* Fall through means insns become live again. */ 3573 dead = false; 3574 } 3575 3576 if (QSIMPLEQ_EMPTY(&label->branches)) { 3577 /* 3578 * While there is an occasional backward branch, virtually 3579 * all branches generated by the translators are forward. 3580 * Which means that generally we will have already removed 3581 * all references to the label that will be, and there is 3582 * little to be gained by iterating. 3583 */ 3584 remove = true; 3585 } else { 3586 /* Once we see a label, insns become live again. */ 3587 dead = false; 3588 remove = false; 3589 } 3590 break; 3591 3592 case INDEX_op_br: 3593 case INDEX_op_exit_tb: 3594 case INDEX_op_goto_ptr: 3595 /* Unconditional branches; everything following is dead. */ 3596 dead = true; 3597 break; 3598 3599 case INDEX_op_call: 3600 /* Notice noreturn helper calls, raising exceptions. */ 3601 if (tcg_call_flags(op) & TCG_CALL_NO_RETURN) { 3602 dead = true; 3603 } 3604 break; 3605 3606 case INDEX_op_insn_start: 3607 /* Never remove -- we need to keep these for unwind. */ 3608 remove = false; 3609 break; 3610 3611 default: 3612 break; 3613 } 3614 3615 if (remove) { 3616 tcg_op_remove(s, op); 3617 } 3618 } 3619 } 3620 3621 #define TS_DEAD 1 3622 #define TS_MEM 2 3623 3624 #define IS_DEAD_ARG(n) (arg_life & (DEAD_ARG << (n))) 3625 #define NEED_SYNC_ARG(n) (arg_life & (SYNC_ARG << (n))) 3626 3627 /* For liveness_pass_1, the register preferences for a given temp. */ 3628 static inline TCGRegSet *la_temp_pref(TCGTemp *ts) 3629 { 3630 return ts->state_ptr; 3631 } 3632 3633 /* For liveness_pass_1, reset the preferences for a given temp to the 3634 * maximal regset for its type. 3635 */ 3636 static inline void la_reset_pref(TCGTemp *ts) 3637 { 3638 *la_temp_pref(ts) 3639 = (ts->state == TS_DEAD ? 0 : tcg_target_available_regs[ts->type]); 3640 } 3641 3642 /* liveness analysis: end of function: all temps are dead, and globals 3643 should be in memory. */ 3644 static void la_func_end(TCGContext *s, int ng, int nt) 3645 { 3646 int i; 3647 3648 for (i = 0; i < ng; ++i) { 3649 s->temps[i].state = TS_DEAD | TS_MEM; 3650 la_reset_pref(&s->temps[i]); 3651 } 3652 for (i = ng; i < nt; ++i) { 3653 s->temps[i].state = TS_DEAD; 3654 la_reset_pref(&s->temps[i]); 3655 } 3656 } 3657 3658 /* liveness analysis: end of basic block: all temps are dead, globals 3659 and local temps should be in memory. */ 3660 static void la_bb_end(TCGContext *s, int ng, int nt) 3661 { 3662 int i; 3663 3664 for (i = 0; i < nt; ++i) { 3665 TCGTemp *ts = &s->temps[i]; 3666 int state; 3667 3668 switch (ts->kind) { 3669 case TEMP_FIXED: 3670 case TEMP_GLOBAL: 3671 case TEMP_TB: 3672 state = TS_DEAD | TS_MEM; 3673 break; 3674 case TEMP_EBB: 3675 case TEMP_CONST: 3676 state = TS_DEAD; 3677 break; 3678 default: 3679 g_assert_not_reached(); 3680 } 3681 ts->state = state; 3682 la_reset_pref(ts); 3683 } 3684 } 3685 3686 /* liveness analysis: sync globals back to memory. */ 3687 static void la_global_sync(TCGContext *s, int ng) 3688 { 3689 int i; 3690 3691 for (i = 0; i < ng; ++i) { 3692 int state = s->temps[i].state; 3693 s->temps[i].state = state | TS_MEM; 3694 if (state == TS_DEAD) { 3695 /* If the global was previously dead, reset prefs. */ 3696 la_reset_pref(&s->temps[i]); 3697 } 3698 } 3699 } 3700 3701 /* 3702 * liveness analysis: conditional branch: all temps are dead unless 3703 * explicitly live-across-conditional-branch, globals and local temps 3704 * should be synced. 3705 */ 3706 static void la_bb_sync(TCGContext *s, int ng, int nt) 3707 { 3708 la_global_sync(s, ng); 3709 3710 for (int i = ng; i < nt; ++i) { 3711 TCGTemp *ts = &s->temps[i]; 3712 int state; 3713 3714 switch (ts->kind) { 3715 case TEMP_TB: 3716 state = ts->state; 3717 ts->state = state | TS_MEM; 3718 if (state != TS_DEAD) { 3719 continue; 3720 } 3721 break; 3722 case TEMP_EBB: 3723 case TEMP_CONST: 3724 continue; 3725 default: 3726 g_assert_not_reached(); 3727 } 3728 la_reset_pref(&s->temps[i]); 3729 } 3730 } 3731 3732 /* liveness analysis: sync globals back to memory and kill. */ 3733 static void la_global_kill(TCGContext *s, int ng) 3734 { 3735 int i; 3736 3737 for (i = 0; i < ng; i++) { 3738 s->temps[i].state = TS_DEAD | TS_MEM; 3739 la_reset_pref(&s->temps[i]); 3740 } 3741 } 3742 3743 /* liveness analysis: note live globals crossing calls. */ 3744 static void la_cross_call(TCGContext *s, int nt) 3745 { 3746 TCGRegSet mask = ~tcg_target_call_clobber_regs; 3747 int i; 3748 3749 for (i = 0; i < nt; i++) { 3750 TCGTemp *ts = &s->temps[i]; 3751 if (!(ts->state & TS_DEAD)) { 3752 TCGRegSet *pset = la_temp_pref(ts); 3753 TCGRegSet set = *pset; 3754 3755 set &= mask; 3756 /* If the combination is not possible, restart. */ 3757 if (set == 0) { 3758 set = tcg_target_available_regs[ts->type] & mask; 3759 } 3760 *pset = set; 3761 } 3762 } 3763 } 3764 3765 /* 3766 * Liveness analysis: Verify the lifetime of TEMP_TB, and reduce 3767 * to TEMP_EBB, if possible. 3768 */ 3769 static void __attribute__((noinline)) 3770 liveness_pass_0(TCGContext *s) 3771 { 3772 void * const multiple_ebb = (void *)(uintptr_t)-1; 3773 int nb_temps = s->nb_temps; 3774 TCGOp *op, *ebb; 3775 3776 for (int i = s->nb_globals; i < nb_temps; ++i) { 3777 s->temps[i].state_ptr = NULL; 3778 } 3779 3780 /* 3781 * Represent each EBB by the op at which it begins. In the case of 3782 * the first EBB, this is the first op, otherwise it is a label. 3783 * Collect the uses of each TEMP_TB: NULL for unused, EBB for use 3784 * within a single EBB, else MULTIPLE_EBB. 3785 */ 3786 ebb = QTAILQ_FIRST(&s->ops); 3787 QTAILQ_FOREACH(op, &s->ops, link) { 3788 const TCGOpDef *def; 3789 int nb_oargs, nb_iargs; 3790 3791 switch (op->opc) { 3792 case INDEX_op_set_label: 3793 ebb = op; 3794 continue; 3795 case INDEX_op_discard: 3796 continue; 3797 case INDEX_op_call: 3798 nb_oargs = TCGOP_CALLO(op); 3799 nb_iargs = TCGOP_CALLI(op); 3800 break; 3801 default: 3802 def = &tcg_op_defs[op->opc]; 3803 nb_oargs = def->nb_oargs; 3804 nb_iargs = def->nb_iargs; 3805 break; 3806 } 3807 3808 for (int i = 0; i < nb_oargs + nb_iargs; ++i) { 3809 TCGTemp *ts = arg_temp(op->args[i]); 3810 3811 if (ts->kind != TEMP_TB) { 3812 continue; 3813 } 3814 if (ts->state_ptr == NULL) { 3815 ts->state_ptr = ebb; 3816 } else if (ts->state_ptr != ebb) { 3817 ts->state_ptr = multiple_ebb; 3818 } 3819 } 3820 } 3821 3822 /* 3823 * For TEMP_TB that turned out not to be used beyond one EBB, 3824 * reduce the liveness to TEMP_EBB. 3825 */ 3826 for (int i = s->nb_globals; i < nb_temps; ++i) { 3827 TCGTemp *ts = &s->temps[i]; 3828 if (ts->kind == TEMP_TB && ts->state_ptr != multiple_ebb) { 3829 ts->kind = TEMP_EBB; 3830 } 3831 } 3832 } 3833 3834 /* Liveness analysis : update the opc_arg_life array to tell if a 3835 given input arguments is dead. Instructions updating dead 3836 temporaries are removed. */ 3837 static void __attribute__((noinline)) 3838 liveness_pass_1(TCGContext *s) 3839 { 3840 int nb_globals = s->nb_globals; 3841 int nb_temps = s->nb_temps; 3842 TCGOp *op, *op_prev; 3843 TCGRegSet *prefs; 3844 int i; 3845 3846 prefs = tcg_malloc(sizeof(TCGRegSet) * nb_temps); 3847 for (i = 0; i < nb_temps; ++i) { 3848 s->temps[i].state_ptr = prefs + i; 3849 } 3850 3851 /* ??? Should be redundant with the exit_tb that ends the TB. */ 3852 la_func_end(s, nb_globals, nb_temps); 3853 3854 QTAILQ_FOREACH_REVERSE_SAFE(op, &s->ops, link, op_prev) { 3855 int nb_iargs, nb_oargs; 3856 TCGOpcode opc_new, opc_new2; 3857 TCGLifeData arg_life = 0; 3858 TCGTemp *ts; 3859 TCGOpcode opc = op->opc; 3860 const TCGOpDef *def = &tcg_op_defs[opc]; 3861 const TCGArgConstraint *args_ct; 3862 3863 switch (opc) { 3864 case INDEX_op_call: 3865 { 3866 const TCGHelperInfo *info = tcg_call_info(op); 3867 int call_flags = tcg_call_flags(op); 3868 3869 nb_oargs = TCGOP_CALLO(op); 3870 nb_iargs = TCGOP_CALLI(op); 3871 3872 /* pure functions can be removed if their result is unused */ 3873 if (call_flags & TCG_CALL_NO_SIDE_EFFECTS) { 3874 for (i = 0; i < nb_oargs; i++) { 3875 ts = arg_temp(op->args[i]); 3876 if (ts->state != TS_DEAD) { 3877 goto do_not_remove_call; 3878 } 3879 } 3880 goto do_remove; 3881 } 3882 do_not_remove_call: 3883 3884 /* Output args are dead. */ 3885 for (i = 0; i < nb_oargs; i++) { 3886 ts = arg_temp(op->args[i]); 3887 if (ts->state & TS_DEAD) { 3888 arg_life |= DEAD_ARG << i; 3889 } 3890 if (ts->state & TS_MEM) { 3891 arg_life |= SYNC_ARG << i; 3892 } 3893 ts->state = TS_DEAD; 3894 la_reset_pref(ts); 3895 } 3896 3897 /* Not used -- it will be tcg_target_call_oarg_reg(). */ 3898 memset(op->output_pref, 0, sizeof(op->output_pref)); 3899 3900 if (!(call_flags & (TCG_CALL_NO_WRITE_GLOBALS | 3901 TCG_CALL_NO_READ_GLOBALS))) { 3902 la_global_kill(s, nb_globals); 3903 } else if (!(call_flags & TCG_CALL_NO_READ_GLOBALS)) { 3904 la_global_sync(s, nb_globals); 3905 } 3906 3907 /* Record arguments that die in this helper. */ 3908 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) { 3909 ts = arg_temp(op->args[i]); 3910 if (ts->state & TS_DEAD) { 3911 arg_life |= DEAD_ARG << i; 3912 } 3913 } 3914 3915 /* For all live registers, remove call-clobbered prefs. */ 3916 la_cross_call(s, nb_temps); 3917 3918 /* 3919 * Input arguments are live for preceding opcodes. 3920 * 3921 * For those arguments that die, and will be allocated in 3922 * registers, clear the register set for that arg, to be 3923 * filled in below. For args that will be on the stack, 3924 * reset to any available reg. Process arguments in reverse 3925 * order so that if a temp is used more than once, the stack 3926 * reset to max happens before the register reset to 0. 3927 */ 3928 for (i = nb_iargs - 1; i >= 0; i--) { 3929 const TCGCallArgumentLoc *loc = &info->in[i]; 3930 ts = arg_temp(op->args[nb_oargs + i]); 3931 3932 if (ts->state & TS_DEAD) { 3933 switch (loc->kind) { 3934 case TCG_CALL_ARG_NORMAL: 3935 case TCG_CALL_ARG_EXTEND_U: 3936 case TCG_CALL_ARG_EXTEND_S: 3937 if (arg_slot_reg_p(loc->arg_slot)) { 3938 *la_temp_pref(ts) = 0; 3939 break; 3940 } 3941 /* fall through */ 3942 default: 3943 *la_temp_pref(ts) = 3944 tcg_target_available_regs[ts->type]; 3945 break; 3946 } 3947 ts->state &= ~TS_DEAD; 3948 } 3949 } 3950 3951 /* 3952 * For each input argument, add its input register to prefs. 3953 * If a temp is used once, this produces a single set bit; 3954 * if a temp is used multiple times, this produces a set. 3955 */ 3956 for (i = 0; i < nb_iargs; i++) { 3957 const TCGCallArgumentLoc *loc = &info->in[i]; 3958 ts = arg_temp(op->args[nb_oargs + i]); 3959 3960 switch (loc->kind) { 3961 case TCG_CALL_ARG_NORMAL: 3962 case TCG_CALL_ARG_EXTEND_U: 3963 case TCG_CALL_ARG_EXTEND_S: 3964 if (arg_slot_reg_p(loc->arg_slot)) { 3965 tcg_regset_set_reg(*la_temp_pref(ts), 3966 tcg_target_call_iarg_regs[loc->arg_slot]); 3967 } 3968 break; 3969 default: 3970 break; 3971 } 3972 } 3973 } 3974 break; 3975 case INDEX_op_insn_start: 3976 break; 3977 case INDEX_op_discard: 3978 /* mark the temporary as dead */ 3979 ts = arg_temp(op->args[0]); 3980 ts->state = TS_DEAD; 3981 la_reset_pref(ts); 3982 break; 3983 3984 case INDEX_op_add2_i32: 3985 case INDEX_op_add2_i64: 3986 opc_new = INDEX_op_add; 3987 goto do_addsub2; 3988 case INDEX_op_sub2_i32: 3989 case INDEX_op_sub2_i64: 3990 opc_new = INDEX_op_sub; 3991 do_addsub2: 3992 nb_iargs = 4; 3993 nb_oargs = 2; 3994 /* Test if the high part of the operation is dead, but not 3995 the low part. The result can be optimized to a simple 3996 add or sub. This happens often for x86_64 guest when the 3997 cpu mode is set to 32 bit. */ 3998 if (arg_temp(op->args[1])->state == TS_DEAD) { 3999 if (arg_temp(op->args[0])->state == TS_DEAD) { 4000 goto do_remove; 4001 } 4002 /* Replace the opcode and adjust the args in place, 4003 leaving 3 unused args at the end. */ 4004 op->opc = opc = opc_new; 4005 op->args[1] = op->args[2]; 4006 op->args[2] = op->args[4]; 4007 /* Fall through and mark the single-word operation live. */ 4008 nb_iargs = 2; 4009 nb_oargs = 1; 4010 } 4011 goto do_not_remove; 4012 4013 case INDEX_op_muls2_i32: 4014 case INDEX_op_muls2_i64: 4015 opc_new = INDEX_op_mul; 4016 opc_new2 = INDEX_op_mulsh; 4017 goto do_mul2; 4018 case INDEX_op_mulu2_i32: 4019 case INDEX_op_mulu2_i64: 4020 opc_new = INDEX_op_mul; 4021 opc_new2 = INDEX_op_muluh; 4022 do_mul2: 4023 nb_iargs = 2; 4024 nb_oargs = 2; 4025 if (arg_temp(op->args[1])->state == TS_DEAD) { 4026 if (arg_temp(op->args[0])->state == TS_DEAD) { 4027 /* Both parts of the operation are dead. */ 4028 goto do_remove; 4029 } 4030 /* The high part of the operation is dead; generate the low. */ 4031 op->opc = opc = opc_new; 4032 op->args[1] = op->args[2]; 4033 op->args[2] = op->args[3]; 4034 } else if (arg_temp(op->args[0])->state == TS_DEAD && 4035 tcg_op_supported(opc_new2, TCGOP_TYPE(op), 0)) { 4036 /* The low part of the operation is dead; generate the high. */ 4037 op->opc = opc = opc_new2; 4038 op->args[0] = op->args[1]; 4039 op->args[1] = op->args[2]; 4040 op->args[2] = op->args[3]; 4041 } else { 4042 goto do_not_remove; 4043 } 4044 /* Mark the single-word operation live. */ 4045 nb_oargs = 1; 4046 goto do_not_remove; 4047 4048 default: 4049 /* XXX: optimize by hardcoding common cases (e.g. triadic ops) */ 4050 nb_iargs = def->nb_iargs; 4051 nb_oargs = def->nb_oargs; 4052 4053 /* Test if the operation can be removed because all 4054 its outputs are dead. We assume that nb_oargs == 0 4055 implies side effects */ 4056 if (!(def->flags & TCG_OPF_SIDE_EFFECTS) && nb_oargs != 0) { 4057 for (i = 0; i < nb_oargs; i++) { 4058 if (arg_temp(op->args[i])->state != TS_DEAD) { 4059 goto do_not_remove; 4060 } 4061 } 4062 goto do_remove; 4063 } 4064 goto do_not_remove; 4065 4066 do_remove: 4067 tcg_op_remove(s, op); 4068 break; 4069 4070 do_not_remove: 4071 for (i = 0; i < nb_oargs; i++) { 4072 ts = arg_temp(op->args[i]); 4073 4074 /* Remember the preference of the uses that followed. */ 4075 if (i < ARRAY_SIZE(op->output_pref)) { 4076 op->output_pref[i] = *la_temp_pref(ts); 4077 } 4078 4079 /* Output args are dead. */ 4080 if (ts->state & TS_DEAD) { 4081 arg_life |= DEAD_ARG << i; 4082 } 4083 if (ts->state & TS_MEM) { 4084 arg_life |= SYNC_ARG << i; 4085 } 4086 ts->state = TS_DEAD; 4087 la_reset_pref(ts); 4088 } 4089 4090 /* If end of basic block, update. */ 4091 if (def->flags & TCG_OPF_BB_EXIT) { 4092 la_func_end(s, nb_globals, nb_temps); 4093 } else if (def->flags & TCG_OPF_COND_BRANCH) { 4094 la_bb_sync(s, nb_globals, nb_temps); 4095 } else if (def->flags & TCG_OPF_BB_END) { 4096 la_bb_end(s, nb_globals, nb_temps); 4097 } else if (def->flags & TCG_OPF_SIDE_EFFECTS) { 4098 la_global_sync(s, nb_globals); 4099 if (def->flags & TCG_OPF_CALL_CLOBBER) { 4100 la_cross_call(s, nb_temps); 4101 } 4102 } 4103 4104 /* Record arguments that die in this opcode. */ 4105 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) { 4106 ts = arg_temp(op->args[i]); 4107 if (ts->state & TS_DEAD) { 4108 arg_life |= DEAD_ARG << i; 4109 } 4110 } 4111 4112 /* Input arguments are live for preceding opcodes. */ 4113 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) { 4114 ts = arg_temp(op->args[i]); 4115 if (ts->state & TS_DEAD) { 4116 /* For operands that were dead, initially allow 4117 all regs for the type. */ 4118 *la_temp_pref(ts) = tcg_target_available_regs[ts->type]; 4119 ts->state &= ~TS_DEAD; 4120 } 4121 } 4122 4123 /* Incorporate constraints for this operand. */ 4124 switch (opc) { 4125 case INDEX_op_mov: 4126 /* Note that these are TCG_OPF_NOT_PRESENT and do not 4127 have proper constraints. That said, special case 4128 moves to propagate preferences backward. */ 4129 if (IS_DEAD_ARG(1)) { 4130 *la_temp_pref(arg_temp(op->args[0])) 4131 = *la_temp_pref(arg_temp(op->args[1])); 4132 } 4133 break; 4134 4135 default: 4136 args_ct = opcode_args_ct(op); 4137 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) { 4138 const TCGArgConstraint *ct = &args_ct[i]; 4139 TCGRegSet set, *pset; 4140 4141 ts = arg_temp(op->args[i]); 4142 pset = la_temp_pref(ts); 4143 set = *pset; 4144 4145 set &= ct->regs; 4146 if (ct->ialias) { 4147 set &= output_pref(op, ct->alias_index); 4148 } 4149 /* If the combination is not possible, restart. */ 4150 if (set == 0) { 4151 set = ct->regs; 4152 } 4153 *pset = set; 4154 } 4155 break; 4156 } 4157 break; 4158 } 4159 op->life = arg_life; 4160 } 4161 } 4162 4163 /* Liveness analysis: Convert indirect regs to direct temporaries. */ 4164 static bool __attribute__((noinline)) 4165 liveness_pass_2(TCGContext *s) 4166 { 4167 int nb_globals = s->nb_globals; 4168 int nb_temps, i; 4169 bool changes = false; 4170 TCGOp *op, *op_next; 4171 4172 /* Create a temporary for each indirect global. */ 4173 for (i = 0; i < nb_globals; ++i) { 4174 TCGTemp *its = &s->temps[i]; 4175 if (its->indirect_reg) { 4176 TCGTemp *dts = tcg_temp_alloc(s); 4177 dts->type = its->type; 4178 dts->base_type = its->base_type; 4179 dts->temp_subindex = its->temp_subindex; 4180 dts->kind = TEMP_EBB; 4181 its->state_ptr = dts; 4182 } else { 4183 its->state_ptr = NULL; 4184 } 4185 /* All globals begin dead. */ 4186 its->state = TS_DEAD; 4187 } 4188 for (nb_temps = s->nb_temps; i < nb_temps; ++i) { 4189 TCGTemp *its = &s->temps[i]; 4190 its->state_ptr = NULL; 4191 its->state = TS_DEAD; 4192 } 4193 4194 QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) { 4195 TCGOpcode opc = op->opc; 4196 const TCGOpDef *def = &tcg_op_defs[opc]; 4197 TCGLifeData arg_life = op->life; 4198 int nb_iargs, nb_oargs, call_flags; 4199 TCGTemp *arg_ts, *dir_ts; 4200 4201 if (opc == INDEX_op_call) { 4202 nb_oargs = TCGOP_CALLO(op); 4203 nb_iargs = TCGOP_CALLI(op); 4204 call_flags = tcg_call_flags(op); 4205 } else { 4206 nb_iargs = def->nb_iargs; 4207 nb_oargs = def->nb_oargs; 4208 4209 /* Set flags similar to how calls require. */ 4210 if (def->flags & TCG_OPF_COND_BRANCH) { 4211 /* Like reading globals: sync_globals */ 4212 call_flags = TCG_CALL_NO_WRITE_GLOBALS; 4213 } else if (def->flags & TCG_OPF_BB_END) { 4214 /* Like writing globals: save_globals */ 4215 call_flags = 0; 4216 } else if (def->flags & TCG_OPF_SIDE_EFFECTS) { 4217 /* Like reading globals: sync_globals */ 4218 call_flags = TCG_CALL_NO_WRITE_GLOBALS; 4219 } else { 4220 /* No effect on globals. */ 4221 call_flags = (TCG_CALL_NO_READ_GLOBALS | 4222 TCG_CALL_NO_WRITE_GLOBALS); 4223 } 4224 } 4225 4226 /* Make sure that input arguments are available. */ 4227 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) { 4228 arg_ts = arg_temp(op->args[i]); 4229 dir_ts = arg_ts->state_ptr; 4230 if (dir_ts && arg_ts->state == TS_DEAD) { 4231 TCGOpcode lopc = (arg_ts->type == TCG_TYPE_I32 4232 ? INDEX_op_ld_i32 4233 : INDEX_op_ld_i64); 4234 TCGOp *lop = tcg_op_insert_before(s, op, lopc, 4235 arg_ts->type, 3); 4236 4237 lop->args[0] = temp_arg(dir_ts); 4238 lop->args[1] = temp_arg(arg_ts->mem_base); 4239 lop->args[2] = arg_ts->mem_offset; 4240 4241 /* Loaded, but synced with memory. */ 4242 arg_ts->state = TS_MEM; 4243 } 4244 } 4245 4246 /* Perform input replacement, and mark inputs that became dead. 4247 No action is required except keeping temp_state up to date 4248 so that we reload when needed. */ 4249 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) { 4250 arg_ts = arg_temp(op->args[i]); 4251 dir_ts = arg_ts->state_ptr; 4252 if (dir_ts) { 4253 op->args[i] = temp_arg(dir_ts); 4254 changes = true; 4255 if (IS_DEAD_ARG(i)) { 4256 arg_ts->state = TS_DEAD; 4257 } 4258 } 4259 } 4260 4261 /* Liveness analysis should ensure that the following are 4262 all correct, for call sites and basic block end points. */ 4263 if (call_flags & TCG_CALL_NO_READ_GLOBALS) { 4264 /* Nothing to do */ 4265 } else if (call_flags & TCG_CALL_NO_WRITE_GLOBALS) { 4266 for (i = 0; i < nb_globals; ++i) { 4267 /* Liveness should see that globals are synced back, 4268 that is, either TS_DEAD or TS_MEM. */ 4269 arg_ts = &s->temps[i]; 4270 tcg_debug_assert(arg_ts->state_ptr == 0 4271 || arg_ts->state != 0); 4272 } 4273 } else { 4274 for (i = 0; i < nb_globals; ++i) { 4275 /* Liveness should see that globals are saved back, 4276 that is, TS_DEAD, waiting to be reloaded. */ 4277 arg_ts = &s->temps[i]; 4278 tcg_debug_assert(arg_ts->state_ptr == 0 4279 || arg_ts->state == TS_DEAD); 4280 } 4281 } 4282 4283 /* Outputs become available. */ 4284 if (opc == INDEX_op_mov) { 4285 arg_ts = arg_temp(op->args[0]); 4286 dir_ts = arg_ts->state_ptr; 4287 if (dir_ts) { 4288 op->args[0] = temp_arg(dir_ts); 4289 changes = true; 4290 4291 /* The output is now live and modified. */ 4292 arg_ts->state = 0; 4293 4294 if (NEED_SYNC_ARG(0)) { 4295 TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32 4296 ? INDEX_op_st_i32 4297 : INDEX_op_st_i64); 4298 TCGOp *sop = tcg_op_insert_after(s, op, sopc, 4299 arg_ts->type, 3); 4300 TCGTemp *out_ts = dir_ts; 4301 4302 if (IS_DEAD_ARG(0)) { 4303 out_ts = arg_temp(op->args[1]); 4304 arg_ts->state = TS_DEAD; 4305 tcg_op_remove(s, op); 4306 } else { 4307 arg_ts->state = TS_MEM; 4308 } 4309 4310 sop->args[0] = temp_arg(out_ts); 4311 sop->args[1] = temp_arg(arg_ts->mem_base); 4312 sop->args[2] = arg_ts->mem_offset; 4313 } else { 4314 tcg_debug_assert(!IS_DEAD_ARG(0)); 4315 } 4316 } 4317 } else { 4318 for (i = 0; i < nb_oargs; i++) { 4319 arg_ts = arg_temp(op->args[i]); 4320 dir_ts = arg_ts->state_ptr; 4321 if (!dir_ts) { 4322 continue; 4323 } 4324 op->args[i] = temp_arg(dir_ts); 4325 changes = true; 4326 4327 /* The output is now live and modified. */ 4328 arg_ts->state = 0; 4329 4330 /* Sync outputs upon their last write. */ 4331 if (NEED_SYNC_ARG(i)) { 4332 TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32 4333 ? INDEX_op_st_i32 4334 : INDEX_op_st_i64); 4335 TCGOp *sop = tcg_op_insert_after(s, op, sopc, 4336 arg_ts->type, 3); 4337 4338 sop->args[0] = temp_arg(dir_ts); 4339 sop->args[1] = temp_arg(arg_ts->mem_base); 4340 sop->args[2] = arg_ts->mem_offset; 4341 4342 arg_ts->state = TS_MEM; 4343 } 4344 /* Drop outputs that are dead. */ 4345 if (IS_DEAD_ARG(i)) { 4346 arg_ts->state = TS_DEAD; 4347 } 4348 } 4349 } 4350 } 4351 4352 return changes; 4353 } 4354 4355 static void temp_allocate_frame(TCGContext *s, TCGTemp *ts) 4356 { 4357 intptr_t off; 4358 int size, align; 4359 4360 /* When allocating an object, look at the full type. */ 4361 size = tcg_type_size(ts->base_type); 4362 switch (ts->base_type) { 4363 case TCG_TYPE_I32: 4364 align = 4; 4365 break; 4366 case TCG_TYPE_I64: 4367 case TCG_TYPE_V64: 4368 align = 8; 4369 break; 4370 case TCG_TYPE_I128: 4371 case TCG_TYPE_V128: 4372 case TCG_TYPE_V256: 4373 /* 4374 * Note that we do not require aligned storage for V256, 4375 * and that we provide alignment for I128 to match V128, 4376 * even if that's above what the host ABI requires. 4377 */ 4378 align = 16; 4379 break; 4380 default: 4381 g_assert_not_reached(); 4382 } 4383 4384 /* 4385 * Assume the stack is sufficiently aligned. 4386 * This affects e.g. ARM NEON, where we have 8 byte stack alignment 4387 * and do not require 16 byte vector alignment. This seems slightly 4388 * easier than fully parameterizing the above switch statement. 4389 */ 4390 align = MIN(TCG_TARGET_STACK_ALIGN, align); 4391 off = ROUND_UP(s->current_frame_offset, align); 4392 4393 /* If we've exhausted the stack frame, restart with a smaller TB. */ 4394 if (off + size > s->frame_end) { 4395 tcg_raise_tb_overflow(s); 4396 } 4397 s->current_frame_offset = off + size; 4398 #if defined(__sparc__) 4399 off += TCG_TARGET_STACK_BIAS; 4400 #endif 4401 4402 /* If the object was subdivided, assign memory to all the parts. */ 4403 if (ts->base_type != ts->type) { 4404 int part_size = tcg_type_size(ts->type); 4405 int part_count = size / part_size; 4406 4407 /* 4408 * Each part is allocated sequentially in tcg_temp_new_internal. 4409 * Jump back to the first part by subtracting the current index. 4410 */ 4411 ts -= ts->temp_subindex; 4412 for (int i = 0; i < part_count; ++i) { 4413 ts[i].mem_offset = off + i * part_size; 4414 ts[i].mem_base = s->frame_temp; 4415 ts[i].mem_allocated = 1; 4416 } 4417 } else { 4418 ts->mem_offset = off; 4419 ts->mem_base = s->frame_temp; 4420 ts->mem_allocated = 1; 4421 } 4422 } 4423 4424 /* Assign @reg to @ts, and update reg_to_temp[]. */ 4425 static void set_temp_val_reg(TCGContext *s, TCGTemp *ts, TCGReg reg) 4426 { 4427 if (ts->val_type == TEMP_VAL_REG) { 4428 TCGReg old = ts->reg; 4429 tcg_debug_assert(s->reg_to_temp[old] == ts); 4430 if (old == reg) { 4431 return; 4432 } 4433 s->reg_to_temp[old] = NULL; 4434 } 4435 tcg_debug_assert(s->reg_to_temp[reg] == NULL); 4436 s->reg_to_temp[reg] = ts; 4437 ts->val_type = TEMP_VAL_REG; 4438 ts->reg = reg; 4439 } 4440 4441 /* Assign a non-register value type to @ts, and update reg_to_temp[]. */ 4442 static void set_temp_val_nonreg(TCGContext *s, TCGTemp *ts, TCGTempVal type) 4443 { 4444 tcg_debug_assert(type != TEMP_VAL_REG); 4445 if (ts->val_type == TEMP_VAL_REG) { 4446 TCGReg reg = ts->reg; 4447 tcg_debug_assert(s->reg_to_temp[reg] == ts); 4448 s->reg_to_temp[reg] = NULL; 4449 } 4450 ts->val_type = type; 4451 } 4452 4453 static void temp_load(TCGContext *, TCGTemp *, TCGRegSet, TCGRegSet, TCGRegSet); 4454 4455 /* Mark a temporary as free or dead. If 'free_or_dead' is negative, 4456 mark it free; otherwise mark it dead. */ 4457 static void temp_free_or_dead(TCGContext *s, TCGTemp *ts, int free_or_dead) 4458 { 4459 TCGTempVal new_type; 4460 4461 switch (ts->kind) { 4462 case TEMP_FIXED: 4463 return; 4464 case TEMP_GLOBAL: 4465 case TEMP_TB: 4466 new_type = TEMP_VAL_MEM; 4467 break; 4468 case TEMP_EBB: 4469 new_type = free_or_dead < 0 ? TEMP_VAL_MEM : TEMP_VAL_DEAD; 4470 break; 4471 case TEMP_CONST: 4472 new_type = TEMP_VAL_CONST; 4473 break; 4474 default: 4475 g_assert_not_reached(); 4476 } 4477 set_temp_val_nonreg(s, ts, new_type); 4478 } 4479 4480 /* Mark a temporary as dead. */ 4481 static inline void temp_dead(TCGContext *s, TCGTemp *ts) 4482 { 4483 temp_free_or_dead(s, ts, 1); 4484 } 4485 4486 /* Sync a temporary to memory. 'allocated_regs' is used in case a temporary 4487 registers needs to be allocated to store a constant. If 'free_or_dead' 4488 is non-zero, subsequently release the temporary; if it is positive, the 4489 temp is dead; if it is negative, the temp is free. */ 4490 static void temp_sync(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs, 4491 TCGRegSet preferred_regs, int free_or_dead) 4492 { 4493 if (!temp_readonly(ts) && !ts->mem_coherent) { 4494 if (!ts->mem_allocated) { 4495 temp_allocate_frame(s, ts); 4496 } 4497 switch (ts->val_type) { 4498 case TEMP_VAL_CONST: 4499 /* If we're going to free the temp immediately, then we won't 4500 require it later in a register, so attempt to store the 4501 constant to memory directly. */ 4502 if (free_or_dead 4503 && tcg_out_sti(s, ts->type, ts->val, 4504 ts->mem_base->reg, ts->mem_offset)) { 4505 break; 4506 } 4507 temp_load(s, ts, tcg_target_available_regs[ts->type], 4508 allocated_regs, preferred_regs); 4509 /* fallthrough */ 4510 4511 case TEMP_VAL_REG: 4512 tcg_out_st(s, ts->type, ts->reg, 4513 ts->mem_base->reg, ts->mem_offset); 4514 break; 4515 4516 case TEMP_VAL_MEM: 4517 break; 4518 4519 case TEMP_VAL_DEAD: 4520 default: 4521 g_assert_not_reached(); 4522 } 4523 ts->mem_coherent = 1; 4524 } 4525 if (free_or_dead) { 4526 temp_free_or_dead(s, ts, free_or_dead); 4527 } 4528 } 4529 4530 /* free register 'reg' by spilling the corresponding temporary if necessary */ 4531 static void tcg_reg_free(TCGContext *s, TCGReg reg, TCGRegSet allocated_regs) 4532 { 4533 TCGTemp *ts = s->reg_to_temp[reg]; 4534 if (ts != NULL) { 4535 temp_sync(s, ts, allocated_regs, 0, -1); 4536 } 4537 } 4538 4539 /** 4540 * tcg_reg_alloc: 4541 * @required_regs: Set of registers in which we must allocate. 4542 * @allocated_regs: Set of registers which must be avoided. 4543 * @preferred_regs: Set of registers we should prefer. 4544 * @rev: True if we search the registers in "indirect" order. 4545 * 4546 * The allocated register must be in @required_regs & ~@allocated_regs, 4547 * but if we can put it in @preferred_regs we may save a move later. 4548 */ 4549 static TCGReg tcg_reg_alloc(TCGContext *s, TCGRegSet required_regs, 4550 TCGRegSet allocated_regs, 4551 TCGRegSet preferred_regs, bool rev) 4552 { 4553 int i, j, f, n = ARRAY_SIZE(tcg_target_reg_alloc_order); 4554 TCGRegSet reg_ct[2]; 4555 const int *order; 4556 4557 reg_ct[1] = required_regs & ~allocated_regs; 4558 tcg_debug_assert(reg_ct[1] != 0); 4559 reg_ct[0] = reg_ct[1] & preferred_regs; 4560 4561 /* Skip the preferred_regs option if it cannot be satisfied, 4562 or if the preference made no difference. */ 4563 f = reg_ct[0] == 0 || reg_ct[0] == reg_ct[1]; 4564 4565 order = rev ? indirect_reg_alloc_order : tcg_target_reg_alloc_order; 4566 4567 /* Try free registers, preferences first. */ 4568 for (j = f; j < 2; j++) { 4569 TCGRegSet set = reg_ct[j]; 4570 4571 if (tcg_regset_single(set)) { 4572 /* One register in the set. */ 4573 TCGReg reg = tcg_regset_first(set); 4574 if (s->reg_to_temp[reg] == NULL) { 4575 return reg; 4576 } 4577 } else { 4578 for (i = 0; i < n; i++) { 4579 TCGReg reg = order[i]; 4580 if (s->reg_to_temp[reg] == NULL && 4581 tcg_regset_test_reg(set, reg)) { 4582 return reg; 4583 } 4584 } 4585 } 4586 } 4587 4588 /* We must spill something. */ 4589 for (j = f; j < 2; j++) { 4590 TCGRegSet set = reg_ct[j]; 4591 4592 if (tcg_regset_single(set)) { 4593 /* One register in the set. */ 4594 TCGReg reg = tcg_regset_first(set); 4595 tcg_reg_free(s, reg, allocated_regs); 4596 return reg; 4597 } else { 4598 for (i = 0; i < n; i++) { 4599 TCGReg reg = order[i]; 4600 if (tcg_regset_test_reg(set, reg)) { 4601 tcg_reg_free(s, reg, allocated_regs); 4602 return reg; 4603 } 4604 } 4605 } 4606 } 4607 4608 g_assert_not_reached(); 4609 } 4610 4611 static TCGReg tcg_reg_alloc_pair(TCGContext *s, TCGRegSet required_regs, 4612 TCGRegSet allocated_regs, 4613 TCGRegSet preferred_regs, bool rev) 4614 { 4615 int i, j, k, fmin, n = ARRAY_SIZE(tcg_target_reg_alloc_order); 4616 TCGRegSet reg_ct[2]; 4617 const int *order; 4618 4619 /* Ensure that if I is not in allocated_regs, I+1 is not either. */ 4620 reg_ct[1] = required_regs & ~(allocated_regs | (allocated_regs >> 1)); 4621 tcg_debug_assert(reg_ct[1] != 0); 4622 reg_ct[0] = reg_ct[1] & preferred_regs; 4623 4624 order = rev ? indirect_reg_alloc_order : tcg_target_reg_alloc_order; 4625 4626 /* 4627 * Skip the preferred_regs option if it cannot be satisfied, 4628 * or if the preference made no difference. 4629 */ 4630 k = reg_ct[0] == 0 || reg_ct[0] == reg_ct[1]; 4631 4632 /* 4633 * Minimize the number of flushes by looking for 2 free registers first, 4634 * then a single flush, then two flushes. 4635 */ 4636 for (fmin = 2; fmin >= 0; fmin--) { 4637 for (j = k; j < 2; j++) { 4638 TCGRegSet set = reg_ct[j]; 4639 4640 for (i = 0; i < n; i++) { 4641 TCGReg reg = order[i]; 4642 4643 if (tcg_regset_test_reg(set, reg)) { 4644 int f = !s->reg_to_temp[reg] + !s->reg_to_temp[reg + 1]; 4645 if (f >= fmin) { 4646 tcg_reg_free(s, reg, allocated_regs); 4647 tcg_reg_free(s, reg + 1, allocated_regs); 4648 return reg; 4649 } 4650 } 4651 } 4652 } 4653 } 4654 g_assert_not_reached(); 4655 } 4656 4657 /* Make sure the temporary is in a register. If needed, allocate the register 4658 from DESIRED while avoiding ALLOCATED. */ 4659 static void temp_load(TCGContext *s, TCGTemp *ts, TCGRegSet desired_regs, 4660 TCGRegSet allocated_regs, TCGRegSet preferred_regs) 4661 { 4662 TCGReg reg; 4663 4664 switch (ts->val_type) { 4665 case TEMP_VAL_REG: 4666 return; 4667 case TEMP_VAL_CONST: 4668 reg = tcg_reg_alloc(s, desired_regs, allocated_regs, 4669 preferred_regs, ts->indirect_base); 4670 if (ts->type <= TCG_TYPE_I64) { 4671 tcg_out_movi(s, ts->type, reg, ts->val); 4672 } else { 4673 uint64_t val = ts->val; 4674 MemOp vece = MO_64; 4675 4676 /* 4677 * Find the minimal vector element that matches the constant. 4678 * The targets will, in general, have to do this search anyway, 4679 * do this generically. 4680 */ 4681 if (val == dup_const(MO_8, val)) { 4682 vece = MO_8; 4683 } else if (val == dup_const(MO_16, val)) { 4684 vece = MO_16; 4685 } else if (val == dup_const(MO_32, val)) { 4686 vece = MO_32; 4687 } 4688 4689 tcg_out_dupi_vec(s, ts->type, vece, reg, ts->val); 4690 } 4691 ts->mem_coherent = 0; 4692 break; 4693 case TEMP_VAL_MEM: 4694 if (!ts->mem_allocated) { 4695 temp_allocate_frame(s, ts); 4696 } 4697 reg = tcg_reg_alloc(s, desired_regs, allocated_regs, 4698 preferred_regs, ts->indirect_base); 4699 tcg_out_ld(s, ts->type, reg, ts->mem_base->reg, ts->mem_offset); 4700 ts->mem_coherent = 1; 4701 break; 4702 case TEMP_VAL_DEAD: 4703 default: 4704 g_assert_not_reached(); 4705 } 4706 set_temp_val_reg(s, ts, reg); 4707 } 4708 4709 /* Save a temporary to memory. 'allocated_regs' is used in case a 4710 temporary registers needs to be allocated to store a constant. */ 4711 static void temp_save(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs) 4712 { 4713 /* The liveness analysis already ensures that globals are back 4714 in memory. Keep an tcg_debug_assert for safety. */ 4715 tcg_debug_assert(ts->val_type == TEMP_VAL_MEM || temp_readonly(ts)); 4716 } 4717 4718 /* save globals to their canonical location and assume they can be 4719 modified be the following code. 'allocated_regs' is used in case a 4720 temporary registers needs to be allocated to store a constant. */ 4721 static void save_globals(TCGContext *s, TCGRegSet allocated_regs) 4722 { 4723 int i, n; 4724 4725 for (i = 0, n = s->nb_globals; i < n; i++) { 4726 temp_save(s, &s->temps[i], allocated_regs); 4727 } 4728 } 4729 4730 /* sync globals to their canonical location and assume they can be 4731 read by the following code. 'allocated_regs' is used in case a 4732 temporary registers needs to be allocated to store a constant. */ 4733 static void sync_globals(TCGContext *s, TCGRegSet allocated_regs) 4734 { 4735 int i, n; 4736 4737 for (i = 0, n = s->nb_globals; i < n; i++) { 4738 TCGTemp *ts = &s->temps[i]; 4739 tcg_debug_assert(ts->val_type != TEMP_VAL_REG 4740 || ts->kind == TEMP_FIXED 4741 || ts->mem_coherent); 4742 } 4743 } 4744 4745 /* at the end of a basic block, we assume all temporaries are dead and 4746 all globals are stored at their canonical location. */ 4747 static void tcg_reg_alloc_bb_end(TCGContext *s, TCGRegSet allocated_regs) 4748 { 4749 int i; 4750 4751 for (i = s->nb_globals; i < s->nb_temps; i++) { 4752 TCGTemp *ts = &s->temps[i]; 4753 4754 switch (ts->kind) { 4755 case TEMP_TB: 4756 temp_save(s, ts, allocated_regs); 4757 break; 4758 case TEMP_EBB: 4759 /* The liveness analysis already ensures that temps are dead. 4760 Keep an tcg_debug_assert for safety. */ 4761 tcg_debug_assert(ts->val_type == TEMP_VAL_DEAD); 4762 break; 4763 case TEMP_CONST: 4764 /* Similarly, we should have freed any allocated register. */ 4765 tcg_debug_assert(ts->val_type == TEMP_VAL_CONST); 4766 break; 4767 default: 4768 g_assert_not_reached(); 4769 } 4770 } 4771 4772 save_globals(s, allocated_regs); 4773 } 4774 4775 /* 4776 * At a conditional branch, we assume all temporaries are dead unless 4777 * explicitly live-across-conditional-branch; all globals and local 4778 * temps are synced to their location. 4779 */ 4780 static void tcg_reg_alloc_cbranch(TCGContext *s, TCGRegSet allocated_regs) 4781 { 4782 sync_globals(s, allocated_regs); 4783 4784 for (int i = s->nb_globals; i < s->nb_temps; i++) { 4785 TCGTemp *ts = &s->temps[i]; 4786 /* 4787 * The liveness analysis already ensures that temps are dead. 4788 * Keep tcg_debug_asserts for safety. 4789 */ 4790 switch (ts->kind) { 4791 case TEMP_TB: 4792 tcg_debug_assert(ts->val_type != TEMP_VAL_REG || ts->mem_coherent); 4793 break; 4794 case TEMP_EBB: 4795 case TEMP_CONST: 4796 break; 4797 default: 4798 g_assert_not_reached(); 4799 } 4800 } 4801 } 4802 4803 /* 4804 * Specialized code generation for INDEX_op_mov_* with a constant. 4805 */ 4806 static void tcg_reg_alloc_do_movi(TCGContext *s, TCGTemp *ots, 4807 tcg_target_ulong val, TCGLifeData arg_life, 4808 TCGRegSet preferred_regs) 4809 { 4810 /* ENV should not be modified. */ 4811 tcg_debug_assert(!temp_readonly(ots)); 4812 4813 /* The movi is not explicitly generated here. */ 4814 set_temp_val_nonreg(s, ots, TEMP_VAL_CONST); 4815 ots->val = val; 4816 ots->mem_coherent = 0; 4817 if (NEED_SYNC_ARG(0)) { 4818 temp_sync(s, ots, s->reserved_regs, preferred_regs, IS_DEAD_ARG(0)); 4819 } else if (IS_DEAD_ARG(0)) { 4820 temp_dead(s, ots); 4821 } 4822 } 4823 4824 /* 4825 * Specialized code generation for INDEX_op_mov_*. 4826 */ 4827 static void tcg_reg_alloc_mov(TCGContext *s, const TCGOp *op) 4828 { 4829 const TCGLifeData arg_life = op->life; 4830 TCGRegSet allocated_regs, preferred_regs; 4831 TCGTemp *ts, *ots; 4832 TCGType otype, itype; 4833 TCGReg oreg, ireg; 4834 4835 allocated_regs = s->reserved_regs; 4836 preferred_regs = output_pref(op, 0); 4837 ots = arg_temp(op->args[0]); 4838 ts = arg_temp(op->args[1]); 4839 4840 /* ENV should not be modified. */ 4841 tcg_debug_assert(!temp_readonly(ots)); 4842 4843 /* Note that otype != itype for no-op truncation. */ 4844 otype = ots->type; 4845 itype = ts->type; 4846 4847 if (ts->val_type == TEMP_VAL_CONST) { 4848 /* propagate constant or generate sti */ 4849 tcg_target_ulong val = ts->val; 4850 if (IS_DEAD_ARG(1)) { 4851 temp_dead(s, ts); 4852 } 4853 tcg_reg_alloc_do_movi(s, ots, val, arg_life, preferred_regs); 4854 return; 4855 } 4856 4857 /* If the source value is in memory we're going to be forced 4858 to have it in a register in order to perform the copy. Copy 4859 the SOURCE value into its own register first, that way we 4860 don't have to reload SOURCE the next time it is used. */ 4861 if (ts->val_type == TEMP_VAL_MEM) { 4862 temp_load(s, ts, tcg_target_available_regs[itype], 4863 allocated_regs, preferred_regs); 4864 } 4865 tcg_debug_assert(ts->val_type == TEMP_VAL_REG); 4866 ireg = ts->reg; 4867 4868 if (IS_DEAD_ARG(0)) { 4869 /* mov to a non-saved dead register makes no sense (even with 4870 liveness analysis disabled). */ 4871 tcg_debug_assert(NEED_SYNC_ARG(0)); 4872 if (!ots->mem_allocated) { 4873 temp_allocate_frame(s, ots); 4874 } 4875 tcg_out_st(s, otype, ireg, ots->mem_base->reg, ots->mem_offset); 4876 if (IS_DEAD_ARG(1)) { 4877 temp_dead(s, ts); 4878 } 4879 temp_dead(s, ots); 4880 return; 4881 } 4882 4883 if (IS_DEAD_ARG(1) && ts->kind != TEMP_FIXED) { 4884 /* 4885 * The mov can be suppressed. Kill input first, so that it 4886 * is unlinked from reg_to_temp, then set the output to the 4887 * reg that we saved from the input. 4888 */ 4889 temp_dead(s, ts); 4890 oreg = ireg; 4891 } else { 4892 if (ots->val_type == TEMP_VAL_REG) { 4893 oreg = ots->reg; 4894 } else { 4895 /* Make sure to not spill the input register during allocation. */ 4896 oreg = tcg_reg_alloc(s, tcg_target_available_regs[otype], 4897 allocated_regs | ((TCGRegSet)1 << ireg), 4898 preferred_regs, ots->indirect_base); 4899 } 4900 if (!tcg_out_mov(s, otype, oreg, ireg)) { 4901 /* 4902 * Cross register class move not supported. 4903 * Store the source register into the destination slot 4904 * and leave the destination temp as TEMP_VAL_MEM. 4905 */ 4906 assert(!temp_readonly(ots)); 4907 if (!ts->mem_allocated) { 4908 temp_allocate_frame(s, ots); 4909 } 4910 tcg_out_st(s, ts->type, ireg, ots->mem_base->reg, ots->mem_offset); 4911 set_temp_val_nonreg(s, ts, TEMP_VAL_MEM); 4912 ots->mem_coherent = 1; 4913 return; 4914 } 4915 } 4916 set_temp_val_reg(s, ots, oreg); 4917 ots->mem_coherent = 0; 4918 4919 if (NEED_SYNC_ARG(0)) { 4920 temp_sync(s, ots, allocated_regs, 0, 0); 4921 } 4922 } 4923 4924 /* 4925 * Specialized code generation for INDEX_op_dup_vec. 4926 */ 4927 static void tcg_reg_alloc_dup(TCGContext *s, const TCGOp *op) 4928 { 4929 const TCGLifeData arg_life = op->life; 4930 TCGRegSet dup_out_regs, dup_in_regs; 4931 const TCGArgConstraint *dup_args_ct; 4932 TCGTemp *its, *ots; 4933 TCGType itype, vtype; 4934 unsigned vece; 4935 int lowpart_ofs; 4936 bool ok; 4937 4938 ots = arg_temp(op->args[0]); 4939 its = arg_temp(op->args[1]); 4940 4941 /* ENV should not be modified. */ 4942 tcg_debug_assert(!temp_readonly(ots)); 4943 4944 itype = its->type; 4945 vece = TCGOP_VECE(op); 4946 vtype = TCGOP_TYPE(op); 4947 4948 if (its->val_type == TEMP_VAL_CONST) { 4949 /* Propagate constant via movi -> dupi. */ 4950 tcg_target_ulong val = its->val; 4951 if (IS_DEAD_ARG(1)) { 4952 temp_dead(s, its); 4953 } 4954 tcg_reg_alloc_do_movi(s, ots, val, arg_life, output_pref(op, 0)); 4955 return; 4956 } 4957 4958 dup_args_ct = opcode_args_ct(op); 4959 dup_out_regs = dup_args_ct[0].regs; 4960 dup_in_regs = dup_args_ct[1].regs; 4961 4962 /* Allocate the output register now. */ 4963 if (ots->val_type != TEMP_VAL_REG) { 4964 TCGRegSet allocated_regs = s->reserved_regs; 4965 TCGReg oreg; 4966 4967 if (!IS_DEAD_ARG(1) && its->val_type == TEMP_VAL_REG) { 4968 /* Make sure to not spill the input register. */ 4969 tcg_regset_set_reg(allocated_regs, its->reg); 4970 } 4971 oreg = tcg_reg_alloc(s, dup_out_regs, allocated_regs, 4972 output_pref(op, 0), ots->indirect_base); 4973 set_temp_val_reg(s, ots, oreg); 4974 } 4975 4976 switch (its->val_type) { 4977 case TEMP_VAL_REG: 4978 /* 4979 * The dup constriaints must be broad, covering all possible VECE. 4980 * However, tcg_op_dup_vec() gets to see the VECE and we allow it 4981 * to fail, indicating that extra moves are required for that case. 4982 */ 4983 if (tcg_regset_test_reg(dup_in_regs, its->reg)) { 4984 if (tcg_out_dup_vec(s, vtype, vece, ots->reg, its->reg)) { 4985 goto done; 4986 } 4987 /* Try again from memory or a vector input register. */ 4988 } 4989 if (!its->mem_coherent) { 4990 /* 4991 * The input register is not synced, and so an extra store 4992 * would be required to use memory. Attempt an integer-vector 4993 * register move first. We do not have a TCGRegSet for this. 4994 */ 4995 if (tcg_out_mov(s, itype, ots->reg, its->reg)) { 4996 break; 4997 } 4998 /* Sync the temp back to its slot and load from there. */ 4999 temp_sync(s, its, s->reserved_regs, 0, 0); 5000 } 5001 /* fall through */ 5002 5003 case TEMP_VAL_MEM: 5004 lowpart_ofs = 0; 5005 if (HOST_BIG_ENDIAN) { 5006 lowpart_ofs = tcg_type_size(itype) - (1 << vece); 5007 } 5008 if (tcg_out_dupm_vec(s, vtype, vece, ots->reg, its->mem_base->reg, 5009 its->mem_offset + lowpart_ofs)) { 5010 goto done; 5011 } 5012 /* Load the input into the destination vector register. */ 5013 tcg_out_ld(s, itype, ots->reg, its->mem_base->reg, its->mem_offset); 5014 break; 5015 5016 default: 5017 g_assert_not_reached(); 5018 } 5019 5020 /* We now have a vector input register, so dup must succeed. */ 5021 ok = tcg_out_dup_vec(s, vtype, vece, ots->reg, ots->reg); 5022 tcg_debug_assert(ok); 5023 5024 done: 5025 ots->mem_coherent = 0; 5026 if (IS_DEAD_ARG(1)) { 5027 temp_dead(s, its); 5028 } 5029 if (NEED_SYNC_ARG(0)) { 5030 temp_sync(s, ots, s->reserved_regs, 0, 0); 5031 } 5032 if (IS_DEAD_ARG(0)) { 5033 temp_dead(s, ots); 5034 } 5035 } 5036 5037 static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op) 5038 { 5039 const TCGLifeData arg_life = op->life; 5040 const TCGOpDef * const def = &tcg_op_defs[op->opc]; 5041 TCGRegSet i_allocated_regs; 5042 TCGRegSet o_allocated_regs; 5043 int i, k, nb_iargs, nb_oargs; 5044 TCGReg reg; 5045 TCGArg arg; 5046 const TCGArgConstraint *args_ct; 5047 const TCGArgConstraint *arg_ct; 5048 TCGTemp *ts; 5049 TCGArg new_args[TCG_MAX_OP_ARGS]; 5050 int const_args[TCG_MAX_OP_ARGS]; 5051 TCGCond op_cond; 5052 5053 nb_oargs = def->nb_oargs; 5054 nb_iargs = def->nb_iargs; 5055 5056 /* copy constants */ 5057 memcpy(new_args + nb_oargs + nb_iargs, 5058 op->args + nb_oargs + nb_iargs, 5059 sizeof(TCGArg) * def->nb_cargs); 5060 5061 i_allocated_regs = s->reserved_regs; 5062 o_allocated_regs = s->reserved_regs; 5063 5064 switch (op->opc) { 5065 case INDEX_op_brcond_i32: 5066 case INDEX_op_brcond_i64: 5067 op_cond = op->args[2]; 5068 break; 5069 case INDEX_op_setcond_i32: 5070 case INDEX_op_setcond_i64: 5071 case INDEX_op_negsetcond_i32: 5072 case INDEX_op_negsetcond_i64: 5073 case INDEX_op_cmp_vec: 5074 op_cond = op->args[3]; 5075 break; 5076 case INDEX_op_brcond2_i32: 5077 op_cond = op->args[4]; 5078 break; 5079 case INDEX_op_movcond_i32: 5080 case INDEX_op_movcond_i64: 5081 case INDEX_op_setcond2_i32: 5082 case INDEX_op_cmpsel_vec: 5083 op_cond = op->args[5]; 5084 break; 5085 default: 5086 /* No condition within opcode. */ 5087 op_cond = TCG_COND_ALWAYS; 5088 break; 5089 } 5090 5091 args_ct = opcode_args_ct(op); 5092 5093 /* satisfy input constraints */ 5094 for (k = 0; k < nb_iargs; k++) { 5095 TCGRegSet i_preferred_regs, i_required_regs; 5096 bool allocate_new_reg, copyto_new_reg; 5097 TCGTemp *ts2; 5098 int i1, i2; 5099 5100 i = args_ct[nb_oargs + k].sort_index; 5101 arg = op->args[i]; 5102 arg_ct = &args_ct[i]; 5103 ts = arg_temp(arg); 5104 5105 if (ts->val_type == TEMP_VAL_CONST) { 5106 #ifdef TCG_REG_ZERO 5107 if (ts->val == 0 && (arg_ct->ct & TCG_CT_REG_ZERO)) { 5108 /* Hardware zero register: indicate register via non-const. */ 5109 const_args[i] = 0; 5110 new_args[i] = TCG_REG_ZERO; 5111 continue; 5112 } 5113 #endif 5114 5115 if (tcg_target_const_match(ts->val, arg_ct->ct, ts->type, 5116 op_cond, TCGOP_VECE(op))) { 5117 /* constant is OK for instruction */ 5118 const_args[i] = 1; 5119 new_args[i] = ts->val; 5120 continue; 5121 } 5122 } 5123 5124 reg = ts->reg; 5125 i_preferred_regs = 0; 5126 i_required_regs = arg_ct->regs; 5127 allocate_new_reg = false; 5128 copyto_new_reg = false; 5129 5130 switch (arg_ct->pair) { 5131 case 0: /* not paired */ 5132 if (arg_ct->ialias) { 5133 i_preferred_regs = output_pref(op, arg_ct->alias_index); 5134 5135 /* 5136 * If the input is readonly, then it cannot also be an 5137 * output and aliased to itself. If the input is not 5138 * dead after the instruction, we must allocate a new 5139 * register and move it. 5140 */ 5141 if (temp_readonly(ts) || !IS_DEAD_ARG(i) 5142 || args_ct[arg_ct->alias_index].newreg) { 5143 allocate_new_reg = true; 5144 } else if (ts->val_type == TEMP_VAL_REG) { 5145 /* 5146 * Check if the current register has already been 5147 * allocated for another input. 5148 */ 5149 allocate_new_reg = 5150 tcg_regset_test_reg(i_allocated_regs, reg); 5151 } 5152 } 5153 if (!allocate_new_reg) { 5154 temp_load(s, ts, i_required_regs, i_allocated_regs, 5155 i_preferred_regs); 5156 reg = ts->reg; 5157 allocate_new_reg = !tcg_regset_test_reg(i_required_regs, reg); 5158 } 5159 if (allocate_new_reg) { 5160 /* 5161 * Allocate a new register matching the constraint 5162 * and move the temporary register into it. 5163 */ 5164 temp_load(s, ts, tcg_target_available_regs[ts->type], 5165 i_allocated_regs, 0); 5166 reg = tcg_reg_alloc(s, i_required_regs, i_allocated_regs, 5167 i_preferred_regs, ts->indirect_base); 5168 copyto_new_reg = true; 5169 } 5170 break; 5171 5172 case 1: 5173 /* First of an input pair; if i1 == i2, the second is an output. */ 5174 i1 = i; 5175 i2 = arg_ct->pair_index; 5176 ts2 = i1 != i2 ? arg_temp(op->args[i2]) : NULL; 5177 5178 /* 5179 * It is easier to default to allocating a new pair 5180 * and to identify a few cases where it's not required. 5181 */ 5182 if (arg_ct->ialias) { 5183 i_preferred_regs = output_pref(op, arg_ct->alias_index); 5184 if (IS_DEAD_ARG(i1) && 5185 IS_DEAD_ARG(i2) && 5186 !temp_readonly(ts) && 5187 ts->val_type == TEMP_VAL_REG && 5188 ts->reg < TCG_TARGET_NB_REGS - 1 && 5189 tcg_regset_test_reg(i_required_regs, reg) && 5190 !tcg_regset_test_reg(i_allocated_regs, reg) && 5191 !tcg_regset_test_reg(i_allocated_regs, reg + 1) && 5192 (ts2 5193 ? ts2->val_type == TEMP_VAL_REG && 5194 ts2->reg == reg + 1 && 5195 !temp_readonly(ts2) 5196 : s->reg_to_temp[reg + 1] == NULL)) { 5197 break; 5198 } 5199 } else { 5200 /* Without aliasing, the pair must also be an input. */ 5201 tcg_debug_assert(ts2); 5202 if (ts->val_type == TEMP_VAL_REG && 5203 ts2->val_type == TEMP_VAL_REG && 5204 ts2->reg == reg + 1 && 5205 tcg_regset_test_reg(i_required_regs, reg)) { 5206 break; 5207 } 5208 } 5209 reg = tcg_reg_alloc_pair(s, i_required_regs, i_allocated_regs, 5210 0, ts->indirect_base); 5211 goto do_pair; 5212 5213 case 2: /* pair second */ 5214 reg = new_args[arg_ct->pair_index] + 1; 5215 goto do_pair; 5216 5217 case 3: /* ialias with second output, no first input */ 5218 tcg_debug_assert(arg_ct->ialias); 5219 i_preferred_regs = output_pref(op, arg_ct->alias_index); 5220 5221 if (IS_DEAD_ARG(i) && 5222 !temp_readonly(ts) && 5223 ts->val_type == TEMP_VAL_REG && 5224 reg > 0 && 5225 s->reg_to_temp[reg - 1] == NULL && 5226 tcg_regset_test_reg(i_required_regs, reg) && 5227 !tcg_regset_test_reg(i_allocated_regs, reg) && 5228 !tcg_regset_test_reg(i_allocated_regs, reg - 1)) { 5229 tcg_regset_set_reg(i_allocated_regs, reg - 1); 5230 break; 5231 } 5232 reg = tcg_reg_alloc_pair(s, i_required_regs >> 1, 5233 i_allocated_regs, 0, 5234 ts->indirect_base); 5235 tcg_regset_set_reg(i_allocated_regs, reg); 5236 reg += 1; 5237 goto do_pair; 5238 5239 do_pair: 5240 /* 5241 * If an aliased input is not dead after the instruction, 5242 * we must allocate a new register and move it. 5243 */ 5244 if (arg_ct->ialias && (!IS_DEAD_ARG(i) || temp_readonly(ts))) { 5245 TCGRegSet t_allocated_regs = i_allocated_regs; 5246 5247 /* 5248 * Because of the alias, and the continued life, make sure 5249 * that the temp is somewhere *other* than the reg pair, 5250 * and we get a copy in reg. 5251 */ 5252 tcg_regset_set_reg(t_allocated_regs, reg); 5253 tcg_regset_set_reg(t_allocated_regs, reg + 1); 5254 if (ts->val_type == TEMP_VAL_REG && ts->reg == reg) { 5255 /* If ts was already in reg, copy it somewhere else. */ 5256 TCGReg nr; 5257 bool ok; 5258 5259 tcg_debug_assert(ts->kind != TEMP_FIXED); 5260 nr = tcg_reg_alloc(s, tcg_target_available_regs[ts->type], 5261 t_allocated_regs, 0, ts->indirect_base); 5262 ok = tcg_out_mov(s, ts->type, nr, reg); 5263 tcg_debug_assert(ok); 5264 5265 set_temp_val_reg(s, ts, nr); 5266 } else { 5267 temp_load(s, ts, tcg_target_available_regs[ts->type], 5268 t_allocated_regs, 0); 5269 copyto_new_reg = true; 5270 } 5271 } else { 5272 /* Preferably allocate to reg, otherwise copy. */ 5273 i_required_regs = (TCGRegSet)1 << reg; 5274 temp_load(s, ts, i_required_regs, i_allocated_regs, 5275 i_preferred_regs); 5276 copyto_new_reg = ts->reg != reg; 5277 } 5278 break; 5279 5280 default: 5281 g_assert_not_reached(); 5282 } 5283 5284 if (copyto_new_reg) { 5285 if (!tcg_out_mov(s, ts->type, reg, ts->reg)) { 5286 /* 5287 * Cross register class move not supported. Sync the 5288 * temp back to its slot and load from there. 5289 */ 5290 temp_sync(s, ts, i_allocated_regs, 0, 0); 5291 tcg_out_ld(s, ts->type, reg, 5292 ts->mem_base->reg, ts->mem_offset); 5293 } 5294 } 5295 new_args[i] = reg; 5296 const_args[i] = 0; 5297 tcg_regset_set_reg(i_allocated_regs, reg); 5298 } 5299 5300 /* mark dead temporaries and free the associated registers */ 5301 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) { 5302 if (IS_DEAD_ARG(i)) { 5303 temp_dead(s, arg_temp(op->args[i])); 5304 } 5305 } 5306 5307 if (def->flags & TCG_OPF_COND_BRANCH) { 5308 tcg_reg_alloc_cbranch(s, i_allocated_regs); 5309 } else if (def->flags & TCG_OPF_BB_END) { 5310 tcg_reg_alloc_bb_end(s, i_allocated_regs); 5311 } else { 5312 if (def->flags & TCG_OPF_CALL_CLOBBER) { 5313 /* XXX: permit generic clobber register list ? */ 5314 for (i = 0; i < TCG_TARGET_NB_REGS; i++) { 5315 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) { 5316 tcg_reg_free(s, i, i_allocated_regs); 5317 } 5318 } 5319 } 5320 if (def->flags & TCG_OPF_SIDE_EFFECTS) { 5321 /* sync globals if the op has side effects and might trigger 5322 an exception. */ 5323 sync_globals(s, i_allocated_regs); 5324 } 5325 5326 /* satisfy the output constraints */ 5327 for (k = 0; k < nb_oargs; k++) { 5328 i = args_ct[k].sort_index; 5329 arg = op->args[i]; 5330 arg_ct = &args_ct[i]; 5331 ts = arg_temp(arg); 5332 5333 /* ENV should not be modified. */ 5334 tcg_debug_assert(!temp_readonly(ts)); 5335 5336 switch (arg_ct->pair) { 5337 case 0: /* not paired */ 5338 if (arg_ct->oalias && !const_args[arg_ct->alias_index]) { 5339 reg = new_args[arg_ct->alias_index]; 5340 } else if (arg_ct->newreg) { 5341 reg = tcg_reg_alloc(s, arg_ct->regs, 5342 i_allocated_regs | o_allocated_regs, 5343 output_pref(op, k), ts->indirect_base); 5344 } else { 5345 reg = tcg_reg_alloc(s, arg_ct->regs, o_allocated_regs, 5346 output_pref(op, k), ts->indirect_base); 5347 } 5348 break; 5349 5350 case 1: /* first of pair */ 5351 if (arg_ct->oalias) { 5352 reg = new_args[arg_ct->alias_index]; 5353 } else if (arg_ct->newreg) { 5354 reg = tcg_reg_alloc_pair(s, arg_ct->regs, 5355 i_allocated_regs | o_allocated_regs, 5356 output_pref(op, k), 5357 ts->indirect_base); 5358 } else { 5359 reg = tcg_reg_alloc_pair(s, arg_ct->regs, o_allocated_regs, 5360 output_pref(op, k), 5361 ts->indirect_base); 5362 } 5363 break; 5364 5365 case 2: /* second of pair */ 5366 if (arg_ct->oalias) { 5367 reg = new_args[arg_ct->alias_index]; 5368 } else { 5369 reg = new_args[arg_ct->pair_index] + 1; 5370 } 5371 break; 5372 5373 case 3: /* first of pair, aliasing with a second input */ 5374 tcg_debug_assert(!arg_ct->newreg); 5375 reg = new_args[arg_ct->pair_index] - 1; 5376 break; 5377 5378 default: 5379 g_assert_not_reached(); 5380 } 5381 tcg_regset_set_reg(o_allocated_regs, reg); 5382 set_temp_val_reg(s, ts, reg); 5383 ts->mem_coherent = 0; 5384 new_args[i] = reg; 5385 } 5386 } 5387 5388 /* emit instruction */ 5389 TCGType type = TCGOP_TYPE(op); 5390 switch (op->opc) { 5391 case INDEX_op_ext_i32_i64: 5392 tcg_out_exts_i32_i64(s, new_args[0], new_args[1]); 5393 break; 5394 case INDEX_op_extu_i32_i64: 5395 tcg_out_extu_i32_i64(s, new_args[0], new_args[1]); 5396 break; 5397 case INDEX_op_extrl_i64_i32: 5398 tcg_out_extrl_i64_i32(s, new_args[0], new_args[1]); 5399 break; 5400 5401 case INDEX_op_add: 5402 case INDEX_op_and: 5403 case INDEX_op_andc: 5404 case INDEX_op_clz: 5405 case INDEX_op_divs: 5406 case INDEX_op_divu: 5407 case INDEX_op_eqv: 5408 case INDEX_op_mul: 5409 case INDEX_op_mulsh: 5410 case INDEX_op_muluh: 5411 case INDEX_op_nand: 5412 case INDEX_op_nor: 5413 case INDEX_op_or: 5414 case INDEX_op_orc: 5415 case INDEX_op_rems: 5416 case INDEX_op_remu: 5417 case INDEX_op_rotl: 5418 case INDEX_op_rotr: 5419 case INDEX_op_sar: 5420 case INDEX_op_shl: 5421 case INDEX_op_shr: 5422 case INDEX_op_xor: 5423 { 5424 const TCGOutOpBinary *out = 5425 container_of(all_outop[op->opc], TCGOutOpBinary, base); 5426 5427 /* Constants should never appear in the first source operand. */ 5428 tcg_debug_assert(!const_args[1]); 5429 if (const_args[2]) { 5430 out->out_rri(s, type, new_args[0], new_args[1], new_args[2]); 5431 } else { 5432 out->out_rrr(s, type, new_args[0], new_args[1], new_args[2]); 5433 } 5434 } 5435 break; 5436 5437 case INDEX_op_sub: 5438 { 5439 const TCGOutOpSubtract *out = &outop_sub; 5440 5441 /* 5442 * Constants should never appear in the second source operand. 5443 * These are folded to add with negative constant. 5444 */ 5445 tcg_debug_assert(!const_args[2]); 5446 if (const_args[1]) { 5447 out->out_rir(s, type, new_args[0], new_args[1], new_args[2]); 5448 } else { 5449 out->out_rrr(s, type, new_args[0], new_args[1], new_args[2]); 5450 } 5451 } 5452 break; 5453 5454 case INDEX_op_neg: 5455 case INDEX_op_not: 5456 { 5457 const TCGOutOpUnary *out = 5458 container_of(all_outop[op->opc], TCGOutOpUnary, base); 5459 5460 /* Constants should have been folded. */ 5461 tcg_debug_assert(!const_args[1]); 5462 out->out_rr(s, type, new_args[0], new_args[1]); 5463 } 5464 break; 5465 5466 case INDEX_op_divs2: 5467 case INDEX_op_divu2: 5468 { 5469 const TCGOutOpDivRem *out = 5470 container_of(all_outop[op->opc], TCGOutOpDivRem, base); 5471 5472 /* Only used by x86 and s390x, which use matching constraints. */ 5473 tcg_debug_assert(new_args[0] == new_args[2]); 5474 tcg_debug_assert(new_args[1] == new_args[3]); 5475 tcg_debug_assert(!const_args[4]); 5476 out->out_rr01r(s, type, new_args[0], new_args[1], new_args[4]); 5477 } 5478 break; 5479 5480 default: 5481 if (def->flags & TCG_OPF_VECTOR) { 5482 tcg_out_vec_op(s, op->opc, type - TCG_TYPE_V64, 5483 TCGOP_VECE(op), new_args, const_args); 5484 } else { 5485 tcg_out_op(s, op->opc, type, new_args, const_args); 5486 } 5487 break; 5488 } 5489 5490 /* move the outputs in the correct register if needed */ 5491 for(i = 0; i < nb_oargs; i++) { 5492 ts = arg_temp(op->args[i]); 5493 5494 /* ENV should not be modified. */ 5495 tcg_debug_assert(!temp_readonly(ts)); 5496 5497 if (NEED_SYNC_ARG(i)) { 5498 temp_sync(s, ts, o_allocated_regs, 0, IS_DEAD_ARG(i)); 5499 } else if (IS_DEAD_ARG(i)) { 5500 temp_dead(s, ts); 5501 } 5502 } 5503 } 5504 5505 static bool tcg_reg_alloc_dup2(TCGContext *s, const TCGOp *op) 5506 { 5507 const TCGLifeData arg_life = op->life; 5508 TCGTemp *ots, *itsl, *itsh; 5509 TCGType vtype = TCGOP_TYPE(op); 5510 5511 /* This opcode is only valid for 32-bit hosts, for 64-bit elements. */ 5512 tcg_debug_assert(TCG_TARGET_REG_BITS == 32); 5513 tcg_debug_assert(TCGOP_VECE(op) == MO_64); 5514 5515 ots = arg_temp(op->args[0]); 5516 itsl = arg_temp(op->args[1]); 5517 itsh = arg_temp(op->args[2]); 5518 5519 /* ENV should not be modified. */ 5520 tcg_debug_assert(!temp_readonly(ots)); 5521 5522 /* Allocate the output register now. */ 5523 if (ots->val_type != TEMP_VAL_REG) { 5524 TCGRegSet allocated_regs = s->reserved_regs; 5525 TCGRegSet dup_out_regs = opcode_args_ct(op)[0].regs; 5526 TCGReg oreg; 5527 5528 /* Make sure to not spill the input registers. */ 5529 if (!IS_DEAD_ARG(1) && itsl->val_type == TEMP_VAL_REG) { 5530 tcg_regset_set_reg(allocated_regs, itsl->reg); 5531 } 5532 if (!IS_DEAD_ARG(2) && itsh->val_type == TEMP_VAL_REG) { 5533 tcg_regset_set_reg(allocated_regs, itsh->reg); 5534 } 5535 5536 oreg = tcg_reg_alloc(s, dup_out_regs, allocated_regs, 5537 output_pref(op, 0), ots->indirect_base); 5538 set_temp_val_reg(s, ots, oreg); 5539 } 5540 5541 /* Promote dup2 of immediates to dupi_vec. */ 5542 if (itsl->val_type == TEMP_VAL_CONST && itsh->val_type == TEMP_VAL_CONST) { 5543 uint64_t val = deposit64(itsl->val, 32, 32, itsh->val); 5544 MemOp vece = MO_64; 5545 5546 if (val == dup_const(MO_8, val)) { 5547 vece = MO_8; 5548 } else if (val == dup_const(MO_16, val)) { 5549 vece = MO_16; 5550 } else if (val == dup_const(MO_32, val)) { 5551 vece = MO_32; 5552 } 5553 5554 tcg_out_dupi_vec(s, vtype, vece, ots->reg, val); 5555 goto done; 5556 } 5557 5558 /* If the two inputs form one 64-bit value, try dupm_vec. */ 5559 if (itsl->temp_subindex == HOST_BIG_ENDIAN && 5560 itsh->temp_subindex == !HOST_BIG_ENDIAN && 5561 itsl == itsh + (HOST_BIG_ENDIAN ? 1 : -1)) { 5562 TCGTemp *its = itsl - HOST_BIG_ENDIAN; 5563 5564 temp_sync(s, its + 0, s->reserved_regs, 0, 0); 5565 temp_sync(s, its + 1, s->reserved_regs, 0, 0); 5566 5567 if (tcg_out_dupm_vec(s, vtype, MO_64, ots->reg, 5568 its->mem_base->reg, its->mem_offset)) { 5569 goto done; 5570 } 5571 } 5572 5573 /* Fall back to generic expansion. */ 5574 return false; 5575 5576 done: 5577 ots->mem_coherent = 0; 5578 if (IS_DEAD_ARG(1)) { 5579 temp_dead(s, itsl); 5580 } 5581 if (IS_DEAD_ARG(2)) { 5582 temp_dead(s, itsh); 5583 } 5584 if (NEED_SYNC_ARG(0)) { 5585 temp_sync(s, ots, s->reserved_regs, 0, IS_DEAD_ARG(0)); 5586 } else if (IS_DEAD_ARG(0)) { 5587 temp_dead(s, ots); 5588 } 5589 return true; 5590 } 5591 5592 static void load_arg_reg(TCGContext *s, TCGReg reg, TCGTemp *ts, 5593 TCGRegSet allocated_regs) 5594 { 5595 if (ts->val_type == TEMP_VAL_REG) { 5596 if (ts->reg != reg) { 5597 tcg_reg_free(s, reg, allocated_regs); 5598 if (!tcg_out_mov(s, ts->type, reg, ts->reg)) { 5599 /* 5600 * Cross register class move not supported. Sync the 5601 * temp back to its slot and load from there. 5602 */ 5603 temp_sync(s, ts, allocated_regs, 0, 0); 5604 tcg_out_ld(s, ts->type, reg, 5605 ts->mem_base->reg, ts->mem_offset); 5606 } 5607 } 5608 } else { 5609 TCGRegSet arg_set = 0; 5610 5611 tcg_reg_free(s, reg, allocated_regs); 5612 tcg_regset_set_reg(arg_set, reg); 5613 temp_load(s, ts, arg_set, allocated_regs, 0); 5614 } 5615 } 5616 5617 static void load_arg_stk(TCGContext *s, unsigned arg_slot, TCGTemp *ts, 5618 TCGRegSet allocated_regs) 5619 { 5620 /* 5621 * When the destination is on the stack, load up the temp and store. 5622 * If there are many call-saved registers, the temp might live to 5623 * see another use; otherwise it'll be discarded. 5624 */ 5625 temp_load(s, ts, tcg_target_available_regs[ts->type], allocated_regs, 0); 5626 tcg_out_st(s, ts->type, ts->reg, TCG_REG_CALL_STACK, 5627 arg_slot_stk_ofs(arg_slot)); 5628 } 5629 5630 static void load_arg_normal(TCGContext *s, const TCGCallArgumentLoc *l, 5631 TCGTemp *ts, TCGRegSet *allocated_regs) 5632 { 5633 if (arg_slot_reg_p(l->arg_slot)) { 5634 TCGReg reg = tcg_target_call_iarg_regs[l->arg_slot]; 5635 load_arg_reg(s, reg, ts, *allocated_regs); 5636 tcg_regset_set_reg(*allocated_regs, reg); 5637 } else { 5638 load_arg_stk(s, l->arg_slot, ts, *allocated_regs); 5639 } 5640 } 5641 5642 static void load_arg_ref(TCGContext *s, unsigned arg_slot, TCGReg ref_base, 5643 intptr_t ref_off, TCGRegSet *allocated_regs) 5644 { 5645 TCGReg reg; 5646 5647 if (arg_slot_reg_p(arg_slot)) { 5648 reg = tcg_target_call_iarg_regs[arg_slot]; 5649 tcg_reg_free(s, reg, *allocated_regs); 5650 tcg_out_addi_ptr(s, reg, ref_base, ref_off); 5651 tcg_regset_set_reg(*allocated_regs, reg); 5652 } else { 5653 reg = tcg_reg_alloc(s, tcg_target_available_regs[TCG_TYPE_PTR], 5654 *allocated_regs, 0, false); 5655 tcg_out_addi_ptr(s, reg, ref_base, ref_off); 5656 tcg_out_st(s, TCG_TYPE_PTR, reg, TCG_REG_CALL_STACK, 5657 arg_slot_stk_ofs(arg_slot)); 5658 } 5659 } 5660 5661 static void tcg_reg_alloc_call(TCGContext *s, TCGOp *op) 5662 { 5663 const int nb_oargs = TCGOP_CALLO(op); 5664 const int nb_iargs = TCGOP_CALLI(op); 5665 const TCGLifeData arg_life = op->life; 5666 const TCGHelperInfo *info = tcg_call_info(op); 5667 TCGRegSet allocated_regs = s->reserved_regs; 5668 int i; 5669 5670 /* 5671 * Move inputs into place in reverse order, 5672 * so that we place stacked arguments first. 5673 */ 5674 for (i = nb_iargs - 1; i >= 0; --i) { 5675 const TCGCallArgumentLoc *loc = &info->in[i]; 5676 TCGTemp *ts = arg_temp(op->args[nb_oargs + i]); 5677 5678 switch (loc->kind) { 5679 case TCG_CALL_ARG_NORMAL: 5680 case TCG_CALL_ARG_EXTEND_U: 5681 case TCG_CALL_ARG_EXTEND_S: 5682 load_arg_normal(s, loc, ts, &allocated_regs); 5683 break; 5684 case TCG_CALL_ARG_BY_REF: 5685 load_arg_stk(s, loc->ref_slot, ts, allocated_regs); 5686 load_arg_ref(s, loc->arg_slot, TCG_REG_CALL_STACK, 5687 arg_slot_stk_ofs(loc->ref_slot), 5688 &allocated_regs); 5689 break; 5690 case TCG_CALL_ARG_BY_REF_N: 5691 load_arg_stk(s, loc->ref_slot, ts, allocated_regs); 5692 break; 5693 default: 5694 g_assert_not_reached(); 5695 } 5696 } 5697 5698 /* Mark dead temporaries and free the associated registers. */ 5699 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) { 5700 if (IS_DEAD_ARG(i)) { 5701 temp_dead(s, arg_temp(op->args[i])); 5702 } 5703 } 5704 5705 /* Clobber call registers. */ 5706 for (i = 0; i < TCG_TARGET_NB_REGS; i++) { 5707 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) { 5708 tcg_reg_free(s, i, allocated_regs); 5709 } 5710 } 5711 5712 /* 5713 * Save globals if they might be written by the helper, 5714 * sync them if they might be read. 5715 */ 5716 if (info->flags & TCG_CALL_NO_READ_GLOBALS) { 5717 /* Nothing to do */ 5718 } else if (info->flags & TCG_CALL_NO_WRITE_GLOBALS) { 5719 sync_globals(s, allocated_regs); 5720 } else { 5721 save_globals(s, allocated_regs); 5722 } 5723 5724 /* 5725 * If the ABI passes a pointer to the returned struct as the first 5726 * argument, load that now. Pass a pointer to the output home slot. 5727 */ 5728 if (info->out_kind == TCG_CALL_RET_BY_REF) { 5729 TCGTemp *ts = arg_temp(op->args[0]); 5730 5731 if (!ts->mem_allocated) { 5732 temp_allocate_frame(s, ts); 5733 } 5734 load_arg_ref(s, 0, ts->mem_base->reg, ts->mem_offset, &allocated_regs); 5735 } 5736 5737 tcg_out_call(s, tcg_call_func(op), info); 5738 5739 /* Assign output registers and emit moves if needed. */ 5740 switch (info->out_kind) { 5741 case TCG_CALL_RET_NORMAL: 5742 for (i = 0; i < nb_oargs; i++) { 5743 TCGTemp *ts = arg_temp(op->args[i]); 5744 TCGReg reg = tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, i); 5745 5746 /* ENV should not be modified. */ 5747 tcg_debug_assert(!temp_readonly(ts)); 5748 5749 set_temp_val_reg(s, ts, reg); 5750 ts->mem_coherent = 0; 5751 } 5752 break; 5753 5754 case TCG_CALL_RET_BY_VEC: 5755 { 5756 TCGTemp *ts = arg_temp(op->args[0]); 5757 5758 tcg_debug_assert(ts->base_type == TCG_TYPE_I128); 5759 tcg_debug_assert(ts->temp_subindex == 0); 5760 if (!ts->mem_allocated) { 5761 temp_allocate_frame(s, ts); 5762 } 5763 tcg_out_st(s, TCG_TYPE_V128, 5764 tcg_target_call_oarg_reg(TCG_CALL_RET_BY_VEC, 0), 5765 ts->mem_base->reg, ts->mem_offset); 5766 } 5767 /* fall through to mark all parts in memory */ 5768 5769 case TCG_CALL_RET_BY_REF: 5770 /* The callee has performed a write through the reference. */ 5771 for (i = 0; i < nb_oargs; i++) { 5772 TCGTemp *ts = arg_temp(op->args[i]); 5773 ts->val_type = TEMP_VAL_MEM; 5774 } 5775 break; 5776 5777 default: 5778 g_assert_not_reached(); 5779 } 5780 5781 /* Flush or discard output registers as needed. */ 5782 for (i = 0; i < nb_oargs; i++) { 5783 TCGTemp *ts = arg_temp(op->args[i]); 5784 if (NEED_SYNC_ARG(i)) { 5785 temp_sync(s, ts, s->reserved_regs, 0, IS_DEAD_ARG(i)); 5786 } else if (IS_DEAD_ARG(i)) { 5787 temp_dead(s, ts); 5788 } 5789 } 5790 } 5791 5792 /** 5793 * atom_and_align_for_opc: 5794 * @s: tcg context 5795 * @opc: memory operation code 5796 * @host_atom: MO_ATOM_{IFALIGN,WITHIN16,SUBALIGN} for host operations 5797 * @allow_two_ops: true if we are prepared to issue two operations 5798 * 5799 * Return the alignment and atomicity to use for the inline fast path 5800 * for the given memory operation. The alignment may be larger than 5801 * that specified in @opc, and the correct alignment will be diagnosed 5802 * by the slow path helper. 5803 * 5804 * If @allow_two_ops, the host is prepared to test for 2x alignment, 5805 * and issue two loads or stores for subalignment. 5806 */ 5807 static TCGAtomAlign atom_and_align_for_opc(TCGContext *s, MemOp opc, 5808 MemOp host_atom, bool allow_two_ops) 5809 { 5810 MemOp align = memop_alignment_bits(opc); 5811 MemOp size = opc & MO_SIZE; 5812 MemOp half = size ? size - 1 : 0; 5813 MemOp atom = opc & MO_ATOM_MASK; 5814 MemOp atmax; 5815 5816 switch (atom) { 5817 case MO_ATOM_NONE: 5818 /* The operation requires no specific atomicity. */ 5819 atmax = MO_8; 5820 break; 5821 5822 case MO_ATOM_IFALIGN: 5823 atmax = size; 5824 break; 5825 5826 case MO_ATOM_IFALIGN_PAIR: 5827 atmax = half; 5828 break; 5829 5830 case MO_ATOM_WITHIN16: 5831 atmax = size; 5832 if (size == MO_128) { 5833 /* Misalignment implies !within16, and therefore no atomicity. */ 5834 } else if (host_atom != MO_ATOM_WITHIN16) { 5835 /* The host does not implement within16, so require alignment. */ 5836 align = MAX(align, size); 5837 } 5838 break; 5839 5840 case MO_ATOM_WITHIN16_PAIR: 5841 atmax = size; 5842 /* 5843 * Misalignment implies !within16, and therefore half atomicity. 5844 * Any host prepared for two operations can implement this with 5845 * half alignment. 5846 */ 5847 if (host_atom != MO_ATOM_WITHIN16 && allow_two_ops) { 5848 align = MAX(align, half); 5849 } 5850 break; 5851 5852 case MO_ATOM_SUBALIGN: 5853 atmax = size; 5854 if (host_atom != MO_ATOM_SUBALIGN) { 5855 /* If unaligned but not odd, there are subobjects up to half. */ 5856 if (allow_two_ops) { 5857 align = MAX(align, half); 5858 } else { 5859 align = MAX(align, size); 5860 } 5861 } 5862 break; 5863 5864 default: 5865 g_assert_not_reached(); 5866 } 5867 5868 return (TCGAtomAlign){ .atom = atmax, .align = align }; 5869 } 5870 5871 /* 5872 * Similarly for qemu_ld/st slow path helpers. 5873 * We must re-implement tcg_gen_callN and tcg_reg_alloc_call simultaneously, 5874 * using only the provided backend tcg_out_* functions. 5875 */ 5876 5877 static int tcg_out_helper_stk_ofs(TCGType type, unsigned slot) 5878 { 5879 int ofs = arg_slot_stk_ofs(slot); 5880 5881 /* 5882 * Each stack slot is TCG_TARGET_LONG_BITS. If the host does not 5883 * require extension to uint64_t, adjust the address for uint32_t. 5884 */ 5885 if (HOST_BIG_ENDIAN && 5886 TCG_TARGET_REG_BITS == 64 && 5887 type == TCG_TYPE_I32) { 5888 ofs += 4; 5889 } 5890 return ofs; 5891 } 5892 5893 static void tcg_out_helper_load_slots(TCGContext *s, 5894 unsigned nmov, TCGMovExtend *mov, 5895 const TCGLdstHelperParam *parm) 5896 { 5897 unsigned i; 5898 TCGReg dst3; 5899 5900 /* 5901 * Start from the end, storing to the stack first. 5902 * This frees those registers, so we need not consider overlap. 5903 */ 5904 for (i = nmov; i-- > 0; ) { 5905 unsigned slot = mov[i].dst; 5906 5907 if (arg_slot_reg_p(slot)) { 5908 goto found_reg; 5909 } 5910 5911 TCGReg src = mov[i].src; 5912 TCGType dst_type = mov[i].dst_type; 5913 MemOp dst_mo = dst_type == TCG_TYPE_I32 ? MO_32 : MO_64; 5914 5915 /* The argument is going onto the stack; extend into scratch. */ 5916 if ((mov[i].src_ext & MO_SIZE) != dst_mo) { 5917 tcg_debug_assert(parm->ntmp != 0); 5918 mov[i].dst = src = parm->tmp[0]; 5919 tcg_out_movext1(s, &mov[i]); 5920 } 5921 5922 tcg_out_st(s, dst_type, src, TCG_REG_CALL_STACK, 5923 tcg_out_helper_stk_ofs(dst_type, slot)); 5924 } 5925 return; 5926 5927 found_reg: 5928 /* 5929 * The remaining arguments are in registers. 5930 * Convert slot numbers to argument registers. 5931 */ 5932 nmov = i + 1; 5933 for (i = 0; i < nmov; ++i) { 5934 mov[i].dst = tcg_target_call_iarg_regs[mov[i].dst]; 5935 } 5936 5937 switch (nmov) { 5938 case 4: 5939 /* The backend must have provided enough temps for the worst case. */ 5940 tcg_debug_assert(parm->ntmp >= 2); 5941 5942 dst3 = mov[3].dst; 5943 for (unsigned j = 0; j < 3; ++j) { 5944 if (dst3 == mov[j].src) { 5945 /* 5946 * Conflict. Copy the source to a temporary, perform the 5947 * remaining moves, then the extension from our scratch 5948 * on the way out. 5949 */ 5950 TCGReg scratch = parm->tmp[1]; 5951 5952 tcg_out_mov(s, mov[3].src_type, scratch, mov[3].src); 5953 tcg_out_movext3(s, mov, mov + 1, mov + 2, parm->tmp[0]); 5954 tcg_out_movext1_new_src(s, &mov[3], scratch); 5955 break; 5956 } 5957 } 5958 5959 /* No conflicts: perform this move and continue. */ 5960 tcg_out_movext1(s, &mov[3]); 5961 /* fall through */ 5962 5963 case 3: 5964 tcg_out_movext3(s, mov, mov + 1, mov + 2, 5965 parm->ntmp ? parm->tmp[0] : -1); 5966 break; 5967 case 2: 5968 tcg_out_movext2(s, mov, mov + 1, 5969 parm->ntmp ? parm->tmp[0] : -1); 5970 break; 5971 case 1: 5972 tcg_out_movext1(s, mov); 5973 break; 5974 default: 5975 g_assert_not_reached(); 5976 } 5977 } 5978 5979 static void tcg_out_helper_load_imm(TCGContext *s, unsigned slot, 5980 TCGType type, tcg_target_long imm, 5981 const TCGLdstHelperParam *parm) 5982 { 5983 if (arg_slot_reg_p(slot)) { 5984 tcg_out_movi(s, type, tcg_target_call_iarg_regs[slot], imm); 5985 } else { 5986 int ofs = tcg_out_helper_stk_ofs(type, slot); 5987 if (!tcg_out_sti(s, type, imm, TCG_REG_CALL_STACK, ofs)) { 5988 tcg_debug_assert(parm->ntmp != 0); 5989 tcg_out_movi(s, type, parm->tmp[0], imm); 5990 tcg_out_st(s, type, parm->tmp[0], TCG_REG_CALL_STACK, ofs); 5991 } 5992 } 5993 } 5994 5995 static void tcg_out_helper_load_common_args(TCGContext *s, 5996 const TCGLabelQemuLdst *ldst, 5997 const TCGLdstHelperParam *parm, 5998 const TCGHelperInfo *info, 5999 unsigned next_arg) 6000 { 6001 TCGMovExtend ptr_mov = { 6002 .dst_type = TCG_TYPE_PTR, 6003 .src_type = TCG_TYPE_PTR, 6004 .src_ext = sizeof(void *) == 4 ? MO_32 : MO_64 6005 }; 6006 const TCGCallArgumentLoc *loc = &info->in[0]; 6007 TCGType type; 6008 unsigned slot; 6009 tcg_target_ulong imm; 6010 6011 /* 6012 * Handle env, which is always first. 6013 */ 6014 ptr_mov.dst = loc->arg_slot; 6015 ptr_mov.src = TCG_AREG0; 6016 tcg_out_helper_load_slots(s, 1, &ptr_mov, parm); 6017 6018 /* 6019 * Handle oi. 6020 */ 6021 imm = ldst->oi; 6022 loc = &info->in[next_arg]; 6023 type = TCG_TYPE_I32; 6024 switch (loc->kind) { 6025 case TCG_CALL_ARG_NORMAL: 6026 break; 6027 case TCG_CALL_ARG_EXTEND_U: 6028 case TCG_CALL_ARG_EXTEND_S: 6029 /* No extension required for MemOpIdx. */ 6030 tcg_debug_assert(imm <= INT32_MAX); 6031 type = TCG_TYPE_REG; 6032 break; 6033 default: 6034 g_assert_not_reached(); 6035 } 6036 tcg_out_helper_load_imm(s, loc->arg_slot, type, imm, parm); 6037 next_arg++; 6038 6039 /* 6040 * Handle ra. 6041 */ 6042 loc = &info->in[next_arg]; 6043 slot = loc->arg_slot; 6044 if (parm->ra_gen) { 6045 int arg_reg = -1; 6046 TCGReg ra_reg; 6047 6048 if (arg_slot_reg_p(slot)) { 6049 arg_reg = tcg_target_call_iarg_regs[slot]; 6050 } 6051 ra_reg = parm->ra_gen(s, ldst, arg_reg); 6052 6053 ptr_mov.dst = slot; 6054 ptr_mov.src = ra_reg; 6055 tcg_out_helper_load_slots(s, 1, &ptr_mov, parm); 6056 } else { 6057 imm = (uintptr_t)ldst->raddr; 6058 tcg_out_helper_load_imm(s, slot, TCG_TYPE_PTR, imm, parm); 6059 } 6060 } 6061 6062 static unsigned tcg_out_helper_add_mov(TCGMovExtend *mov, 6063 const TCGCallArgumentLoc *loc, 6064 TCGType dst_type, TCGType src_type, 6065 TCGReg lo, TCGReg hi) 6066 { 6067 MemOp reg_mo; 6068 6069 if (dst_type <= TCG_TYPE_REG) { 6070 MemOp src_ext; 6071 6072 switch (loc->kind) { 6073 case TCG_CALL_ARG_NORMAL: 6074 src_ext = src_type == TCG_TYPE_I32 ? MO_32 : MO_64; 6075 break; 6076 case TCG_CALL_ARG_EXTEND_U: 6077 dst_type = TCG_TYPE_REG; 6078 src_ext = MO_UL; 6079 break; 6080 case TCG_CALL_ARG_EXTEND_S: 6081 dst_type = TCG_TYPE_REG; 6082 src_ext = MO_SL; 6083 break; 6084 default: 6085 g_assert_not_reached(); 6086 } 6087 6088 mov[0].dst = loc->arg_slot; 6089 mov[0].dst_type = dst_type; 6090 mov[0].src = lo; 6091 mov[0].src_type = src_type; 6092 mov[0].src_ext = src_ext; 6093 return 1; 6094 } 6095 6096 if (TCG_TARGET_REG_BITS == 32) { 6097 assert(dst_type == TCG_TYPE_I64); 6098 reg_mo = MO_32; 6099 } else { 6100 assert(dst_type == TCG_TYPE_I128); 6101 reg_mo = MO_64; 6102 } 6103 6104 mov[0].dst = loc[HOST_BIG_ENDIAN].arg_slot; 6105 mov[0].src = lo; 6106 mov[0].dst_type = TCG_TYPE_REG; 6107 mov[0].src_type = TCG_TYPE_REG; 6108 mov[0].src_ext = reg_mo; 6109 6110 mov[1].dst = loc[!HOST_BIG_ENDIAN].arg_slot; 6111 mov[1].src = hi; 6112 mov[1].dst_type = TCG_TYPE_REG; 6113 mov[1].src_type = TCG_TYPE_REG; 6114 mov[1].src_ext = reg_mo; 6115 6116 return 2; 6117 } 6118 6119 static void tcg_out_ld_helper_args(TCGContext *s, const TCGLabelQemuLdst *ldst, 6120 const TCGLdstHelperParam *parm) 6121 { 6122 const TCGHelperInfo *info; 6123 const TCGCallArgumentLoc *loc; 6124 TCGMovExtend mov[2]; 6125 unsigned next_arg, nmov; 6126 MemOp mop = get_memop(ldst->oi); 6127 6128 switch (mop & MO_SIZE) { 6129 case MO_8: 6130 case MO_16: 6131 case MO_32: 6132 info = &info_helper_ld32_mmu; 6133 break; 6134 case MO_64: 6135 info = &info_helper_ld64_mmu; 6136 break; 6137 case MO_128: 6138 info = &info_helper_ld128_mmu; 6139 break; 6140 default: 6141 g_assert_not_reached(); 6142 } 6143 6144 /* Defer env argument. */ 6145 next_arg = 1; 6146 6147 loc = &info->in[next_arg]; 6148 if (TCG_TARGET_REG_BITS == 32 && s->addr_type == TCG_TYPE_I32) { 6149 /* 6150 * 32-bit host with 32-bit guest: zero-extend the guest address 6151 * to 64-bits for the helper by storing the low part, then 6152 * load a zero for the high part. 6153 */ 6154 tcg_out_helper_add_mov(mov, loc + HOST_BIG_ENDIAN, 6155 TCG_TYPE_I32, TCG_TYPE_I32, 6156 ldst->addr_reg, -1); 6157 tcg_out_helper_load_slots(s, 1, mov, parm); 6158 6159 tcg_out_helper_load_imm(s, loc[!HOST_BIG_ENDIAN].arg_slot, 6160 TCG_TYPE_I32, 0, parm); 6161 next_arg += 2; 6162 } else { 6163 nmov = tcg_out_helper_add_mov(mov, loc, TCG_TYPE_I64, s->addr_type, 6164 ldst->addr_reg, -1); 6165 tcg_out_helper_load_slots(s, nmov, mov, parm); 6166 next_arg += nmov; 6167 } 6168 6169 switch (info->out_kind) { 6170 case TCG_CALL_RET_NORMAL: 6171 case TCG_CALL_RET_BY_VEC: 6172 break; 6173 case TCG_CALL_RET_BY_REF: 6174 /* 6175 * The return reference is in the first argument slot. 6176 * We need memory in which to return: re-use the top of stack. 6177 */ 6178 { 6179 int ofs_slot0 = TCG_TARGET_CALL_STACK_OFFSET; 6180 6181 if (arg_slot_reg_p(0)) { 6182 tcg_out_addi_ptr(s, tcg_target_call_iarg_regs[0], 6183 TCG_REG_CALL_STACK, ofs_slot0); 6184 } else { 6185 tcg_debug_assert(parm->ntmp != 0); 6186 tcg_out_addi_ptr(s, parm->tmp[0], 6187 TCG_REG_CALL_STACK, ofs_slot0); 6188 tcg_out_st(s, TCG_TYPE_PTR, parm->tmp[0], 6189 TCG_REG_CALL_STACK, ofs_slot0); 6190 } 6191 } 6192 break; 6193 default: 6194 g_assert_not_reached(); 6195 } 6196 6197 tcg_out_helper_load_common_args(s, ldst, parm, info, next_arg); 6198 } 6199 6200 static void tcg_out_ld_helper_ret(TCGContext *s, const TCGLabelQemuLdst *ldst, 6201 bool load_sign, 6202 const TCGLdstHelperParam *parm) 6203 { 6204 MemOp mop = get_memop(ldst->oi); 6205 TCGMovExtend mov[2]; 6206 int ofs_slot0; 6207 6208 switch (ldst->type) { 6209 case TCG_TYPE_I64: 6210 if (TCG_TARGET_REG_BITS == 32) { 6211 break; 6212 } 6213 /* fall through */ 6214 6215 case TCG_TYPE_I32: 6216 mov[0].dst = ldst->datalo_reg; 6217 mov[0].src = tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, 0); 6218 mov[0].dst_type = ldst->type; 6219 mov[0].src_type = TCG_TYPE_REG; 6220 6221 /* 6222 * If load_sign, then we allowed the helper to perform the 6223 * appropriate sign extension to tcg_target_ulong, and all 6224 * we need now is a plain move. 6225 * 6226 * If they do not, then we expect the relevant extension 6227 * instruction to be no more expensive than a move, and 6228 * we thus save the icache etc by only using one of two 6229 * helper functions. 6230 */ 6231 if (load_sign || !(mop & MO_SIGN)) { 6232 if (TCG_TARGET_REG_BITS == 32 || ldst->type == TCG_TYPE_I32) { 6233 mov[0].src_ext = MO_32; 6234 } else { 6235 mov[0].src_ext = MO_64; 6236 } 6237 } else { 6238 mov[0].src_ext = mop & MO_SSIZE; 6239 } 6240 tcg_out_movext1(s, mov); 6241 return; 6242 6243 case TCG_TYPE_I128: 6244 tcg_debug_assert(TCG_TARGET_REG_BITS == 64); 6245 ofs_slot0 = TCG_TARGET_CALL_STACK_OFFSET; 6246 switch (TCG_TARGET_CALL_RET_I128) { 6247 case TCG_CALL_RET_NORMAL: 6248 break; 6249 case TCG_CALL_RET_BY_VEC: 6250 tcg_out_st(s, TCG_TYPE_V128, 6251 tcg_target_call_oarg_reg(TCG_CALL_RET_BY_VEC, 0), 6252 TCG_REG_CALL_STACK, ofs_slot0); 6253 /* fall through */ 6254 case TCG_CALL_RET_BY_REF: 6255 tcg_out_ld(s, TCG_TYPE_I64, ldst->datalo_reg, 6256 TCG_REG_CALL_STACK, ofs_slot0 + 8 * HOST_BIG_ENDIAN); 6257 tcg_out_ld(s, TCG_TYPE_I64, ldst->datahi_reg, 6258 TCG_REG_CALL_STACK, ofs_slot0 + 8 * !HOST_BIG_ENDIAN); 6259 return; 6260 default: 6261 g_assert_not_reached(); 6262 } 6263 break; 6264 6265 default: 6266 g_assert_not_reached(); 6267 } 6268 6269 mov[0].dst = ldst->datalo_reg; 6270 mov[0].src = 6271 tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, HOST_BIG_ENDIAN); 6272 mov[0].dst_type = TCG_TYPE_REG; 6273 mov[0].src_type = TCG_TYPE_REG; 6274 mov[0].src_ext = TCG_TARGET_REG_BITS == 32 ? MO_32 : MO_64; 6275 6276 mov[1].dst = ldst->datahi_reg; 6277 mov[1].src = 6278 tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, !HOST_BIG_ENDIAN); 6279 mov[1].dst_type = TCG_TYPE_REG; 6280 mov[1].src_type = TCG_TYPE_REG; 6281 mov[1].src_ext = TCG_TARGET_REG_BITS == 32 ? MO_32 : MO_64; 6282 6283 tcg_out_movext2(s, mov, mov + 1, parm->ntmp ? parm->tmp[0] : -1); 6284 } 6285 6286 static void tcg_out_st_helper_args(TCGContext *s, const TCGLabelQemuLdst *ldst, 6287 const TCGLdstHelperParam *parm) 6288 { 6289 const TCGHelperInfo *info; 6290 const TCGCallArgumentLoc *loc; 6291 TCGMovExtend mov[4]; 6292 TCGType data_type; 6293 unsigned next_arg, nmov, n; 6294 MemOp mop = get_memop(ldst->oi); 6295 6296 switch (mop & MO_SIZE) { 6297 case MO_8: 6298 case MO_16: 6299 case MO_32: 6300 info = &info_helper_st32_mmu; 6301 data_type = TCG_TYPE_I32; 6302 break; 6303 case MO_64: 6304 info = &info_helper_st64_mmu; 6305 data_type = TCG_TYPE_I64; 6306 break; 6307 case MO_128: 6308 info = &info_helper_st128_mmu; 6309 data_type = TCG_TYPE_I128; 6310 break; 6311 default: 6312 g_assert_not_reached(); 6313 } 6314 6315 /* Defer env argument. */ 6316 next_arg = 1; 6317 nmov = 0; 6318 6319 /* Handle addr argument. */ 6320 loc = &info->in[next_arg]; 6321 tcg_debug_assert(s->addr_type <= TCG_TYPE_REG); 6322 if (TCG_TARGET_REG_BITS == 32) { 6323 /* 6324 * 32-bit host (and thus 32-bit guest): zero-extend the guest address 6325 * to 64-bits for the helper by storing the low part. Later, 6326 * after we have processed the register inputs, we will load a 6327 * zero for the high part. 6328 */ 6329 tcg_out_helper_add_mov(mov, loc + HOST_BIG_ENDIAN, 6330 TCG_TYPE_I32, TCG_TYPE_I32, 6331 ldst->addr_reg, -1); 6332 next_arg += 2; 6333 nmov += 1; 6334 } else { 6335 n = tcg_out_helper_add_mov(mov, loc, TCG_TYPE_I64, s->addr_type, 6336 ldst->addr_reg, -1); 6337 next_arg += n; 6338 nmov += n; 6339 } 6340 6341 /* Handle data argument. */ 6342 loc = &info->in[next_arg]; 6343 switch (loc->kind) { 6344 case TCG_CALL_ARG_NORMAL: 6345 case TCG_CALL_ARG_EXTEND_U: 6346 case TCG_CALL_ARG_EXTEND_S: 6347 n = tcg_out_helper_add_mov(mov + nmov, loc, data_type, ldst->type, 6348 ldst->datalo_reg, ldst->datahi_reg); 6349 next_arg += n; 6350 nmov += n; 6351 tcg_out_helper_load_slots(s, nmov, mov, parm); 6352 break; 6353 6354 case TCG_CALL_ARG_BY_REF: 6355 tcg_debug_assert(TCG_TARGET_REG_BITS == 64); 6356 tcg_debug_assert(data_type == TCG_TYPE_I128); 6357 tcg_out_st(s, TCG_TYPE_I64, 6358 HOST_BIG_ENDIAN ? ldst->datahi_reg : ldst->datalo_reg, 6359 TCG_REG_CALL_STACK, arg_slot_stk_ofs(loc[0].ref_slot)); 6360 tcg_out_st(s, TCG_TYPE_I64, 6361 HOST_BIG_ENDIAN ? ldst->datalo_reg : ldst->datahi_reg, 6362 TCG_REG_CALL_STACK, arg_slot_stk_ofs(loc[1].ref_slot)); 6363 6364 tcg_out_helper_load_slots(s, nmov, mov, parm); 6365 6366 if (arg_slot_reg_p(loc->arg_slot)) { 6367 tcg_out_addi_ptr(s, tcg_target_call_iarg_regs[loc->arg_slot], 6368 TCG_REG_CALL_STACK, 6369 arg_slot_stk_ofs(loc->ref_slot)); 6370 } else { 6371 tcg_debug_assert(parm->ntmp != 0); 6372 tcg_out_addi_ptr(s, parm->tmp[0], TCG_REG_CALL_STACK, 6373 arg_slot_stk_ofs(loc->ref_slot)); 6374 tcg_out_st(s, TCG_TYPE_PTR, parm->tmp[0], 6375 TCG_REG_CALL_STACK, arg_slot_stk_ofs(loc->arg_slot)); 6376 } 6377 next_arg += 2; 6378 break; 6379 6380 default: 6381 g_assert_not_reached(); 6382 } 6383 6384 if (TCG_TARGET_REG_BITS == 32) { 6385 /* Zero extend the address by loading a zero for the high part. */ 6386 loc = &info->in[1 + !HOST_BIG_ENDIAN]; 6387 tcg_out_helper_load_imm(s, loc->arg_slot, TCG_TYPE_I32, 0, parm); 6388 } 6389 6390 tcg_out_helper_load_common_args(s, ldst, parm, info, next_arg); 6391 } 6392 6393 int tcg_gen_code(TCGContext *s, TranslationBlock *tb, uint64_t pc_start) 6394 { 6395 int i, start_words, num_insns; 6396 TCGOp *op; 6397 6398 if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP) 6399 && qemu_log_in_addr_range(pc_start))) { 6400 FILE *logfile = qemu_log_trylock(); 6401 if (logfile) { 6402 fprintf(logfile, "OP:\n"); 6403 tcg_dump_ops(s, logfile, false); 6404 fprintf(logfile, "\n"); 6405 qemu_log_unlock(logfile); 6406 } 6407 } 6408 6409 #ifdef CONFIG_DEBUG_TCG 6410 /* Ensure all labels referenced have been emitted. */ 6411 { 6412 TCGLabel *l; 6413 bool error = false; 6414 6415 QSIMPLEQ_FOREACH(l, &s->labels, next) { 6416 if (unlikely(!l->present) && !QSIMPLEQ_EMPTY(&l->branches)) { 6417 qemu_log_mask(CPU_LOG_TB_OP, 6418 "$L%d referenced but not present.\n", l->id); 6419 error = true; 6420 } 6421 } 6422 assert(!error); 6423 } 6424 #endif 6425 6426 /* Do not reuse any EBB that may be allocated within the TB. */ 6427 tcg_temp_ebb_reset_freed(s); 6428 6429 tcg_optimize(s); 6430 6431 reachable_code_pass(s); 6432 liveness_pass_0(s); 6433 liveness_pass_1(s); 6434 6435 if (s->nb_indirects > 0) { 6436 if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_IND) 6437 && qemu_log_in_addr_range(pc_start))) { 6438 FILE *logfile = qemu_log_trylock(); 6439 if (logfile) { 6440 fprintf(logfile, "OP before indirect lowering:\n"); 6441 tcg_dump_ops(s, logfile, false); 6442 fprintf(logfile, "\n"); 6443 qemu_log_unlock(logfile); 6444 } 6445 } 6446 6447 /* Replace indirect temps with direct temps. */ 6448 if (liveness_pass_2(s)) { 6449 /* If changes were made, re-run liveness. */ 6450 liveness_pass_1(s); 6451 } 6452 } 6453 6454 if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_OPT) 6455 && qemu_log_in_addr_range(pc_start))) { 6456 FILE *logfile = qemu_log_trylock(); 6457 if (logfile) { 6458 fprintf(logfile, "OP after optimization and liveness analysis:\n"); 6459 tcg_dump_ops(s, logfile, true); 6460 fprintf(logfile, "\n"); 6461 qemu_log_unlock(logfile); 6462 } 6463 } 6464 6465 /* Initialize goto_tb jump offsets. */ 6466 tb->jmp_reset_offset[0] = TB_JMP_OFFSET_INVALID; 6467 tb->jmp_reset_offset[1] = TB_JMP_OFFSET_INVALID; 6468 tb->jmp_insn_offset[0] = TB_JMP_OFFSET_INVALID; 6469 tb->jmp_insn_offset[1] = TB_JMP_OFFSET_INVALID; 6470 6471 tcg_reg_alloc_start(s); 6472 6473 /* 6474 * Reset the buffer pointers when restarting after overflow. 6475 * TODO: Move this into translate-all.c with the rest of the 6476 * buffer management. Having only this done here is confusing. 6477 */ 6478 s->code_buf = tcg_splitwx_to_rw(tb->tc.ptr); 6479 s->code_ptr = s->code_buf; 6480 s->data_gen_ptr = NULL; 6481 6482 QSIMPLEQ_INIT(&s->ldst_labels); 6483 s->pool_labels = NULL; 6484 6485 start_words = s->insn_start_words; 6486 s->gen_insn_data = 6487 tcg_malloc(sizeof(uint64_t) * s->gen_tb->icount * start_words); 6488 6489 tcg_out_tb_start(s); 6490 6491 num_insns = -1; 6492 QTAILQ_FOREACH(op, &s->ops, link) { 6493 TCGOpcode opc = op->opc; 6494 6495 switch (opc) { 6496 case INDEX_op_mov: 6497 case INDEX_op_mov_vec: 6498 tcg_reg_alloc_mov(s, op); 6499 break; 6500 case INDEX_op_dup_vec: 6501 tcg_reg_alloc_dup(s, op); 6502 break; 6503 case INDEX_op_insn_start: 6504 if (num_insns >= 0) { 6505 size_t off = tcg_current_code_size(s); 6506 s->gen_insn_end_off[num_insns] = off; 6507 /* Assert that we do not overflow our stored offset. */ 6508 assert(s->gen_insn_end_off[num_insns] == off); 6509 } 6510 num_insns++; 6511 for (i = 0; i < start_words; ++i) { 6512 s->gen_insn_data[num_insns * start_words + i] = 6513 tcg_get_insn_start_param(op, i); 6514 } 6515 break; 6516 case INDEX_op_discard: 6517 temp_dead(s, arg_temp(op->args[0])); 6518 break; 6519 case INDEX_op_set_label: 6520 tcg_reg_alloc_bb_end(s, s->reserved_regs); 6521 tcg_out_label(s, arg_label(op->args[0])); 6522 break; 6523 case INDEX_op_call: 6524 tcg_reg_alloc_call(s, op); 6525 break; 6526 case INDEX_op_exit_tb: 6527 tcg_out_exit_tb(s, op->args[0]); 6528 break; 6529 case INDEX_op_goto_tb: 6530 tcg_out_goto_tb(s, op->args[0]); 6531 break; 6532 case INDEX_op_dup2_vec: 6533 if (tcg_reg_alloc_dup2(s, op)) { 6534 break; 6535 } 6536 /* fall through */ 6537 default: 6538 /* Sanity check that we've not introduced any unhandled opcodes. */ 6539 tcg_debug_assert(tcg_op_supported(opc, TCGOP_TYPE(op), 6540 TCGOP_FLAGS(op))); 6541 /* Note: in order to speed up the code, it would be much 6542 faster to have specialized register allocator functions for 6543 some common argument patterns */ 6544 tcg_reg_alloc_op(s, op); 6545 break; 6546 } 6547 /* Test for (pending) buffer overflow. The assumption is that any 6548 one operation beginning below the high water mark cannot overrun 6549 the buffer completely. Thus we can test for overflow after 6550 generating code without having to check during generation. */ 6551 if (unlikely((void *)s->code_ptr > s->code_gen_highwater)) { 6552 return -1; 6553 } 6554 /* Test for TB overflow, as seen by gen_insn_end_off. */ 6555 if (unlikely(tcg_current_code_size(s) > UINT16_MAX)) { 6556 return -2; 6557 } 6558 } 6559 tcg_debug_assert(num_insns + 1 == s->gen_tb->icount); 6560 s->gen_insn_end_off[num_insns] = tcg_current_code_size(s); 6561 6562 /* Generate TB finalization at the end of block */ 6563 i = tcg_out_ldst_finalize(s); 6564 if (i < 0) { 6565 return i; 6566 } 6567 i = tcg_out_pool_finalize(s); 6568 if (i < 0) { 6569 return i; 6570 } 6571 if (!tcg_resolve_relocs(s)) { 6572 return -2; 6573 } 6574 6575 #ifndef CONFIG_TCG_INTERPRETER 6576 /* flush instruction cache */ 6577 flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(s->code_buf), 6578 (uintptr_t)s->code_buf, 6579 tcg_ptr_byte_diff(s->code_ptr, s->code_buf)); 6580 #endif 6581 6582 return tcg_current_code_size(s); 6583 } 6584 6585 #ifdef ELF_HOST_MACHINE 6586 /* In order to use this feature, the backend needs to do three things: 6587 6588 (1) Define ELF_HOST_MACHINE to indicate both what value to 6589 put into the ELF image and to indicate support for the feature. 6590 6591 (2) Define tcg_register_jit. This should create a buffer containing 6592 the contents of a .debug_frame section that describes the post- 6593 prologue unwind info for the tcg machine. 6594 6595 (3) Call tcg_register_jit_int, with the constructed .debug_frame. 6596 */ 6597 6598 /* Begin GDB interface. THE FOLLOWING MUST MATCH GDB DOCS. */ 6599 typedef enum { 6600 JIT_NOACTION = 0, 6601 JIT_REGISTER_FN, 6602 JIT_UNREGISTER_FN 6603 } jit_actions_t; 6604 6605 struct jit_code_entry { 6606 struct jit_code_entry *next_entry; 6607 struct jit_code_entry *prev_entry; 6608 const void *symfile_addr; 6609 uint64_t symfile_size; 6610 }; 6611 6612 struct jit_descriptor { 6613 uint32_t version; 6614 uint32_t action_flag; 6615 struct jit_code_entry *relevant_entry; 6616 struct jit_code_entry *first_entry; 6617 }; 6618 6619 void __jit_debug_register_code(void) __attribute__((noinline)); 6620 void __jit_debug_register_code(void) 6621 { 6622 asm(""); 6623 } 6624 6625 /* Must statically initialize the version, because GDB may check 6626 the version before we can set it. */ 6627 struct jit_descriptor __jit_debug_descriptor = { 1, 0, 0, 0 }; 6628 6629 /* End GDB interface. */ 6630 6631 static int find_string(const char *strtab, const char *str) 6632 { 6633 const char *p = strtab + 1; 6634 6635 while (1) { 6636 if (strcmp(p, str) == 0) { 6637 return p - strtab; 6638 } 6639 p += strlen(p) + 1; 6640 } 6641 } 6642 6643 static void tcg_register_jit_int(const void *buf_ptr, size_t buf_size, 6644 const void *debug_frame, 6645 size_t debug_frame_size) 6646 { 6647 struct __attribute__((packed)) DebugInfo { 6648 uint32_t len; 6649 uint16_t version; 6650 uint32_t abbrev; 6651 uint8_t ptr_size; 6652 uint8_t cu_die; 6653 uint16_t cu_lang; 6654 uintptr_t cu_low_pc; 6655 uintptr_t cu_high_pc; 6656 uint8_t fn_die; 6657 char fn_name[16]; 6658 uintptr_t fn_low_pc; 6659 uintptr_t fn_high_pc; 6660 uint8_t cu_eoc; 6661 }; 6662 6663 struct ElfImage { 6664 ElfW(Ehdr) ehdr; 6665 ElfW(Phdr) phdr; 6666 ElfW(Shdr) shdr[7]; 6667 ElfW(Sym) sym[2]; 6668 struct DebugInfo di; 6669 uint8_t da[24]; 6670 char str[80]; 6671 }; 6672 6673 struct ElfImage *img; 6674 6675 static const struct ElfImage img_template = { 6676 .ehdr = { 6677 .e_ident[EI_MAG0] = ELFMAG0, 6678 .e_ident[EI_MAG1] = ELFMAG1, 6679 .e_ident[EI_MAG2] = ELFMAG2, 6680 .e_ident[EI_MAG3] = ELFMAG3, 6681 .e_ident[EI_CLASS] = ELF_CLASS, 6682 .e_ident[EI_DATA] = ELF_DATA, 6683 .e_ident[EI_VERSION] = EV_CURRENT, 6684 .e_type = ET_EXEC, 6685 .e_machine = ELF_HOST_MACHINE, 6686 .e_version = EV_CURRENT, 6687 .e_phoff = offsetof(struct ElfImage, phdr), 6688 .e_shoff = offsetof(struct ElfImage, shdr), 6689 .e_ehsize = sizeof(ElfW(Shdr)), 6690 .e_phentsize = sizeof(ElfW(Phdr)), 6691 .e_phnum = 1, 6692 .e_shentsize = sizeof(ElfW(Shdr)), 6693 .e_shnum = ARRAY_SIZE(img->shdr), 6694 .e_shstrndx = ARRAY_SIZE(img->shdr) - 1, 6695 #ifdef ELF_HOST_FLAGS 6696 .e_flags = ELF_HOST_FLAGS, 6697 #endif 6698 #ifdef ELF_OSABI 6699 .e_ident[EI_OSABI] = ELF_OSABI, 6700 #endif 6701 }, 6702 .phdr = { 6703 .p_type = PT_LOAD, 6704 .p_flags = PF_X, 6705 }, 6706 .shdr = { 6707 [0] = { .sh_type = SHT_NULL }, 6708 /* Trick: The contents of code_gen_buffer are not present in 6709 this fake ELF file; that got allocated elsewhere. Therefore 6710 we mark .text as SHT_NOBITS (similar to .bss) so that readers 6711 will not look for contents. We can record any address. */ 6712 [1] = { /* .text */ 6713 .sh_type = SHT_NOBITS, 6714 .sh_flags = SHF_EXECINSTR | SHF_ALLOC, 6715 }, 6716 [2] = { /* .debug_info */ 6717 .sh_type = SHT_PROGBITS, 6718 .sh_offset = offsetof(struct ElfImage, di), 6719 .sh_size = sizeof(struct DebugInfo), 6720 }, 6721 [3] = { /* .debug_abbrev */ 6722 .sh_type = SHT_PROGBITS, 6723 .sh_offset = offsetof(struct ElfImage, da), 6724 .sh_size = sizeof(img->da), 6725 }, 6726 [4] = { /* .debug_frame */ 6727 .sh_type = SHT_PROGBITS, 6728 .sh_offset = sizeof(struct ElfImage), 6729 }, 6730 [5] = { /* .symtab */ 6731 .sh_type = SHT_SYMTAB, 6732 .sh_offset = offsetof(struct ElfImage, sym), 6733 .sh_size = sizeof(img->sym), 6734 .sh_info = 1, 6735 .sh_link = ARRAY_SIZE(img->shdr) - 1, 6736 .sh_entsize = sizeof(ElfW(Sym)), 6737 }, 6738 [6] = { /* .strtab */ 6739 .sh_type = SHT_STRTAB, 6740 .sh_offset = offsetof(struct ElfImage, str), 6741 .sh_size = sizeof(img->str), 6742 } 6743 }, 6744 .sym = { 6745 [1] = { /* code_gen_buffer */ 6746 .st_info = ELF_ST_INFO(STB_GLOBAL, STT_FUNC), 6747 .st_shndx = 1, 6748 } 6749 }, 6750 .di = { 6751 .len = sizeof(struct DebugInfo) - 4, 6752 .version = 2, 6753 .ptr_size = sizeof(void *), 6754 .cu_die = 1, 6755 .cu_lang = 0x8001, /* DW_LANG_Mips_Assembler */ 6756 .fn_die = 2, 6757 .fn_name = "code_gen_buffer" 6758 }, 6759 .da = { 6760 1, /* abbrev number (the cu) */ 6761 0x11, 1, /* DW_TAG_compile_unit, has children */ 6762 0x13, 0x5, /* DW_AT_language, DW_FORM_data2 */ 6763 0x11, 0x1, /* DW_AT_low_pc, DW_FORM_addr */ 6764 0x12, 0x1, /* DW_AT_high_pc, DW_FORM_addr */ 6765 0, 0, /* end of abbrev */ 6766 2, /* abbrev number (the fn) */ 6767 0x2e, 0, /* DW_TAG_subprogram, no children */ 6768 0x3, 0x8, /* DW_AT_name, DW_FORM_string */ 6769 0x11, 0x1, /* DW_AT_low_pc, DW_FORM_addr */ 6770 0x12, 0x1, /* DW_AT_high_pc, DW_FORM_addr */ 6771 0, 0, /* end of abbrev */ 6772 0 /* no more abbrev */ 6773 }, 6774 .str = "\0" ".text\0" ".debug_info\0" ".debug_abbrev\0" 6775 ".debug_frame\0" ".symtab\0" ".strtab\0" "code_gen_buffer", 6776 }; 6777 6778 /* We only need a single jit entry; statically allocate it. */ 6779 static struct jit_code_entry one_entry; 6780 6781 uintptr_t buf = (uintptr_t)buf_ptr; 6782 size_t img_size = sizeof(struct ElfImage) + debug_frame_size; 6783 DebugFrameHeader *dfh; 6784 6785 img = g_malloc(img_size); 6786 *img = img_template; 6787 6788 img->phdr.p_vaddr = buf; 6789 img->phdr.p_paddr = buf; 6790 img->phdr.p_memsz = buf_size; 6791 6792 img->shdr[1].sh_name = find_string(img->str, ".text"); 6793 img->shdr[1].sh_addr = buf; 6794 img->shdr[1].sh_size = buf_size; 6795 6796 img->shdr[2].sh_name = find_string(img->str, ".debug_info"); 6797 img->shdr[3].sh_name = find_string(img->str, ".debug_abbrev"); 6798 6799 img->shdr[4].sh_name = find_string(img->str, ".debug_frame"); 6800 img->shdr[4].sh_size = debug_frame_size; 6801 6802 img->shdr[5].sh_name = find_string(img->str, ".symtab"); 6803 img->shdr[6].sh_name = find_string(img->str, ".strtab"); 6804 6805 img->sym[1].st_name = find_string(img->str, "code_gen_buffer"); 6806 img->sym[1].st_value = buf; 6807 img->sym[1].st_size = buf_size; 6808 6809 img->di.cu_low_pc = buf; 6810 img->di.cu_high_pc = buf + buf_size; 6811 img->di.fn_low_pc = buf; 6812 img->di.fn_high_pc = buf + buf_size; 6813 6814 dfh = (DebugFrameHeader *)(img + 1); 6815 memcpy(dfh, debug_frame, debug_frame_size); 6816 dfh->fde.func_start = buf; 6817 dfh->fde.func_len = buf_size; 6818 6819 #ifdef DEBUG_JIT 6820 /* Enable this block to be able to debug the ELF image file creation. 6821 One can use readelf, objdump, or other inspection utilities. */ 6822 { 6823 g_autofree char *jit = g_strdup_printf("%s/qemu.jit", g_get_tmp_dir()); 6824 FILE *f = fopen(jit, "w+b"); 6825 if (f) { 6826 if (fwrite(img, img_size, 1, f) != img_size) { 6827 /* Avoid stupid unused return value warning for fwrite. */ 6828 } 6829 fclose(f); 6830 } 6831 } 6832 #endif 6833 6834 one_entry.symfile_addr = img; 6835 one_entry.symfile_size = img_size; 6836 6837 __jit_debug_descriptor.action_flag = JIT_REGISTER_FN; 6838 __jit_debug_descriptor.relevant_entry = &one_entry; 6839 __jit_debug_descriptor.first_entry = &one_entry; 6840 __jit_debug_register_code(); 6841 } 6842 #else 6843 /* No support for the feature. Provide the entry point expected by exec.c, 6844 and implement the internal function we declared earlier. */ 6845 6846 static void tcg_register_jit_int(const void *buf, size_t size, 6847 const void *debug_frame, 6848 size_t debug_frame_size) 6849 { 6850 } 6851 6852 void tcg_register_jit(const void *buf, size_t buf_size) 6853 { 6854 } 6855 #endif /* ELF_HOST_MACHINE */ 6856 6857 #if !TCG_TARGET_MAYBE_vec 6858 void tcg_expand_vec_op(TCGOpcode o, TCGType t, unsigned e, TCGArg a0, ...) 6859 { 6860 g_assert_not_reached(); 6861 } 6862 #endif 6863