1 /* 2 * Tiny Code Generator for QEMU 3 * 4 * Copyright (c) 2008 Fabrice Bellard 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a copy 7 * of this software and associated documentation files (the "Software"), to deal 8 * in the Software without restriction, including without limitation the rights 9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 * copies of the Software, and to permit persons to whom the Software is 11 * furnished to do so, subject to the following conditions: 12 * 13 * The above copyright notice and this permission notice shall be included in 14 * all copies or substantial portions of the Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 22 * THE SOFTWARE. 23 */ 24 25 #include "qemu/osdep.h" 26 27 /* Define to jump the ELF file used to communicate with GDB. */ 28 #undef DEBUG_JIT 29 30 #include "qemu/error-report.h" 31 #include "qemu/cutils.h" 32 #include "qemu/host-utils.h" 33 #include "qemu/qemu-print.h" 34 #include "qemu/cacheflush.h" 35 #include "qemu/cacheinfo.h" 36 #include "qemu/timer.h" 37 #include "exec/translation-block.h" 38 #include "exec/tlb-common.h" 39 #include "tcg/startup.h" 40 #include "tcg/tcg-op-common.h" 41 42 #if UINTPTR_MAX == UINT32_MAX 43 # define ELF_CLASS ELFCLASS32 44 #else 45 # define ELF_CLASS ELFCLASS64 46 #endif 47 #if HOST_BIG_ENDIAN 48 # define ELF_DATA ELFDATA2MSB 49 #else 50 # define ELF_DATA ELFDATA2LSB 51 #endif 52 53 #include "elf.h" 54 #include "exec/log.h" 55 #include "tcg/tcg-ldst.h" 56 #include "tcg/tcg-temp-internal.h" 57 #include "tcg-internal.h" 58 #include "tcg/perf.h" 59 #include "tcg-has.h" 60 #ifdef CONFIG_USER_ONLY 61 #include "user/guest-base.h" 62 #endif 63 64 /* Forward declarations for functions declared in tcg-target.c.inc and 65 used here. */ 66 static void tcg_target_init(TCGContext *s); 67 static void tcg_target_qemu_prologue(TCGContext *s); 68 static bool patch_reloc(tcg_insn_unit *code_ptr, int type, 69 intptr_t value, intptr_t addend); 70 static void tcg_out_nop_fill(tcg_insn_unit *p, int count); 71 72 typedef struct TCGLabelQemuLdst TCGLabelQemuLdst; 73 static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l); 74 static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l); 75 76 /* The CIE and FDE header definitions will be common to all hosts. */ 77 typedef struct { 78 uint32_t len __attribute__((aligned((sizeof(void *))))); 79 uint32_t id; 80 uint8_t version; 81 char augmentation[1]; 82 uint8_t code_align; 83 uint8_t data_align; 84 uint8_t return_column; 85 } DebugFrameCIE; 86 87 typedef struct QEMU_PACKED { 88 uint32_t len __attribute__((aligned((sizeof(void *))))); 89 uint32_t cie_offset; 90 uintptr_t func_start; 91 uintptr_t func_len; 92 } DebugFrameFDEHeader; 93 94 typedef struct QEMU_PACKED { 95 DebugFrameCIE cie; 96 DebugFrameFDEHeader fde; 97 } DebugFrameHeader; 98 99 struct TCGLabelQemuLdst { 100 bool is_ld; /* qemu_ld: true, qemu_st: false */ 101 MemOpIdx oi; 102 TCGType type; /* result type of a load */ 103 TCGReg addr_reg; /* reg index for guest virtual addr */ 104 TCGReg datalo_reg; /* reg index for low word to be loaded or stored */ 105 TCGReg datahi_reg; /* reg index for high word to be loaded or stored */ 106 const tcg_insn_unit *raddr; /* addr of the next IR of qemu_ld/st IR */ 107 tcg_insn_unit *label_ptr[2]; /* label pointers to be updated */ 108 QSIMPLEQ_ENTRY(TCGLabelQemuLdst) next; 109 }; 110 111 static void tcg_register_jit_int(const void *buf, size_t size, 112 const void *debug_frame, 113 size_t debug_frame_size) 114 __attribute__((unused)); 115 116 /* Forward declarations for functions declared and used in tcg-target.c.inc. */ 117 static void tcg_out_tb_start(TCGContext *s); 118 static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg1, 119 intptr_t arg2); 120 static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg); 121 static void tcg_out_movi(TCGContext *s, TCGType type, 122 TCGReg ret, tcg_target_long arg); 123 static void tcg_out_ext8s(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg); 124 static void tcg_out_ext16s(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg); 125 static void tcg_out_ext8u(TCGContext *s, TCGReg ret, TCGReg arg); 126 static void tcg_out_ext16u(TCGContext *s, TCGReg ret, TCGReg arg); 127 static void tcg_out_ext32s(TCGContext *s, TCGReg ret, TCGReg arg); 128 static void tcg_out_ext32u(TCGContext *s, TCGReg ret, TCGReg arg); 129 static void tcg_out_exts_i32_i64(TCGContext *s, TCGReg ret, TCGReg arg); 130 static void tcg_out_extu_i32_i64(TCGContext *s, TCGReg ret, TCGReg arg); 131 static void tcg_out_extrl_i64_i32(TCGContext *s, TCGReg ret, TCGReg arg); 132 static void tcg_out_addi_ptr(TCGContext *s, TCGReg, TCGReg, tcg_target_long); 133 static bool tcg_out_xchg(TCGContext *s, TCGType type, TCGReg r1, TCGReg r2); 134 static void tcg_out_exit_tb(TCGContext *s, uintptr_t arg); 135 static void tcg_out_goto_tb(TCGContext *s, int which); 136 static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, 137 const TCGArg args[TCG_MAX_OP_ARGS], 138 const int const_args[TCG_MAX_OP_ARGS]); 139 #if TCG_TARGET_MAYBE_vec 140 static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece, 141 TCGReg dst, TCGReg src); 142 static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece, 143 TCGReg dst, TCGReg base, intptr_t offset); 144 static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece, 145 TCGReg dst, int64_t arg); 146 static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, 147 unsigned vecl, unsigned vece, 148 const TCGArg args[TCG_MAX_OP_ARGS], 149 const int const_args[TCG_MAX_OP_ARGS]); 150 #else 151 static inline bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece, 152 TCGReg dst, TCGReg src) 153 { 154 g_assert_not_reached(); 155 } 156 static inline bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece, 157 TCGReg dst, TCGReg base, intptr_t offset) 158 { 159 g_assert_not_reached(); 160 } 161 static inline void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece, 162 TCGReg dst, int64_t arg) 163 { 164 g_assert_not_reached(); 165 } 166 static inline void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, 167 unsigned vecl, unsigned vece, 168 const TCGArg args[TCG_MAX_OP_ARGS], 169 const int const_args[TCG_MAX_OP_ARGS]) 170 { 171 g_assert_not_reached(); 172 } 173 int tcg_can_emit_vec_op(TCGOpcode o, TCGType t, unsigned ve) 174 { 175 return 0; 176 } 177 #endif 178 static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, TCGReg arg1, 179 intptr_t arg2); 180 static bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val, 181 TCGReg base, intptr_t ofs); 182 static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target, 183 const TCGHelperInfo *info); 184 static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot); 185 static bool tcg_target_const_match(int64_t val, int ct, 186 TCGType type, TCGCond cond, int vece); 187 188 #ifndef CONFIG_USER_ONLY 189 #define guest_base ({ qemu_build_not_reached(); (uintptr_t)0; }) 190 #endif 191 192 typedef struct TCGLdstHelperParam { 193 TCGReg (*ra_gen)(TCGContext *s, const TCGLabelQemuLdst *l, int arg_reg); 194 unsigned ntmp; 195 int tmp[3]; 196 } TCGLdstHelperParam; 197 198 static void tcg_out_ld_helper_args(TCGContext *s, const TCGLabelQemuLdst *l, 199 const TCGLdstHelperParam *p) 200 __attribute__((unused)); 201 static void tcg_out_ld_helper_ret(TCGContext *s, const TCGLabelQemuLdst *l, 202 bool load_sign, const TCGLdstHelperParam *p) 203 __attribute__((unused)); 204 static void tcg_out_st_helper_args(TCGContext *s, const TCGLabelQemuLdst *l, 205 const TCGLdstHelperParam *p) 206 __attribute__((unused)); 207 208 static void * const qemu_ld_helpers[MO_SSIZE + 1] __attribute__((unused)) = { 209 [MO_UB] = helper_ldub_mmu, 210 [MO_SB] = helper_ldsb_mmu, 211 [MO_UW] = helper_lduw_mmu, 212 [MO_SW] = helper_ldsw_mmu, 213 [MO_UL] = helper_ldul_mmu, 214 [MO_UQ] = helper_ldq_mmu, 215 #if TCG_TARGET_REG_BITS == 64 216 [MO_SL] = helper_ldsl_mmu, 217 [MO_128] = helper_ld16_mmu, 218 #endif 219 }; 220 221 static void * const qemu_st_helpers[MO_SIZE + 1] __attribute__((unused)) = { 222 [MO_8] = helper_stb_mmu, 223 [MO_16] = helper_stw_mmu, 224 [MO_32] = helper_stl_mmu, 225 [MO_64] = helper_stq_mmu, 226 #if TCG_TARGET_REG_BITS == 64 227 [MO_128] = helper_st16_mmu, 228 #endif 229 }; 230 231 typedef struct { 232 MemOp atom; /* lg2 bits of atomicity required */ 233 MemOp align; /* lg2 bits of alignment to use */ 234 } TCGAtomAlign; 235 236 static TCGAtomAlign atom_and_align_for_opc(TCGContext *s, MemOp opc, 237 MemOp host_atom, bool allow_two_ops) 238 __attribute__((unused)); 239 240 #ifdef CONFIG_USER_ONLY 241 bool tcg_use_softmmu; 242 #endif 243 244 TCGContext tcg_init_ctx; 245 __thread TCGContext *tcg_ctx; 246 247 TCGContext **tcg_ctxs; 248 unsigned int tcg_cur_ctxs; 249 unsigned int tcg_max_ctxs; 250 TCGv_env tcg_env; 251 const void *tcg_code_gen_epilogue; 252 uintptr_t tcg_splitwx_diff; 253 254 #ifndef CONFIG_TCG_INTERPRETER 255 tcg_prologue_fn *tcg_qemu_tb_exec; 256 #endif 257 258 static TCGRegSet tcg_target_available_regs[TCG_TYPE_COUNT]; 259 static TCGRegSet tcg_target_call_clobber_regs; 260 261 #if TCG_TARGET_INSN_UNIT_SIZE == 1 262 static __attribute__((unused)) inline void tcg_out8(TCGContext *s, uint8_t v) 263 { 264 *s->code_ptr++ = v; 265 } 266 267 static __attribute__((unused)) inline void tcg_patch8(tcg_insn_unit *p, 268 uint8_t v) 269 { 270 *p = v; 271 } 272 #endif 273 274 #if TCG_TARGET_INSN_UNIT_SIZE <= 2 275 static __attribute__((unused)) inline void tcg_out16(TCGContext *s, uint16_t v) 276 { 277 if (TCG_TARGET_INSN_UNIT_SIZE == 2) { 278 *s->code_ptr++ = v; 279 } else { 280 tcg_insn_unit *p = s->code_ptr; 281 memcpy(p, &v, sizeof(v)); 282 s->code_ptr = p + (2 / TCG_TARGET_INSN_UNIT_SIZE); 283 } 284 } 285 286 static __attribute__((unused)) inline void tcg_patch16(tcg_insn_unit *p, 287 uint16_t v) 288 { 289 if (TCG_TARGET_INSN_UNIT_SIZE == 2) { 290 *p = v; 291 } else { 292 memcpy(p, &v, sizeof(v)); 293 } 294 } 295 #endif 296 297 #if TCG_TARGET_INSN_UNIT_SIZE <= 4 298 static __attribute__((unused)) inline void tcg_out32(TCGContext *s, uint32_t v) 299 { 300 if (TCG_TARGET_INSN_UNIT_SIZE == 4) { 301 *s->code_ptr++ = v; 302 } else { 303 tcg_insn_unit *p = s->code_ptr; 304 memcpy(p, &v, sizeof(v)); 305 s->code_ptr = p + (4 / TCG_TARGET_INSN_UNIT_SIZE); 306 } 307 } 308 309 static __attribute__((unused)) inline void tcg_patch32(tcg_insn_unit *p, 310 uint32_t v) 311 { 312 if (TCG_TARGET_INSN_UNIT_SIZE == 4) { 313 *p = v; 314 } else { 315 memcpy(p, &v, sizeof(v)); 316 } 317 } 318 #endif 319 320 #if TCG_TARGET_INSN_UNIT_SIZE <= 8 321 static __attribute__((unused)) inline void tcg_out64(TCGContext *s, uint64_t v) 322 { 323 if (TCG_TARGET_INSN_UNIT_SIZE == 8) { 324 *s->code_ptr++ = v; 325 } else { 326 tcg_insn_unit *p = s->code_ptr; 327 memcpy(p, &v, sizeof(v)); 328 s->code_ptr = p + (8 / TCG_TARGET_INSN_UNIT_SIZE); 329 } 330 } 331 332 static __attribute__((unused)) inline void tcg_patch64(tcg_insn_unit *p, 333 uint64_t v) 334 { 335 if (TCG_TARGET_INSN_UNIT_SIZE == 8) { 336 *p = v; 337 } else { 338 memcpy(p, &v, sizeof(v)); 339 } 340 } 341 #endif 342 343 /* label relocation processing */ 344 345 static void tcg_out_reloc(TCGContext *s, tcg_insn_unit *code_ptr, int type, 346 TCGLabel *l, intptr_t addend) 347 { 348 TCGRelocation *r = tcg_malloc(sizeof(TCGRelocation)); 349 350 r->type = type; 351 r->ptr = code_ptr; 352 r->addend = addend; 353 QSIMPLEQ_INSERT_TAIL(&l->relocs, r, next); 354 } 355 356 static void tcg_out_label(TCGContext *s, TCGLabel *l) 357 { 358 tcg_debug_assert(!l->has_value); 359 l->has_value = 1; 360 l->u.value_ptr = tcg_splitwx_to_rx(s->code_ptr); 361 } 362 363 TCGLabel *gen_new_label(void) 364 { 365 TCGContext *s = tcg_ctx; 366 TCGLabel *l = tcg_malloc(sizeof(TCGLabel)); 367 368 memset(l, 0, sizeof(TCGLabel)); 369 l->id = s->nb_labels++; 370 QSIMPLEQ_INIT(&l->branches); 371 QSIMPLEQ_INIT(&l->relocs); 372 373 QSIMPLEQ_INSERT_TAIL(&s->labels, l, next); 374 375 return l; 376 } 377 378 static bool tcg_resolve_relocs(TCGContext *s) 379 { 380 TCGLabel *l; 381 382 QSIMPLEQ_FOREACH(l, &s->labels, next) { 383 TCGRelocation *r; 384 uintptr_t value = l->u.value; 385 386 QSIMPLEQ_FOREACH(r, &l->relocs, next) { 387 if (!patch_reloc(r->ptr, r->type, value, r->addend)) { 388 return false; 389 } 390 } 391 } 392 return true; 393 } 394 395 static void set_jmp_reset_offset(TCGContext *s, int which) 396 { 397 /* 398 * We will check for overflow at the end of the opcode loop in 399 * tcg_gen_code, where we bound tcg_current_code_size to UINT16_MAX. 400 */ 401 s->gen_tb->jmp_reset_offset[which] = tcg_current_code_size(s); 402 } 403 404 static void G_GNUC_UNUSED set_jmp_insn_offset(TCGContext *s, int which) 405 { 406 /* 407 * We will check for overflow at the end of the opcode loop in 408 * tcg_gen_code, where we bound tcg_current_code_size to UINT16_MAX. 409 */ 410 s->gen_tb->jmp_insn_offset[which] = tcg_current_code_size(s); 411 } 412 413 static uintptr_t G_GNUC_UNUSED get_jmp_target_addr(TCGContext *s, int which) 414 { 415 /* 416 * Return the read-execute version of the pointer, for the benefit 417 * of any pc-relative addressing mode. 418 */ 419 return (uintptr_t)tcg_splitwx_to_rx(&s->gen_tb->jmp_target_addr[which]); 420 } 421 422 static int __attribute__((unused)) 423 tlb_mask_table_ofs(TCGContext *s, int which) 424 { 425 return (offsetof(CPUNegativeOffsetState, tlb.f[which]) - 426 sizeof(CPUNegativeOffsetState)); 427 } 428 429 /* Signal overflow, starting over with fewer guest insns. */ 430 static G_NORETURN 431 void tcg_raise_tb_overflow(TCGContext *s) 432 { 433 siglongjmp(s->jmp_trans, -2); 434 } 435 436 /* 437 * Used by tcg_out_movext{1,2} to hold the arguments for tcg_out_movext. 438 * By the time we arrive at tcg_out_movext1, @dst is always a TCGReg. 439 * 440 * However, tcg_out_helper_load_slots reuses this field to hold an 441 * argument slot number (which may designate a argument register or an 442 * argument stack slot), converting to TCGReg once all arguments that 443 * are destined for the stack are processed. 444 */ 445 typedef struct TCGMovExtend { 446 unsigned dst; 447 TCGReg src; 448 TCGType dst_type; 449 TCGType src_type; 450 MemOp src_ext; 451 } TCGMovExtend; 452 453 /** 454 * tcg_out_movext -- move and extend 455 * @s: tcg context 456 * @dst_type: integral type for destination 457 * @dst: destination register 458 * @src_type: integral type for source 459 * @src_ext: extension to apply to source 460 * @src: source register 461 * 462 * Move or extend @src into @dst, depending on @src_ext and the types. 463 */ 464 static void tcg_out_movext(TCGContext *s, TCGType dst_type, TCGReg dst, 465 TCGType src_type, MemOp src_ext, TCGReg src) 466 { 467 switch (src_ext) { 468 case MO_UB: 469 tcg_out_ext8u(s, dst, src); 470 break; 471 case MO_SB: 472 tcg_out_ext8s(s, dst_type, dst, src); 473 break; 474 case MO_UW: 475 tcg_out_ext16u(s, dst, src); 476 break; 477 case MO_SW: 478 tcg_out_ext16s(s, dst_type, dst, src); 479 break; 480 case MO_UL: 481 case MO_SL: 482 if (dst_type == TCG_TYPE_I32) { 483 if (src_type == TCG_TYPE_I32) { 484 tcg_out_mov(s, TCG_TYPE_I32, dst, src); 485 } else { 486 tcg_out_extrl_i64_i32(s, dst, src); 487 } 488 } else if (src_type == TCG_TYPE_I32) { 489 if (src_ext & MO_SIGN) { 490 tcg_out_exts_i32_i64(s, dst, src); 491 } else { 492 tcg_out_extu_i32_i64(s, dst, src); 493 } 494 } else { 495 if (src_ext & MO_SIGN) { 496 tcg_out_ext32s(s, dst, src); 497 } else { 498 tcg_out_ext32u(s, dst, src); 499 } 500 } 501 break; 502 case MO_UQ: 503 tcg_debug_assert(TCG_TARGET_REG_BITS == 64); 504 if (dst_type == TCG_TYPE_I32) { 505 tcg_out_extrl_i64_i32(s, dst, src); 506 } else { 507 tcg_out_mov(s, TCG_TYPE_I64, dst, src); 508 } 509 break; 510 default: 511 g_assert_not_reached(); 512 } 513 } 514 515 /* Minor variations on a theme, using a structure. */ 516 static void tcg_out_movext1_new_src(TCGContext *s, const TCGMovExtend *i, 517 TCGReg src) 518 { 519 tcg_out_movext(s, i->dst_type, i->dst, i->src_type, i->src_ext, src); 520 } 521 522 static void tcg_out_movext1(TCGContext *s, const TCGMovExtend *i) 523 { 524 tcg_out_movext1_new_src(s, i, i->src); 525 } 526 527 /** 528 * tcg_out_movext2 -- move and extend two pair 529 * @s: tcg context 530 * @i1: first move description 531 * @i2: second move description 532 * @scratch: temporary register, or -1 for none 533 * 534 * As tcg_out_movext, for both @i1 and @i2, caring for overlap 535 * between the sources and destinations. 536 */ 537 538 static void tcg_out_movext2(TCGContext *s, const TCGMovExtend *i1, 539 const TCGMovExtend *i2, int scratch) 540 { 541 TCGReg src1 = i1->src; 542 TCGReg src2 = i2->src; 543 544 if (i1->dst != src2) { 545 tcg_out_movext1(s, i1); 546 tcg_out_movext1(s, i2); 547 return; 548 } 549 if (i2->dst == src1) { 550 TCGType src1_type = i1->src_type; 551 TCGType src2_type = i2->src_type; 552 553 if (tcg_out_xchg(s, MAX(src1_type, src2_type), src1, src2)) { 554 /* The data is now in the correct registers, now extend. */ 555 src1 = i2->src; 556 src2 = i1->src; 557 } else { 558 tcg_debug_assert(scratch >= 0); 559 tcg_out_mov(s, src1_type, scratch, src1); 560 src1 = scratch; 561 } 562 } 563 tcg_out_movext1_new_src(s, i2, src2); 564 tcg_out_movext1_new_src(s, i1, src1); 565 } 566 567 /** 568 * tcg_out_movext3 -- move and extend three pair 569 * @s: tcg context 570 * @i1: first move description 571 * @i2: second move description 572 * @i3: third move description 573 * @scratch: temporary register, or -1 for none 574 * 575 * As tcg_out_movext, for all of @i1, @i2 and @i3, caring for overlap 576 * between the sources and destinations. 577 */ 578 579 static void tcg_out_movext3(TCGContext *s, const TCGMovExtend *i1, 580 const TCGMovExtend *i2, const TCGMovExtend *i3, 581 int scratch) 582 { 583 TCGReg src1 = i1->src; 584 TCGReg src2 = i2->src; 585 TCGReg src3 = i3->src; 586 587 if (i1->dst != src2 && i1->dst != src3) { 588 tcg_out_movext1(s, i1); 589 tcg_out_movext2(s, i2, i3, scratch); 590 return; 591 } 592 if (i2->dst != src1 && i2->dst != src3) { 593 tcg_out_movext1(s, i2); 594 tcg_out_movext2(s, i1, i3, scratch); 595 return; 596 } 597 if (i3->dst != src1 && i3->dst != src2) { 598 tcg_out_movext1(s, i3); 599 tcg_out_movext2(s, i1, i2, scratch); 600 return; 601 } 602 603 /* 604 * There is a cycle. Since there are only 3 nodes, the cycle is 605 * either "clockwise" or "anti-clockwise", and can be solved with 606 * a single scratch or two xchg. 607 */ 608 if (i1->dst == src2 && i2->dst == src3 && i3->dst == src1) { 609 /* "Clockwise" */ 610 if (tcg_out_xchg(s, MAX(i1->src_type, i2->src_type), src1, src2)) { 611 tcg_out_xchg(s, MAX(i2->src_type, i3->src_type), src2, src3); 612 /* The data is now in the correct registers, now extend. */ 613 tcg_out_movext1_new_src(s, i1, i1->dst); 614 tcg_out_movext1_new_src(s, i2, i2->dst); 615 tcg_out_movext1_new_src(s, i3, i3->dst); 616 } else { 617 tcg_debug_assert(scratch >= 0); 618 tcg_out_mov(s, i1->src_type, scratch, src1); 619 tcg_out_movext1(s, i3); 620 tcg_out_movext1(s, i2); 621 tcg_out_movext1_new_src(s, i1, scratch); 622 } 623 } else if (i1->dst == src3 && i2->dst == src1 && i3->dst == src2) { 624 /* "Anti-clockwise" */ 625 if (tcg_out_xchg(s, MAX(i2->src_type, i3->src_type), src2, src3)) { 626 tcg_out_xchg(s, MAX(i1->src_type, i2->src_type), src1, src2); 627 /* The data is now in the correct registers, now extend. */ 628 tcg_out_movext1_new_src(s, i1, i1->dst); 629 tcg_out_movext1_new_src(s, i2, i2->dst); 630 tcg_out_movext1_new_src(s, i3, i3->dst); 631 } else { 632 tcg_debug_assert(scratch >= 0); 633 tcg_out_mov(s, i1->src_type, scratch, src1); 634 tcg_out_movext1(s, i2); 635 tcg_out_movext1(s, i3); 636 tcg_out_movext1_new_src(s, i1, scratch); 637 } 638 } else { 639 g_assert_not_reached(); 640 } 641 } 642 643 /* 644 * Allocate a new TCGLabelQemuLdst entry. 645 */ 646 647 __attribute__((unused)) 648 static TCGLabelQemuLdst *new_ldst_label(TCGContext *s) 649 { 650 TCGLabelQemuLdst *l = tcg_malloc(sizeof(*l)); 651 652 memset(l, 0, sizeof(*l)); 653 QSIMPLEQ_INSERT_TAIL(&s->ldst_labels, l, next); 654 655 return l; 656 } 657 658 /* 659 * Allocate new constant pool entries. 660 */ 661 662 typedef struct TCGLabelPoolData { 663 struct TCGLabelPoolData *next; 664 tcg_insn_unit *label; 665 intptr_t addend; 666 int rtype; 667 unsigned nlong; 668 tcg_target_ulong data[]; 669 } TCGLabelPoolData; 670 671 static TCGLabelPoolData *new_pool_alloc(TCGContext *s, int nlong, int rtype, 672 tcg_insn_unit *label, intptr_t addend) 673 { 674 TCGLabelPoolData *n = tcg_malloc(sizeof(TCGLabelPoolData) 675 + sizeof(tcg_target_ulong) * nlong); 676 677 n->label = label; 678 n->addend = addend; 679 n->rtype = rtype; 680 n->nlong = nlong; 681 return n; 682 } 683 684 static void new_pool_insert(TCGContext *s, TCGLabelPoolData *n) 685 { 686 TCGLabelPoolData *i, **pp; 687 int nlong = n->nlong; 688 689 /* Insertion sort on the pool. */ 690 for (pp = &s->pool_labels; (i = *pp) != NULL; pp = &i->next) { 691 if (nlong > i->nlong) { 692 break; 693 } 694 if (nlong < i->nlong) { 695 continue; 696 } 697 if (memcmp(n->data, i->data, sizeof(tcg_target_ulong) * nlong) >= 0) { 698 break; 699 } 700 } 701 n->next = *pp; 702 *pp = n; 703 } 704 705 /* The "usual" for generic integer code. */ 706 __attribute__((unused)) 707 static void new_pool_label(TCGContext *s, tcg_target_ulong d, int rtype, 708 tcg_insn_unit *label, intptr_t addend) 709 { 710 TCGLabelPoolData *n = new_pool_alloc(s, 1, rtype, label, addend); 711 n->data[0] = d; 712 new_pool_insert(s, n); 713 } 714 715 /* For v64 or v128, depending on the host. */ 716 __attribute__((unused)) 717 static void new_pool_l2(TCGContext *s, int rtype, tcg_insn_unit *label, 718 intptr_t addend, tcg_target_ulong d0, 719 tcg_target_ulong d1) 720 { 721 TCGLabelPoolData *n = new_pool_alloc(s, 2, rtype, label, addend); 722 n->data[0] = d0; 723 n->data[1] = d1; 724 new_pool_insert(s, n); 725 } 726 727 /* For v128 or v256, depending on the host. */ 728 __attribute__((unused)) 729 static void new_pool_l4(TCGContext *s, int rtype, tcg_insn_unit *label, 730 intptr_t addend, tcg_target_ulong d0, 731 tcg_target_ulong d1, tcg_target_ulong d2, 732 tcg_target_ulong d3) 733 { 734 TCGLabelPoolData *n = new_pool_alloc(s, 4, rtype, label, addend); 735 n->data[0] = d0; 736 n->data[1] = d1; 737 n->data[2] = d2; 738 n->data[3] = d3; 739 new_pool_insert(s, n); 740 } 741 742 /* For v256, for 32-bit host. */ 743 __attribute__((unused)) 744 static void new_pool_l8(TCGContext *s, int rtype, tcg_insn_unit *label, 745 intptr_t addend, tcg_target_ulong d0, 746 tcg_target_ulong d1, tcg_target_ulong d2, 747 tcg_target_ulong d3, tcg_target_ulong d4, 748 tcg_target_ulong d5, tcg_target_ulong d6, 749 tcg_target_ulong d7) 750 { 751 TCGLabelPoolData *n = new_pool_alloc(s, 8, rtype, label, addend); 752 n->data[0] = d0; 753 n->data[1] = d1; 754 n->data[2] = d2; 755 n->data[3] = d3; 756 n->data[4] = d4; 757 n->data[5] = d5; 758 n->data[6] = d6; 759 n->data[7] = d7; 760 new_pool_insert(s, n); 761 } 762 763 /* 764 * Generate TB finalization at the end of block 765 */ 766 767 static int tcg_out_ldst_finalize(TCGContext *s) 768 { 769 TCGLabelQemuLdst *lb; 770 771 /* qemu_ld/st slow paths */ 772 QSIMPLEQ_FOREACH(lb, &s->ldst_labels, next) { 773 if (lb->is_ld 774 ? !tcg_out_qemu_ld_slow_path(s, lb) 775 : !tcg_out_qemu_st_slow_path(s, lb)) { 776 return -2; 777 } 778 779 /* 780 * Test for (pending) buffer overflow. The assumption is that any 781 * one operation beginning below the high water mark cannot overrun 782 * the buffer completely. Thus we can test for overflow after 783 * generating code without having to check during generation. 784 */ 785 if (unlikely((void *)s->code_ptr > s->code_gen_highwater)) { 786 return -1; 787 } 788 } 789 return 0; 790 } 791 792 static int tcg_out_pool_finalize(TCGContext *s) 793 { 794 TCGLabelPoolData *p = s->pool_labels; 795 TCGLabelPoolData *l = NULL; 796 void *a; 797 798 if (p == NULL) { 799 return 0; 800 } 801 802 /* 803 * ??? Round up to qemu_icache_linesize, but then do not round 804 * again when allocating the next TranslationBlock structure. 805 */ 806 a = (void *)ROUND_UP((uintptr_t)s->code_ptr, 807 sizeof(tcg_target_ulong) * p->nlong); 808 tcg_out_nop_fill(s->code_ptr, (tcg_insn_unit *)a - s->code_ptr); 809 s->data_gen_ptr = a; 810 811 for (; p != NULL; p = p->next) { 812 size_t size = sizeof(tcg_target_ulong) * p->nlong; 813 uintptr_t value; 814 815 if (!l || l->nlong != p->nlong || memcmp(l->data, p->data, size)) { 816 if (unlikely(a > s->code_gen_highwater)) { 817 return -1; 818 } 819 memcpy(a, p->data, size); 820 a += size; 821 l = p; 822 } 823 824 value = (uintptr_t)tcg_splitwx_to_rx(a) - size; 825 if (!patch_reloc(p->label, p->rtype, value, p->addend)) { 826 return -2; 827 } 828 } 829 830 s->code_ptr = a; 831 return 0; 832 } 833 834 #define C_PFX1(P, A) P##A 835 #define C_PFX2(P, A, B) P##A##_##B 836 #define C_PFX3(P, A, B, C) P##A##_##B##_##C 837 #define C_PFX4(P, A, B, C, D) P##A##_##B##_##C##_##D 838 #define C_PFX5(P, A, B, C, D, E) P##A##_##B##_##C##_##D##_##E 839 #define C_PFX6(P, A, B, C, D, E, F) P##A##_##B##_##C##_##D##_##E##_##F 840 841 /* Define an enumeration for the various combinations. */ 842 843 #define C_O0_I1(I1) C_PFX1(c_o0_i1_, I1), 844 #define C_O0_I2(I1, I2) C_PFX2(c_o0_i2_, I1, I2), 845 #define C_O0_I3(I1, I2, I3) C_PFX3(c_o0_i3_, I1, I2, I3), 846 #define C_O0_I4(I1, I2, I3, I4) C_PFX4(c_o0_i4_, I1, I2, I3, I4), 847 848 #define C_O1_I1(O1, I1) C_PFX2(c_o1_i1_, O1, I1), 849 #define C_O1_I2(O1, I1, I2) C_PFX3(c_o1_i2_, O1, I1, I2), 850 #define C_O1_I3(O1, I1, I2, I3) C_PFX4(c_o1_i3_, O1, I1, I2, I3), 851 #define C_O1_I4(O1, I1, I2, I3, I4) C_PFX5(c_o1_i4_, O1, I1, I2, I3, I4), 852 853 #define C_N1_I2(O1, I1, I2) C_PFX3(c_n1_i2_, O1, I1, I2), 854 #define C_N1O1_I1(O1, O2, I1) C_PFX3(c_n1o1_i1_, O1, O2, I1), 855 #define C_N2_I1(O1, O2, I1) C_PFX3(c_n2_i1_, O1, O2, I1), 856 857 #define C_O2_I1(O1, O2, I1) C_PFX3(c_o2_i1_, O1, O2, I1), 858 #define C_O2_I2(O1, O2, I1, I2) C_PFX4(c_o2_i2_, O1, O2, I1, I2), 859 #define C_O2_I3(O1, O2, I1, I2, I3) C_PFX5(c_o2_i3_, O1, O2, I1, I2, I3), 860 #define C_O2_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_o2_i4_, O1, O2, I1, I2, I3, I4), 861 #define C_N1_O1_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_n1_o1_i4_, O1, O2, I1, I2, I3, I4), 862 863 typedef enum { 864 C_Dynamic = -2, 865 C_NotImplemented = -1, 866 #include "tcg-target-con-set.h" 867 } TCGConstraintSetIndex; 868 869 static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode, TCGType, unsigned); 870 871 #undef C_O0_I1 872 #undef C_O0_I2 873 #undef C_O0_I3 874 #undef C_O0_I4 875 #undef C_O1_I1 876 #undef C_O1_I2 877 #undef C_O1_I3 878 #undef C_O1_I4 879 #undef C_N1_I2 880 #undef C_N1O1_I1 881 #undef C_N2_I1 882 #undef C_O2_I1 883 #undef C_O2_I2 884 #undef C_O2_I3 885 #undef C_O2_I4 886 #undef C_N1_O1_I4 887 888 /* Put all of the constraint sets into an array, indexed by the enum. */ 889 890 typedef struct TCGConstraintSet { 891 uint8_t nb_oargs, nb_iargs; 892 const char *args_ct_str[TCG_MAX_OP_ARGS]; 893 } TCGConstraintSet; 894 895 #define C_O0_I1(I1) { 0, 1, { #I1 } }, 896 #define C_O0_I2(I1, I2) { 0, 2, { #I1, #I2 } }, 897 #define C_O0_I3(I1, I2, I3) { 0, 3, { #I1, #I2, #I3 } }, 898 #define C_O0_I4(I1, I2, I3, I4) { 0, 4, { #I1, #I2, #I3, #I4 } }, 899 900 #define C_O1_I1(O1, I1) { 1, 1, { #O1, #I1 } }, 901 #define C_O1_I2(O1, I1, I2) { 1, 2, { #O1, #I1, #I2 } }, 902 #define C_O1_I3(O1, I1, I2, I3) { 1, 3, { #O1, #I1, #I2, #I3 } }, 903 #define C_O1_I4(O1, I1, I2, I3, I4) { 1, 4, { #O1, #I1, #I2, #I3, #I4 } }, 904 905 #define C_N1_I2(O1, I1, I2) { 1, 2, { "&" #O1, #I1, #I2 } }, 906 #define C_N1O1_I1(O1, O2, I1) { 2, 1, { "&" #O1, #O2, #I1 } }, 907 #define C_N2_I1(O1, O2, I1) { 2, 1, { "&" #O1, "&" #O2, #I1 } }, 908 909 #define C_O2_I1(O1, O2, I1) { 2, 1, { #O1, #O2, #I1 } }, 910 #define C_O2_I2(O1, O2, I1, I2) { 2, 2, { #O1, #O2, #I1, #I2 } }, 911 #define C_O2_I3(O1, O2, I1, I2, I3) { 2, 3, { #O1, #O2, #I1, #I2, #I3 } }, 912 #define C_O2_I4(O1, O2, I1, I2, I3, I4) { 2, 4, { #O1, #O2, #I1, #I2, #I3, #I4 } }, 913 #define C_N1_O1_I4(O1, O2, I1, I2, I3, I4) { 2, 4, { "&" #O1, #O2, #I1, #I2, #I3, #I4 } }, 914 915 static const TCGConstraintSet constraint_sets[] = { 916 #include "tcg-target-con-set.h" 917 }; 918 919 #undef C_O0_I1 920 #undef C_O0_I2 921 #undef C_O0_I3 922 #undef C_O0_I4 923 #undef C_O1_I1 924 #undef C_O1_I2 925 #undef C_O1_I3 926 #undef C_O1_I4 927 #undef C_N1_I2 928 #undef C_N1O1_I1 929 #undef C_N2_I1 930 #undef C_O2_I1 931 #undef C_O2_I2 932 #undef C_O2_I3 933 #undef C_O2_I4 934 #undef C_N1_O1_I4 935 936 /* Expand the enumerator to be returned from tcg_target_op_def(). */ 937 938 #define C_O0_I1(I1) C_PFX1(c_o0_i1_, I1) 939 #define C_O0_I2(I1, I2) C_PFX2(c_o0_i2_, I1, I2) 940 #define C_O0_I3(I1, I2, I3) C_PFX3(c_o0_i3_, I1, I2, I3) 941 #define C_O0_I4(I1, I2, I3, I4) C_PFX4(c_o0_i4_, I1, I2, I3, I4) 942 943 #define C_O1_I1(O1, I1) C_PFX2(c_o1_i1_, O1, I1) 944 #define C_O1_I2(O1, I1, I2) C_PFX3(c_o1_i2_, O1, I1, I2) 945 #define C_O1_I3(O1, I1, I2, I3) C_PFX4(c_o1_i3_, O1, I1, I2, I3) 946 #define C_O1_I4(O1, I1, I2, I3, I4) C_PFX5(c_o1_i4_, O1, I1, I2, I3, I4) 947 948 #define C_N1_I2(O1, I1, I2) C_PFX3(c_n1_i2_, O1, I1, I2) 949 #define C_N1O1_I1(O1, O2, I1) C_PFX3(c_n1o1_i1_, O1, O2, I1) 950 #define C_N2_I1(O1, O2, I1) C_PFX3(c_n2_i1_, O1, O2, I1) 951 952 #define C_O2_I1(O1, O2, I1) C_PFX3(c_o2_i1_, O1, O2, I1) 953 #define C_O2_I2(O1, O2, I1, I2) C_PFX4(c_o2_i2_, O1, O2, I1, I2) 954 #define C_O2_I3(O1, O2, I1, I2, I3) C_PFX5(c_o2_i3_, O1, O2, I1, I2, I3) 955 #define C_O2_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_o2_i4_, O1, O2, I1, I2, I3, I4) 956 #define C_N1_O1_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_n1_o1_i4_, O1, O2, I1, I2, I3, I4) 957 958 /* 959 * TCGOutOp is the base class for a set of structures that describe how 960 * to generate code for a given TCGOpcode. 961 * 962 * @static_constraint: 963 * C_NotImplemented: The TCGOpcode is not supported by the backend. 964 * C_Dynamic: Use @dynamic_constraint to select a constraint set 965 * based on any of @type, @flags, or host isa. 966 * Otherwise: The register allocation constrains for the TCGOpcode. 967 * 968 * Subclasses of TCGOutOp will define a set of output routines that may 969 * be used. Such routines will often be selected by the set of registers 970 * and constants that come out of register allocation. The set of 971 * routines that are provided will guide the set of constraints that are 972 * legal. In particular, assume that tcg_optimize() has done its job in 973 * swapping commutative operands and folding operations for which all 974 * operands are constant. 975 */ 976 typedef struct TCGOutOp { 977 TCGConstraintSetIndex static_constraint; 978 TCGConstraintSetIndex (*dynamic_constraint)(TCGType type, unsigned flags); 979 } TCGOutOp; 980 981 typedef struct TCGOutOpBinary { 982 TCGOutOp base; 983 void (*out_rrr)(TCGContext *s, TCGType type, 984 TCGReg a0, TCGReg a1, TCGReg a2); 985 void (*out_rri)(TCGContext *s, TCGType type, 986 TCGReg a0, TCGReg a1, tcg_target_long a2); 987 } TCGOutOpBinary; 988 989 typedef struct TCGOutOpDivRem { 990 TCGOutOp base; 991 void (*out_rr01r)(TCGContext *s, TCGType type, 992 TCGReg a0, TCGReg a1, TCGReg a4); 993 } TCGOutOpDivRem; 994 995 typedef struct TCGOutOpMul2 { 996 TCGOutOp base; 997 void (*out_rrrr)(TCGContext *s, TCGType type, 998 TCGReg a0, TCGReg a1, TCGReg a2, TCGReg a3); 999 } TCGOutOpMul2; 1000 1001 typedef struct TCGOutOpUnary { 1002 TCGOutOp base; 1003 void (*out_rr)(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1); 1004 } TCGOutOpUnary; 1005 1006 typedef struct TCGOutOpSubtract { 1007 TCGOutOp base; 1008 void (*out_rrr)(TCGContext *s, TCGType type, 1009 TCGReg a0, TCGReg a1, TCGReg a2); 1010 void (*out_rir)(TCGContext *s, TCGType type, 1011 TCGReg a0, tcg_target_long a1, TCGReg a2); 1012 } TCGOutOpSubtract; 1013 1014 #include "tcg-target.c.inc" 1015 1016 #ifndef CONFIG_TCG_INTERPRETER 1017 /* Validate CPUTLBDescFast placement. */ 1018 QEMU_BUILD_BUG_ON((int)(offsetof(CPUNegativeOffsetState, tlb.f[0]) - 1019 sizeof(CPUNegativeOffsetState)) 1020 < MIN_TLB_MASK_TABLE_OFS); 1021 #endif 1022 1023 /* 1024 * Register V as the TCGOutOp for O. 1025 * This verifies that V is of type T, otherwise give a nice compiler error. 1026 * This prevents trivial mistakes within each arch/tcg-target.c.inc. 1027 */ 1028 #define OUTOP(O, T, V) [O] = _Generic(V, T: &V.base) 1029 1030 /* Register allocation descriptions for every TCGOpcode. */ 1031 static const TCGOutOp * const all_outop[NB_OPS] = { 1032 OUTOP(INDEX_op_add, TCGOutOpBinary, outop_add), 1033 OUTOP(INDEX_op_and, TCGOutOpBinary, outop_and), 1034 OUTOP(INDEX_op_andc, TCGOutOpBinary, outop_andc), 1035 OUTOP(INDEX_op_clz, TCGOutOpBinary, outop_clz), 1036 OUTOP(INDEX_op_ctpop, TCGOutOpUnary, outop_ctpop), 1037 OUTOP(INDEX_op_ctz, TCGOutOpBinary, outop_ctz), 1038 OUTOP(INDEX_op_divs, TCGOutOpBinary, outop_divs), 1039 OUTOP(INDEX_op_divu, TCGOutOpBinary, outop_divu), 1040 OUTOP(INDEX_op_divs2, TCGOutOpDivRem, outop_divs2), 1041 OUTOP(INDEX_op_divu2, TCGOutOpDivRem, outop_divu2), 1042 OUTOP(INDEX_op_eqv, TCGOutOpBinary, outop_eqv), 1043 OUTOP(INDEX_op_mul, TCGOutOpBinary, outop_mul), 1044 OUTOP(INDEX_op_muls2, TCGOutOpMul2, outop_muls2), 1045 OUTOP(INDEX_op_mulsh, TCGOutOpBinary, outop_mulsh), 1046 OUTOP(INDEX_op_mulu2, TCGOutOpMul2, outop_mulu2), 1047 OUTOP(INDEX_op_muluh, TCGOutOpBinary, outop_muluh), 1048 OUTOP(INDEX_op_nand, TCGOutOpBinary, outop_nand), 1049 OUTOP(INDEX_op_neg, TCGOutOpUnary, outop_neg), 1050 OUTOP(INDEX_op_nor, TCGOutOpBinary, outop_nor), 1051 OUTOP(INDEX_op_not, TCGOutOpUnary, outop_not), 1052 OUTOP(INDEX_op_or, TCGOutOpBinary, outop_or), 1053 OUTOP(INDEX_op_orc, TCGOutOpBinary, outop_orc), 1054 OUTOP(INDEX_op_rems, TCGOutOpBinary, outop_rems), 1055 OUTOP(INDEX_op_remu, TCGOutOpBinary, outop_remu), 1056 OUTOP(INDEX_op_rotl, TCGOutOpBinary, outop_rotl), 1057 OUTOP(INDEX_op_rotr, TCGOutOpBinary, outop_rotr), 1058 OUTOP(INDEX_op_sar, TCGOutOpBinary, outop_sar), 1059 OUTOP(INDEX_op_shl, TCGOutOpBinary, outop_shl), 1060 OUTOP(INDEX_op_shr, TCGOutOpBinary, outop_shr), 1061 OUTOP(INDEX_op_sub, TCGOutOpSubtract, outop_sub), 1062 OUTOP(INDEX_op_xor, TCGOutOpBinary, outop_xor), 1063 }; 1064 1065 #undef OUTOP 1066 1067 /* 1068 * All TCG threads except the parent (i.e. the one that called tcg_context_init 1069 * and registered the target's TCG globals) must register with this function 1070 * before initiating translation. 1071 * 1072 * In user-mode we just point tcg_ctx to tcg_init_ctx. See the documentation 1073 * of tcg_region_init() for the reasoning behind this. 1074 * 1075 * In system-mode each caller registers its context in tcg_ctxs[]. Note that in 1076 * system-mode tcg_ctxs[] does not track tcg_ctx_init, since the initial context 1077 * is not used anymore for translation once this function is called. 1078 * 1079 * Not tracking tcg_init_ctx in tcg_ctxs[] in system-mode keeps code that 1080 * iterates over the array (e.g. tcg_code_size() the same for both system/user 1081 * modes. 1082 */ 1083 #ifdef CONFIG_USER_ONLY 1084 void tcg_register_thread(void) 1085 { 1086 tcg_ctx = &tcg_init_ctx; 1087 } 1088 #else 1089 void tcg_register_thread(void) 1090 { 1091 TCGContext *s = g_malloc(sizeof(*s)); 1092 unsigned int i, n; 1093 1094 *s = tcg_init_ctx; 1095 1096 /* Relink mem_base. */ 1097 for (i = 0, n = tcg_init_ctx.nb_globals; i < n; ++i) { 1098 if (tcg_init_ctx.temps[i].mem_base) { 1099 ptrdiff_t b = tcg_init_ctx.temps[i].mem_base - tcg_init_ctx.temps; 1100 tcg_debug_assert(b >= 0 && b < n); 1101 s->temps[i].mem_base = &s->temps[b]; 1102 } 1103 } 1104 1105 /* Claim an entry in tcg_ctxs */ 1106 n = qatomic_fetch_inc(&tcg_cur_ctxs); 1107 g_assert(n < tcg_max_ctxs); 1108 qatomic_set(&tcg_ctxs[n], s); 1109 1110 if (n > 0) { 1111 tcg_region_initial_alloc(s); 1112 } 1113 1114 tcg_ctx = s; 1115 } 1116 #endif /* !CONFIG_USER_ONLY */ 1117 1118 /* pool based memory allocation */ 1119 void *tcg_malloc_internal(TCGContext *s, int size) 1120 { 1121 TCGPool *p; 1122 int pool_size; 1123 1124 if (size > TCG_POOL_CHUNK_SIZE) { 1125 /* big malloc: insert a new pool (XXX: could optimize) */ 1126 p = g_malloc(sizeof(TCGPool) + size); 1127 p->size = size; 1128 p->next = s->pool_first_large; 1129 s->pool_first_large = p; 1130 return p->data; 1131 } else { 1132 p = s->pool_current; 1133 if (!p) { 1134 p = s->pool_first; 1135 if (!p) 1136 goto new_pool; 1137 } else { 1138 if (!p->next) { 1139 new_pool: 1140 pool_size = TCG_POOL_CHUNK_SIZE; 1141 p = g_malloc(sizeof(TCGPool) + pool_size); 1142 p->size = pool_size; 1143 p->next = NULL; 1144 if (s->pool_current) { 1145 s->pool_current->next = p; 1146 } else { 1147 s->pool_first = p; 1148 } 1149 } else { 1150 p = p->next; 1151 } 1152 } 1153 } 1154 s->pool_current = p; 1155 s->pool_cur = p->data + size; 1156 s->pool_end = p->data + p->size; 1157 return p->data; 1158 } 1159 1160 void tcg_pool_reset(TCGContext *s) 1161 { 1162 TCGPool *p, *t; 1163 for (p = s->pool_first_large; p; p = t) { 1164 t = p->next; 1165 g_free(p); 1166 } 1167 s->pool_first_large = NULL; 1168 s->pool_cur = s->pool_end = NULL; 1169 s->pool_current = NULL; 1170 } 1171 1172 /* 1173 * Create TCGHelperInfo structures for "tcg/tcg-ldst.h" functions, 1174 * akin to what "exec/helper-tcg.h" does with DEF_HELPER_FLAGS_N. 1175 * We only use these for layout in tcg_out_ld_helper_ret and 1176 * tcg_out_st_helper_args, and share them between several of 1177 * the helpers, with the end result that it's easier to build manually. 1178 */ 1179 1180 #if TCG_TARGET_REG_BITS == 32 1181 # define dh_typecode_ttl dh_typecode_i32 1182 #else 1183 # define dh_typecode_ttl dh_typecode_i64 1184 #endif 1185 1186 static TCGHelperInfo info_helper_ld32_mmu = { 1187 .flags = TCG_CALL_NO_WG, 1188 .typemask = dh_typemask(ttl, 0) /* return tcg_target_ulong */ 1189 | dh_typemask(env, 1) 1190 | dh_typemask(i64, 2) /* uint64_t addr */ 1191 | dh_typemask(i32, 3) /* unsigned oi */ 1192 | dh_typemask(ptr, 4) /* uintptr_t ra */ 1193 }; 1194 1195 static TCGHelperInfo info_helper_ld64_mmu = { 1196 .flags = TCG_CALL_NO_WG, 1197 .typemask = dh_typemask(i64, 0) /* return uint64_t */ 1198 | dh_typemask(env, 1) 1199 | dh_typemask(i64, 2) /* uint64_t addr */ 1200 | dh_typemask(i32, 3) /* unsigned oi */ 1201 | dh_typemask(ptr, 4) /* uintptr_t ra */ 1202 }; 1203 1204 static TCGHelperInfo info_helper_ld128_mmu = { 1205 .flags = TCG_CALL_NO_WG, 1206 .typemask = dh_typemask(i128, 0) /* return Int128 */ 1207 | dh_typemask(env, 1) 1208 | dh_typemask(i64, 2) /* uint64_t addr */ 1209 | dh_typemask(i32, 3) /* unsigned oi */ 1210 | dh_typemask(ptr, 4) /* uintptr_t ra */ 1211 }; 1212 1213 static TCGHelperInfo info_helper_st32_mmu = { 1214 .flags = TCG_CALL_NO_WG, 1215 .typemask = dh_typemask(void, 0) 1216 | dh_typemask(env, 1) 1217 | dh_typemask(i64, 2) /* uint64_t addr */ 1218 | dh_typemask(i32, 3) /* uint32_t data */ 1219 | dh_typemask(i32, 4) /* unsigned oi */ 1220 | dh_typemask(ptr, 5) /* uintptr_t ra */ 1221 }; 1222 1223 static TCGHelperInfo info_helper_st64_mmu = { 1224 .flags = TCG_CALL_NO_WG, 1225 .typemask = dh_typemask(void, 0) 1226 | dh_typemask(env, 1) 1227 | dh_typemask(i64, 2) /* uint64_t addr */ 1228 | dh_typemask(i64, 3) /* uint64_t data */ 1229 | dh_typemask(i32, 4) /* unsigned oi */ 1230 | dh_typemask(ptr, 5) /* uintptr_t ra */ 1231 }; 1232 1233 static TCGHelperInfo info_helper_st128_mmu = { 1234 .flags = TCG_CALL_NO_WG, 1235 .typemask = dh_typemask(void, 0) 1236 | dh_typemask(env, 1) 1237 | dh_typemask(i64, 2) /* uint64_t addr */ 1238 | dh_typemask(i128, 3) /* Int128 data */ 1239 | dh_typemask(i32, 4) /* unsigned oi */ 1240 | dh_typemask(ptr, 5) /* uintptr_t ra */ 1241 }; 1242 1243 #ifdef CONFIG_TCG_INTERPRETER 1244 static ffi_type *typecode_to_ffi(int argmask) 1245 { 1246 /* 1247 * libffi does not support __int128_t, so we have forced Int128 1248 * to use the structure definition instead of the builtin type. 1249 */ 1250 static ffi_type *ffi_type_i128_elements[3] = { 1251 &ffi_type_uint64, 1252 &ffi_type_uint64, 1253 NULL 1254 }; 1255 static ffi_type ffi_type_i128 = { 1256 .size = 16, 1257 .alignment = __alignof__(Int128), 1258 .type = FFI_TYPE_STRUCT, 1259 .elements = ffi_type_i128_elements, 1260 }; 1261 1262 switch (argmask) { 1263 case dh_typecode_void: 1264 return &ffi_type_void; 1265 case dh_typecode_i32: 1266 return &ffi_type_uint32; 1267 case dh_typecode_s32: 1268 return &ffi_type_sint32; 1269 case dh_typecode_i64: 1270 return &ffi_type_uint64; 1271 case dh_typecode_s64: 1272 return &ffi_type_sint64; 1273 case dh_typecode_ptr: 1274 return &ffi_type_pointer; 1275 case dh_typecode_i128: 1276 return &ffi_type_i128; 1277 } 1278 g_assert_not_reached(); 1279 } 1280 1281 static ffi_cif *init_ffi_layout(TCGHelperInfo *info) 1282 { 1283 unsigned typemask = info->typemask; 1284 struct { 1285 ffi_cif cif; 1286 ffi_type *args[]; 1287 } *ca; 1288 ffi_status status; 1289 int nargs; 1290 1291 /* Ignoring the return type, find the last non-zero field. */ 1292 nargs = 32 - clz32(typemask >> 3); 1293 nargs = DIV_ROUND_UP(nargs, 3); 1294 assert(nargs <= MAX_CALL_IARGS); 1295 1296 ca = g_malloc0(sizeof(*ca) + nargs * sizeof(ffi_type *)); 1297 ca->cif.rtype = typecode_to_ffi(typemask & 7); 1298 ca->cif.nargs = nargs; 1299 1300 if (nargs != 0) { 1301 ca->cif.arg_types = ca->args; 1302 for (int j = 0; j < nargs; ++j) { 1303 int typecode = extract32(typemask, (j + 1) * 3, 3); 1304 ca->args[j] = typecode_to_ffi(typecode); 1305 } 1306 } 1307 1308 status = ffi_prep_cif(&ca->cif, FFI_DEFAULT_ABI, nargs, 1309 ca->cif.rtype, ca->cif.arg_types); 1310 assert(status == FFI_OK); 1311 1312 return &ca->cif; 1313 } 1314 1315 #define HELPER_INFO_INIT(I) (&(I)->cif) 1316 #define HELPER_INFO_INIT_VAL(I) init_ffi_layout(I) 1317 #else 1318 #define HELPER_INFO_INIT(I) (&(I)->init) 1319 #define HELPER_INFO_INIT_VAL(I) 1 1320 #endif /* CONFIG_TCG_INTERPRETER */ 1321 1322 static inline bool arg_slot_reg_p(unsigned arg_slot) 1323 { 1324 /* 1325 * Split the sizeof away from the comparison to avoid Werror from 1326 * "unsigned < 0 is always false", when iarg_regs is empty. 1327 */ 1328 unsigned nreg = ARRAY_SIZE(tcg_target_call_iarg_regs); 1329 return arg_slot < nreg; 1330 } 1331 1332 static inline int arg_slot_stk_ofs(unsigned arg_slot) 1333 { 1334 unsigned max = TCG_STATIC_CALL_ARGS_SIZE / sizeof(tcg_target_long); 1335 unsigned stk_slot = arg_slot - ARRAY_SIZE(tcg_target_call_iarg_regs); 1336 1337 tcg_debug_assert(stk_slot < max); 1338 return TCG_TARGET_CALL_STACK_OFFSET + stk_slot * sizeof(tcg_target_long); 1339 } 1340 1341 typedef struct TCGCumulativeArgs { 1342 int arg_idx; /* tcg_gen_callN args[] */ 1343 int info_in_idx; /* TCGHelperInfo in[] */ 1344 int arg_slot; /* regs+stack slot */ 1345 int ref_slot; /* stack slots for references */ 1346 } TCGCumulativeArgs; 1347 1348 static void layout_arg_even(TCGCumulativeArgs *cum) 1349 { 1350 cum->arg_slot += cum->arg_slot & 1; 1351 } 1352 1353 static void layout_arg_1(TCGCumulativeArgs *cum, TCGHelperInfo *info, 1354 TCGCallArgumentKind kind) 1355 { 1356 TCGCallArgumentLoc *loc = &info->in[cum->info_in_idx]; 1357 1358 *loc = (TCGCallArgumentLoc){ 1359 .kind = kind, 1360 .arg_idx = cum->arg_idx, 1361 .arg_slot = cum->arg_slot, 1362 }; 1363 cum->info_in_idx++; 1364 cum->arg_slot++; 1365 } 1366 1367 static void layout_arg_normal_n(TCGCumulativeArgs *cum, 1368 TCGHelperInfo *info, int n) 1369 { 1370 TCGCallArgumentLoc *loc = &info->in[cum->info_in_idx]; 1371 1372 for (int i = 0; i < n; ++i) { 1373 /* Layout all using the same arg_idx, adjusting the subindex. */ 1374 loc[i] = (TCGCallArgumentLoc){ 1375 .kind = TCG_CALL_ARG_NORMAL, 1376 .arg_idx = cum->arg_idx, 1377 .tmp_subindex = i, 1378 .arg_slot = cum->arg_slot + i, 1379 }; 1380 } 1381 cum->info_in_idx += n; 1382 cum->arg_slot += n; 1383 } 1384 1385 static void layout_arg_by_ref(TCGCumulativeArgs *cum, TCGHelperInfo *info) 1386 { 1387 TCGCallArgumentLoc *loc = &info->in[cum->info_in_idx]; 1388 int n = 128 / TCG_TARGET_REG_BITS; 1389 1390 /* The first subindex carries the pointer. */ 1391 layout_arg_1(cum, info, TCG_CALL_ARG_BY_REF); 1392 1393 /* 1394 * The callee is allowed to clobber memory associated with 1395 * structure pass by-reference. Therefore we must make copies. 1396 * Allocate space from "ref_slot", which will be adjusted to 1397 * follow the parameters on the stack. 1398 */ 1399 loc[0].ref_slot = cum->ref_slot; 1400 1401 /* 1402 * Subsequent words also go into the reference slot, but 1403 * do not accumulate into the regular arguments. 1404 */ 1405 for (int i = 1; i < n; ++i) { 1406 loc[i] = (TCGCallArgumentLoc){ 1407 .kind = TCG_CALL_ARG_BY_REF_N, 1408 .arg_idx = cum->arg_idx, 1409 .tmp_subindex = i, 1410 .ref_slot = cum->ref_slot + i, 1411 }; 1412 } 1413 cum->info_in_idx += n - 1; /* i=0 accounted for in layout_arg_1 */ 1414 cum->ref_slot += n; 1415 } 1416 1417 static void init_call_layout(TCGHelperInfo *info) 1418 { 1419 int max_reg_slots = ARRAY_SIZE(tcg_target_call_iarg_regs); 1420 int max_stk_slots = TCG_STATIC_CALL_ARGS_SIZE / sizeof(tcg_target_long); 1421 unsigned typemask = info->typemask; 1422 unsigned typecode; 1423 TCGCumulativeArgs cum = { }; 1424 1425 /* 1426 * Parse and place any function return value. 1427 */ 1428 typecode = typemask & 7; 1429 switch (typecode) { 1430 case dh_typecode_void: 1431 info->nr_out = 0; 1432 break; 1433 case dh_typecode_i32: 1434 case dh_typecode_s32: 1435 case dh_typecode_ptr: 1436 info->nr_out = 1; 1437 info->out_kind = TCG_CALL_RET_NORMAL; 1438 break; 1439 case dh_typecode_i64: 1440 case dh_typecode_s64: 1441 info->nr_out = 64 / TCG_TARGET_REG_BITS; 1442 info->out_kind = TCG_CALL_RET_NORMAL; 1443 /* Query the last register now to trigger any assert early. */ 1444 tcg_target_call_oarg_reg(info->out_kind, info->nr_out - 1); 1445 break; 1446 case dh_typecode_i128: 1447 info->nr_out = 128 / TCG_TARGET_REG_BITS; 1448 info->out_kind = TCG_TARGET_CALL_RET_I128; 1449 switch (TCG_TARGET_CALL_RET_I128) { 1450 case TCG_CALL_RET_NORMAL: 1451 /* Query the last register now to trigger any assert early. */ 1452 tcg_target_call_oarg_reg(info->out_kind, info->nr_out - 1); 1453 break; 1454 case TCG_CALL_RET_BY_VEC: 1455 /* Query the single register now to trigger any assert early. */ 1456 tcg_target_call_oarg_reg(TCG_CALL_RET_BY_VEC, 0); 1457 break; 1458 case TCG_CALL_RET_BY_REF: 1459 /* 1460 * Allocate the first argument to the output. 1461 * We don't need to store this anywhere, just make it 1462 * unavailable for use in the input loop below. 1463 */ 1464 cum.arg_slot = 1; 1465 break; 1466 default: 1467 qemu_build_not_reached(); 1468 } 1469 break; 1470 default: 1471 g_assert_not_reached(); 1472 } 1473 1474 /* 1475 * Parse and place function arguments. 1476 */ 1477 for (typemask >>= 3; typemask; typemask >>= 3, cum.arg_idx++) { 1478 TCGCallArgumentKind kind; 1479 TCGType type; 1480 1481 typecode = typemask & 7; 1482 switch (typecode) { 1483 case dh_typecode_i32: 1484 case dh_typecode_s32: 1485 type = TCG_TYPE_I32; 1486 break; 1487 case dh_typecode_i64: 1488 case dh_typecode_s64: 1489 type = TCG_TYPE_I64; 1490 break; 1491 case dh_typecode_ptr: 1492 type = TCG_TYPE_PTR; 1493 break; 1494 case dh_typecode_i128: 1495 type = TCG_TYPE_I128; 1496 break; 1497 default: 1498 g_assert_not_reached(); 1499 } 1500 1501 switch (type) { 1502 case TCG_TYPE_I32: 1503 switch (TCG_TARGET_CALL_ARG_I32) { 1504 case TCG_CALL_ARG_EVEN: 1505 layout_arg_even(&cum); 1506 /* fall through */ 1507 case TCG_CALL_ARG_NORMAL: 1508 layout_arg_1(&cum, info, TCG_CALL_ARG_NORMAL); 1509 break; 1510 case TCG_CALL_ARG_EXTEND: 1511 kind = TCG_CALL_ARG_EXTEND_U + (typecode & 1); 1512 layout_arg_1(&cum, info, kind); 1513 break; 1514 default: 1515 qemu_build_not_reached(); 1516 } 1517 break; 1518 1519 case TCG_TYPE_I64: 1520 switch (TCG_TARGET_CALL_ARG_I64) { 1521 case TCG_CALL_ARG_EVEN: 1522 layout_arg_even(&cum); 1523 /* fall through */ 1524 case TCG_CALL_ARG_NORMAL: 1525 if (TCG_TARGET_REG_BITS == 32) { 1526 layout_arg_normal_n(&cum, info, 2); 1527 } else { 1528 layout_arg_1(&cum, info, TCG_CALL_ARG_NORMAL); 1529 } 1530 break; 1531 default: 1532 qemu_build_not_reached(); 1533 } 1534 break; 1535 1536 case TCG_TYPE_I128: 1537 switch (TCG_TARGET_CALL_ARG_I128) { 1538 case TCG_CALL_ARG_EVEN: 1539 layout_arg_even(&cum); 1540 /* fall through */ 1541 case TCG_CALL_ARG_NORMAL: 1542 layout_arg_normal_n(&cum, info, 128 / TCG_TARGET_REG_BITS); 1543 break; 1544 case TCG_CALL_ARG_BY_REF: 1545 layout_arg_by_ref(&cum, info); 1546 break; 1547 default: 1548 qemu_build_not_reached(); 1549 } 1550 break; 1551 1552 default: 1553 g_assert_not_reached(); 1554 } 1555 } 1556 info->nr_in = cum.info_in_idx; 1557 1558 /* Validate that we didn't overrun the input array. */ 1559 assert(cum.info_in_idx <= ARRAY_SIZE(info->in)); 1560 /* Validate the backend has enough argument space. */ 1561 assert(cum.arg_slot <= max_reg_slots + max_stk_slots); 1562 1563 /* 1564 * Relocate the "ref_slot" area to the end of the parameters. 1565 * Minimizing this stack offset helps code size for x86, 1566 * which has a signed 8-bit offset encoding. 1567 */ 1568 if (cum.ref_slot != 0) { 1569 int ref_base = 0; 1570 1571 if (cum.arg_slot > max_reg_slots) { 1572 int align = __alignof(Int128) / sizeof(tcg_target_long); 1573 1574 ref_base = cum.arg_slot - max_reg_slots; 1575 if (align > 1) { 1576 ref_base = ROUND_UP(ref_base, align); 1577 } 1578 } 1579 assert(ref_base + cum.ref_slot <= max_stk_slots); 1580 ref_base += max_reg_slots; 1581 1582 if (ref_base != 0) { 1583 for (int i = cum.info_in_idx - 1; i >= 0; --i) { 1584 TCGCallArgumentLoc *loc = &info->in[i]; 1585 switch (loc->kind) { 1586 case TCG_CALL_ARG_BY_REF: 1587 case TCG_CALL_ARG_BY_REF_N: 1588 loc->ref_slot += ref_base; 1589 break; 1590 default: 1591 break; 1592 } 1593 } 1594 } 1595 } 1596 } 1597 1598 static int indirect_reg_alloc_order[ARRAY_SIZE(tcg_target_reg_alloc_order)]; 1599 static void process_constraint_sets(void); 1600 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type, 1601 TCGReg reg, const char *name); 1602 1603 static void tcg_context_init(unsigned max_threads) 1604 { 1605 TCGContext *s = &tcg_init_ctx; 1606 int n, i; 1607 TCGTemp *ts; 1608 1609 memset(s, 0, sizeof(*s)); 1610 s->nb_globals = 0; 1611 1612 init_call_layout(&info_helper_ld32_mmu); 1613 init_call_layout(&info_helper_ld64_mmu); 1614 init_call_layout(&info_helper_ld128_mmu); 1615 init_call_layout(&info_helper_st32_mmu); 1616 init_call_layout(&info_helper_st64_mmu); 1617 init_call_layout(&info_helper_st128_mmu); 1618 1619 tcg_target_init(s); 1620 process_constraint_sets(); 1621 1622 /* Reverse the order of the saved registers, assuming they're all at 1623 the start of tcg_target_reg_alloc_order. */ 1624 for (n = 0; n < ARRAY_SIZE(tcg_target_reg_alloc_order); ++n) { 1625 int r = tcg_target_reg_alloc_order[n]; 1626 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, r)) { 1627 break; 1628 } 1629 } 1630 for (i = 0; i < n; ++i) { 1631 indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[n - 1 - i]; 1632 } 1633 for (; i < ARRAY_SIZE(tcg_target_reg_alloc_order); ++i) { 1634 indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[i]; 1635 } 1636 1637 tcg_ctx = s; 1638 /* 1639 * In user-mode we simply share the init context among threads, since we 1640 * use a single region. See the documentation tcg_region_init() for the 1641 * reasoning behind this. 1642 * In system-mode we will have at most max_threads TCG threads. 1643 */ 1644 #ifdef CONFIG_USER_ONLY 1645 tcg_ctxs = &tcg_ctx; 1646 tcg_cur_ctxs = 1; 1647 tcg_max_ctxs = 1; 1648 #else 1649 tcg_max_ctxs = max_threads; 1650 tcg_ctxs = g_new0(TCGContext *, max_threads); 1651 #endif 1652 1653 tcg_debug_assert(!tcg_regset_test_reg(s->reserved_regs, TCG_AREG0)); 1654 ts = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, TCG_AREG0, "env"); 1655 tcg_env = temp_tcgv_ptr(ts); 1656 } 1657 1658 void tcg_init(size_t tb_size, int splitwx, unsigned max_threads) 1659 { 1660 tcg_context_init(max_threads); 1661 tcg_region_init(tb_size, splitwx, max_threads); 1662 } 1663 1664 /* 1665 * Allocate TBs right before their corresponding translated code, making 1666 * sure that TBs and code are on different cache lines. 1667 */ 1668 TranslationBlock *tcg_tb_alloc(TCGContext *s) 1669 { 1670 uintptr_t align = qemu_icache_linesize; 1671 TranslationBlock *tb; 1672 void *next; 1673 1674 retry: 1675 tb = (void *)ROUND_UP((uintptr_t)s->code_gen_ptr, align); 1676 next = (void *)ROUND_UP((uintptr_t)(tb + 1), align); 1677 1678 if (unlikely(next > s->code_gen_highwater)) { 1679 if (tcg_region_alloc(s)) { 1680 return NULL; 1681 } 1682 goto retry; 1683 } 1684 qatomic_set(&s->code_gen_ptr, next); 1685 return tb; 1686 } 1687 1688 void tcg_prologue_init(void) 1689 { 1690 TCGContext *s = tcg_ctx; 1691 size_t prologue_size; 1692 1693 s->code_ptr = s->code_gen_ptr; 1694 s->code_buf = s->code_gen_ptr; 1695 s->data_gen_ptr = NULL; 1696 1697 #ifndef CONFIG_TCG_INTERPRETER 1698 tcg_qemu_tb_exec = (tcg_prologue_fn *)tcg_splitwx_to_rx(s->code_ptr); 1699 #endif 1700 1701 s->pool_labels = NULL; 1702 1703 qemu_thread_jit_write(); 1704 /* Generate the prologue. */ 1705 tcg_target_qemu_prologue(s); 1706 1707 /* Allow the prologue to put e.g. guest_base into a pool entry. */ 1708 { 1709 int result = tcg_out_pool_finalize(s); 1710 tcg_debug_assert(result == 0); 1711 } 1712 1713 prologue_size = tcg_current_code_size(s); 1714 perf_report_prologue(s->code_gen_ptr, prologue_size); 1715 1716 #ifndef CONFIG_TCG_INTERPRETER 1717 flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(s->code_buf), 1718 (uintptr_t)s->code_buf, prologue_size); 1719 #endif 1720 1721 if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM)) { 1722 FILE *logfile = qemu_log_trylock(); 1723 if (logfile) { 1724 fprintf(logfile, "PROLOGUE: [size=%zu]\n", prologue_size); 1725 if (s->data_gen_ptr) { 1726 size_t code_size = s->data_gen_ptr - s->code_gen_ptr; 1727 size_t data_size = prologue_size - code_size; 1728 size_t i; 1729 1730 disas(logfile, s->code_gen_ptr, code_size); 1731 1732 for (i = 0; i < data_size; i += sizeof(tcg_target_ulong)) { 1733 if (sizeof(tcg_target_ulong) == 8) { 1734 fprintf(logfile, 1735 "0x%08" PRIxPTR ": .quad 0x%016" PRIx64 "\n", 1736 (uintptr_t)s->data_gen_ptr + i, 1737 *(uint64_t *)(s->data_gen_ptr + i)); 1738 } else { 1739 fprintf(logfile, 1740 "0x%08" PRIxPTR ": .long 0x%08x\n", 1741 (uintptr_t)s->data_gen_ptr + i, 1742 *(uint32_t *)(s->data_gen_ptr + i)); 1743 } 1744 } 1745 } else { 1746 disas(logfile, s->code_gen_ptr, prologue_size); 1747 } 1748 fprintf(logfile, "\n"); 1749 qemu_log_unlock(logfile); 1750 } 1751 } 1752 1753 #ifndef CONFIG_TCG_INTERPRETER 1754 /* 1755 * Assert that goto_ptr is implemented completely, setting an epilogue. 1756 * For tci, we use NULL as the signal to return from the interpreter, 1757 * so skip this check. 1758 */ 1759 tcg_debug_assert(tcg_code_gen_epilogue != NULL); 1760 #endif 1761 1762 tcg_region_prologue_set(s); 1763 } 1764 1765 void tcg_func_start(TCGContext *s) 1766 { 1767 tcg_pool_reset(s); 1768 s->nb_temps = s->nb_globals; 1769 1770 /* No temps have been previously allocated for size or locality. */ 1771 tcg_temp_ebb_reset_freed(s); 1772 1773 /* No constant temps have been previously allocated. */ 1774 for (int i = 0; i < TCG_TYPE_COUNT; ++i) { 1775 if (s->const_table[i]) { 1776 g_hash_table_remove_all(s->const_table[i]); 1777 } 1778 } 1779 1780 s->nb_ops = 0; 1781 s->nb_labels = 0; 1782 s->current_frame_offset = s->frame_start; 1783 1784 #ifdef CONFIG_DEBUG_TCG 1785 s->goto_tb_issue_mask = 0; 1786 #endif 1787 1788 QTAILQ_INIT(&s->ops); 1789 QTAILQ_INIT(&s->free_ops); 1790 s->emit_before_op = NULL; 1791 QSIMPLEQ_INIT(&s->labels); 1792 1793 tcg_debug_assert(s->addr_type <= TCG_TYPE_REG); 1794 tcg_debug_assert(s->insn_start_words > 0); 1795 } 1796 1797 static TCGTemp *tcg_temp_alloc(TCGContext *s) 1798 { 1799 int n = s->nb_temps++; 1800 1801 if (n >= TCG_MAX_TEMPS) { 1802 tcg_raise_tb_overflow(s); 1803 } 1804 return memset(&s->temps[n], 0, sizeof(TCGTemp)); 1805 } 1806 1807 static TCGTemp *tcg_global_alloc(TCGContext *s) 1808 { 1809 TCGTemp *ts; 1810 1811 tcg_debug_assert(s->nb_globals == s->nb_temps); 1812 tcg_debug_assert(s->nb_globals < TCG_MAX_TEMPS); 1813 s->nb_globals++; 1814 ts = tcg_temp_alloc(s); 1815 ts->kind = TEMP_GLOBAL; 1816 1817 return ts; 1818 } 1819 1820 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type, 1821 TCGReg reg, const char *name) 1822 { 1823 TCGTemp *ts; 1824 1825 tcg_debug_assert(TCG_TARGET_REG_BITS == 64 || type == TCG_TYPE_I32); 1826 1827 ts = tcg_global_alloc(s); 1828 ts->base_type = type; 1829 ts->type = type; 1830 ts->kind = TEMP_FIXED; 1831 ts->reg = reg; 1832 ts->name = name; 1833 tcg_regset_set_reg(s->reserved_regs, reg); 1834 1835 return ts; 1836 } 1837 1838 void tcg_set_frame(TCGContext *s, TCGReg reg, intptr_t start, intptr_t size) 1839 { 1840 s->frame_start = start; 1841 s->frame_end = start + size; 1842 s->frame_temp 1843 = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, reg, "_frame"); 1844 } 1845 1846 static TCGTemp *tcg_global_mem_new_internal(TCGv_ptr base, intptr_t offset, 1847 const char *name, TCGType type) 1848 { 1849 TCGContext *s = tcg_ctx; 1850 TCGTemp *base_ts = tcgv_ptr_temp(base); 1851 TCGTemp *ts = tcg_global_alloc(s); 1852 int indirect_reg = 0; 1853 1854 switch (base_ts->kind) { 1855 case TEMP_FIXED: 1856 break; 1857 case TEMP_GLOBAL: 1858 /* We do not support double-indirect registers. */ 1859 tcg_debug_assert(!base_ts->indirect_reg); 1860 base_ts->indirect_base = 1; 1861 s->nb_indirects += (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64 1862 ? 2 : 1); 1863 indirect_reg = 1; 1864 break; 1865 default: 1866 g_assert_not_reached(); 1867 } 1868 1869 if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) { 1870 TCGTemp *ts2 = tcg_global_alloc(s); 1871 char buf[64]; 1872 1873 ts->base_type = TCG_TYPE_I64; 1874 ts->type = TCG_TYPE_I32; 1875 ts->indirect_reg = indirect_reg; 1876 ts->mem_allocated = 1; 1877 ts->mem_base = base_ts; 1878 ts->mem_offset = offset; 1879 pstrcpy(buf, sizeof(buf), name); 1880 pstrcat(buf, sizeof(buf), "_0"); 1881 ts->name = strdup(buf); 1882 1883 tcg_debug_assert(ts2 == ts + 1); 1884 ts2->base_type = TCG_TYPE_I64; 1885 ts2->type = TCG_TYPE_I32; 1886 ts2->indirect_reg = indirect_reg; 1887 ts2->mem_allocated = 1; 1888 ts2->mem_base = base_ts; 1889 ts2->mem_offset = offset + 4; 1890 ts2->temp_subindex = 1; 1891 pstrcpy(buf, sizeof(buf), name); 1892 pstrcat(buf, sizeof(buf), "_1"); 1893 ts2->name = strdup(buf); 1894 } else { 1895 ts->base_type = type; 1896 ts->type = type; 1897 ts->indirect_reg = indirect_reg; 1898 ts->mem_allocated = 1; 1899 ts->mem_base = base_ts; 1900 ts->mem_offset = offset; 1901 ts->name = name; 1902 } 1903 return ts; 1904 } 1905 1906 TCGv_i32 tcg_global_mem_new_i32(TCGv_ptr reg, intptr_t off, const char *name) 1907 { 1908 TCGTemp *ts = tcg_global_mem_new_internal(reg, off, name, TCG_TYPE_I32); 1909 return temp_tcgv_i32(ts); 1910 } 1911 1912 TCGv_i64 tcg_global_mem_new_i64(TCGv_ptr reg, intptr_t off, const char *name) 1913 { 1914 TCGTemp *ts = tcg_global_mem_new_internal(reg, off, name, TCG_TYPE_I64); 1915 return temp_tcgv_i64(ts); 1916 } 1917 1918 TCGv_ptr tcg_global_mem_new_ptr(TCGv_ptr reg, intptr_t off, const char *name) 1919 { 1920 TCGTemp *ts = tcg_global_mem_new_internal(reg, off, name, TCG_TYPE_PTR); 1921 return temp_tcgv_ptr(ts); 1922 } 1923 1924 TCGTemp *tcg_temp_new_internal(TCGType type, TCGTempKind kind) 1925 { 1926 TCGContext *s = tcg_ctx; 1927 TCGTemp *ts; 1928 int n; 1929 1930 if (kind == TEMP_EBB) { 1931 int idx = find_first_bit(s->free_temps[type].l, TCG_MAX_TEMPS); 1932 1933 if (idx < TCG_MAX_TEMPS) { 1934 /* There is already an available temp with the right type. */ 1935 clear_bit(idx, s->free_temps[type].l); 1936 1937 ts = &s->temps[idx]; 1938 ts->temp_allocated = 1; 1939 tcg_debug_assert(ts->base_type == type); 1940 tcg_debug_assert(ts->kind == kind); 1941 return ts; 1942 } 1943 } else { 1944 tcg_debug_assert(kind == TEMP_TB); 1945 } 1946 1947 switch (type) { 1948 case TCG_TYPE_I32: 1949 case TCG_TYPE_V64: 1950 case TCG_TYPE_V128: 1951 case TCG_TYPE_V256: 1952 n = 1; 1953 break; 1954 case TCG_TYPE_I64: 1955 n = 64 / TCG_TARGET_REG_BITS; 1956 break; 1957 case TCG_TYPE_I128: 1958 n = 128 / TCG_TARGET_REG_BITS; 1959 break; 1960 default: 1961 g_assert_not_reached(); 1962 } 1963 1964 ts = tcg_temp_alloc(s); 1965 ts->base_type = type; 1966 ts->temp_allocated = 1; 1967 ts->kind = kind; 1968 1969 if (n == 1) { 1970 ts->type = type; 1971 } else { 1972 ts->type = TCG_TYPE_REG; 1973 1974 for (int i = 1; i < n; ++i) { 1975 TCGTemp *ts2 = tcg_temp_alloc(s); 1976 1977 tcg_debug_assert(ts2 == ts + i); 1978 ts2->base_type = type; 1979 ts2->type = TCG_TYPE_REG; 1980 ts2->temp_allocated = 1; 1981 ts2->temp_subindex = i; 1982 ts2->kind = kind; 1983 } 1984 } 1985 return ts; 1986 } 1987 1988 TCGv_i32 tcg_temp_new_i32(void) 1989 { 1990 return temp_tcgv_i32(tcg_temp_new_internal(TCG_TYPE_I32, TEMP_TB)); 1991 } 1992 1993 TCGv_i32 tcg_temp_ebb_new_i32(void) 1994 { 1995 return temp_tcgv_i32(tcg_temp_new_internal(TCG_TYPE_I32, TEMP_EBB)); 1996 } 1997 1998 TCGv_i64 tcg_temp_new_i64(void) 1999 { 2000 return temp_tcgv_i64(tcg_temp_new_internal(TCG_TYPE_I64, TEMP_TB)); 2001 } 2002 2003 TCGv_i64 tcg_temp_ebb_new_i64(void) 2004 { 2005 return temp_tcgv_i64(tcg_temp_new_internal(TCG_TYPE_I64, TEMP_EBB)); 2006 } 2007 2008 TCGv_ptr tcg_temp_new_ptr(void) 2009 { 2010 return temp_tcgv_ptr(tcg_temp_new_internal(TCG_TYPE_PTR, TEMP_TB)); 2011 } 2012 2013 TCGv_ptr tcg_temp_ebb_new_ptr(void) 2014 { 2015 return temp_tcgv_ptr(tcg_temp_new_internal(TCG_TYPE_PTR, TEMP_EBB)); 2016 } 2017 2018 TCGv_i128 tcg_temp_new_i128(void) 2019 { 2020 return temp_tcgv_i128(tcg_temp_new_internal(TCG_TYPE_I128, TEMP_TB)); 2021 } 2022 2023 TCGv_i128 tcg_temp_ebb_new_i128(void) 2024 { 2025 return temp_tcgv_i128(tcg_temp_new_internal(TCG_TYPE_I128, TEMP_EBB)); 2026 } 2027 2028 TCGv_vec tcg_temp_new_vec(TCGType type) 2029 { 2030 TCGTemp *t; 2031 2032 #ifdef CONFIG_DEBUG_TCG 2033 switch (type) { 2034 case TCG_TYPE_V64: 2035 assert(TCG_TARGET_HAS_v64); 2036 break; 2037 case TCG_TYPE_V128: 2038 assert(TCG_TARGET_HAS_v128); 2039 break; 2040 case TCG_TYPE_V256: 2041 assert(TCG_TARGET_HAS_v256); 2042 break; 2043 default: 2044 g_assert_not_reached(); 2045 } 2046 #endif 2047 2048 t = tcg_temp_new_internal(type, TEMP_EBB); 2049 return temp_tcgv_vec(t); 2050 } 2051 2052 /* Create a new temp of the same type as an existing temp. */ 2053 TCGv_vec tcg_temp_new_vec_matching(TCGv_vec match) 2054 { 2055 TCGTemp *t = tcgv_vec_temp(match); 2056 2057 tcg_debug_assert(t->temp_allocated != 0); 2058 2059 t = tcg_temp_new_internal(t->base_type, TEMP_EBB); 2060 return temp_tcgv_vec(t); 2061 } 2062 2063 void tcg_temp_free_internal(TCGTemp *ts) 2064 { 2065 TCGContext *s = tcg_ctx; 2066 2067 switch (ts->kind) { 2068 case TEMP_CONST: 2069 case TEMP_TB: 2070 /* Silently ignore free. */ 2071 break; 2072 case TEMP_EBB: 2073 tcg_debug_assert(ts->temp_allocated != 0); 2074 ts->temp_allocated = 0; 2075 set_bit(temp_idx(ts), s->free_temps[ts->base_type].l); 2076 break; 2077 default: 2078 /* It never made sense to free TEMP_FIXED or TEMP_GLOBAL. */ 2079 g_assert_not_reached(); 2080 } 2081 } 2082 2083 void tcg_temp_free_i32(TCGv_i32 arg) 2084 { 2085 tcg_temp_free_internal(tcgv_i32_temp(arg)); 2086 } 2087 2088 void tcg_temp_free_i64(TCGv_i64 arg) 2089 { 2090 tcg_temp_free_internal(tcgv_i64_temp(arg)); 2091 } 2092 2093 void tcg_temp_free_i128(TCGv_i128 arg) 2094 { 2095 tcg_temp_free_internal(tcgv_i128_temp(arg)); 2096 } 2097 2098 void tcg_temp_free_ptr(TCGv_ptr arg) 2099 { 2100 tcg_temp_free_internal(tcgv_ptr_temp(arg)); 2101 } 2102 2103 void tcg_temp_free_vec(TCGv_vec arg) 2104 { 2105 tcg_temp_free_internal(tcgv_vec_temp(arg)); 2106 } 2107 2108 TCGTemp *tcg_constant_internal(TCGType type, int64_t val) 2109 { 2110 TCGContext *s = tcg_ctx; 2111 GHashTable *h = s->const_table[type]; 2112 TCGTemp *ts; 2113 2114 if (h == NULL) { 2115 h = g_hash_table_new(g_int64_hash, g_int64_equal); 2116 s->const_table[type] = h; 2117 } 2118 2119 ts = g_hash_table_lookup(h, &val); 2120 if (ts == NULL) { 2121 int64_t *val_ptr; 2122 2123 ts = tcg_temp_alloc(s); 2124 2125 if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) { 2126 TCGTemp *ts2 = tcg_temp_alloc(s); 2127 2128 tcg_debug_assert(ts2 == ts + 1); 2129 2130 ts->base_type = TCG_TYPE_I64; 2131 ts->type = TCG_TYPE_I32; 2132 ts->kind = TEMP_CONST; 2133 ts->temp_allocated = 1; 2134 2135 ts2->base_type = TCG_TYPE_I64; 2136 ts2->type = TCG_TYPE_I32; 2137 ts2->kind = TEMP_CONST; 2138 ts2->temp_allocated = 1; 2139 ts2->temp_subindex = 1; 2140 2141 /* 2142 * Retain the full value of the 64-bit constant in the low 2143 * part, so that the hash table works. Actual uses will 2144 * truncate the value to the low part. 2145 */ 2146 ts[HOST_BIG_ENDIAN].val = val; 2147 ts[!HOST_BIG_ENDIAN].val = val >> 32; 2148 val_ptr = &ts[HOST_BIG_ENDIAN].val; 2149 } else { 2150 ts->base_type = type; 2151 ts->type = type; 2152 ts->kind = TEMP_CONST; 2153 ts->temp_allocated = 1; 2154 ts->val = val; 2155 val_ptr = &ts->val; 2156 } 2157 g_hash_table_insert(h, val_ptr, ts); 2158 } 2159 2160 return ts; 2161 } 2162 2163 TCGv_i32 tcg_constant_i32(int32_t val) 2164 { 2165 return temp_tcgv_i32(tcg_constant_internal(TCG_TYPE_I32, val)); 2166 } 2167 2168 TCGv_i64 tcg_constant_i64(int64_t val) 2169 { 2170 return temp_tcgv_i64(tcg_constant_internal(TCG_TYPE_I64, val)); 2171 } 2172 2173 TCGv_ptr tcg_constant_ptr_int(intptr_t val) 2174 { 2175 return temp_tcgv_ptr(tcg_constant_internal(TCG_TYPE_PTR, val)); 2176 } 2177 2178 TCGv_vec tcg_constant_vec(TCGType type, unsigned vece, int64_t val) 2179 { 2180 val = dup_const(vece, val); 2181 return temp_tcgv_vec(tcg_constant_internal(type, val)); 2182 } 2183 2184 TCGv_vec tcg_constant_vec_matching(TCGv_vec match, unsigned vece, int64_t val) 2185 { 2186 TCGTemp *t = tcgv_vec_temp(match); 2187 2188 tcg_debug_assert(t->temp_allocated != 0); 2189 return tcg_constant_vec(t->base_type, vece, val); 2190 } 2191 2192 #ifdef CONFIG_DEBUG_TCG 2193 size_t temp_idx(TCGTemp *ts) 2194 { 2195 ptrdiff_t n = ts - tcg_ctx->temps; 2196 assert(n >= 0 && n < tcg_ctx->nb_temps); 2197 return n; 2198 } 2199 2200 TCGTemp *tcgv_i32_temp(TCGv_i32 v) 2201 { 2202 uintptr_t o = (uintptr_t)v - offsetof(TCGContext, temps); 2203 2204 assert(o < sizeof(TCGTemp) * tcg_ctx->nb_temps); 2205 assert(o % sizeof(TCGTemp) == 0); 2206 2207 return (void *)tcg_ctx + (uintptr_t)v; 2208 } 2209 #endif /* CONFIG_DEBUG_TCG */ 2210 2211 /* 2212 * Return true if OP may appear in the opcode stream with TYPE. 2213 * Test the runtime variable that controls each opcode. 2214 */ 2215 bool tcg_op_supported(TCGOpcode op, TCGType type, unsigned flags) 2216 { 2217 bool has_type; 2218 2219 switch (type) { 2220 case TCG_TYPE_I32: 2221 has_type = true; 2222 break; 2223 case TCG_TYPE_I64: 2224 has_type = TCG_TARGET_REG_BITS == 64; 2225 break; 2226 case TCG_TYPE_V64: 2227 has_type = TCG_TARGET_HAS_v64; 2228 break; 2229 case TCG_TYPE_V128: 2230 has_type = TCG_TARGET_HAS_v128; 2231 break; 2232 case TCG_TYPE_V256: 2233 has_type = TCG_TARGET_HAS_v256; 2234 break; 2235 default: 2236 has_type = false; 2237 break; 2238 } 2239 2240 switch (op) { 2241 case INDEX_op_discard: 2242 case INDEX_op_set_label: 2243 case INDEX_op_call: 2244 case INDEX_op_br: 2245 case INDEX_op_mb: 2246 case INDEX_op_insn_start: 2247 case INDEX_op_exit_tb: 2248 case INDEX_op_goto_tb: 2249 case INDEX_op_goto_ptr: 2250 case INDEX_op_qemu_ld_i32: 2251 case INDEX_op_qemu_st_i32: 2252 case INDEX_op_qemu_ld_i64: 2253 case INDEX_op_qemu_st_i64: 2254 return true; 2255 2256 case INDEX_op_qemu_st8_i32: 2257 return TCG_TARGET_HAS_qemu_st8_i32; 2258 2259 case INDEX_op_qemu_ld_i128: 2260 case INDEX_op_qemu_st_i128: 2261 return TCG_TARGET_HAS_qemu_ldst_i128; 2262 2263 case INDEX_op_add: 2264 case INDEX_op_and: 2265 case INDEX_op_mov: 2266 case INDEX_op_or: 2267 case INDEX_op_xor: 2268 return has_type; 2269 2270 case INDEX_op_setcond_i32: 2271 case INDEX_op_brcond_i32: 2272 case INDEX_op_movcond_i32: 2273 case INDEX_op_ld8u_i32: 2274 case INDEX_op_ld8s_i32: 2275 case INDEX_op_ld16u_i32: 2276 case INDEX_op_ld16s_i32: 2277 case INDEX_op_ld_i32: 2278 case INDEX_op_st8_i32: 2279 case INDEX_op_st16_i32: 2280 case INDEX_op_st_i32: 2281 case INDEX_op_extract_i32: 2282 case INDEX_op_sextract_i32: 2283 case INDEX_op_deposit_i32: 2284 return true; 2285 2286 case INDEX_op_negsetcond_i32: 2287 return TCG_TARGET_HAS_negsetcond_i32; 2288 case INDEX_op_extract2_i32: 2289 return TCG_TARGET_HAS_extract2_i32; 2290 case INDEX_op_add2_i32: 2291 return TCG_TARGET_HAS_add2_i32; 2292 case INDEX_op_sub2_i32: 2293 return TCG_TARGET_HAS_sub2_i32; 2294 case INDEX_op_bswap16_i32: 2295 return TCG_TARGET_HAS_bswap16_i32; 2296 case INDEX_op_bswap32_i32: 2297 return TCG_TARGET_HAS_bswap32_i32; 2298 2299 case INDEX_op_brcond2_i32: 2300 case INDEX_op_setcond2_i32: 2301 return TCG_TARGET_REG_BITS == 32; 2302 2303 case INDEX_op_setcond_i64: 2304 case INDEX_op_brcond_i64: 2305 case INDEX_op_movcond_i64: 2306 case INDEX_op_ld8u_i64: 2307 case INDEX_op_ld8s_i64: 2308 case INDEX_op_ld16u_i64: 2309 case INDEX_op_ld16s_i64: 2310 case INDEX_op_ld32u_i64: 2311 case INDEX_op_ld32s_i64: 2312 case INDEX_op_ld_i64: 2313 case INDEX_op_st8_i64: 2314 case INDEX_op_st16_i64: 2315 case INDEX_op_st32_i64: 2316 case INDEX_op_st_i64: 2317 case INDEX_op_ext_i32_i64: 2318 case INDEX_op_extu_i32_i64: 2319 case INDEX_op_extract_i64: 2320 case INDEX_op_sextract_i64: 2321 case INDEX_op_deposit_i64: 2322 return TCG_TARGET_REG_BITS == 64; 2323 2324 case INDEX_op_negsetcond_i64: 2325 return TCG_TARGET_HAS_negsetcond_i64; 2326 case INDEX_op_extract2_i64: 2327 return TCG_TARGET_HAS_extract2_i64; 2328 case INDEX_op_extrl_i64_i32: 2329 case INDEX_op_extrh_i64_i32: 2330 return TCG_TARGET_HAS_extr_i64_i32; 2331 case INDEX_op_bswap16_i64: 2332 return TCG_TARGET_HAS_bswap16_i64; 2333 case INDEX_op_bswap32_i64: 2334 return TCG_TARGET_HAS_bswap32_i64; 2335 case INDEX_op_bswap64_i64: 2336 return TCG_TARGET_HAS_bswap64_i64; 2337 case INDEX_op_add2_i64: 2338 return TCG_TARGET_HAS_add2_i64; 2339 case INDEX_op_sub2_i64: 2340 return TCG_TARGET_HAS_sub2_i64; 2341 2342 case INDEX_op_mov_vec: 2343 case INDEX_op_dup_vec: 2344 case INDEX_op_dupm_vec: 2345 case INDEX_op_ld_vec: 2346 case INDEX_op_st_vec: 2347 case INDEX_op_add_vec: 2348 case INDEX_op_sub_vec: 2349 case INDEX_op_and_vec: 2350 case INDEX_op_or_vec: 2351 case INDEX_op_xor_vec: 2352 case INDEX_op_cmp_vec: 2353 return has_type; 2354 case INDEX_op_dup2_vec: 2355 return has_type && TCG_TARGET_REG_BITS == 32; 2356 case INDEX_op_not_vec: 2357 return has_type && TCG_TARGET_HAS_not_vec; 2358 case INDEX_op_neg_vec: 2359 return has_type && TCG_TARGET_HAS_neg_vec; 2360 case INDEX_op_abs_vec: 2361 return has_type && TCG_TARGET_HAS_abs_vec; 2362 case INDEX_op_andc_vec: 2363 return has_type && TCG_TARGET_HAS_andc_vec; 2364 case INDEX_op_orc_vec: 2365 return has_type && TCG_TARGET_HAS_orc_vec; 2366 case INDEX_op_nand_vec: 2367 return has_type && TCG_TARGET_HAS_nand_vec; 2368 case INDEX_op_nor_vec: 2369 return has_type && TCG_TARGET_HAS_nor_vec; 2370 case INDEX_op_eqv_vec: 2371 return has_type && TCG_TARGET_HAS_eqv_vec; 2372 case INDEX_op_mul_vec: 2373 return has_type && TCG_TARGET_HAS_mul_vec; 2374 case INDEX_op_shli_vec: 2375 case INDEX_op_shri_vec: 2376 case INDEX_op_sari_vec: 2377 return has_type && TCG_TARGET_HAS_shi_vec; 2378 case INDEX_op_shls_vec: 2379 case INDEX_op_shrs_vec: 2380 case INDEX_op_sars_vec: 2381 return has_type && TCG_TARGET_HAS_shs_vec; 2382 case INDEX_op_shlv_vec: 2383 case INDEX_op_shrv_vec: 2384 case INDEX_op_sarv_vec: 2385 return has_type && TCG_TARGET_HAS_shv_vec; 2386 case INDEX_op_rotli_vec: 2387 return has_type && TCG_TARGET_HAS_roti_vec; 2388 case INDEX_op_rotls_vec: 2389 return has_type && TCG_TARGET_HAS_rots_vec; 2390 case INDEX_op_rotlv_vec: 2391 case INDEX_op_rotrv_vec: 2392 return has_type && TCG_TARGET_HAS_rotv_vec; 2393 case INDEX_op_ssadd_vec: 2394 case INDEX_op_usadd_vec: 2395 case INDEX_op_sssub_vec: 2396 case INDEX_op_ussub_vec: 2397 return has_type && TCG_TARGET_HAS_sat_vec; 2398 case INDEX_op_smin_vec: 2399 case INDEX_op_umin_vec: 2400 case INDEX_op_smax_vec: 2401 case INDEX_op_umax_vec: 2402 return has_type && TCG_TARGET_HAS_minmax_vec; 2403 case INDEX_op_bitsel_vec: 2404 return has_type && TCG_TARGET_HAS_bitsel_vec; 2405 case INDEX_op_cmpsel_vec: 2406 return has_type && TCG_TARGET_HAS_cmpsel_vec; 2407 2408 default: 2409 if (op < INDEX_op_last_generic) { 2410 const TCGOutOp *outop; 2411 TCGConstraintSetIndex con_set; 2412 2413 if (!has_type) { 2414 return false; 2415 } 2416 2417 outop = all_outop[op]; 2418 tcg_debug_assert(outop != NULL); 2419 2420 con_set = outop->static_constraint; 2421 if (con_set == C_Dynamic) { 2422 con_set = outop->dynamic_constraint(type, flags); 2423 } 2424 if (con_set >= 0) { 2425 return true; 2426 } 2427 tcg_debug_assert(con_set == C_NotImplemented); 2428 return false; 2429 } 2430 tcg_debug_assert(op < NB_OPS); 2431 return true; 2432 2433 case INDEX_op_last_generic: 2434 g_assert_not_reached(); 2435 } 2436 } 2437 2438 bool tcg_op_deposit_valid(TCGType type, unsigned ofs, unsigned len) 2439 { 2440 unsigned width; 2441 2442 tcg_debug_assert(type == TCG_TYPE_I32 || type == TCG_TYPE_I64); 2443 width = (type == TCG_TYPE_I32 ? 32 : 64); 2444 2445 tcg_debug_assert(ofs < width); 2446 tcg_debug_assert(len > 0); 2447 tcg_debug_assert(len <= width - ofs); 2448 2449 return TCG_TARGET_deposit_valid(type, ofs, len); 2450 } 2451 2452 static TCGOp *tcg_op_alloc(TCGOpcode opc, unsigned nargs); 2453 2454 static void tcg_gen_callN(void *func, TCGHelperInfo *info, 2455 TCGTemp *ret, TCGTemp **args) 2456 { 2457 TCGv_i64 extend_free[MAX_CALL_IARGS]; 2458 int n_extend = 0; 2459 TCGOp *op; 2460 int i, n, pi = 0, total_args; 2461 2462 if (unlikely(g_once_init_enter(HELPER_INFO_INIT(info)))) { 2463 init_call_layout(info); 2464 g_once_init_leave(HELPER_INFO_INIT(info), HELPER_INFO_INIT_VAL(info)); 2465 } 2466 2467 total_args = info->nr_out + info->nr_in + 2; 2468 op = tcg_op_alloc(INDEX_op_call, total_args); 2469 2470 #ifdef CONFIG_PLUGIN 2471 /* Flag helpers that may affect guest state */ 2472 if (tcg_ctx->plugin_insn && !(info->flags & TCG_CALL_NO_SIDE_EFFECTS)) { 2473 tcg_ctx->plugin_insn->calls_helpers = true; 2474 } 2475 #endif 2476 2477 TCGOP_CALLO(op) = n = info->nr_out; 2478 switch (n) { 2479 case 0: 2480 tcg_debug_assert(ret == NULL); 2481 break; 2482 case 1: 2483 tcg_debug_assert(ret != NULL); 2484 op->args[pi++] = temp_arg(ret); 2485 break; 2486 case 2: 2487 case 4: 2488 tcg_debug_assert(ret != NULL); 2489 tcg_debug_assert(ret->base_type == ret->type + ctz32(n)); 2490 tcg_debug_assert(ret->temp_subindex == 0); 2491 for (i = 0; i < n; ++i) { 2492 op->args[pi++] = temp_arg(ret + i); 2493 } 2494 break; 2495 default: 2496 g_assert_not_reached(); 2497 } 2498 2499 TCGOP_CALLI(op) = n = info->nr_in; 2500 for (i = 0; i < n; i++) { 2501 const TCGCallArgumentLoc *loc = &info->in[i]; 2502 TCGTemp *ts = args[loc->arg_idx] + loc->tmp_subindex; 2503 2504 switch (loc->kind) { 2505 case TCG_CALL_ARG_NORMAL: 2506 case TCG_CALL_ARG_BY_REF: 2507 case TCG_CALL_ARG_BY_REF_N: 2508 op->args[pi++] = temp_arg(ts); 2509 break; 2510 2511 case TCG_CALL_ARG_EXTEND_U: 2512 case TCG_CALL_ARG_EXTEND_S: 2513 { 2514 TCGv_i64 temp = tcg_temp_ebb_new_i64(); 2515 TCGv_i32 orig = temp_tcgv_i32(ts); 2516 2517 if (loc->kind == TCG_CALL_ARG_EXTEND_S) { 2518 tcg_gen_ext_i32_i64(temp, orig); 2519 } else { 2520 tcg_gen_extu_i32_i64(temp, orig); 2521 } 2522 op->args[pi++] = tcgv_i64_arg(temp); 2523 extend_free[n_extend++] = temp; 2524 } 2525 break; 2526 2527 default: 2528 g_assert_not_reached(); 2529 } 2530 } 2531 op->args[pi++] = (uintptr_t)func; 2532 op->args[pi++] = (uintptr_t)info; 2533 tcg_debug_assert(pi == total_args); 2534 2535 if (tcg_ctx->emit_before_op) { 2536 QTAILQ_INSERT_BEFORE(tcg_ctx->emit_before_op, op, link); 2537 } else { 2538 QTAILQ_INSERT_TAIL(&tcg_ctx->ops, op, link); 2539 } 2540 2541 tcg_debug_assert(n_extend < ARRAY_SIZE(extend_free)); 2542 for (i = 0; i < n_extend; ++i) { 2543 tcg_temp_free_i64(extend_free[i]); 2544 } 2545 } 2546 2547 void tcg_gen_call0(void *func, TCGHelperInfo *info, TCGTemp *ret) 2548 { 2549 tcg_gen_callN(func, info, ret, NULL); 2550 } 2551 2552 void tcg_gen_call1(void *func, TCGHelperInfo *info, TCGTemp *ret, TCGTemp *t1) 2553 { 2554 tcg_gen_callN(func, info, ret, &t1); 2555 } 2556 2557 void tcg_gen_call2(void *func, TCGHelperInfo *info, TCGTemp *ret, 2558 TCGTemp *t1, TCGTemp *t2) 2559 { 2560 TCGTemp *args[2] = { t1, t2 }; 2561 tcg_gen_callN(func, info, ret, args); 2562 } 2563 2564 void tcg_gen_call3(void *func, TCGHelperInfo *info, TCGTemp *ret, 2565 TCGTemp *t1, TCGTemp *t2, TCGTemp *t3) 2566 { 2567 TCGTemp *args[3] = { t1, t2, t3 }; 2568 tcg_gen_callN(func, info, ret, args); 2569 } 2570 2571 void tcg_gen_call4(void *func, TCGHelperInfo *info, TCGTemp *ret, 2572 TCGTemp *t1, TCGTemp *t2, TCGTemp *t3, TCGTemp *t4) 2573 { 2574 TCGTemp *args[4] = { t1, t2, t3, t4 }; 2575 tcg_gen_callN(func, info, ret, args); 2576 } 2577 2578 void tcg_gen_call5(void *func, TCGHelperInfo *info, TCGTemp *ret, TCGTemp *t1, 2579 TCGTemp *t2, TCGTemp *t3, TCGTemp *t4, TCGTemp *t5) 2580 { 2581 TCGTemp *args[5] = { t1, t2, t3, t4, t5 }; 2582 tcg_gen_callN(func, info, ret, args); 2583 } 2584 2585 void tcg_gen_call6(void *func, TCGHelperInfo *info, TCGTemp *ret, 2586 TCGTemp *t1, TCGTemp *t2, TCGTemp *t3, 2587 TCGTemp *t4, TCGTemp *t5, TCGTemp *t6) 2588 { 2589 TCGTemp *args[6] = { t1, t2, t3, t4, t5, t6 }; 2590 tcg_gen_callN(func, info, ret, args); 2591 } 2592 2593 void tcg_gen_call7(void *func, TCGHelperInfo *info, TCGTemp *ret, TCGTemp *t1, 2594 TCGTemp *t2, TCGTemp *t3, TCGTemp *t4, 2595 TCGTemp *t5, TCGTemp *t6, TCGTemp *t7) 2596 { 2597 TCGTemp *args[7] = { t1, t2, t3, t4, t5, t6, t7 }; 2598 tcg_gen_callN(func, info, ret, args); 2599 } 2600 2601 static void tcg_reg_alloc_start(TCGContext *s) 2602 { 2603 int i, n; 2604 2605 for (i = 0, n = s->nb_temps; i < n; i++) { 2606 TCGTemp *ts = &s->temps[i]; 2607 TCGTempVal val = TEMP_VAL_MEM; 2608 2609 switch (ts->kind) { 2610 case TEMP_CONST: 2611 val = TEMP_VAL_CONST; 2612 break; 2613 case TEMP_FIXED: 2614 val = TEMP_VAL_REG; 2615 break; 2616 case TEMP_GLOBAL: 2617 break; 2618 case TEMP_EBB: 2619 val = TEMP_VAL_DEAD; 2620 /* fall through */ 2621 case TEMP_TB: 2622 ts->mem_allocated = 0; 2623 break; 2624 default: 2625 g_assert_not_reached(); 2626 } 2627 ts->val_type = val; 2628 } 2629 2630 memset(s->reg_to_temp, 0, sizeof(s->reg_to_temp)); 2631 } 2632 2633 static char *tcg_get_arg_str_ptr(TCGContext *s, char *buf, int buf_size, 2634 TCGTemp *ts) 2635 { 2636 int idx = temp_idx(ts); 2637 2638 switch (ts->kind) { 2639 case TEMP_FIXED: 2640 case TEMP_GLOBAL: 2641 pstrcpy(buf, buf_size, ts->name); 2642 break; 2643 case TEMP_TB: 2644 snprintf(buf, buf_size, "loc%d", idx - s->nb_globals); 2645 break; 2646 case TEMP_EBB: 2647 snprintf(buf, buf_size, "tmp%d", idx - s->nb_globals); 2648 break; 2649 case TEMP_CONST: 2650 switch (ts->type) { 2651 case TCG_TYPE_I32: 2652 snprintf(buf, buf_size, "$0x%x", (int32_t)ts->val); 2653 break; 2654 #if TCG_TARGET_REG_BITS > 32 2655 case TCG_TYPE_I64: 2656 snprintf(buf, buf_size, "$0x%" PRIx64, ts->val); 2657 break; 2658 #endif 2659 case TCG_TYPE_V64: 2660 case TCG_TYPE_V128: 2661 case TCG_TYPE_V256: 2662 snprintf(buf, buf_size, "v%d$0x%" PRIx64, 2663 64 << (ts->type - TCG_TYPE_V64), ts->val); 2664 break; 2665 default: 2666 g_assert_not_reached(); 2667 } 2668 break; 2669 } 2670 return buf; 2671 } 2672 2673 static char *tcg_get_arg_str(TCGContext *s, char *buf, 2674 int buf_size, TCGArg arg) 2675 { 2676 return tcg_get_arg_str_ptr(s, buf, buf_size, arg_temp(arg)); 2677 } 2678 2679 static const char * const cond_name[] = 2680 { 2681 [TCG_COND_NEVER] = "never", 2682 [TCG_COND_ALWAYS] = "always", 2683 [TCG_COND_EQ] = "eq", 2684 [TCG_COND_NE] = "ne", 2685 [TCG_COND_LT] = "lt", 2686 [TCG_COND_GE] = "ge", 2687 [TCG_COND_LE] = "le", 2688 [TCG_COND_GT] = "gt", 2689 [TCG_COND_LTU] = "ltu", 2690 [TCG_COND_GEU] = "geu", 2691 [TCG_COND_LEU] = "leu", 2692 [TCG_COND_GTU] = "gtu", 2693 [TCG_COND_TSTEQ] = "tsteq", 2694 [TCG_COND_TSTNE] = "tstne", 2695 }; 2696 2697 static const char * const ldst_name[(MO_BSWAP | MO_SSIZE) + 1] = 2698 { 2699 [MO_UB] = "ub", 2700 [MO_SB] = "sb", 2701 [MO_LEUW] = "leuw", 2702 [MO_LESW] = "lesw", 2703 [MO_LEUL] = "leul", 2704 [MO_LESL] = "lesl", 2705 [MO_LEUQ] = "leq", 2706 [MO_BEUW] = "beuw", 2707 [MO_BESW] = "besw", 2708 [MO_BEUL] = "beul", 2709 [MO_BESL] = "besl", 2710 [MO_BEUQ] = "beq", 2711 [MO_128 + MO_BE] = "beo", 2712 [MO_128 + MO_LE] = "leo", 2713 }; 2714 2715 static const char * const alignment_name[(MO_AMASK >> MO_ASHIFT) + 1] = { 2716 [MO_UNALN >> MO_ASHIFT] = "un+", 2717 [MO_ALIGN >> MO_ASHIFT] = "al+", 2718 [MO_ALIGN_2 >> MO_ASHIFT] = "al2+", 2719 [MO_ALIGN_4 >> MO_ASHIFT] = "al4+", 2720 [MO_ALIGN_8 >> MO_ASHIFT] = "al8+", 2721 [MO_ALIGN_16 >> MO_ASHIFT] = "al16+", 2722 [MO_ALIGN_32 >> MO_ASHIFT] = "al32+", 2723 [MO_ALIGN_64 >> MO_ASHIFT] = "al64+", 2724 }; 2725 2726 static const char * const atom_name[(MO_ATOM_MASK >> MO_ATOM_SHIFT) + 1] = { 2727 [MO_ATOM_IFALIGN >> MO_ATOM_SHIFT] = "", 2728 [MO_ATOM_IFALIGN_PAIR >> MO_ATOM_SHIFT] = "pair+", 2729 [MO_ATOM_WITHIN16 >> MO_ATOM_SHIFT] = "w16+", 2730 [MO_ATOM_WITHIN16_PAIR >> MO_ATOM_SHIFT] = "w16p+", 2731 [MO_ATOM_SUBALIGN >> MO_ATOM_SHIFT] = "sub+", 2732 [MO_ATOM_NONE >> MO_ATOM_SHIFT] = "noat+", 2733 }; 2734 2735 static const char bswap_flag_name[][6] = { 2736 [TCG_BSWAP_IZ] = "iz", 2737 [TCG_BSWAP_OZ] = "oz", 2738 [TCG_BSWAP_OS] = "os", 2739 [TCG_BSWAP_IZ | TCG_BSWAP_OZ] = "iz,oz", 2740 [TCG_BSWAP_IZ | TCG_BSWAP_OS] = "iz,os", 2741 }; 2742 2743 #ifdef CONFIG_PLUGIN 2744 static const char * const plugin_from_name[] = { 2745 "from-tb", 2746 "from-insn", 2747 "after-insn", 2748 "after-tb", 2749 }; 2750 #endif 2751 2752 static inline bool tcg_regset_single(TCGRegSet d) 2753 { 2754 return (d & (d - 1)) == 0; 2755 } 2756 2757 static inline TCGReg tcg_regset_first(TCGRegSet d) 2758 { 2759 if (TCG_TARGET_NB_REGS <= 32) { 2760 return ctz32(d); 2761 } else { 2762 return ctz64(d); 2763 } 2764 } 2765 2766 /* Return only the number of characters output -- no error return. */ 2767 #define ne_fprintf(...) \ 2768 ({ int ret_ = fprintf(__VA_ARGS__); ret_ >= 0 ? ret_ : 0; }) 2769 2770 void tcg_dump_ops(TCGContext *s, FILE *f, bool have_prefs) 2771 { 2772 char buf[128]; 2773 TCGOp *op; 2774 2775 QTAILQ_FOREACH(op, &s->ops, link) { 2776 int i, k, nb_oargs, nb_iargs, nb_cargs; 2777 const TCGOpDef *def; 2778 TCGOpcode c; 2779 int col = 0; 2780 2781 c = op->opc; 2782 def = &tcg_op_defs[c]; 2783 2784 if (c == INDEX_op_insn_start) { 2785 nb_oargs = 0; 2786 col += ne_fprintf(f, "\n ----"); 2787 2788 for (i = 0, k = s->insn_start_words; i < k; ++i) { 2789 col += ne_fprintf(f, " %016" PRIx64, 2790 tcg_get_insn_start_param(op, i)); 2791 } 2792 } else if (c == INDEX_op_call) { 2793 const TCGHelperInfo *info = tcg_call_info(op); 2794 void *func = tcg_call_func(op); 2795 2796 /* variable number of arguments */ 2797 nb_oargs = TCGOP_CALLO(op); 2798 nb_iargs = TCGOP_CALLI(op); 2799 nb_cargs = def->nb_cargs; 2800 2801 col += ne_fprintf(f, " %s ", def->name); 2802 2803 /* 2804 * Print the function name from TCGHelperInfo, if available. 2805 * Note that plugins have a template function for the info, 2806 * but the actual function pointer comes from the plugin. 2807 */ 2808 if (func == info->func) { 2809 col += ne_fprintf(f, "%s", info->name); 2810 } else { 2811 col += ne_fprintf(f, "plugin(%p)", func); 2812 } 2813 2814 col += ne_fprintf(f, ",$0x%x,$%d", info->flags, nb_oargs); 2815 for (i = 0; i < nb_oargs; i++) { 2816 col += ne_fprintf(f, ",%s", tcg_get_arg_str(s, buf, sizeof(buf), 2817 op->args[i])); 2818 } 2819 for (i = 0; i < nb_iargs; i++) { 2820 TCGArg arg = op->args[nb_oargs + i]; 2821 const char *t = tcg_get_arg_str(s, buf, sizeof(buf), arg); 2822 col += ne_fprintf(f, ",%s", t); 2823 } 2824 } else { 2825 if (def->flags & TCG_OPF_INT) { 2826 col += ne_fprintf(f, " %s_i%d ", 2827 def->name, 2828 8 * tcg_type_size(TCGOP_TYPE(op))); 2829 } else if (def->flags & TCG_OPF_VECTOR) { 2830 col += ne_fprintf(f, "%s v%d,e%d,", 2831 def->name, 2832 8 * tcg_type_size(TCGOP_TYPE(op)), 2833 8 << TCGOP_VECE(op)); 2834 } else { 2835 col += ne_fprintf(f, " %s ", def->name); 2836 } 2837 2838 nb_oargs = def->nb_oargs; 2839 nb_iargs = def->nb_iargs; 2840 nb_cargs = def->nb_cargs; 2841 2842 k = 0; 2843 for (i = 0; i < nb_oargs; i++) { 2844 const char *sep = k ? "," : ""; 2845 col += ne_fprintf(f, "%s%s", sep, 2846 tcg_get_arg_str(s, buf, sizeof(buf), 2847 op->args[k++])); 2848 } 2849 for (i = 0; i < nb_iargs; i++) { 2850 const char *sep = k ? "," : ""; 2851 col += ne_fprintf(f, "%s%s", sep, 2852 tcg_get_arg_str(s, buf, sizeof(buf), 2853 op->args[k++])); 2854 } 2855 switch (c) { 2856 case INDEX_op_brcond_i32: 2857 case INDEX_op_setcond_i32: 2858 case INDEX_op_negsetcond_i32: 2859 case INDEX_op_movcond_i32: 2860 case INDEX_op_brcond2_i32: 2861 case INDEX_op_setcond2_i32: 2862 case INDEX_op_brcond_i64: 2863 case INDEX_op_setcond_i64: 2864 case INDEX_op_negsetcond_i64: 2865 case INDEX_op_movcond_i64: 2866 case INDEX_op_cmp_vec: 2867 case INDEX_op_cmpsel_vec: 2868 if (op->args[k] < ARRAY_SIZE(cond_name) 2869 && cond_name[op->args[k]]) { 2870 col += ne_fprintf(f, ",%s", cond_name[op->args[k++]]); 2871 } else { 2872 col += ne_fprintf(f, ",$0x%" TCG_PRIlx, op->args[k++]); 2873 } 2874 i = 1; 2875 break; 2876 case INDEX_op_qemu_ld_i32: 2877 case INDEX_op_qemu_st_i32: 2878 case INDEX_op_qemu_st8_i32: 2879 case INDEX_op_qemu_ld_i64: 2880 case INDEX_op_qemu_st_i64: 2881 case INDEX_op_qemu_ld_i128: 2882 case INDEX_op_qemu_st_i128: 2883 { 2884 const char *s_al, *s_op, *s_at; 2885 MemOpIdx oi = op->args[k++]; 2886 MemOp mop = get_memop(oi); 2887 unsigned ix = get_mmuidx(oi); 2888 2889 s_al = alignment_name[(mop & MO_AMASK) >> MO_ASHIFT]; 2890 s_op = ldst_name[mop & (MO_BSWAP | MO_SSIZE)]; 2891 s_at = atom_name[(mop & MO_ATOM_MASK) >> MO_ATOM_SHIFT]; 2892 mop &= ~(MO_AMASK | MO_BSWAP | MO_SSIZE | MO_ATOM_MASK); 2893 2894 /* If all fields are accounted for, print symbolically. */ 2895 if (!mop && s_al && s_op && s_at) { 2896 col += ne_fprintf(f, ",%s%s%s,%u", 2897 s_at, s_al, s_op, ix); 2898 } else { 2899 mop = get_memop(oi); 2900 col += ne_fprintf(f, ",$0x%x,%u", mop, ix); 2901 } 2902 i = 1; 2903 } 2904 break; 2905 case INDEX_op_bswap16_i32: 2906 case INDEX_op_bswap16_i64: 2907 case INDEX_op_bswap32_i32: 2908 case INDEX_op_bswap32_i64: 2909 case INDEX_op_bswap64_i64: 2910 { 2911 TCGArg flags = op->args[k]; 2912 const char *name = NULL; 2913 2914 if (flags < ARRAY_SIZE(bswap_flag_name)) { 2915 name = bswap_flag_name[flags]; 2916 } 2917 if (name) { 2918 col += ne_fprintf(f, ",%s", name); 2919 } else { 2920 col += ne_fprintf(f, ",$0x%" TCG_PRIlx, flags); 2921 } 2922 i = k = 1; 2923 } 2924 break; 2925 #ifdef CONFIG_PLUGIN 2926 case INDEX_op_plugin_cb: 2927 { 2928 TCGArg from = op->args[k++]; 2929 const char *name = NULL; 2930 2931 if (from < ARRAY_SIZE(plugin_from_name)) { 2932 name = plugin_from_name[from]; 2933 } 2934 if (name) { 2935 col += ne_fprintf(f, "%s", name); 2936 } else { 2937 col += ne_fprintf(f, "$0x%" TCG_PRIlx, from); 2938 } 2939 i = 1; 2940 } 2941 break; 2942 #endif 2943 default: 2944 i = 0; 2945 break; 2946 } 2947 switch (c) { 2948 case INDEX_op_set_label: 2949 case INDEX_op_br: 2950 case INDEX_op_brcond_i32: 2951 case INDEX_op_brcond_i64: 2952 case INDEX_op_brcond2_i32: 2953 col += ne_fprintf(f, "%s$L%d", k ? "," : "", 2954 arg_label(op->args[k])->id); 2955 i++, k++; 2956 break; 2957 case INDEX_op_mb: 2958 { 2959 TCGBar membar = op->args[k]; 2960 const char *b_op, *m_op; 2961 2962 switch (membar & TCG_BAR_SC) { 2963 case 0: 2964 b_op = "none"; 2965 break; 2966 case TCG_BAR_LDAQ: 2967 b_op = "acq"; 2968 break; 2969 case TCG_BAR_STRL: 2970 b_op = "rel"; 2971 break; 2972 case TCG_BAR_SC: 2973 b_op = "seq"; 2974 break; 2975 default: 2976 g_assert_not_reached(); 2977 } 2978 2979 switch (membar & TCG_MO_ALL) { 2980 case 0: 2981 m_op = "none"; 2982 break; 2983 case TCG_MO_LD_LD: 2984 m_op = "rr"; 2985 break; 2986 case TCG_MO_LD_ST: 2987 m_op = "rw"; 2988 break; 2989 case TCG_MO_ST_LD: 2990 m_op = "wr"; 2991 break; 2992 case TCG_MO_ST_ST: 2993 m_op = "ww"; 2994 break; 2995 case TCG_MO_LD_LD | TCG_MO_LD_ST: 2996 m_op = "rr+rw"; 2997 break; 2998 case TCG_MO_LD_LD | TCG_MO_ST_LD: 2999 m_op = "rr+wr"; 3000 break; 3001 case TCG_MO_LD_LD | TCG_MO_ST_ST: 3002 m_op = "rr+ww"; 3003 break; 3004 case TCG_MO_LD_ST | TCG_MO_ST_LD: 3005 m_op = "rw+wr"; 3006 break; 3007 case TCG_MO_LD_ST | TCG_MO_ST_ST: 3008 m_op = "rw+ww"; 3009 break; 3010 case TCG_MO_ST_LD | TCG_MO_ST_ST: 3011 m_op = "wr+ww"; 3012 break; 3013 case TCG_MO_LD_LD | TCG_MO_LD_ST | TCG_MO_ST_LD: 3014 m_op = "rr+rw+wr"; 3015 break; 3016 case TCG_MO_LD_LD | TCG_MO_LD_ST | TCG_MO_ST_ST: 3017 m_op = "rr+rw+ww"; 3018 break; 3019 case TCG_MO_LD_LD | TCG_MO_ST_LD | TCG_MO_ST_ST: 3020 m_op = "rr+wr+ww"; 3021 break; 3022 case TCG_MO_LD_ST | TCG_MO_ST_LD | TCG_MO_ST_ST: 3023 m_op = "rw+wr+ww"; 3024 break; 3025 case TCG_MO_ALL: 3026 m_op = "all"; 3027 break; 3028 default: 3029 g_assert_not_reached(); 3030 } 3031 3032 col += ne_fprintf(f, "%s%s:%s", (k ? "," : ""), b_op, m_op); 3033 i++, k++; 3034 } 3035 break; 3036 default: 3037 break; 3038 } 3039 for (; i < nb_cargs; i++, k++) { 3040 col += ne_fprintf(f, "%s$0x%" TCG_PRIlx, k ? "," : "", 3041 op->args[k]); 3042 } 3043 } 3044 3045 if (have_prefs || op->life) { 3046 for (; col < 40; ++col) { 3047 putc(' ', f); 3048 } 3049 } 3050 3051 if (op->life) { 3052 unsigned life = op->life; 3053 3054 if (life & (SYNC_ARG * 3)) { 3055 ne_fprintf(f, " sync:"); 3056 for (i = 0; i < 2; ++i) { 3057 if (life & (SYNC_ARG << i)) { 3058 ne_fprintf(f, " %d", i); 3059 } 3060 } 3061 } 3062 life /= DEAD_ARG; 3063 if (life) { 3064 ne_fprintf(f, " dead:"); 3065 for (i = 0; life; ++i, life >>= 1) { 3066 if (life & 1) { 3067 ne_fprintf(f, " %d", i); 3068 } 3069 } 3070 } 3071 } 3072 3073 if (have_prefs) { 3074 for (i = 0; i < nb_oargs; ++i) { 3075 TCGRegSet set = output_pref(op, i); 3076 3077 if (i == 0) { 3078 ne_fprintf(f, " pref="); 3079 } else { 3080 ne_fprintf(f, ","); 3081 } 3082 if (set == 0) { 3083 ne_fprintf(f, "none"); 3084 } else if (set == MAKE_64BIT_MASK(0, TCG_TARGET_NB_REGS)) { 3085 ne_fprintf(f, "all"); 3086 #ifdef CONFIG_DEBUG_TCG 3087 } else if (tcg_regset_single(set)) { 3088 TCGReg reg = tcg_regset_first(set); 3089 ne_fprintf(f, "%s", tcg_target_reg_names[reg]); 3090 #endif 3091 } else if (TCG_TARGET_NB_REGS <= 32) { 3092 ne_fprintf(f, "0x%x", (uint32_t)set); 3093 } else { 3094 ne_fprintf(f, "0x%" PRIx64, (uint64_t)set); 3095 } 3096 } 3097 } 3098 3099 putc('\n', f); 3100 } 3101 } 3102 3103 /* we give more priority to constraints with less registers */ 3104 static int get_constraint_priority(const TCGArgConstraint *arg_ct, int k) 3105 { 3106 int n; 3107 3108 arg_ct += k; 3109 n = ctpop64(arg_ct->regs); 3110 3111 /* 3112 * Sort constraints of a single register first, which includes output 3113 * aliases (which must exactly match the input already allocated). 3114 */ 3115 if (n == 1 || arg_ct->oalias) { 3116 return INT_MAX; 3117 } 3118 3119 /* 3120 * Sort register pairs next, first then second immediately after. 3121 * Arbitrarily sort multiple pairs by the index of the first reg; 3122 * there shouldn't be many pairs. 3123 */ 3124 switch (arg_ct->pair) { 3125 case 1: 3126 case 3: 3127 return (k + 1) * 2; 3128 case 2: 3129 return (arg_ct->pair_index + 1) * 2 - 1; 3130 } 3131 3132 /* Finally, sort by decreasing register count. */ 3133 assert(n > 1); 3134 return -n; 3135 } 3136 3137 /* sort from highest priority to lowest */ 3138 static void sort_constraints(TCGArgConstraint *a, int start, int n) 3139 { 3140 int i, j; 3141 3142 for (i = 0; i < n; i++) { 3143 a[start + i].sort_index = start + i; 3144 } 3145 if (n <= 1) { 3146 return; 3147 } 3148 for (i = 0; i < n - 1; i++) { 3149 for (j = i + 1; j < n; j++) { 3150 int p1 = get_constraint_priority(a, a[start + i].sort_index); 3151 int p2 = get_constraint_priority(a, a[start + j].sort_index); 3152 if (p1 < p2) { 3153 int tmp = a[start + i].sort_index; 3154 a[start + i].sort_index = a[start + j].sort_index; 3155 a[start + j].sort_index = tmp; 3156 } 3157 } 3158 } 3159 } 3160 3161 static const TCGArgConstraint empty_cts[TCG_MAX_OP_ARGS]; 3162 static TCGArgConstraint all_cts[ARRAY_SIZE(constraint_sets)][TCG_MAX_OP_ARGS]; 3163 3164 static void process_constraint_sets(void) 3165 { 3166 for (size_t c = 0; c < ARRAY_SIZE(constraint_sets); ++c) { 3167 const TCGConstraintSet *tdefs = &constraint_sets[c]; 3168 TCGArgConstraint *args_ct = all_cts[c]; 3169 int nb_oargs = tdefs->nb_oargs; 3170 int nb_iargs = tdefs->nb_iargs; 3171 int nb_args = nb_oargs + nb_iargs; 3172 bool saw_alias_pair = false; 3173 3174 for (int i = 0; i < nb_args; i++) { 3175 const char *ct_str = tdefs->args_ct_str[i]; 3176 bool input_p = i >= nb_oargs; 3177 int o; 3178 3179 switch (*ct_str) { 3180 case '0' ... '9': 3181 o = *ct_str - '0'; 3182 tcg_debug_assert(input_p); 3183 tcg_debug_assert(o < nb_oargs); 3184 tcg_debug_assert(args_ct[o].regs != 0); 3185 tcg_debug_assert(!args_ct[o].oalias); 3186 args_ct[i] = args_ct[o]; 3187 /* The output sets oalias. */ 3188 args_ct[o].oalias = 1; 3189 args_ct[o].alias_index = i; 3190 /* The input sets ialias. */ 3191 args_ct[i].ialias = 1; 3192 args_ct[i].alias_index = o; 3193 if (args_ct[i].pair) { 3194 saw_alias_pair = true; 3195 } 3196 tcg_debug_assert(ct_str[1] == '\0'); 3197 continue; 3198 3199 case '&': 3200 tcg_debug_assert(!input_p); 3201 args_ct[i].newreg = true; 3202 ct_str++; 3203 break; 3204 3205 case 'p': /* plus */ 3206 /* Allocate to the register after the previous. */ 3207 tcg_debug_assert(i > (input_p ? nb_oargs : 0)); 3208 o = i - 1; 3209 tcg_debug_assert(!args_ct[o].pair); 3210 tcg_debug_assert(!args_ct[o].ct); 3211 args_ct[i] = (TCGArgConstraint){ 3212 .pair = 2, 3213 .pair_index = o, 3214 .regs = args_ct[o].regs << 1, 3215 .newreg = args_ct[o].newreg, 3216 }; 3217 args_ct[o].pair = 1; 3218 args_ct[o].pair_index = i; 3219 tcg_debug_assert(ct_str[1] == '\0'); 3220 continue; 3221 3222 case 'm': /* minus */ 3223 /* Allocate to the register before the previous. */ 3224 tcg_debug_assert(i > (input_p ? nb_oargs : 0)); 3225 o = i - 1; 3226 tcg_debug_assert(!args_ct[o].pair); 3227 tcg_debug_assert(!args_ct[o].ct); 3228 args_ct[i] = (TCGArgConstraint){ 3229 .pair = 1, 3230 .pair_index = o, 3231 .regs = args_ct[o].regs >> 1, 3232 .newreg = args_ct[o].newreg, 3233 }; 3234 args_ct[o].pair = 2; 3235 args_ct[o].pair_index = i; 3236 tcg_debug_assert(ct_str[1] == '\0'); 3237 continue; 3238 } 3239 3240 do { 3241 switch (*ct_str) { 3242 case 'i': 3243 args_ct[i].ct |= TCG_CT_CONST; 3244 break; 3245 #ifdef TCG_REG_ZERO 3246 case 'z': 3247 args_ct[i].ct |= TCG_CT_REG_ZERO; 3248 break; 3249 #endif 3250 3251 /* Include all of the target-specific constraints. */ 3252 3253 #undef CONST 3254 #define CONST(CASE, MASK) \ 3255 case CASE: args_ct[i].ct |= MASK; break; 3256 #define REGS(CASE, MASK) \ 3257 case CASE: args_ct[i].regs |= MASK; break; 3258 3259 #include "tcg-target-con-str.h" 3260 3261 #undef REGS 3262 #undef CONST 3263 default: 3264 case '0' ... '9': 3265 case '&': 3266 case 'p': 3267 case 'm': 3268 /* Typo in TCGConstraintSet constraint. */ 3269 g_assert_not_reached(); 3270 } 3271 } while (*++ct_str != '\0'); 3272 } 3273 3274 /* 3275 * Fix up output pairs that are aliased with inputs. 3276 * When we created the alias, we copied pair from the output. 3277 * There are three cases: 3278 * (1a) Pairs of inputs alias pairs of outputs. 3279 * (1b) One input aliases the first of a pair of outputs. 3280 * (2) One input aliases the second of a pair of outputs. 3281 * 3282 * Case 1a is handled by making sure that the pair_index'es are 3283 * properly updated so that they appear the same as a pair of inputs. 3284 * 3285 * Case 1b is handled by setting the pair_index of the input to 3286 * itself, simply so it doesn't point to an unrelated argument. 3287 * Since we don't encounter the "second" during the input allocation 3288 * phase, nothing happens with the second half of the input pair. 3289 * 3290 * Case 2 is handled by setting the second input to pair=3, the 3291 * first output to pair=3, and the pair_index'es to match. 3292 */ 3293 if (saw_alias_pair) { 3294 for (int i = nb_oargs; i < nb_args; i++) { 3295 int o, o2, i2; 3296 3297 /* 3298 * Since [0-9pm] must be alone in the constraint string, 3299 * the only way they can both be set is if the pair comes 3300 * from the output alias. 3301 */ 3302 if (!args_ct[i].ialias) { 3303 continue; 3304 } 3305 switch (args_ct[i].pair) { 3306 case 0: 3307 break; 3308 case 1: 3309 o = args_ct[i].alias_index; 3310 o2 = args_ct[o].pair_index; 3311 tcg_debug_assert(args_ct[o].pair == 1); 3312 tcg_debug_assert(args_ct[o2].pair == 2); 3313 if (args_ct[o2].oalias) { 3314 /* Case 1a */ 3315 i2 = args_ct[o2].alias_index; 3316 tcg_debug_assert(args_ct[i2].pair == 2); 3317 args_ct[i2].pair_index = i; 3318 args_ct[i].pair_index = i2; 3319 } else { 3320 /* Case 1b */ 3321 args_ct[i].pair_index = i; 3322 } 3323 break; 3324 case 2: 3325 o = args_ct[i].alias_index; 3326 o2 = args_ct[o].pair_index; 3327 tcg_debug_assert(args_ct[o].pair == 2); 3328 tcg_debug_assert(args_ct[o2].pair == 1); 3329 if (args_ct[o2].oalias) { 3330 /* Case 1a */ 3331 i2 = args_ct[o2].alias_index; 3332 tcg_debug_assert(args_ct[i2].pair == 1); 3333 args_ct[i2].pair_index = i; 3334 args_ct[i].pair_index = i2; 3335 } else { 3336 /* Case 2 */ 3337 args_ct[i].pair = 3; 3338 args_ct[o2].pair = 3; 3339 args_ct[i].pair_index = o2; 3340 args_ct[o2].pair_index = i; 3341 } 3342 break; 3343 default: 3344 g_assert_not_reached(); 3345 } 3346 } 3347 } 3348 3349 /* sort the constraints (XXX: this is just an heuristic) */ 3350 sort_constraints(args_ct, 0, nb_oargs); 3351 sort_constraints(args_ct, nb_oargs, nb_iargs); 3352 } 3353 } 3354 3355 static const TCGArgConstraint *opcode_args_ct(const TCGOp *op) 3356 { 3357 TCGOpcode opc = op->opc; 3358 TCGType type = TCGOP_TYPE(op); 3359 unsigned flags = TCGOP_FLAGS(op); 3360 const TCGOpDef *def = &tcg_op_defs[opc]; 3361 const TCGOutOp *outop = all_outop[opc]; 3362 TCGConstraintSetIndex con_set; 3363 3364 if (def->flags & TCG_OPF_NOT_PRESENT) { 3365 return empty_cts; 3366 } 3367 3368 if (outop) { 3369 con_set = outop->static_constraint; 3370 if (con_set == C_Dynamic) { 3371 con_set = outop->dynamic_constraint(type, flags); 3372 } 3373 } else { 3374 con_set = tcg_target_op_def(opc, type, flags); 3375 } 3376 tcg_debug_assert(con_set >= 0); 3377 tcg_debug_assert(con_set < ARRAY_SIZE(constraint_sets)); 3378 3379 /* The constraint arguments must match TCGOpcode arguments. */ 3380 tcg_debug_assert(constraint_sets[con_set].nb_oargs == def->nb_oargs); 3381 tcg_debug_assert(constraint_sets[con_set].nb_iargs == def->nb_iargs); 3382 3383 return all_cts[con_set]; 3384 } 3385 3386 static void remove_label_use(TCGOp *op, int idx) 3387 { 3388 TCGLabel *label = arg_label(op->args[idx]); 3389 TCGLabelUse *use; 3390 3391 QSIMPLEQ_FOREACH(use, &label->branches, next) { 3392 if (use->op == op) { 3393 QSIMPLEQ_REMOVE(&label->branches, use, TCGLabelUse, next); 3394 return; 3395 } 3396 } 3397 g_assert_not_reached(); 3398 } 3399 3400 void tcg_op_remove(TCGContext *s, TCGOp *op) 3401 { 3402 switch (op->opc) { 3403 case INDEX_op_br: 3404 remove_label_use(op, 0); 3405 break; 3406 case INDEX_op_brcond_i32: 3407 case INDEX_op_brcond_i64: 3408 remove_label_use(op, 3); 3409 break; 3410 case INDEX_op_brcond2_i32: 3411 remove_label_use(op, 5); 3412 break; 3413 default: 3414 break; 3415 } 3416 3417 QTAILQ_REMOVE(&s->ops, op, link); 3418 QTAILQ_INSERT_TAIL(&s->free_ops, op, link); 3419 s->nb_ops--; 3420 } 3421 3422 void tcg_remove_ops_after(TCGOp *op) 3423 { 3424 TCGContext *s = tcg_ctx; 3425 3426 while (true) { 3427 TCGOp *last = tcg_last_op(); 3428 if (last == op) { 3429 return; 3430 } 3431 tcg_op_remove(s, last); 3432 } 3433 } 3434 3435 static TCGOp *tcg_op_alloc(TCGOpcode opc, unsigned nargs) 3436 { 3437 TCGContext *s = tcg_ctx; 3438 TCGOp *op = NULL; 3439 3440 if (unlikely(!QTAILQ_EMPTY(&s->free_ops))) { 3441 QTAILQ_FOREACH(op, &s->free_ops, link) { 3442 if (nargs <= op->nargs) { 3443 QTAILQ_REMOVE(&s->free_ops, op, link); 3444 nargs = op->nargs; 3445 goto found; 3446 } 3447 } 3448 } 3449 3450 /* Most opcodes have 3 or 4 operands: reduce fragmentation. */ 3451 nargs = MAX(4, nargs); 3452 op = tcg_malloc(sizeof(TCGOp) + sizeof(TCGArg) * nargs); 3453 3454 found: 3455 memset(op, 0, offsetof(TCGOp, link)); 3456 op->opc = opc; 3457 op->nargs = nargs; 3458 3459 /* Check for bitfield overflow. */ 3460 tcg_debug_assert(op->nargs == nargs); 3461 3462 s->nb_ops++; 3463 return op; 3464 } 3465 3466 TCGOp *tcg_emit_op(TCGOpcode opc, unsigned nargs) 3467 { 3468 TCGOp *op = tcg_op_alloc(opc, nargs); 3469 3470 if (tcg_ctx->emit_before_op) { 3471 QTAILQ_INSERT_BEFORE(tcg_ctx->emit_before_op, op, link); 3472 } else { 3473 QTAILQ_INSERT_TAIL(&tcg_ctx->ops, op, link); 3474 } 3475 return op; 3476 } 3477 3478 TCGOp *tcg_op_insert_before(TCGContext *s, TCGOp *old_op, 3479 TCGOpcode opc, TCGType type, unsigned nargs) 3480 { 3481 TCGOp *new_op = tcg_op_alloc(opc, nargs); 3482 3483 TCGOP_TYPE(new_op) = type; 3484 QTAILQ_INSERT_BEFORE(old_op, new_op, link); 3485 return new_op; 3486 } 3487 3488 TCGOp *tcg_op_insert_after(TCGContext *s, TCGOp *old_op, 3489 TCGOpcode opc, TCGType type, unsigned nargs) 3490 { 3491 TCGOp *new_op = tcg_op_alloc(opc, nargs); 3492 3493 TCGOP_TYPE(new_op) = type; 3494 QTAILQ_INSERT_AFTER(&s->ops, old_op, new_op, link); 3495 return new_op; 3496 } 3497 3498 static void move_label_uses(TCGLabel *to, TCGLabel *from) 3499 { 3500 TCGLabelUse *u; 3501 3502 QSIMPLEQ_FOREACH(u, &from->branches, next) { 3503 TCGOp *op = u->op; 3504 switch (op->opc) { 3505 case INDEX_op_br: 3506 op->args[0] = label_arg(to); 3507 break; 3508 case INDEX_op_brcond_i32: 3509 case INDEX_op_brcond_i64: 3510 op->args[3] = label_arg(to); 3511 break; 3512 case INDEX_op_brcond2_i32: 3513 op->args[5] = label_arg(to); 3514 break; 3515 default: 3516 g_assert_not_reached(); 3517 } 3518 } 3519 3520 QSIMPLEQ_CONCAT(&to->branches, &from->branches); 3521 } 3522 3523 /* Reachable analysis : remove unreachable code. */ 3524 static void __attribute__((noinline)) 3525 reachable_code_pass(TCGContext *s) 3526 { 3527 TCGOp *op, *op_next, *op_prev; 3528 bool dead = false; 3529 3530 QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) { 3531 bool remove = dead; 3532 TCGLabel *label; 3533 3534 switch (op->opc) { 3535 case INDEX_op_set_label: 3536 label = arg_label(op->args[0]); 3537 3538 /* 3539 * Note that the first op in the TB is always a load, 3540 * so there is always something before a label. 3541 */ 3542 op_prev = QTAILQ_PREV(op, link); 3543 3544 /* 3545 * If we find two sequential labels, move all branches to 3546 * reference the second label and remove the first label. 3547 * Do this before branch to next optimization, so that the 3548 * middle label is out of the way. 3549 */ 3550 if (op_prev->opc == INDEX_op_set_label) { 3551 move_label_uses(label, arg_label(op_prev->args[0])); 3552 tcg_op_remove(s, op_prev); 3553 op_prev = QTAILQ_PREV(op, link); 3554 } 3555 3556 /* 3557 * Optimization can fold conditional branches to unconditional. 3558 * If we find a label which is preceded by an unconditional 3559 * branch to next, remove the branch. We couldn't do this when 3560 * processing the branch because any dead code between the branch 3561 * and label had not yet been removed. 3562 */ 3563 if (op_prev->opc == INDEX_op_br && 3564 label == arg_label(op_prev->args[0])) { 3565 tcg_op_remove(s, op_prev); 3566 /* Fall through means insns become live again. */ 3567 dead = false; 3568 } 3569 3570 if (QSIMPLEQ_EMPTY(&label->branches)) { 3571 /* 3572 * While there is an occasional backward branch, virtually 3573 * all branches generated by the translators are forward. 3574 * Which means that generally we will have already removed 3575 * all references to the label that will be, and there is 3576 * little to be gained by iterating. 3577 */ 3578 remove = true; 3579 } else { 3580 /* Once we see a label, insns become live again. */ 3581 dead = false; 3582 remove = false; 3583 } 3584 break; 3585 3586 case INDEX_op_br: 3587 case INDEX_op_exit_tb: 3588 case INDEX_op_goto_ptr: 3589 /* Unconditional branches; everything following is dead. */ 3590 dead = true; 3591 break; 3592 3593 case INDEX_op_call: 3594 /* Notice noreturn helper calls, raising exceptions. */ 3595 if (tcg_call_flags(op) & TCG_CALL_NO_RETURN) { 3596 dead = true; 3597 } 3598 break; 3599 3600 case INDEX_op_insn_start: 3601 /* Never remove -- we need to keep these for unwind. */ 3602 remove = false; 3603 break; 3604 3605 default: 3606 break; 3607 } 3608 3609 if (remove) { 3610 tcg_op_remove(s, op); 3611 } 3612 } 3613 } 3614 3615 #define TS_DEAD 1 3616 #define TS_MEM 2 3617 3618 #define IS_DEAD_ARG(n) (arg_life & (DEAD_ARG << (n))) 3619 #define NEED_SYNC_ARG(n) (arg_life & (SYNC_ARG << (n))) 3620 3621 /* For liveness_pass_1, the register preferences for a given temp. */ 3622 static inline TCGRegSet *la_temp_pref(TCGTemp *ts) 3623 { 3624 return ts->state_ptr; 3625 } 3626 3627 /* For liveness_pass_1, reset the preferences for a given temp to the 3628 * maximal regset for its type. 3629 */ 3630 static inline void la_reset_pref(TCGTemp *ts) 3631 { 3632 *la_temp_pref(ts) 3633 = (ts->state == TS_DEAD ? 0 : tcg_target_available_regs[ts->type]); 3634 } 3635 3636 /* liveness analysis: end of function: all temps are dead, and globals 3637 should be in memory. */ 3638 static void la_func_end(TCGContext *s, int ng, int nt) 3639 { 3640 int i; 3641 3642 for (i = 0; i < ng; ++i) { 3643 s->temps[i].state = TS_DEAD | TS_MEM; 3644 la_reset_pref(&s->temps[i]); 3645 } 3646 for (i = ng; i < nt; ++i) { 3647 s->temps[i].state = TS_DEAD; 3648 la_reset_pref(&s->temps[i]); 3649 } 3650 } 3651 3652 /* liveness analysis: end of basic block: all temps are dead, globals 3653 and local temps should be in memory. */ 3654 static void la_bb_end(TCGContext *s, int ng, int nt) 3655 { 3656 int i; 3657 3658 for (i = 0; i < nt; ++i) { 3659 TCGTemp *ts = &s->temps[i]; 3660 int state; 3661 3662 switch (ts->kind) { 3663 case TEMP_FIXED: 3664 case TEMP_GLOBAL: 3665 case TEMP_TB: 3666 state = TS_DEAD | TS_MEM; 3667 break; 3668 case TEMP_EBB: 3669 case TEMP_CONST: 3670 state = TS_DEAD; 3671 break; 3672 default: 3673 g_assert_not_reached(); 3674 } 3675 ts->state = state; 3676 la_reset_pref(ts); 3677 } 3678 } 3679 3680 /* liveness analysis: sync globals back to memory. */ 3681 static void la_global_sync(TCGContext *s, int ng) 3682 { 3683 int i; 3684 3685 for (i = 0; i < ng; ++i) { 3686 int state = s->temps[i].state; 3687 s->temps[i].state = state | TS_MEM; 3688 if (state == TS_DEAD) { 3689 /* If the global was previously dead, reset prefs. */ 3690 la_reset_pref(&s->temps[i]); 3691 } 3692 } 3693 } 3694 3695 /* 3696 * liveness analysis: conditional branch: all temps are dead unless 3697 * explicitly live-across-conditional-branch, globals and local temps 3698 * should be synced. 3699 */ 3700 static void la_bb_sync(TCGContext *s, int ng, int nt) 3701 { 3702 la_global_sync(s, ng); 3703 3704 for (int i = ng; i < nt; ++i) { 3705 TCGTemp *ts = &s->temps[i]; 3706 int state; 3707 3708 switch (ts->kind) { 3709 case TEMP_TB: 3710 state = ts->state; 3711 ts->state = state | TS_MEM; 3712 if (state != TS_DEAD) { 3713 continue; 3714 } 3715 break; 3716 case TEMP_EBB: 3717 case TEMP_CONST: 3718 continue; 3719 default: 3720 g_assert_not_reached(); 3721 } 3722 la_reset_pref(&s->temps[i]); 3723 } 3724 } 3725 3726 /* liveness analysis: sync globals back to memory and kill. */ 3727 static void la_global_kill(TCGContext *s, int ng) 3728 { 3729 int i; 3730 3731 for (i = 0; i < ng; i++) { 3732 s->temps[i].state = TS_DEAD | TS_MEM; 3733 la_reset_pref(&s->temps[i]); 3734 } 3735 } 3736 3737 /* liveness analysis: note live globals crossing calls. */ 3738 static void la_cross_call(TCGContext *s, int nt) 3739 { 3740 TCGRegSet mask = ~tcg_target_call_clobber_regs; 3741 int i; 3742 3743 for (i = 0; i < nt; i++) { 3744 TCGTemp *ts = &s->temps[i]; 3745 if (!(ts->state & TS_DEAD)) { 3746 TCGRegSet *pset = la_temp_pref(ts); 3747 TCGRegSet set = *pset; 3748 3749 set &= mask; 3750 /* If the combination is not possible, restart. */ 3751 if (set == 0) { 3752 set = tcg_target_available_regs[ts->type] & mask; 3753 } 3754 *pset = set; 3755 } 3756 } 3757 } 3758 3759 /* 3760 * Liveness analysis: Verify the lifetime of TEMP_TB, and reduce 3761 * to TEMP_EBB, if possible. 3762 */ 3763 static void __attribute__((noinline)) 3764 liveness_pass_0(TCGContext *s) 3765 { 3766 void * const multiple_ebb = (void *)(uintptr_t)-1; 3767 int nb_temps = s->nb_temps; 3768 TCGOp *op, *ebb; 3769 3770 for (int i = s->nb_globals; i < nb_temps; ++i) { 3771 s->temps[i].state_ptr = NULL; 3772 } 3773 3774 /* 3775 * Represent each EBB by the op at which it begins. In the case of 3776 * the first EBB, this is the first op, otherwise it is a label. 3777 * Collect the uses of each TEMP_TB: NULL for unused, EBB for use 3778 * within a single EBB, else MULTIPLE_EBB. 3779 */ 3780 ebb = QTAILQ_FIRST(&s->ops); 3781 QTAILQ_FOREACH(op, &s->ops, link) { 3782 const TCGOpDef *def; 3783 int nb_oargs, nb_iargs; 3784 3785 switch (op->opc) { 3786 case INDEX_op_set_label: 3787 ebb = op; 3788 continue; 3789 case INDEX_op_discard: 3790 continue; 3791 case INDEX_op_call: 3792 nb_oargs = TCGOP_CALLO(op); 3793 nb_iargs = TCGOP_CALLI(op); 3794 break; 3795 default: 3796 def = &tcg_op_defs[op->opc]; 3797 nb_oargs = def->nb_oargs; 3798 nb_iargs = def->nb_iargs; 3799 break; 3800 } 3801 3802 for (int i = 0; i < nb_oargs + nb_iargs; ++i) { 3803 TCGTemp *ts = arg_temp(op->args[i]); 3804 3805 if (ts->kind != TEMP_TB) { 3806 continue; 3807 } 3808 if (ts->state_ptr == NULL) { 3809 ts->state_ptr = ebb; 3810 } else if (ts->state_ptr != ebb) { 3811 ts->state_ptr = multiple_ebb; 3812 } 3813 } 3814 } 3815 3816 /* 3817 * For TEMP_TB that turned out not to be used beyond one EBB, 3818 * reduce the liveness to TEMP_EBB. 3819 */ 3820 for (int i = s->nb_globals; i < nb_temps; ++i) { 3821 TCGTemp *ts = &s->temps[i]; 3822 if (ts->kind == TEMP_TB && ts->state_ptr != multiple_ebb) { 3823 ts->kind = TEMP_EBB; 3824 } 3825 } 3826 } 3827 3828 /* Liveness analysis : update the opc_arg_life array to tell if a 3829 given input arguments is dead. Instructions updating dead 3830 temporaries are removed. */ 3831 static void __attribute__((noinline)) 3832 liveness_pass_1(TCGContext *s) 3833 { 3834 int nb_globals = s->nb_globals; 3835 int nb_temps = s->nb_temps; 3836 TCGOp *op, *op_prev; 3837 TCGRegSet *prefs; 3838 int i; 3839 3840 prefs = tcg_malloc(sizeof(TCGRegSet) * nb_temps); 3841 for (i = 0; i < nb_temps; ++i) { 3842 s->temps[i].state_ptr = prefs + i; 3843 } 3844 3845 /* ??? Should be redundant with the exit_tb that ends the TB. */ 3846 la_func_end(s, nb_globals, nb_temps); 3847 3848 QTAILQ_FOREACH_REVERSE_SAFE(op, &s->ops, link, op_prev) { 3849 int nb_iargs, nb_oargs; 3850 TCGOpcode opc_new, opc_new2; 3851 TCGLifeData arg_life = 0; 3852 TCGTemp *ts; 3853 TCGOpcode opc = op->opc; 3854 const TCGOpDef *def = &tcg_op_defs[opc]; 3855 const TCGArgConstraint *args_ct; 3856 3857 switch (opc) { 3858 case INDEX_op_call: 3859 { 3860 const TCGHelperInfo *info = tcg_call_info(op); 3861 int call_flags = tcg_call_flags(op); 3862 3863 nb_oargs = TCGOP_CALLO(op); 3864 nb_iargs = TCGOP_CALLI(op); 3865 3866 /* pure functions can be removed if their result is unused */ 3867 if (call_flags & TCG_CALL_NO_SIDE_EFFECTS) { 3868 for (i = 0; i < nb_oargs; i++) { 3869 ts = arg_temp(op->args[i]); 3870 if (ts->state != TS_DEAD) { 3871 goto do_not_remove_call; 3872 } 3873 } 3874 goto do_remove; 3875 } 3876 do_not_remove_call: 3877 3878 /* Output args are dead. */ 3879 for (i = 0; i < nb_oargs; i++) { 3880 ts = arg_temp(op->args[i]); 3881 if (ts->state & TS_DEAD) { 3882 arg_life |= DEAD_ARG << i; 3883 } 3884 if (ts->state & TS_MEM) { 3885 arg_life |= SYNC_ARG << i; 3886 } 3887 ts->state = TS_DEAD; 3888 la_reset_pref(ts); 3889 } 3890 3891 /* Not used -- it will be tcg_target_call_oarg_reg(). */ 3892 memset(op->output_pref, 0, sizeof(op->output_pref)); 3893 3894 if (!(call_flags & (TCG_CALL_NO_WRITE_GLOBALS | 3895 TCG_CALL_NO_READ_GLOBALS))) { 3896 la_global_kill(s, nb_globals); 3897 } else if (!(call_flags & TCG_CALL_NO_READ_GLOBALS)) { 3898 la_global_sync(s, nb_globals); 3899 } 3900 3901 /* Record arguments that die in this helper. */ 3902 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) { 3903 ts = arg_temp(op->args[i]); 3904 if (ts->state & TS_DEAD) { 3905 arg_life |= DEAD_ARG << i; 3906 } 3907 } 3908 3909 /* For all live registers, remove call-clobbered prefs. */ 3910 la_cross_call(s, nb_temps); 3911 3912 /* 3913 * Input arguments are live for preceding opcodes. 3914 * 3915 * For those arguments that die, and will be allocated in 3916 * registers, clear the register set for that arg, to be 3917 * filled in below. For args that will be on the stack, 3918 * reset to any available reg. Process arguments in reverse 3919 * order so that if a temp is used more than once, the stack 3920 * reset to max happens before the register reset to 0. 3921 */ 3922 for (i = nb_iargs - 1; i >= 0; i--) { 3923 const TCGCallArgumentLoc *loc = &info->in[i]; 3924 ts = arg_temp(op->args[nb_oargs + i]); 3925 3926 if (ts->state & TS_DEAD) { 3927 switch (loc->kind) { 3928 case TCG_CALL_ARG_NORMAL: 3929 case TCG_CALL_ARG_EXTEND_U: 3930 case TCG_CALL_ARG_EXTEND_S: 3931 if (arg_slot_reg_p(loc->arg_slot)) { 3932 *la_temp_pref(ts) = 0; 3933 break; 3934 } 3935 /* fall through */ 3936 default: 3937 *la_temp_pref(ts) = 3938 tcg_target_available_regs[ts->type]; 3939 break; 3940 } 3941 ts->state &= ~TS_DEAD; 3942 } 3943 } 3944 3945 /* 3946 * For each input argument, add its input register to prefs. 3947 * If a temp is used once, this produces a single set bit; 3948 * if a temp is used multiple times, this produces a set. 3949 */ 3950 for (i = 0; i < nb_iargs; i++) { 3951 const TCGCallArgumentLoc *loc = &info->in[i]; 3952 ts = arg_temp(op->args[nb_oargs + i]); 3953 3954 switch (loc->kind) { 3955 case TCG_CALL_ARG_NORMAL: 3956 case TCG_CALL_ARG_EXTEND_U: 3957 case TCG_CALL_ARG_EXTEND_S: 3958 if (arg_slot_reg_p(loc->arg_slot)) { 3959 tcg_regset_set_reg(*la_temp_pref(ts), 3960 tcg_target_call_iarg_regs[loc->arg_slot]); 3961 } 3962 break; 3963 default: 3964 break; 3965 } 3966 } 3967 } 3968 break; 3969 case INDEX_op_insn_start: 3970 break; 3971 case INDEX_op_discard: 3972 /* mark the temporary as dead */ 3973 ts = arg_temp(op->args[0]); 3974 ts->state = TS_DEAD; 3975 la_reset_pref(ts); 3976 break; 3977 3978 case INDEX_op_add2_i32: 3979 case INDEX_op_add2_i64: 3980 opc_new = INDEX_op_add; 3981 goto do_addsub2; 3982 case INDEX_op_sub2_i32: 3983 case INDEX_op_sub2_i64: 3984 opc_new = INDEX_op_sub; 3985 do_addsub2: 3986 nb_iargs = 4; 3987 nb_oargs = 2; 3988 /* Test if the high part of the operation is dead, but not 3989 the low part. The result can be optimized to a simple 3990 add or sub. This happens often for x86_64 guest when the 3991 cpu mode is set to 32 bit. */ 3992 if (arg_temp(op->args[1])->state == TS_DEAD) { 3993 if (arg_temp(op->args[0])->state == TS_DEAD) { 3994 goto do_remove; 3995 } 3996 /* Replace the opcode and adjust the args in place, 3997 leaving 3 unused args at the end. */ 3998 op->opc = opc = opc_new; 3999 op->args[1] = op->args[2]; 4000 op->args[2] = op->args[4]; 4001 /* Fall through and mark the single-word operation live. */ 4002 nb_iargs = 2; 4003 nb_oargs = 1; 4004 } 4005 goto do_not_remove; 4006 4007 case INDEX_op_muls2: 4008 opc_new = INDEX_op_mul; 4009 opc_new2 = INDEX_op_mulsh; 4010 goto do_mul2; 4011 case INDEX_op_mulu2: 4012 opc_new = INDEX_op_mul; 4013 opc_new2 = INDEX_op_muluh; 4014 do_mul2: 4015 nb_iargs = 2; 4016 nb_oargs = 2; 4017 if (arg_temp(op->args[1])->state == TS_DEAD) { 4018 if (arg_temp(op->args[0])->state == TS_DEAD) { 4019 /* Both parts of the operation are dead. */ 4020 goto do_remove; 4021 } 4022 /* The high part of the operation is dead; generate the low. */ 4023 op->opc = opc = opc_new; 4024 op->args[1] = op->args[2]; 4025 op->args[2] = op->args[3]; 4026 } else if (arg_temp(op->args[0])->state == TS_DEAD && 4027 tcg_op_supported(opc_new2, TCGOP_TYPE(op), 0)) { 4028 /* The low part of the operation is dead; generate the high. */ 4029 op->opc = opc = opc_new2; 4030 op->args[0] = op->args[1]; 4031 op->args[1] = op->args[2]; 4032 op->args[2] = op->args[3]; 4033 } else { 4034 goto do_not_remove; 4035 } 4036 /* Mark the single-word operation live. */ 4037 nb_oargs = 1; 4038 goto do_not_remove; 4039 4040 default: 4041 /* XXX: optimize by hardcoding common cases (e.g. triadic ops) */ 4042 nb_iargs = def->nb_iargs; 4043 nb_oargs = def->nb_oargs; 4044 4045 /* Test if the operation can be removed because all 4046 its outputs are dead. We assume that nb_oargs == 0 4047 implies side effects */ 4048 if (!(def->flags & TCG_OPF_SIDE_EFFECTS) && nb_oargs != 0) { 4049 for (i = 0; i < nb_oargs; i++) { 4050 if (arg_temp(op->args[i])->state != TS_DEAD) { 4051 goto do_not_remove; 4052 } 4053 } 4054 goto do_remove; 4055 } 4056 goto do_not_remove; 4057 4058 do_remove: 4059 tcg_op_remove(s, op); 4060 break; 4061 4062 do_not_remove: 4063 for (i = 0; i < nb_oargs; i++) { 4064 ts = arg_temp(op->args[i]); 4065 4066 /* Remember the preference of the uses that followed. */ 4067 if (i < ARRAY_SIZE(op->output_pref)) { 4068 op->output_pref[i] = *la_temp_pref(ts); 4069 } 4070 4071 /* Output args are dead. */ 4072 if (ts->state & TS_DEAD) { 4073 arg_life |= DEAD_ARG << i; 4074 } 4075 if (ts->state & TS_MEM) { 4076 arg_life |= SYNC_ARG << i; 4077 } 4078 ts->state = TS_DEAD; 4079 la_reset_pref(ts); 4080 } 4081 4082 /* If end of basic block, update. */ 4083 if (def->flags & TCG_OPF_BB_EXIT) { 4084 la_func_end(s, nb_globals, nb_temps); 4085 } else if (def->flags & TCG_OPF_COND_BRANCH) { 4086 la_bb_sync(s, nb_globals, nb_temps); 4087 } else if (def->flags & TCG_OPF_BB_END) { 4088 la_bb_end(s, nb_globals, nb_temps); 4089 } else if (def->flags & TCG_OPF_SIDE_EFFECTS) { 4090 la_global_sync(s, nb_globals); 4091 if (def->flags & TCG_OPF_CALL_CLOBBER) { 4092 la_cross_call(s, nb_temps); 4093 } 4094 } 4095 4096 /* Record arguments that die in this opcode. */ 4097 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) { 4098 ts = arg_temp(op->args[i]); 4099 if (ts->state & TS_DEAD) { 4100 arg_life |= DEAD_ARG << i; 4101 } 4102 } 4103 4104 /* Input arguments are live for preceding opcodes. */ 4105 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) { 4106 ts = arg_temp(op->args[i]); 4107 if (ts->state & TS_DEAD) { 4108 /* For operands that were dead, initially allow 4109 all regs for the type. */ 4110 *la_temp_pref(ts) = tcg_target_available_regs[ts->type]; 4111 ts->state &= ~TS_DEAD; 4112 } 4113 } 4114 4115 /* Incorporate constraints for this operand. */ 4116 switch (opc) { 4117 case INDEX_op_mov: 4118 /* Note that these are TCG_OPF_NOT_PRESENT and do not 4119 have proper constraints. That said, special case 4120 moves to propagate preferences backward. */ 4121 if (IS_DEAD_ARG(1)) { 4122 *la_temp_pref(arg_temp(op->args[0])) 4123 = *la_temp_pref(arg_temp(op->args[1])); 4124 } 4125 break; 4126 4127 default: 4128 args_ct = opcode_args_ct(op); 4129 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) { 4130 const TCGArgConstraint *ct = &args_ct[i]; 4131 TCGRegSet set, *pset; 4132 4133 ts = arg_temp(op->args[i]); 4134 pset = la_temp_pref(ts); 4135 set = *pset; 4136 4137 set &= ct->regs; 4138 if (ct->ialias) { 4139 set &= output_pref(op, ct->alias_index); 4140 } 4141 /* If the combination is not possible, restart. */ 4142 if (set == 0) { 4143 set = ct->regs; 4144 } 4145 *pset = set; 4146 } 4147 break; 4148 } 4149 break; 4150 } 4151 op->life = arg_life; 4152 } 4153 } 4154 4155 /* Liveness analysis: Convert indirect regs to direct temporaries. */ 4156 static bool __attribute__((noinline)) 4157 liveness_pass_2(TCGContext *s) 4158 { 4159 int nb_globals = s->nb_globals; 4160 int nb_temps, i; 4161 bool changes = false; 4162 TCGOp *op, *op_next; 4163 4164 /* Create a temporary for each indirect global. */ 4165 for (i = 0; i < nb_globals; ++i) { 4166 TCGTemp *its = &s->temps[i]; 4167 if (its->indirect_reg) { 4168 TCGTemp *dts = tcg_temp_alloc(s); 4169 dts->type = its->type; 4170 dts->base_type = its->base_type; 4171 dts->temp_subindex = its->temp_subindex; 4172 dts->kind = TEMP_EBB; 4173 its->state_ptr = dts; 4174 } else { 4175 its->state_ptr = NULL; 4176 } 4177 /* All globals begin dead. */ 4178 its->state = TS_DEAD; 4179 } 4180 for (nb_temps = s->nb_temps; i < nb_temps; ++i) { 4181 TCGTemp *its = &s->temps[i]; 4182 its->state_ptr = NULL; 4183 its->state = TS_DEAD; 4184 } 4185 4186 QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) { 4187 TCGOpcode opc = op->opc; 4188 const TCGOpDef *def = &tcg_op_defs[opc]; 4189 TCGLifeData arg_life = op->life; 4190 int nb_iargs, nb_oargs, call_flags; 4191 TCGTemp *arg_ts, *dir_ts; 4192 4193 if (opc == INDEX_op_call) { 4194 nb_oargs = TCGOP_CALLO(op); 4195 nb_iargs = TCGOP_CALLI(op); 4196 call_flags = tcg_call_flags(op); 4197 } else { 4198 nb_iargs = def->nb_iargs; 4199 nb_oargs = def->nb_oargs; 4200 4201 /* Set flags similar to how calls require. */ 4202 if (def->flags & TCG_OPF_COND_BRANCH) { 4203 /* Like reading globals: sync_globals */ 4204 call_flags = TCG_CALL_NO_WRITE_GLOBALS; 4205 } else if (def->flags & TCG_OPF_BB_END) { 4206 /* Like writing globals: save_globals */ 4207 call_flags = 0; 4208 } else if (def->flags & TCG_OPF_SIDE_EFFECTS) { 4209 /* Like reading globals: sync_globals */ 4210 call_flags = TCG_CALL_NO_WRITE_GLOBALS; 4211 } else { 4212 /* No effect on globals. */ 4213 call_flags = (TCG_CALL_NO_READ_GLOBALS | 4214 TCG_CALL_NO_WRITE_GLOBALS); 4215 } 4216 } 4217 4218 /* Make sure that input arguments are available. */ 4219 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) { 4220 arg_ts = arg_temp(op->args[i]); 4221 dir_ts = arg_ts->state_ptr; 4222 if (dir_ts && arg_ts->state == TS_DEAD) { 4223 TCGOpcode lopc = (arg_ts->type == TCG_TYPE_I32 4224 ? INDEX_op_ld_i32 4225 : INDEX_op_ld_i64); 4226 TCGOp *lop = tcg_op_insert_before(s, op, lopc, 4227 arg_ts->type, 3); 4228 4229 lop->args[0] = temp_arg(dir_ts); 4230 lop->args[1] = temp_arg(arg_ts->mem_base); 4231 lop->args[2] = arg_ts->mem_offset; 4232 4233 /* Loaded, but synced with memory. */ 4234 arg_ts->state = TS_MEM; 4235 } 4236 } 4237 4238 /* Perform input replacement, and mark inputs that became dead. 4239 No action is required except keeping temp_state up to date 4240 so that we reload when needed. */ 4241 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) { 4242 arg_ts = arg_temp(op->args[i]); 4243 dir_ts = arg_ts->state_ptr; 4244 if (dir_ts) { 4245 op->args[i] = temp_arg(dir_ts); 4246 changes = true; 4247 if (IS_DEAD_ARG(i)) { 4248 arg_ts->state = TS_DEAD; 4249 } 4250 } 4251 } 4252 4253 /* Liveness analysis should ensure that the following are 4254 all correct, for call sites and basic block end points. */ 4255 if (call_flags & TCG_CALL_NO_READ_GLOBALS) { 4256 /* Nothing to do */ 4257 } else if (call_flags & TCG_CALL_NO_WRITE_GLOBALS) { 4258 for (i = 0; i < nb_globals; ++i) { 4259 /* Liveness should see that globals are synced back, 4260 that is, either TS_DEAD or TS_MEM. */ 4261 arg_ts = &s->temps[i]; 4262 tcg_debug_assert(arg_ts->state_ptr == 0 4263 || arg_ts->state != 0); 4264 } 4265 } else { 4266 for (i = 0; i < nb_globals; ++i) { 4267 /* Liveness should see that globals are saved back, 4268 that is, TS_DEAD, waiting to be reloaded. */ 4269 arg_ts = &s->temps[i]; 4270 tcg_debug_assert(arg_ts->state_ptr == 0 4271 || arg_ts->state == TS_DEAD); 4272 } 4273 } 4274 4275 /* Outputs become available. */ 4276 if (opc == INDEX_op_mov) { 4277 arg_ts = arg_temp(op->args[0]); 4278 dir_ts = arg_ts->state_ptr; 4279 if (dir_ts) { 4280 op->args[0] = temp_arg(dir_ts); 4281 changes = true; 4282 4283 /* The output is now live and modified. */ 4284 arg_ts->state = 0; 4285 4286 if (NEED_SYNC_ARG(0)) { 4287 TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32 4288 ? INDEX_op_st_i32 4289 : INDEX_op_st_i64); 4290 TCGOp *sop = tcg_op_insert_after(s, op, sopc, 4291 arg_ts->type, 3); 4292 TCGTemp *out_ts = dir_ts; 4293 4294 if (IS_DEAD_ARG(0)) { 4295 out_ts = arg_temp(op->args[1]); 4296 arg_ts->state = TS_DEAD; 4297 tcg_op_remove(s, op); 4298 } else { 4299 arg_ts->state = TS_MEM; 4300 } 4301 4302 sop->args[0] = temp_arg(out_ts); 4303 sop->args[1] = temp_arg(arg_ts->mem_base); 4304 sop->args[2] = arg_ts->mem_offset; 4305 } else { 4306 tcg_debug_assert(!IS_DEAD_ARG(0)); 4307 } 4308 } 4309 } else { 4310 for (i = 0; i < nb_oargs; i++) { 4311 arg_ts = arg_temp(op->args[i]); 4312 dir_ts = arg_ts->state_ptr; 4313 if (!dir_ts) { 4314 continue; 4315 } 4316 op->args[i] = temp_arg(dir_ts); 4317 changes = true; 4318 4319 /* The output is now live and modified. */ 4320 arg_ts->state = 0; 4321 4322 /* Sync outputs upon their last write. */ 4323 if (NEED_SYNC_ARG(i)) { 4324 TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32 4325 ? INDEX_op_st_i32 4326 : INDEX_op_st_i64); 4327 TCGOp *sop = tcg_op_insert_after(s, op, sopc, 4328 arg_ts->type, 3); 4329 4330 sop->args[0] = temp_arg(dir_ts); 4331 sop->args[1] = temp_arg(arg_ts->mem_base); 4332 sop->args[2] = arg_ts->mem_offset; 4333 4334 arg_ts->state = TS_MEM; 4335 } 4336 /* Drop outputs that are dead. */ 4337 if (IS_DEAD_ARG(i)) { 4338 arg_ts->state = TS_DEAD; 4339 } 4340 } 4341 } 4342 } 4343 4344 return changes; 4345 } 4346 4347 static void temp_allocate_frame(TCGContext *s, TCGTemp *ts) 4348 { 4349 intptr_t off; 4350 int size, align; 4351 4352 /* When allocating an object, look at the full type. */ 4353 size = tcg_type_size(ts->base_type); 4354 switch (ts->base_type) { 4355 case TCG_TYPE_I32: 4356 align = 4; 4357 break; 4358 case TCG_TYPE_I64: 4359 case TCG_TYPE_V64: 4360 align = 8; 4361 break; 4362 case TCG_TYPE_I128: 4363 case TCG_TYPE_V128: 4364 case TCG_TYPE_V256: 4365 /* 4366 * Note that we do not require aligned storage for V256, 4367 * and that we provide alignment for I128 to match V128, 4368 * even if that's above what the host ABI requires. 4369 */ 4370 align = 16; 4371 break; 4372 default: 4373 g_assert_not_reached(); 4374 } 4375 4376 /* 4377 * Assume the stack is sufficiently aligned. 4378 * This affects e.g. ARM NEON, where we have 8 byte stack alignment 4379 * and do not require 16 byte vector alignment. This seems slightly 4380 * easier than fully parameterizing the above switch statement. 4381 */ 4382 align = MIN(TCG_TARGET_STACK_ALIGN, align); 4383 off = ROUND_UP(s->current_frame_offset, align); 4384 4385 /* If we've exhausted the stack frame, restart with a smaller TB. */ 4386 if (off + size > s->frame_end) { 4387 tcg_raise_tb_overflow(s); 4388 } 4389 s->current_frame_offset = off + size; 4390 #if defined(__sparc__) 4391 off += TCG_TARGET_STACK_BIAS; 4392 #endif 4393 4394 /* If the object was subdivided, assign memory to all the parts. */ 4395 if (ts->base_type != ts->type) { 4396 int part_size = tcg_type_size(ts->type); 4397 int part_count = size / part_size; 4398 4399 /* 4400 * Each part is allocated sequentially in tcg_temp_new_internal. 4401 * Jump back to the first part by subtracting the current index. 4402 */ 4403 ts -= ts->temp_subindex; 4404 for (int i = 0; i < part_count; ++i) { 4405 ts[i].mem_offset = off + i * part_size; 4406 ts[i].mem_base = s->frame_temp; 4407 ts[i].mem_allocated = 1; 4408 } 4409 } else { 4410 ts->mem_offset = off; 4411 ts->mem_base = s->frame_temp; 4412 ts->mem_allocated = 1; 4413 } 4414 } 4415 4416 /* Assign @reg to @ts, and update reg_to_temp[]. */ 4417 static void set_temp_val_reg(TCGContext *s, TCGTemp *ts, TCGReg reg) 4418 { 4419 if (ts->val_type == TEMP_VAL_REG) { 4420 TCGReg old = ts->reg; 4421 tcg_debug_assert(s->reg_to_temp[old] == ts); 4422 if (old == reg) { 4423 return; 4424 } 4425 s->reg_to_temp[old] = NULL; 4426 } 4427 tcg_debug_assert(s->reg_to_temp[reg] == NULL); 4428 s->reg_to_temp[reg] = ts; 4429 ts->val_type = TEMP_VAL_REG; 4430 ts->reg = reg; 4431 } 4432 4433 /* Assign a non-register value type to @ts, and update reg_to_temp[]. */ 4434 static void set_temp_val_nonreg(TCGContext *s, TCGTemp *ts, TCGTempVal type) 4435 { 4436 tcg_debug_assert(type != TEMP_VAL_REG); 4437 if (ts->val_type == TEMP_VAL_REG) { 4438 TCGReg reg = ts->reg; 4439 tcg_debug_assert(s->reg_to_temp[reg] == ts); 4440 s->reg_to_temp[reg] = NULL; 4441 } 4442 ts->val_type = type; 4443 } 4444 4445 static void temp_load(TCGContext *, TCGTemp *, TCGRegSet, TCGRegSet, TCGRegSet); 4446 4447 /* Mark a temporary as free or dead. If 'free_or_dead' is negative, 4448 mark it free; otherwise mark it dead. */ 4449 static void temp_free_or_dead(TCGContext *s, TCGTemp *ts, int free_or_dead) 4450 { 4451 TCGTempVal new_type; 4452 4453 switch (ts->kind) { 4454 case TEMP_FIXED: 4455 return; 4456 case TEMP_GLOBAL: 4457 case TEMP_TB: 4458 new_type = TEMP_VAL_MEM; 4459 break; 4460 case TEMP_EBB: 4461 new_type = free_or_dead < 0 ? TEMP_VAL_MEM : TEMP_VAL_DEAD; 4462 break; 4463 case TEMP_CONST: 4464 new_type = TEMP_VAL_CONST; 4465 break; 4466 default: 4467 g_assert_not_reached(); 4468 } 4469 set_temp_val_nonreg(s, ts, new_type); 4470 } 4471 4472 /* Mark a temporary as dead. */ 4473 static inline void temp_dead(TCGContext *s, TCGTemp *ts) 4474 { 4475 temp_free_or_dead(s, ts, 1); 4476 } 4477 4478 /* Sync a temporary to memory. 'allocated_regs' is used in case a temporary 4479 registers needs to be allocated to store a constant. If 'free_or_dead' 4480 is non-zero, subsequently release the temporary; if it is positive, the 4481 temp is dead; if it is negative, the temp is free. */ 4482 static void temp_sync(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs, 4483 TCGRegSet preferred_regs, int free_or_dead) 4484 { 4485 if (!temp_readonly(ts) && !ts->mem_coherent) { 4486 if (!ts->mem_allocated) { 4487 temp_allocate_frame(s, ts); 4488 } 4489 switch (ts->val_type) { 4490 case TEMP_VAL_CONST: 4491 /* If we're going to free the temp immediately, then we won't 4492 require it later in a register, so attempt to store the 4493 constant to memory directly. */ 4494 if (free_or_dead 4495 && tcg_out_sti(s, ts->type, ts->val, 4496 ts->mem_base->reg, ts->mem_offset)) { 4497 break; 4498 } 4499 temp_load(s, ts, tcg_target_available_regs[ts->type], 4500 allocated_regs, preferred_regs); 4501 /* fallthrough */ 4502 4503 case TEMP_VAL_REG: 4504 tcg_out_st(s, ts->type, ts->reg, 4505 ts->mem_base->reg, ts->mem_offset); 4506 break; 4507 4508 case TEMP_VAL_MEM: 4509 break; 4510 4511 case TEMP_VAL_DEAD: 4512 default: 4513 g_assert_not_reached(); 4514 } 4515 ts->mem_coherent = 1; 4516 } 4517 if (free_or_dead) { 4518 temp_free_or_dead(s, ts, free_or_dead); 4519 } 4520 } 4521 4522 /* free register 'reg' by spilling the corresponding temporary if necessary */ 4523 static void tcg_reg_free(TCGContext *s, TCGReg reg, TCGRegSet allocated_regs) 4524 { 4525 TCGTemp *ts = s->reg_to_temp[reg]; 4526 if (ts != NULL) { 4527 temp_sync(s, ts, allocated_regs, 0, -1); 4528 } 4529 } 4530 4531 /** 4532 * tcg_reg_alloc: 4533 * @required_regs: Set of registers in which we must allocate. 4534 * @allocated_regs: Set of registers which must be avoided. 4535 * @preferred_regs: Set of registers we should prefer. 4536 * @rev: True if we search the registers in "indirect" order. 4537 * 4538 * The allocated register must be in @required_regs & ~@allocated_regs, 4539 * but if we can put it in @preferred_regs we may save a move later. 4540 */ 4541 static TCGReg tcg_reg_alloc(TCGContext *s, TCGRegSet required_regs, 4542 TCGRegSet allocated_regs, 4543 TCGRegSet preferred_regs, bool rev) 4544 { 4545 int i, j, f, n = ARRAY_SIZE(tcg_target_reg_alloc_order); 4546 TCGRegSet reg_ct[2]; 4547 const int *order; 4548 4549 reg_ct[1] = required_regs & ~allocated_regs; 4550 tcg_debug_assert(reg_ct[1] != 0); 4551 reg_ct[0] = reg_ct[1] & preferred_regs; 4552 4553 /* Skip the preferred_regs option if it cannot be satisfied, 4554 or if the preference made no difference. */ 4555 f = reg_ct[0] == 0 || reg_ct[0] == reg_ct[1]; 4556 4557 order = rev ? indirect_reg_alloc_order : tcg_target_reg_alloc_order; 4558 4559 /* Try free registers, preferences first. */ 4560 for (j = f; j < 2; j++) { 4561 TCGRegSet set = reg_ct[j]; 4562 4563 if (tcg_regset_single(set)) { 4564 /* One register in the set. */ 4565 TCGReg reg = tcg_regset_first(set); 4566 if (s->reg_to_temp[reg] == NULL) { 4567 return reg; 4568 } 4569 } else { 4570 for (i = 0; i < n; i++) { 4571 TCGReg reg = order[i]; 4572 if (s->reg_to_temp[reg] == NULL && 4573 tcg_regset_test_reg(set, reg)) { 4574 return reg; 4575 } 4576 } 4577 } 4578 } 4579 4580 /* We must spill something. */ 4581 for (j = f; j < 2; j++) { 4582 TCGRegSet set = reg_ct[j]; 4583 4584 if (tcg_regset_single(set)) { 4585 /* One register in the set. */ 4586 TCGReg reg = tcg_regset_first(set); 4587 tcg_reg_free(s, reg, allocated_regs); 4588 return reg; 4589 } else { 4590 for (i = 0; i < n; i++) { 4591 TCGReg reg = order[i]; 4592 if (tcg_regset_test_reg(set, reg)) { 4593 tcg_reg_free(s, reg, allocated_regs); 4594 return reg; 4595 } 4596 } 4597 } 4598 } 4599 4600 g_assert_not_reached(); 4601 } 4602 4603 static TCGReg tcg_reg_alloc_pair(TCGContext *s, TCGRegSet required_regs, 4604 TCGRegSet allocated_regs, 4605 TCGRegSet preferred_regs, bool rev) 4606 { 4607 int i, j, k, fmin, n = ARRAY_SIZE(tcg_target_reg_alloc_order); 4608 TCGRegSet reg_ct[2]; 4609 const int *order; 4610 4611 /* Ensure that if I is not in allocated_regs, I+1 is not either. */ 4612 reg_ct[1] = required_regs & ~(allocated_regs | (allocated_regs >> 1)); 4613 tcg_debug_assert(reg_ct[1] != 0); 4614 reg_ct[0] = reg_ct[1] & preferred_regs; 4615 4616 order = rev ? indirect_reg_alloc_order : tcg_target_reg_alloc_order; 4617 4618 /* 4619 * Skip the preferred_regs option if it cannot be satisfied, 4620 * or if the preference made no difference. 4621 */ 4622 k = reg_ct[0] == 0 || reg_ct[0] == reg_ct[1]; 4623 4624 /* 4625 * Minimize the number of flushes by looking for 2 free registers first, 4626 * then a single flush, then two flushes. 4627 */ 4628 for (fmin = 2; fmin >= 0; fmin--) { 4629 for (j = k; j < 2; j++) { 4630 TCGRegSet set = reg_ct[j]; 4631 4632 for (i = 0; i < n; i++) { 4633 TCGReg reg = order[i]; 4634 4635 if (tcg_regset_test_reg(set, reg)) { 4636 int f = !s->reg_to_temp[reg] + !s->reg_to_temp[reg + 1]; 4637 if (f >= fmin) { 4638 tcg_reg_free(s, reg, allocated_regs); 4639 tcg_reg_free(s, reg + 1, allocated_regs); 4640 return reg; 4641 } 4642 } 4643 } 4644 } 4645 } 4646 g_assert_not_reached(); 4647 } 4648 4649 /* Make sure the temporary is in a register. If needed, allocate the register 4650 from DESIRED while avoiding ALLOCATED. */ 4651 static void temp_load(TCGContext *s, TCGTemp *ts, TCGRegSet desired_regs, 4652 TCGRegSet allocated_regs, TCGRegSet preferred_regs) 4653 { 4654 TCGReg reg; 4655 4656 switch (ts->val_type) { 4657 case TEMP_VAL_REG: 4658 return; 4659 case TEMP_VAL_CONST: 4660 reg = tcg_reg_alloc(s, desired_regs, allocated_regs, 4661 preferred_regs, ts->indirect_base); 4662 if (ts->type <= TCG_TYPE_I64) { 4663 tcg_out_movi(s, ts->type, reg, ts->val); 4664 } else { 4665 uint64_t val = ts->val; 4666 MemOp vece = MO_64; 4667 4668 /* 4669 * Find the minimal vector element that matches the constant. 4670 * The targets will, in general, have to do this search anyway, 4671 * do this generically. 4672 */ 4673 if (val == dup_const(MO_8, val)) { 4674 vece = MO_8; 4675 } else if (val == dup_const(MO_16, val)) { 4676 vece = MO_16; 4677 } else if (val == dup_const(MO_32, val)) { 4678 vece = MO_32; 4679 } 4680 4681 tcg_out_dupi_vec(s, ts->type, vece, reg, ts->val); 4682 } 4683 ts->mem_coherent = 0; 4684 break; 4685 case TEMP_VAL_MEM: 4686 if (!ts->mem_allocated) { 4687 temp_allocate_frame(s, ts); 4688 } 4689 reg = tcg_reg_alloc(s, desired_regs, allocated_regs, 4690 preferred_regs, ts->indirect_base); 4691 tcg_out_ld(s, ts->type, reg, ts->mem_base->reg, ts->mem_offset); 4692 ts->mem_coherent = 1; 4693 break; 4694 case TEMP_VAL_DEAD: 4695 default: 4696 g_assert_not_reached(); 4697 } 4698 set_temp_val_reg(s, ts, reg); 4699 } 4700 4701 /* Save a temporary to memory. 'allocated_regs' is used in case a 4702 temporary registers needs to be allocated to store a constant. */ 4703 static void temp_save(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs) 4704 { 4705 /* The liveness analysis already ensures that globals are back 4706 in memory. Keep an tcg_debug_assert for safety. */ 4707 tcg_debug_assert(ts->val_type == TEMP_VAL_MEM || temp_readonly(ts)); 4708 } 4709 4710 /* save globals to their canonical location and assume they can be 4711 modified be the following code. 'allocated_regs' is used in case a 4712 temporary registers needs to be allocated to store a constant. */ 4713 static void save_globals(TCGContext *s, TCGRegSet allocated_regs) 4714 { 4715 int i, n; 4716 4717 for (i = 0, n = s->nb_globals; i < n; i++) { 4718 temp_save(s, &s->temps[i], allocated_regs); 4719 } 4720 } 4721 4722 /* sync globals to their canonical location and assume they can be 4723 read by the following code. 'allocated_regs' is used in case a 4724 temporary registers needs to be allocated to store a constant. */ 4725 static void sync_globals(TCGContext *s, TCGRegSet allocated_regs) 4726 { 4727 int i, n; 4728 4729 for (i = 0, n = s->nb_globals; i < n; i++) { 4730 TCGTemp *ts = &s->temps[i]; 4731 tcg_debug_assert(ts->val_type != TEMP_VAL_REG 4732 || ts->kind == TEMP_FIXED 4733 || ts->mem_coherent); 4734 } 4735 } 4736 4737 /* at the end of a basic block, we assume all temporaries are dead and 4738 all globals are stored at their canonical location. */ 4739 static void tcg_reg_alloc_bb_end(TCGContext *s, TCGRegSet allocated_regs) 4740 { 4741 int i; 4742 4743 for (i = s->nb_globals; i < s->nb_temps; i++) { 4744 TCGTemp *ts = &s->temps[i]; 4745 4746 switch (ts->kind) { 4747 case TEMP_TB: 4748 temp_save(s, ts, allocated_regs); 4749 break; 4750 case TEMP_EBB: 4751 /* The liveness analysis already ensures that temps are dead. 4752 Keep an tcg_debug_assert for safety. */ 4753 tcg_debug_assert(ts->val_type == TEMP_VAL_DEAD); 4754 break; 4755 case TEMP_CONST: 4756 /* Similarly, we should have freed any allocated register. */ 4757 tcg_debug_assert(ts->val_type == TEMP_VAL_CONST); 4758 break; 4759 default: 4760 g_assert_not_reached(); 4761 } 4762 } 4763 4764 save_globals(s, allocated_regs); 4765 } 4766 4767 /* 4768 * At a conditional branch, we assume all temporaries are dead unless 4769 * explicitly live-across-conditional-branch; all globals and local 4770 * temps are synced to their location. 4771 */ 4772 static void tcg_reg_alloc_cbranch(TCGContext *s, TCGRegSet allocated_regs) 4773 { 4774 sync_globals(s, allocated_regs); 4775 4776 for (int i = s->nb_globals; i < s->nb_temps; i++) { 4777 TCGTemp *ts = &s->temps[i]; 4778 /* 4779 * The liveness analysis already ensures that temps are dead. 4780 * Keep tcg_debug_asserts for safety. 4781 */ 4782 switch (ts->kind) { 4783 case TEMP_TB: 4784 tcg_debug_assert(ts->val_type != TEMP_VAL_REG || ts->mem_coherent); 4785 break; 4786 case TEMP_EBB: 4787 case TEMP_CONST: 4788 break; 4789 default: 4790 g_assert_not_reached(); 4791 } 4792 } 4793 } 4794 4795 /* 4796 * Specialized code generation for INDEX_op_mov_* with a constant. 4797 */ 4798 static void tcg_reg_alloc_do_movi(TCGContext *s, TCGTemp *ots, 4799 tcg_target_ulong val, TCGLifeData arg_life, 4800 TCGRegSet preferred_regs) 4801 { 4802 /* ENV should not be modified. */ 4803 tcg_debug_assert(!temp_readonly(ots)); 4804 4805 /* The movi is not explicitly generated here. */ 4806 set_temp_val_nonreg(s, ots, TEMP_VAL_CONST); 4807 ots->val = val; 4808 ots->mem_coherent = 0; 4809 if (NEED_SYNC_ARG(0)) { 4810 temp_sync(s, ots, s->reserved_regs, preferred_regs, IS_DEAD_ARG(0)); 4811 } else if (IS_DEAD_ARG(0)) { 4812 temp_dead(s, ots); 4813 } 4814 } 4815 4816 /* 4817 * Specialized code generation for INDEX_op_mov_*. 4818 */ 4819 static void tcg_reg_alloc_mov(TCGContext *s, const TCGOp *op) 4820 { 4821 const TCGLifeData arg_life = op->life; 4822 TCGRegSet allocated_regs, preferred_regs; 4823 TCGTemp *ts, *ots; 4824 TCGType otype, itype; 4825 TCGReg oreg, ireg; 4826 4827 allocated_regs = s->reserved_regs; 4828 preferred_regs = output_pref(op, 0); 4829 ots = arg_temp(op->args[0]); 4830 ts = arg_temp(op->args[1]); 4831 4832 /* ENV should not be modified. */ 4833 tcg_debug_assert(!temp_readonly(ots)); 4834 4835 /* Note that otype != itype for no-op truncation. */ 4836 otype = ots->type; 4837 itype = ts->type; 4838 4839 if (ts->val_type == TEMP_VAL_CONST) { 4840 /* propagate constant or generate sti */ 4841 tcg_target_ulong val = ts->val; 4842 if (IS_DEAD_ARG(1)) { 4843 temp_dead(s, ts); 4844 } 4845 tcg_reg_alloc_do_movi(s, ots, val, arg_life, preferred_regs); 4846 return; 4847 } 4848 4849 /* If the source value is in memory we're going to be forced 4850 to have it in a register in order to perform the copy. Copy 4851 the SOURCE value into its own register first, that way we 4852 don't have to reload SOURCE the next time it is used. */ 4853 if (ts->val_type == TEMP_VAL_MEM) { 4854 temp_load(s, ts, tcg_target_available_regs[itype], 4855 allocated_regs, preferred_regs); 4856 } 4857 tcg_debug_assert(ts->val_type == TEMP_VAL_REG); 4858 ireg = ts->reg; 4859 4860 if (IS_DEAD_ARG(0)) { 4861 /* mov to a non-saved dead register makes no sense (even with 4862 liveness analysis disabled). */ 4863 tcg_debug_assert(NEED_SYNC_ARG(0)); 4864 if (!ots->mem_allocated) { 4865 temp_allocate_frame(s, ots); 4866 } 4867 tcg_out_st(s, otype, ireg, ots->mem_base->reg, ots->mem_offset); 4868 if (IS_DEAD_ARG(1)) { 4869 temp_dead(s, ts); 4870 } 4871 temp_dead(s, ots); 4872 return; 4873 } 4874 4875 if (IS_DEAD_ARG(1) && ts->kind != TEMP_FIXED) { 4876 /* 4877 * The mov can be suppressed. Kill input first, so that it 4878 * is unlinked from reg_to_temp, then set the output to the 4879 * reg that we saved from the input. 4880 */ 4881 temp_dead(s, ts); 4882 oreg = ireg; 4883 } else { 4884 if (ots->val_type == TEMP_VAL_REG) { 4885 oreg = ots->reg; 4886 } else { 4887 /* Make sure to not spill the input register during allocation. */ 4888 oreg = tcg_reg_alloc(s, tcg_target_available_regs[otype], 4889 allocated_regs | ((TCGRegSet)1 << ireg), 4890 preferred_regs, ots->indirect_base); 4891 } 4892 if (!tcg_out_mov(s, otype, oreg, ireg)) { 4893 /* 4894 * Cross register class move not supported. 4895 * Store the source register into the destination slot 4896 * and leave the destination temp as TEMP_VAL_MEM. 4897 */ 4898 assert(!temp_readonly(ots)); 4899 if (!ts->mem_allocated) { 4900 temp_allocate_frame(s, ots); 4901 } 4902 tcg_out_st(s, ts->type, ireg, ots->mem_base->reg, ots->mem_offset); 4903 set_temp_val_nonreg(s, ts, TEMP_VAL_MEM); 4904 ots->mem_coherent = 1; 4905 return; 4906 } 4907 } 4908 set_temp_val_reg(s, ots, oreg); 4909 ots->mem_coherent = 0; 4910 4911 if (NEED_SYNC_ARG(0)) { 4912 temp_sync(s, ots, allocated_regs, 0, 0); 4913 } 4914 } 4915 4916 /* 4917 * Specialized code generation for INDEX_op_dup_vec. 4918 */ 4919 static void tcg_reg_alloc_dup(TCGContext *s, const TCGOp *op) 4920 { 4921 const TCGLifeData arg_life = op->life; 4922 TCGRegSet dup_out_regs, dup_in_regs; 4923 const TCGArgConstraint *dup_args_ct; 4924 TCGTemp *its, *ots; 4925 TCGType itype, vtype; 4926 unsigned vece; 4927 int lowpart_ofs; 4928 bool ok; 4929 4930 ots = arg_temp(op->args[0]); 4931 its = arg_temp(op->args[1]); 4932 4933 /* ENV should not be modified. */ 4934 tcg_debug_assert(!temp_readonly(ots)); 4935 4936 itype = its->type; 4937 vece = TCGOP_VECE(op); 4938 vtype = TCGOP_TYPE(op); 4939 4940 if (its->val_type == TEMP_VAL_CONST) { 4941 /* Propagate constant via movi -> dupi. */ 4942 tcg_target_ulong val = its->val; 4943 if (IS_DEAD_ARG(1)) { 4944 temp_dead(s, its); 4945 } 4946 tcg_reg_alloc_do_movi(s, ots, val, arg_life, output_pref(op, 0)); 4947 return; 4948 } 4949 4950 dup_args_ct = opcode_args_ct(op); 4951 dup_out_regs = dup_args_ct[0].regs; 4952 dup_in_regs = dup_args_ct[1].regs; 4953 4954 /* Allocate the output register now. */ 4955 if (ots->val_type != TEMP_VAL_REG) { 4956 TCGRegSet allocated_regs = s->reserved_regs; 4957 TCGReg oreg; 4958 4959 if (!IS_DEAD_ARG(1) && its->val_type == TEMP_VAL_REG) { 4960 /* Make sure to not spill the input register. */ 4961 tcg_regset_set_reg(allocated_regs, its->reg); 4962 } 4963 oreg = tcg_reg_alloc(s, dup_out_regs, allocated_regs, 4964 output_pref(op, 0), ots->indirect_base); 4965 set_temp_val_reg(s, ots, oreg); 4966 } 4967 4968 switch (its->val_type) { 4969 case TEMP_VAL_REG: 4970 /* 4971 * The dup constriaints must be broad, covering all possible VECE. 4972 * However, tcg_op_dup_vec() gets to see the VECE and we allow it 4973 * to fail, indicating that extra moves are required for that case. 4974 */ 4975 if (tcg_regset_test_reg(dup_in_regs, its->reg)) { 4976 if (tcg_out_dup_vec(s, vtype, vece, ots->reg, its->reg)) { 4977 goto done; 4978 } 4979 /* Try again from memory or a vector input register. */ 4980 } 4981 if (!its->mem_coherent) { 4982 /* 4983 * The input register is not synced, and so an extra store 4984 * would be required to use memory. Attempt an integer-vector 4985 * register move first. We do not have a TCGRegSet for this. 4986 */ 4987 if (tcg_out_mov(s, itype, ots->reg, its->reg)) { 4988 break; 4989 } 4990 /* Sync the temp back to its slot and load from there. */ 4991 temp_sync(s, its, s->reserved_regs, 0, 0); 4992 } 4993 /* fall through */ 4994 4995 case TEMP_VAL_MEM: 4996 lowpart_ofs = 0; 4997 if (HOST_BIG_ENDIAN) { 4998 lowpart_ofs = tcg_type_size(itype) - (1 << vece); 4999 } 5000 if (tcg_out_dupm_vec(s, vtype, vece, ots->reg, its->mem_base->reg, 5001 its->mem_offset + lowpart_ofs)) { 5002 goto done; 5003 } 5004 /* Load the input into the destination vector register. */ 5005 tcg_out_ld(s, itype, ots->reg, its->mem_base->reg, its->mem_offset); 5006 break; 5007 5008 default: 5009 g_assert_not_reached(); 5010 } 5011 5012 /* We now have a vector input register, so dup must succeed. */ 5013 ok = tcg_out_dup_vec(s, vtype, vece, ots->reg, ots->reg); 5014 tcg_debug_assert(ok); 5015 5016 done: 5017 ots->mem_coherent = 0; 5018 if (IS_DEAD_ARG(1)) { 5019 temp_dead(s, its); 5020 } 5021 if (NEED_SYNC_ARG(0)) { 5022 temp_sync(s, ots, s->reserved_regs, 0, 0); 5023 } 5024 if (IS_DEAD_ARG(0)) { 5025 temp_dead(s, ots); 5026 } 5027 } 5028 5029 static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op) 5030 { 5031 const TCGLifeData arg_life = op->life; 5032 const TCGOpDef * const def = &tcg_op_defs[op->opc]; 5033 TCGRegSet i_allocated_regs; 5034 TCGRegSet o_allocated_regs; 5035 int i, k, nb_iargs, nb_oargs; 5036 TCGReg reg; 5037 TCGArg arg; 5038 const TCGArgConstraint *args_ct; 5039 const TCGArgConstraint *arg_ct; 5040 TCGTemp *ts; 5041 TCGArg new_args[TCG_MAX_OP_ARGS]; 5042 int const_args[TCG_MAX_OP_ARGS]; 5043 TCGCond op_cond; 5044 5045 nb_oargs = def->nb_oargs; 5046 nb_iargs = def->nb_iargs; 5047 5048 /* copy constants */ 5049 memcpy(new_args + nb_oargs + nb_iargs, 5050 op->args + nb_oargs + nb_iargs, 5051 sizeof(TCGArg) * def->nb_cargs); 5052 5053 i_allocated_regs = s->reserved_regs; 5054 o_allocated_regs = s->reserved_regs; 5055 5056 switch (op->opc) { 5057 case INDEX_op_brcond_i32: 5058 case INDEX_op_brcond_i64: 5059 op_cond = op->args[2]; 5060 break; 5061 case INDEX_op_setcond_i32: 5062 case INDEX_op_setcond_i64: 5063 case INDEX_op_negsetcond_i32: 5064 case INDEX_op_negsetcond_i64: 5065 case INDEX_op_cmp_vec: 5066 op_cond = op->args[3]; 5067 break; 5068 case INDEX_op_brcond2_i32: 5069 op_cond = op->args[4]; 5070 break; 5071 case INDEX_op_movcond_i32: 5072 case INDEX_op_movcond_i64: 5073 case INDEX_op_setcond2_i32: 5074 case INDEX_op_cmpsel_vec: 5075 op_cond = op->args[5]; 5076 break; 5077 default: 5078 /* No condition within opcode. */ 5079 op_cond = TCG_COND_ALWAYS; 5080 break; 5081 } 5082 5083 args_ct = opcode_args_ct(op); 5084 5085 /* satisfy input constraints */ 5086 for (k = 0; k < nb_iargs; k++) { 5087 TCGRegSet i_preferred_regs, i_required_regs; 5088 bool allocate_new_reg, copyto_new_reg; 5089 TCGTemp *ts2; 5090 int i1, i2; 5091 5092 i = args_ct[nb_oargs + k].sort_index; 5093 arg = op->args[i]; 5094 arg_ct = &args_ct[i]; 5095 ts = arg_temp(arg); 5096 5097 if (ts->val_type == TEMP_VAL_CONST) { 5098 #ifdef TCG_REG_ZERO 5099 if (ts->val == 0 && (arg_ct->ct & TCG_CT_REG_ZERO)) { 5100 /* Hardware zero register: indicate register via non-const. */ 5101 const_args[i] = 0; 5102 new_args[i] = TCG_REG_ZERO; 5103 continue; 5104 } 5105 #endif 5106 5107 if (tcg_target_const_match(ts->val, arg_ct->ct, ts->type, 5108 op_cond, TCGOP_VECE(op))) { 5109 /* constant is OK for instruction */ 5110 const_args[i] = 1; 5111 new_args[i] = ts->val; 5112 continue; 5113 } 5114 } 5115 5116 reg = ts->reg; 5117 i_preferred_regs = 0; 5118 i_required_regs = arg_ct->regs; 5119 allocate_new_reg = false; 5120 copyto_new_reg = false; 5121 5122 switch (arg_ct->pair) { 5123 case 0: /* not paired */ 5124 if (arg_ct->ialias) { 5125 i_preferred_regs = output_pref(op, arg_ct->alias_index); 5126 5127 /* 5128 * If the input is readonly, then it cannot also be an 5129 * output and aliased to itself. If the input is not 5130 * dead after the instruction, we must allocate a new 5131 * register and move it. 5132 */ 5133 if (temp_readonly(ts) || !IS_DEAD_ARG(i) 5134 || args_ct[arg_ct->alias_index].newreg) { 5135 allocate_new_reg = true; 5136 } else if (ts->val_type == TEMP_VAL_REG) { 5137 /* 5138 * Check if the current register has already been 5139 * allocated for another input. 5140 */ 5141 allocate_new_reg = 5142 tcg_regset_test_reg(i_allocated_regs, reg); 5143 } 5144 } 5145 if (!allocate_new_reg) { 5146 temp_load(s, ts, i_required_regs, i_allocated_regs, 5147 i_preferred_regs); 5148 reg = ts->reg; 5149 allocate_new_reg = !tcg_regset_test_reg(i_required_regs, reg); 5150 } 5151 if (allocate_new_reg) { 5152 /* 5153 * Allocate a new register matching the constraint 5154 * and move the temporary register into it. 5155 */ 5156 temp_load(s, ts, tcg_target_available_regs[ts->type], 5157 i_allocated_regs, 0); 5158 reg = tcg_reg_alloc(s, i_required_regs, i_allocated_regs, 5159 i_preferred_regs, ts->indirect_base); 5160 copyto_new_reg = true; 5161 } 5162 break; 5163 5164 case 1: 5165 /* First of an input pair; if i1 == i2, the second is an output. */ 5166 i1 = i; 5167 i2 = arg_ct->pair_index; 5168 ts2 = i1 != i2 ? arg_temp(op->args[i2]) : NULL; 5169 5170 /* 5171 * It is easier to default to allocating a new pair 5172 * and to identify a few cases where it's not required. 5173 */ 5174 if (arg_ct->ialias) { 5175 i_preferred_regs = output_pref(op, arg_ct->alias_index); 5176 if (IS_DEAD_ARG(i1) && 5177 IS_DEAD_ARG(i2) && 5178 !temp_readonly(ts) && 5179 ts->val_type == TEMP_VAL_REG && 5180 ts->reg < TCG_TARGET_NB_REGS - 1 && 5181 tcg_regset_test_reg(i_required_regs, reg) && 5182 !tcg_regset_test_reg(i_allocated_regs, reg) && 5183 !tcg_regset_test_reg(i_allocated_regs, reg + 1) && 5184 (ts2 5185 ? ts2->val_type == TEMP_VAL_REG && 5186 ts2->reg == reg + 1 && 5187 !temp_readonly(ts2) 5188 : s->reg_to_temp[reg + 1] == NULL)) { 5189 break; 5190 } 5191 } else { 5192 /* Without aliasing, the pair must also be an input. */ 5193 tcg_debug_assert(ts2); 5194 if (ts->val_type == TEMP_VAL_REG && 5195 ts2->val_type == TEMP_VAL_REG && 5196 ts2->reg == reg + 1 && 5197 tcg_regset_test_reg(i_required_regs, reg)) { 5198 break; 5199 } 5200 } 5201 reg = tcg_reg_alloc_pair(s, i_required_regs, i_allocated_regs, 5202 0, ts->indirect_base); 5203 goto do_pair; 5204 5205 case 2: /* pair second */ 5206 reg = new_args[arg_ct->pair_index] + 1; 5207 goto do_pair; 5208 5209 case 3: /* ialias with second output, no first input */ 5210 tcg_debug_assert(arg_ct->ialias); 5211 i_preferred_regs = output_pref(op, arg_ct->alias_index); 5212 5213 if (IS_DEAD_ARG(i) && 5214 !temp_readonly(ts) && 5215 ts->val_type == TEMP_VAL_REG && 5216 reg > 0 && 5217 s->reg_to_temp[reg - 1] == NULL && 5218 tcg_regset_test_reg(i_required_regs, reg) && 5219 !tcg_regset_test_reg(i_allocated_regs, reg) && 5220 !tcg_regset_test_reg(i_allocated_regs, reg - 1)) { 5221 tcg_regset_set_reg(i_allocated_regs, reg - 1); 5222 break; 5223 } 5224 reg = tcg_reg_alloc_pair(s, i_required_regs >> 1, 5225 i_allocated_regs, 0, 5226 ts->indirect_base); 5227 tcg_regset_set_reg(i_allocated_regs, reg); 5228 reg += 1; 5229 goto do_pair; 5230 5231 do_pair: 5232 /* 5233 * If an aliased input is not dead after the instruction, 5234 * we must allocate a new register and move it. 5235 */ 5236 if (arg_ct->ialias && (!IS_DEAD_ARG(i) || temp_readonly(ts))) { 5237 TCGRegSet t_allocated_regs = i_allocated_regs; 5238 5239 /* 5240 * Because of the alias, and the continued life, make sure 5241 * that the temp is somewhere *other* than the reg pair, 5242 * and we get a copy in reg. 5243 */ 5244 tcg_regset_set_reg(t_allocated_regs, reg); 5245 tcg_regset_set_reg(t_allocated_regs, reg + 1); 5246 if (ts->val_type == TEMP_VAL_REG && ts->reg == reg) { 5247 /* If ts was already in reg, copy it somewhere else. */ 5248 TCGReg nr; 5249 bool ok; 5250 5251 tcg_debug_assert(ts->kind != TEMP_FIXED); 5252 nr = tcg_reg_alloc(s, tcg_target_available_regs[ts->type], 5253 t_allocated_regs, 0, ts->indirect_base); 5254 ok = tcg_out_mov(s, ts->type, nr, reg); 5255 tcg_debug_assert(ok); 5256 5257 set_temp_val_reg(s, ts, nr); 5258 } else { 5259 temp_load(s, ts, tcg_target_available_regs[ts->type], 5260 t_allocated_regs, 0); 5261 copyto_new_reg = true; 5262 } 5263 } else { 5264 /* Preferably allocate to reg, otherwise copy. */ 5265 i_required_regs = (TCGRegSet)1 << reg; 5266 temp_load(s, ts, i_required_regs, i_allocated_regs, 5267 i_preferred_regs); 5268 copyto_new_reg = ts->reg != reg; 5269 } 5270 break; 5271 5272 default: 5273 g_assert_not_reached(); 5274 } 5275 5276 if (copyto_new_reg) { 5277 if (!tcg_out_mov(s, ts->type, reg, ts->reg)) { 5278 /* 5279 * Cross register class move not supported. Sync the 5280 * temp back to its slot and load from there. 5281 */ 5282 temp_sync(s, ts, i_allocated_regs, 0, 0); 5283 tcg_out_ld(s, ts->type, reg, 5284 ts->mem_base->reg, ts->mem_offset); 5285 } 5286 } 5287 new_args[i] = reg; 5288 const_args[i] = 0; 5289 tcg_regset_set_reg(i_allocated_regs, reg); 5290 } 5291 5292 /* mark dead temporaries and free the associated registers */ 5293 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) { 5294 if (IS_DEAD_ARG(i)) { 5295 temp_dead(s, arg_temp(op->args[i])); 5296 } 5297 } 5298 5299 if (def->flags & TCG_OPF_COND_BRANCH) { 5300 tcg_reg_alloc_cbranch(s, i_allocated_regs); 5301 } else if (def->flags & TCG_OPF_BB_END) { 5302 tcg_reg_alloc_bb_end(s, i_allocated_regs); 5303 } else { 5304 if (def->flags & TCG_OPF_CALL_CLOBBER) { 5305 /* XXX: permit generic clobber register list ? */ 5306 for (i = 0; i < TCG_TARGET_NB_REGS; i++) { 5307 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) { 5308 tcg_reg_free(s, i, i_allocated_regs); 5309 } 5310 } 5311 } 5312 if (def->flags & TCG_OPF_SIDE_EFFECTS) { 5313 /* sync globals if the op has side effects and might trigger 5314 an exception. */ 5315 sync_globals(s, i_allocated_regs); 5316 } 5317 5318 /* satisfy the output constraints */ 5319 for (k = 0; k < nb_oargs; k++) { 5320 i = args_ct[k].sort_index; 5321 arg = op->args[i]; 5322 arg_ct = &args_ct[i]; 5323 ts = arg_temp(arg); 5324 5325 /* ENV should not be modified. */ 5326 tcg_debug_assert(!temp_readonly(ts)); 5327 5328 switch (arg_ct->pair) { 5329 case 0: /* not paired */ 5330 if (arg_ct->oalias && !const_args[arg_ct->alias_index]) { 5331 reg = new_args[arg_ct->alias_index]; 5332 } else if (arg_ct->newreg) { 5333 reg = tcg_reg_alloc(s, arg_ct->regs, 5334 i_allocated_regs | o_allocated_regs, 5335 output_pref(op, k), ts->indirect_base); 5336 } else { 5337 reg = tcg_reg_alloc(s, arg_ct->regs, o_allocated_regs, 5338 output_pref(op, k), ts->indirect_base); 5339 } 5340 break; 5341 5342 case 1: /* first of pair */ 5343 if (arg_ct->oalias) { 5344 reg = new_args[arg_ct->alias_index]; 5345 } else if (arg_ct->newreg) { 5346 reg = tcg_reg_alloc_pair(s, arg_ct->regs, 5347 i_allocated_regs | o_allocated_regs, 5348 output_pref(op, k), 5349 ts->indirect_base); 5350 } else { 5351 reg = tcg_reg_alloc_pair(s, arg_ct->regs, o_allocated_regs, 5352 output_pref(op, k), 5353 ts->indirect_base); 5354 } 5355 break; 5356 5357 case 2: /* second of pair */ 5358 if (arg_ct->oalias) { 5359 reg = new_args[arg_ct->alias_index]; 5360 } else { 5361 reg = new_args[arg_ct->pair_index] + 1; 5362 } 5363 break; 5364 5365 case 3: /* first of pair, aliasing with a second input */ 5366 tcg_debug_assert(!arg_ct->newreg); 5367 reg = new_args[arg_ct->pair_index] - 1; 5368 break; 5369 5370 default: 5371 g_assert_not_reached(); 5372 } 5373 tcg_regset_set_reg(o_allocated_regs, reg); 5374 set_temp_val_reg(s, ts, reg); 5375 ts->mem_coherent = 0; 5376 new_args[i] = reg; 5377 } 5378 } 5379 5380 /* emit instruction */ 5381 TCGType type = TCGOP_TYPE(op); 5382 switch (op->opc) { 5383 case INDEX_op_ext_i32_i64: 5384 tcg_out_exts_i32_i64(s, new_args[0], new_args[1]); 5385 break; 5386 case INDEX_op_extu_i32_i64: 5387 tcg_out_extu_i32_i64(s, new_args[0], new_args[1]); 5388 break; 5389 case INDEX_op_extrl_i64_i32: 5390 tcg_out_extrl_i64_i32(s, new_args[0], new_args[1]); 5391 break; 5392 5393 case INDEX_op_add: 5394 case INDEX_op_and: 5395 case INDEX_op_andc: 5396 case INDEX_op_clz: 5397 case INDEX_op_ctz: 5398 case INDEX_op_divs: 5399 case INDEX_op_divu: 5400 case INDEX_op_eqv: 5401 case INDEX_op_mul: 5402 case INDEX_op_mulsh: 5403 case INDEX_op_muluh: 5404 case INDEX_op_nand: 5405 case INDEX_op_nor: 5406 case INDEX_op_or: 5407 case INDEX_op_orc: 5408 case INDEX_op_rems: 5409 case INDEX_op_remu: 5410 case INDEX_op_rotl: 5411 case INDEX_op_rotr: 5412 case INDEX_op_sar: 5413 case INDEX_op_shl: 5414 case INDEX_op_shr: 5415 case INDEX_op_xor: 5416 { 5417 const TCGOutOpBinary *out = 5418 container_of(all_outop[op->opc], TCGOutOpBinary, base); 5419 5420 /* Constants should never appear in the first source operand. */ 5421 tcg_debug_assert(!const_args[1]); 5422 if (const_args[2]) { 5423 out->out_rri(s, type, new_args[0], new_args[1], new_args[2]); 5424 } else { 5425 out->out_rrr(s, type, new_args[0], new_args[1], new_args[2]); 5426 } 5427 } 5428 break; 5429 5430 case INDEX_op_sub: 5431 { 5432 const TCGOutOpSubtract *out = &outop_sub; 5433 5434 /* 5435 * Constants should never appear in the second source operand. 5436 * These are folded to add with negative constant. 5437 */ 5438 tcg_debug_assert(!const_args[2]); 5439 if (const_args[1]) { 5440 out->out_rir(s, type, new_args[0], new_args[1], new_args[2]); 5441 } else { 5442 out->out_rrr(s, type, new_args[0], new_args[1], new_args[2]); 5443 } 5444 } 5445 break; 5446 5447 case INDEX_op_ctpop: 5448 case INDEX_op_neg: 5449 case INDEX_op_not: 5450 { 5451 const TCGOutOpUnary *out = 5452 container_of(all_outop[op->opc], TCGOutOpUnary, base); 5453 5454 /* Constants should have been folded. */ 5455 tcg_debug_assert(!const_args[1]); 5456 out->out_rr(s, type, new_args[0], new_args[1]); 5457 } 5458 break; 5459 5460 case INDEX_op_divs2: 5461 case INDEX_op_divu2: 5462 { 5463 const TCGOutOpDivRem *out = 5464 container_of(all_outop[op->opc], TCGOutOpDivRem, base); 5465 5466 /* Only used by x86 and s390x, which use matching constraints. */ 5467 tcg_debug_assert(new_args[0] == new_args[2]); 5468 tcg_debug_assert(new_args[1] == new_args[3]); 5469 tcg_debug_assert(!const_args[4]); 5470 out->out_rr01r(s, type, new_args[0], new_args[1], new_args[4]); 5471 } 5472 break; 5473 5474 case INDEX_op_muls2: 5475 case INDEX_op_mulu2: 5476 { 5477 const TCGOutOpMul2 *out = 5478 container_of(all_outop[op->opc], TCGOutOpMul2, base); 5479 5480 tcg_debug_assert(!const_args[2]); 5481 tcg_debug_assert(!const_args[3]); 5482 out->out_rrrr(s, type, new_args[0], new_args[1], 5483 new_args[2], new_args[3]); 5484 } 5485 break; 5486 5487 5488 default: 5489 if (def->flags & TCG_OPF_VECTOR) { 5490 tcg_out_vec_op(s, op->opc, type - TCG_TYPE_V64, 5491 TCGOP_VECE(op), new_args, const_args); 5492 } else { 5493 tcg_out_op(s, op->opc, type, new_args, const_args); 5494 } 5495 break; 5496 } 5497 5498 /* move the outputs in the correct register if needed */ 5499 for(i = 0; i < nb_oargs; i++) { 5500 ts = arg_temp(op->args[i]); 5501 5502 /* ENV should not be modified. */ 5503 tcg_debug_assert(!temp_readonly(ts)); 5504 5505 if (NEED_SYNC_ARG(i)) { 5506 temp_sync(s, ts, o_allocated_regs, 0, IS_DEAD_ARG(i)); 5507 } else if (IS_DEAD_ARG(i)) { 5508 temp_dead(s, ts); 5509 } 5510 } 5511 } 5512 5513 static bool tcg_reg_alloc_dup2(TCGContext *s, const TCGOp *op) 5514 { 5515 const TCGLifeData arg_life = op->life; 5516 TCGTemp *ots, *itsl, *itsh; 5517 TCGType vtype = TCGOP_TYPE(op); 5518 5519 /* This opcode is only valid for 32-bit hosts, for 64-bit elements. */ 5520 tcg_debug_assert(TCG_TARGET_REG_BITS == 32); 5521 tcg_debug_assert(TCGOP_VECE(op) == MO_64); 5522 5523 ots = arg_temp(op->args[0]); 5524 itsl = arg_temp(op->args[1]); 5525 itsh = arg_temp(op->args[2]); 5526 5527 /* ENV should not be modified. */ 5528 tcg_debug_assert(!temp_readonly(ots)); 5529 5530 /* Allocate the output register now. */ 5531 if (ots->val_type != TEMP_VAL_REG) { 5532 TCGRegSet allocated_regs = s->reserved_regs; 5533 TCGRegSet dup_out_regs = opcode_args_ct(op)[0].regs; 5534 TCGReg oreg; 5535 5536 /* Make sure to not spill the input registers. */ 5537 if (!IS_DEAD_ARG(1) && itsl->val_type == TEMP_VAL_REG) { 5538 tcg_regset_set_reg(allocated_regs, itsl->reg); 5539 } 5540 if (!IS_DEAD_ARG(2) && itsh->val_type == TEMP_VAL_REG) { 5541 tcg_regset_set_reg(allocated_regs, itsh->reg); 5542 } 5543 5544 oreg = tcg_reg_alloc(s, dup_out_regs, allocated_regs, 5545 output_pref(op, 0), ots->indirect_base); 5546 set_temp_val_reg(s, ots, oreg); 5547 } 5548 5549 /* Promote dup2 of immediates to dupi_vec. */ 5550 if (itsl->val_type == TEMP_VAL_CONST && itsh->val_type == TEMP_VAL_CONST) { 5551 uint64_t val = deposit64(itsl->val, 32, 32, itsh->val); 5552 MemOp vece = MO_64; 5553 5554 if (val == dup_const(MO_8, val)) { 5555 vece = MO_8; 5556 } else if (val == dup_const(MO_16, val)) { 5557 vece = MO_16; 5558 } else if (val == dup_const(MO_32, val)) { 5559 vece = MO_32; 5560 } 5561 5562 tcg_out_dupi_vec(s, vtype, vece, ots->reg, val); 5563 goto done; 5564 } 5565 5566 /* If the two inputs form one 64-bit value, try dupm_vec. */ 5567 if (itsl->temp_subindex == HOST_BIG_ENDIAN && 5568 itsh->temp_subindex == !HOST_BIG_ENDIAN && 5569 itsl == itsh + (HOST_BIG_ENDIAN ? 1 : -1)) { 5570 TCGTemp *its = itsl - HOST_BIG_ENDIAN; 5571 5572 temp_sync(s, its + 0, s->reserved_regs, 0, 0); 5573 temp_sync(s, its + 1, s->reserved_regs, 0, 0); 5574 5575 if (tcg_out_dupm_vec(s, vtype, MO_64, ots->reg, 5576 its->mem_base->reg, its->mem_offset)) { 5577 goto done; 5578 } 5579 } 5580 5581 /* Fall back to generic expansion. */ 5582 return false; 5583 5584 done: 5585 ots->mem_coherent = 0; 5586 if (IS_DEAD_ARG(1)) { 5587 temp_dead(s, itsl); 5588 } 5589 if (IS_DEAD_ARG(2)) { 5590 temp_dead(s, itsh); 5591 } 5592 if (NEED_SYNC_ARG(0)) { 5593 temp_sync(s, ots, s->reserved_regs, 0, IS_DEAD_ARG(0)); 5594 } else if (IS_DEAD_ARG(0)) { 5595 temp_dead(s, ots); 5596 } 5597 return true; 5598 } 5599 5600 static void load_arg_reg(TCGContext *s, TCGReg reg, TCGTemp *ts, 5601 TCGRegSet allocated_regs) 5602 { 5603 if (ts->val_type == TEMP_VAL_REG) { 5604 if (ts->reg != reg) { 5605 tcg_reg_free(s, reg, allocated_regs); 5606 if (!tcg_out_mov(s, ts->type, reg, ts->reg)) { 5607 /* 5608 * Cross register class move not supported. Sync the 5609 * temp back to its slot and load from there. 5610 */ 5611 temp_sync(s, ts, allocated_regs, 0, 0); 5612 tcg_out_ld(s, ts->type, reg, 5613 ts->mem_base->reg, ts->mem_offset); 5614 } 5615 } 5616 } else { 5617 TCGRegSet arg_set = 0; 5618 5619 tcg_reg_free(s, reg, allocated_regs); 5620 tcg_regset_set_reg(arg_set, reg); 5621 temp_load(s, ts, arg_set, allocated_regs, 0); 5622 } 5623 } 5624 5625 static void load_arg_stk(TCGContext *s, unsigned arg_slot, TCGTemp *ts, 5626 TCGRegSet allocated_regs) 5627 { 5628 /* 5629 * When the destination is on the stack, load up the temp and store. 5630 * If there are many call-saved registers, the temp might live to 5631 * see another use; otherwise it'll be discarded. 5632 */ 5633 temp_load(s, ts, tcg_target_available_regs[ts->type], allocated_regs, 0); 5634 tcg_out_st(s, ts->type, ts->reg, TCG_REG_CALL_STACK, 5635 arg_slot_stk_ofs(arg_slot)); 5636 } 5637 5638 static void load_arg_normal(TCGContext *s, const TCGCallArgumentLoc *l, 5639 TCGTemp *ts, TCGRegSet *allocated_regs) 5640 { 5641 if (arg_slot_reg_p(l->arg_slot)) { 5642 TCGReg reg = tcg_target_call_iarg_regs[l->arg_slot]; 5643 load_arg_reg(s, reg, ts, *allocated_regs); 5644 tcg_regset_set_reg(*allocated_regs, reg); 5645 } else { 5646 load_arg_stk(s, l->arg_slot, ts, *allocated_regs); 5647 } 5648 } 5649 5650 static void load_arg_ref(TCGContext *s, unsigned arg_slot, TCGReg ref_base, 5651 intptr_t ref_off, TCGRegSet *allocated_regs) 5652 { 5653 TCGReg reg; 5654 5655 if (arg_slot_reg_p(arg_slot)) { 5656 reg = tcg_target_call_iarg_regs[arg_slot]; 5657 tcg_reg_free(s, reg, *allocated_regs); 5658 tcg_out_addi_ptr(s, reg, ref_base, ref_off); 5659 tcg_regset_set_reg(*allocated_regs, reg); 5660 } else { 5661 reg = tcg_reg_alloc(s, tcg_target_available_regs[TCG_TYPE_PTR], 5662 *allocated_regs, 0, false); 5663 tcg_out_addi_ptr(s, reg, ref_base, ref_off); 5664 tcg_out_st(s, TCG_TYPE_PTR, reg, TCG_REG_CALL_STACK, 5665 arg_slot_stk_ofs(arg_slot)); 5666 } 5667 } 5668 5669 static void tcg_reg_alloc_call(TCGContext *s, TCGOp *op) 5670 { 5671 const int nb_oargs = TCGOP_CALLO(op); 5672 const int nb_iargs = TCGOP_CALLI(op); 5673 const TCGLifeData arg_life = op->life; 5674 const TCGHelperInfo *info = tcg_call_info(op); 5675 TCGRegSet allocated_regs = s->reserved_regs; 5676 int i; 5677 5678 /* 5679 * Move inputs into place in reverse order, 5680 * so that we place stacked arguments first. 5681 */ 5682 for (i = nb_iargs - 1; i >= 0; --i) { 5683 const TCGCallArgumentLoc *loc = &info->in[i]; 5684 TCGTemp *ts = arg_temp(op->args[nb_oargs + i]); 5685 5686 switch (loc->kind) { 5687 case TCG_CALL_ARG_NORMAL: 5688 case TCG_CALL_ARG_EXTEND_U: 5689 case TCG_CALL_ARG_EXTEND_S: 5690 load_arg_normal(s, loc, ts, &allocated_regs); 5691 break; 5692 case TCG_CALL_ARG_BY_REF: 5693 load_arg_stk(s, loc->ref_slot, ts, allocated_regs); 5694 load_arg_ref(s, loc->arg_slot, TCG_REG_CALL_STACK, 5695 arg_slot_stk_ofs(loc->ref_slot), 5696 &allocated_regs); 5697 break; 5698 case TCG_CALL_ARG_BY_REF_N: 5699 load_arg_stk(s, loc->ref_slot, ts, allocated_regs); 5700 break; 5701 default: 5702 g_assert_not_reached(); 5703 } 5704 } 5705 5706 /* Mark dead temporaries and free the associated registers. */ 5707 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) { 5708 if (IS_DEAD_ARG(i)) { 5709 temp_dead(s, arg_temp(op->args[i])); 5710 } 5711 } 5712 5713 /* Clobber call registers. */ 5714 for (i = 0; i < TCG_TARGET_NB_REGS; i++) { 5715 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) { 5716 tcg_reg_free(s, i, allocated_regs); 5717 } 5718 } 5719 5720 /* 5721 * Save globals if they might be written by the helper, 5722 * sync them if they might be read. 5723 */ 5724 if (info->flags & TCG_CALL_NO_READ_GLOBALS) { 5725 /* Nothing to do */ 5726 } else if (info->flags & TCG_CALL_NO_WRITE_GLOBALS) { 5727 sync_globals(s, allocated_regs); 5728 } else { 5729 save_globals(s, allocated_regs); 5730 } 5731 5732 /* 5733 * If the ABI passes a pointer to the returned struct as the first 5734 * argument, load that now. Pass a pointer to the output home slot. 5735 */ 5736 if (info->out_kind == TCG_CALL_RET_BY_REF) { 5737 TCGTemp *ts = arg_temp(op->args[0]); 5738 5739 if (!ts->mem_allocated) { 5740 temp_allocate_frame(s, ts); 5741 } 5742 load_arg_ref(s, 0, ts->mem_base->reg, ts->mem_offset, &allocated_regs); 5743 } 5744 5745 tcg_out_call(s, tcg_call_func(op), info); 5746 5747 /* Assign output registers and emit moves if needed. */ 5748 switch (info->out_kind) { 5749 case TCG_CALL_RET_NORMAL: 5750 for (i = 0; i < nb_oargs; i++) { 5751 TCGTemp *ts = arg_temp(op->args[i]); 5752 TCGReg reg = tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, i); 5753 5754 /* ENV should not be modified. */ 5755 tcg_debug_assert(!temp_readonly(ts)); 5756 5757 set_temp_val_reg(s, ts, reg); 5758 ts->mem_coherent = 0; 5759 } 5760 break; 5761 5762 case TCG_CALL_RET_BY_VEC: 5763 { 5764 TCGTemp *ts = arg_temp(op->args[0]); 5765 5766 tcg_debug_assert(ts->base_type == TCG_TYPE_I128); 5767 tcg_debug_assert(ts->temp_subindex == 0); 5768 if (!ts->mem_allocated) { 5769 temp_allocate_frame(s, ts); 5770 } 5771 tcg_out_st(s, TCG_TYPE_V128, 5772 tcg_target_call_oarg_reg(TCG_CALL_RET_BY_VEC, 0), 5773 ts->mem_base->reg, ts->mem_offset); 5774 } 5775 /* fall through to mark all parts in memory */ 5776 5777 case TCG_CALL_RET_BY_REF: 5778 /* The callee has performed a write through the reference. */ 5779 for (i = 0; i < nb_oargs; i++) { 5780 TCGTemp *ts = arg_temp(op->args[i]); 5781 ts->val_type = TEMP_VAL_MEM; 5782 } 5783 break; 5784 5785 default: 5786 g_assert_not_reached(); 5787 } 5788 5789 /* Flush or discard output registers as needed. */ 5790 for (i = 0; i < nb_oargs; i++) { 5791 TCGTemp *ts = arg_temp(op->args[i]); 5792 if (NEED_SYNC_ARG(i)) { 5793 temp_sync(s, ts, s->reserved_regs, 0, IS_DEAD_ARG(i)); 5794 } else if (IS_DEAD_ARG(i)) { 5795 temp_dead(s, ts); 5796 } 5797 } 5798 } 5799 5800 /** 5801 * atom_and_align_for_opc: 5802 * @s: tcg context 5803 * @opc: memory operation code 5804 * @host_atom: MO_ATOM_{IFALIGN,WITHIN16,SUBALIGN} for host operations 5805 * @allow_two_ops: true if we are prepared to issue two operations 5806 * 5807 * Return the alignment and atomicity to use for the inline fast path 5808 * for the given memory operation. The alignment may be larger than 5809 * that specified in @opc, and the correct alignment will be diagnosed 5810 * by the slow path helper. 5811 * 5812 * If @allow_two_ops, the host is prepared to test for 2x alignment, 5813 * and issue two loads or stores for subalignment. 5814 */ 5815 static TCGAtomAlign atom_and_align_for_opc(TCGContext *s, MemOp opc, 5816 MemOp host_atom, bool allow_two_ops) 5817 { 5818 MemOp align = memop_alignment_bits(opc); 5819 MemOp size = opc & MO_SIZE; 5820 MemOp half = size ? size - 1 : 0; 5821 MemOp atom = opc & MO_ATOM_MASK; 5822 MemOp atmax; 5823 5824 switch (atom) { 5825 case MO_ATOM_NONE: 5826 /* The operation requires no specific atomicity. */ 5827 atmax = MO_8; 5828 break; 5829 5830 case MO_ATOM_IFALIGN: 5831 atmax = size; 5832 break; 5833 5834 case MO_ATOM_IFALIGN_PAIR: 5835 atmax = half; 5836 break; 5837 5838 case MO_ATOM_WITHIN16: 5839 atmax = size; 5840 if (size == MO_128) { 5841 /* Misalignment implies !within16, and therefore no atomicity. */ 5842 } else if (host_atom != MO_ATOM_WITHIN16) { 5843 /* The host does not implement within16, so require alignment. */ 5844 align = MAX(align, size); 5845 } 5846 break; 5847 5848 case MO_ATOM_WITHIN16_PAIR: 5849 atmax = size; 5850 /* 5851 * Misalignment implies !within16, and therefore half atomicity. 5852 * Any host prepared for two operations can implement this with 5853 * half alignment. 5854 */ 5855 if (host_atom != MO_ATOM_WITHIN16 && allow_two_ops) { 5856 align = MAX(align, half); 5857 } 5858 break; 5859 5860 case MO_ATOM_SUBALIGN: 5861 atmax = size; 5862 if (host_atom != MO_ATOM_SUBALIGN) { 5863 /* If unaligned but not odd, there are subobjects up to half. */ 5864 if (allow_two_ops) { 5865 align = MAX(align, half); 5866 } else { 5867 align = MAX(align, size); 5868 } 5869 } 5870 break; 5871 5872 default: 5873 g_assert_not_reached(); 5874 } 5875 5876 return (TCGAtomAlign){ .atom = atmax, .align = align }; 5877 } 5878 5879 /* 5880 * Similarly for qemu_ld/st slow path helpers. 5881 * We must re-implement tcg_gen_callN and tcg_reg_alloc_call simultaneously, 5882 * using only the provided backend tcg_out_* functions. 5883 */ 5884 5885 static int tcg_out_helper_stk_ofs(TCGType type, unsigned slot) 5886 { 5887 int ofs = arg_slot_stk_ofs(slot); 5888 5889 /* 5890 * Each stack slot is TCG_TARGET_LONG_BITS. If the host does not 5891 * require extension to uint64_t, adjust the address for uint32_t. 5892 */ 5893 if (HOST_BIG_ENDIAN && 5894 TCG_TARGET_REG_BITS == 64 && 5895 type == TCG_TYPE_I32) { 5896 ofs += 4; 5897 } 5898 return ofs; 5899 } 5900 5901 static void tcg_out_helper_load_slots(TCGContext *s, 5902 unsigned nmov, TCGMovExtend *mov, 5903 const TCGLdstHelperParam *parm) 5904 { 5905 unsigned i; 5906 TCGReg dst3; 5907 5908 /* 5909 * Start from the end, storing to the stack first. 5910 * This frees those registers, so we need not consider overlap. 5911 */ 5912 for (i = nmov; i-- > 0; ) { 5913 unsigned slot = mov[i].dst; 5914 5915 if (arg_slot_reg_p(slot)) { 5916 goto found_reg; 5917 } 5918 5919 TCGReg src = mov[i].src; 5920 TCGType dst_type = mov[i].dst_type; 5921 MemOp dst_mo = dst_type == TCG_TYPE_I32 ? MO_32 : MO_64; 5922 5923 /* The argument is going onto the stack; extend into scratch. */ 5924 if ((mov[i].src_ext & MO_SIZE) != dst_mo) { 5925 tcg_debug_assert(parm->ntmp != 0); 5926 mov[i].dst = src = parm->tmp[0]; 5927 tcg_out_movext1(s, &mov[i]); 5928 } 5929 5930 tcg_out_st(s, dst_type, src, TCG_REG_CALL_STACK, 5931 tcg_out_helper_stk_ofs(dst_type, slot)); 5932 } 5933 return; 5934 5935 found_reg: 5936 /* 5937 * The remaining arguments are in registers. 5938 * Convert slot numbers to argument registers. 5939 */ 5940 nmov = i + 1; 5941 for (i = 0; i < nmov; ++i) { 5942 mov[i].dst = tcg_target_call_iarg_regs[mov[i].dst]; 5943 } 5944 5945 switch (nmov) { 5946 case 4: 5947 /* The backend must have provided enough temps for the worst case. */ 5948 tcg_debug_assert(parm->ntmp >= 2); 5949 5950 dst3 = mov[3].dst; 5951 for (unsigned j = 0; j < 3; ++j) { 5952 if (dst3 == mov[j].src) { 5953 /* 5954 * Conflict. Copy the source to a temporary, perform the 5955 * remaining moves, then the extension from our scratch 5956 * on the way out. 5957 */ 5958 TCGReg scratch = parm->tmp[1]; 5959 5960 tcg_out_mov(s, mov[3].src_type, scratch, mov[3].src); 5961 tcg_out_movext3(s, mov, mov + 1, mov + 2, parm->tmp[0]); 5962 tcg_out_movext1_new_src(s, &mov[3], scratch); 5963 break; 5964 } 5965 } 5966 5967 /* No conflicts: perform this move and continue. */ 5968 tcg_out_movext1(s, &mov[3]); 5969 /* fall through */ 5970 5971 case 3: 5972 tcg_out_movext3(s, mov, mov + 1, mov + 2, 5973 parm->ntmp ? parm->tmp[0] : -1); 5974 break; 5975 case 2: 5976 tcg_out_movext2(s, mov, mov + 1, 5977 parm->ntmp ? parm->tmp[0] : -1); 5978 break; 5979 case 1: 5980 tcg_out_movext1(s, mov); 5981 break; 5982 default: 5983 g_assert_not_reached(); 5984 } 5985 } 5986 5987 static void tcg_out_helper_load_imm(TCGContext *s, unsigned slot, 5988 TCGType type, tcg_target_long imm, 5989 const TCGLdstHelperParam *parm) 5990 { 5991 if (arg_slot_reg_p(slot)) { 5992 tcg_out_movi(s, type, tcg_target_call_iarg_regs[slot], imm); 5993 } else { 5994 int ofs = tcg_out_helper_stk_ofs(type, slot); 5995 if (!tcg_out_sti(s, type, imm, TCG_REG_CALL_STACK, ofs)) { 5996 tcg_debug_assert(parm->ntmp != 0); 5997 tcg_out_movi(s, type, parm->tmp[0], imm); 5998 tcg_out_st(s, type, parm->tmp[0], TCG_REG_CALL_STACK, ofs); 5999 } 6000 } 6001 } 6002 6003 static void tcg_out_helper_load_common_args(TCGContext *s, 6004 const TCGLabelQemuLdst *ldst, 6005 const TCGLdstHelperParam *parm, 6006 const TCGHelperInfo *info, 6007 unsigned next_arg) 6008 { 6009 TCGMovExtend ptr_mov = { 6010 .dst_type = TCG_TYPE_PTR, 6011 .src_type = TCG_TYPE_PTR, 6012 .src_ext = sizeof(void *) == 4 ? MO_32 : MO_64 6013 }; 6014 const TCGCallArgumentLoc *loc = &info->in[0]; 6015 TCGType type; 6016 unsigned slot; 6017 tcg_target_ulong imm; 6018 6019 /* 6020 * Handle env, which is always first. 6021 */ 6022 ptr_mov.dst = loc->arg_slot; 6023 ptr_mov.src = TCG_AREG0; 6024 tcg_out_helper_load_slots(s, 1, &ptr_mov, parm); 6025 6026 /* 6027 * Handle oi. 6028 */ 6029 imm = ldst->oi; 6030 loc = &info->in[next_arg]; 6031 type = TCG_TYPE_I32; 6032 switch (loc->kind) { 6033 case TCG_CALL_ARG_NORMAL: 6034 break; 6035 case TCG_CALL_ARG_EXTEND_U: 6036 case TCG_CALL_ARG_EXTEND_S: 6037 /* No extension required for MemOpIdx. */ 6038 tcg_debug_assert(imm <= INT32_MAX); 6039 type = TCG_TYPE_REG; 6040 break; 6041 default: 6042 g_assert_not_reached(); 6043 } 6044 tcg_out_helper_load_imm(s, loc->arg_slot, type, imm, parm); 6045 next_arg++; 6046 6047 /* 6048 * Handle ra. 6049 */ 6050 loc = &info->in[next_arg]; 6051 slot = loc->arg_slot; 6052 if (parm->ra_gen) { 6053 int arg_reg = -1; 6054 TCGReg ra_reg; 6055 6056 if (arg_slot_reg_p(slot)) { 6057 arg_reg = tcg_target_call_iarg_regs[slot]; 6058 } 6059 ra_reg = parm->ra_gen(s, ldst, arg_reg); 6060 6061 ptr_mov.dst = slot; 6062 ptr_mov.src = ra_reg; 6063 tcg_out_helper_load_slots(s, 1, &ptr_mov, parm); 6064 } else { 6065 imm = (uintptr_t)ldst->raddr; 6066 tcg_out_helper_load_imm(s, slot, TCG_TYPE_PTR, imm, parm); 6067 } 6068 } 6069 6070 static unsigned tcg_out_helper_add_mov(TCGMovExtend *mov, 6071 const TCGCallArgumentLoc *loc, 6072 TCGType dst_type, TCGType src_type, 6073 TCGReg lo, TCGReg hi) 6074 { 6075 MemOp reg_mo; 6076 6077 if (dst_type <= TCG_TYPE_REG) { 6078 MemOp src_ext; 6079 6080 switch (loc->kind) { 6081 case TCG_CALL_ARG_NORMAL: 6082 src_ext = src_type == TCG_TYPE_I32 ? MO_32 : MO_64; 6083 break; 6084 case TCG_CALL_ARG_EXTEND_U: 6085 dst_type = TCG_TYPE_REG; 6086 src_ext = MO_UL; 6087 break; 6088 case TCG_CALL_ARG_EXTEND_S: 6089 dst_type = TCG_TYPE_REG; 6090 src_ext = MO_SL; 6091 break; 6092 default: 6093 g_assert_not_reached(); 6094 } 6095 6096 mov[0].dst = loc->arg_slot; 6097 mov[0].dst_type = dst_type; 6098 mov[0].src = lo; 6099 mov[0].src_type = src_type; 6100 mov[0].src_ext = src_ext; 6101 return 1; 6102 } 6103 6104 if (TCG_TARGET_REG_BITS == 32) { 6105 assert(dst_type == TCG_TYPE_I64); 6106 reg_mo = MO_32; 6107 } else { 6108 assert(dst_type == TCG_TYPE_I128); 6109 reg_mo = MO_64; 6110 } 6111 6112 mov[0].dst = loc[HOST_BIG_ENDIAN].arg_slot; 6113 mov[0].src = lo; 6114 mov[0].dst_type = TCG_TYPE_REG; 6115 mov[0].src_type = TCG_TYPE_REG; 6116 mov[0].src_ext = reg_mo; 6117 6118 mov[1].dst = loc[!HOST_BIG_ENDIAN].arg_slot; 6119 mov[1].src = hi; 6120 mov[1].dst_type = TCG_TYPE_REG; 6121 mov[1].src_type = TCG_TYPE_REG; 6122 mov[1].src_ext = reg_mo; 6123 6124 return 2; 6125 } 6126 6127 static void tcg_out_ld_helper_args(TCGContext *s, const TCGLabelQemuLdst *ldst, 6128 const TCGLdstHelperParam *parm) 6129 { 6130 const TCGHelperInfo *info; 6131 const TCGCallArgumentLoc *loc; 6132 TCGMovExtend mov[2]; 6133 unsigned next_arg, nmov; 6134 MemOp mop = get_memop(ldst->oi); 6135 6136 switch (mop & MO_SIZE) { 6137 case MO_8: 6138 case MO_16: 6139 case MO_32: 6140 info = &info_helper_ld32_mmu; 6141 break; 6142 case MO_64: 6143 info = &info_helper_ld64_mmu; 6144 break; 6145 case MO_128: 6146 info = &info_helper_ld128_mmu; 6147 break; 6148 default: 6149 g_assert_not_reached(); 6150 } 6151 6152 /* Defer env argument. */ 6153 next_arg = 1; 6154 6155 loc = &info->in[next_arg]; 6156 if (TCG_TARGET_REG_BITS == 32 && s->addr_type == TCG_TYPE_I32) { 6157 /* 6158 * 32-bit host with 32-bit guest: zero-extend the guest address 6159 * to 64-bits for the helper by storing the low part, then 6160 * load a zero for the high part. 6161 */ 6162 tcg_out_helper_add_mov(mov, loc + HOST_BIG_ENDIAN, 6163 TCG_TYPE_I32, TCG_TYPE_I32, 6164 ldst->addr_reg, -1); 6165 tcg_out_helper_load_slots(s, 1, mov, parm); 6166 6167 tcg_out_helper_load_imm(s, loc[!HOST_BIG_ENDIAN].arg_slot, 6168 TCG_TYPE_I32, 0, parm); 6169 next_arg += 2; 6170 } else { 6171 nmov = tcg_out_helper_add_mov(mov, loc, TCG_TYPE_I64, s->addr_type, 6172 ldst->addr_reg, -1); 6173 tcg_out_helper_load_slots(s, nmov, mov, parm); 6174 next_arg += nmov; 6175 } 6176 6177 switch (info->out_kind) { 6178 case TCG_CALL_RET_NORMAL: 6179 case TCG_CALL_RET_BY_VEC: 6180 break; 6181 case TCG_CALL_RET_BY_REF: 6182 /* 6183 * The return reference is in the first argument slot. 6184 * We need memory in which to return: re-use the top of stack. 6185 */ 6186 { 6187 int ofs_slot0 = TCG_TARGET_CALL_STACK_OFFSET; 6188 6189 if (arg_slot_reg_p(0)) { 6190 tcg_out_addi_ptr(s, tcg_target_call_iarg_regs[0], 6191 TCG_REG_CALL_STACK, ofs_slot0); 6192 } else { 6193 tcg_debug_assert(parm->ntmp != 0); 6194 tcg_out_addi_ptr(s, parm->tmp[0], 6195 TCG_REG_CALL_STACK, ofs_slot0); 6196 tcg_out_st(s, TCG_TYPE_PTR, parm->tmp[0], 6197 TCG_REG_CALL_STACK, ofs_slot0); 6198 } 6199 } 6200 break; 6201 default: 6202 g_assert_not_reached(); 6203 } 6204 6205 tcg_out_helper_load_common_args(s, ldst, parm, info, next_arg); 6206 } 6207 6208 static void tcg_out_ld_helper_ret(TCGContext *s, const TCGLabelQemuLdst *ldst, 6209 bool load_sign, 6210 const TCGLdstHelperParam *parm) 6211 { 6212 MemOp mop = get_memop(ldst->oi); 6213 TCGMovExtend mov[2]; 6214 int ofs_slot0; 6215 6216 switch (ldst->type) { 6217 case TCG_TYPE_I64: 6218 if (TCG_TARGET_REG_BITS == 32) { 6219 break; 6220 } 6221 /* fall through */ 6222 6223 case TCG_TYPE_I32: 6224 mov[0].dst = ldst->datalo_reg; 6225 mov[0].src = tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, 0); 6226 mov[0].dst_type = ldst->type; 6227 mov[0].src_type = TCG_TYPE_REG; 6228 6229 /* 6230 * If load_sign, then we allowed the helper to perform the 6231 * appropriate sign extension to tcg_target_ulong, and all 6232 * we need now is a plain move. 6233 * 6234 * If they do not, then we expect the relevant extension 6235 * instruction to be no more expensive than a move, and 6236 * we thus save the icache etc by only using one of two 6237 * helper functions. 6238 */ 6239 if (load_sign || !(mop & MO_SIGN)) { 6240 if (TCG_TARGET_REG_BITS == 32 || ldst->type == TCG_TYPE_I32) { 6241 mov[0].src_ext = MO_32; 6242 } else { 6243 mov[0].src_ext = MO_64; 6244 } 6245 } else { 6246 mov[0].src_ext = mop & MO_SSIZE; 6247 } 6248 tcg_out_movext1(s, mov); 6249 return; 6250 6251 case TCG_TYPE_I128: 6252 tcg_debug_assert(TCG_TARGET_REG_BITS == 64); 6253 ofs_slot0 = TCG_TARGET_CALL_STACK_OFFSET; 6254 switch (TCG_TARGET_CALL_RET_I128) { 6255 case TCG_CALL_RET_NORMAL: 6256 break; 6257 case TCG_CALL_RET_BY_VEC: 6258 tcg_out_st(s, TCG_TYPE_V128, 6259 tcg_target_call_oarg_reg(TCG_CALL_RET_BY_VEC, 0), 6260 TCG_REG_CALL_STACK, ofs_slot0); 6261 /* fall through */ 6262 case TCG_CALL_RET_BY_REF: 6263 tcg_out_ld(s, TCG_TYPE_I64, ldst->datalo_reg, 6264 TCG_REG_CALL_STACK, ofs_slot0 + 8 * HOST_BIG_ENDIAN); 6265 tcg_out_ld(s, TCG_TYPE_I64, ldst->datahi_reg, 6266 TCG_REG_CALL_STACK, ofs_slot0 + 8 * !HOST_BIG_ENDIAN); 6267 return; 6268 default: 6269 g_assert_not_reached(); 6270 } 6271 break; 6272 6273 default: 6274 g_assert_not_reached(); 6275 } 6276 6277 mov[0].dst = ldst->datalo_reg; 6278 mov[0].src = 6279 tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, HOST_BIG_ENDIAN); 6280 mov[0].dst_type = TCG_TYPE_REG; 6281 mov[0].src_type = TCG_TYPE_REG; 6282 mov[0].src_ext = TCG_TARGET_REG_BITS == 32 ? MO_32 : MO_64; 6283 6284 mov[1].dst = ldst->datahi_reg; 6285 mov[1].src = 6286 tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, !HOST_BIG_ENDIAN); 6287 mov[1].dst_type = TCG_TYPE_REG; 6288 mov[1].src_type = TCG_TYPE_REG; 6289 mov[1].src_ext = TCG_TARGET_REG_BITS == 32 ? MO_32 : MO_64; 6290 6291 tcg_out_movext2(s, mov, mov + 1, parm->ntmp ? parm->tmp[0] : -1); 6292 } 6293 6294 static void tcg_out_st_helper_args(TCGContext *s, const TCGLabelQemuLdst *ldst, 6295 const TCGLdstHelperParam *parm) 6296 { 6297 const TCGHelperInfo *info; 6298 const TCGCallArgumentLoc *loc; 6299 TCGMovExtend mov[4]; 6300 TCGType data_type; 6301 unsigned next_arg, nmov, n; 6302 MemOp mop = get_memop(ldst->oi); 6303 6304 switch (mop & MO_SIZE) { 6305 case MO_8: 6306 case MO_16: 6307 case MO_32: 6308 info = &info_helper_st32_mmu; 6309 data_type = TCG_TYPE_I32; 6310 break; 6311 case MO_64: 6312 info = &info_helper_st64_mmu; 6313 data_type = TCG_TYPE_I64; 6314 break; 6315 case MO_128: 6316 info = &info_helper_st128_mmu; 6317 data_type = TCG_TYPE_I128; 6318 break; 6319 default: 6320 g_assert_not_reached(); 6321 } 6322 6323 /* Defer env argument. */ 6324 next_arg = 1; 6325 nmov = 0; 6326 6327 /* Handle addr argument. */ 6328 loc = &info->in[next_arg]; 6329 tcg_debug_assert(s->addr_type <= TCG_TYPE_REG); 6330 if (TCG_TARGET_REG_BITS == 32) { 6331 /* 6332 * 32-bit host (and thus 32-bit guest): zero-extend the guest address 6333 * to 64-bits for the helper by storing the low part. Later, 6334 * after we have processed the register inputs, we will load a 6335 * zero for the high part. 6336 */ 6337 tcg_out_helper_add_mov(mov, loc + HOST_BIG_ENDIAN, 6338 TCG_TYPE_I32, TCG_TYPE_I32, 6339 ldst->addr_reg, -1); 6340 next_arg += 2; 6341 nmov += 1; 6342 } else { 6343 n = tcg_out_helper_add_mov(mov, loc, TCG_TYPE_I64, s->addr_type, 6344 ldst->addr_reg, -1); 6345 next_arg += n; 6346 nmov += n; 6347 } 6348 6349 /* Handle data argument. */ 6350 loc = &info->in[next_arg]; 6351 switch (loc->kind) { 6352 case TCG_CALL_ARG_NORMAL: 6353 case TCG_CALL_ARG_EXTEND_U: 6354 case TCG_CALL_ARG_EXTEND_S: 6355 n = tcg_out_helper_add_mov(mov + nmov, loc, data_type, ldst->type, 6356 ldst->datalo_reg, ldst->datahi_reg); 6357 next_arg += n; 6358 nmov += n; 6359 tcg_out_helper_load_slots(s, nmov, mov, parm); 6360 break; 6361 6362 case TCG_CALL_ARG_BY_REF: 6363 tcg_debug_assert(TCG_TARGET_REG_BITS == 64); 6364 tcg_debug_assert(data_type == TCG_TYPE_I128); 6365 tcg_out_st(s, TCG_TYPE_I64, 6366 HOST_BIG_ENDIAN ? ldst->datahi_reg : ldst->datalo_reg, 6367 TCG_REG_CALL_STACK, arg_slot_stk_ofs(loc[0].ref_slot)); 6368 tcg_out_st(s, TCG_TYPE_I64, 6369 HOST_BIG_ENDIAN ? ldst->datalo_reg : ldst->datahi_reg, 6370 TCG_REG_CALL_STACK, arg_slot_stk_ofs(loc[1].ref_slot)); 6371 6372 tcg_out_helper_load_slots(s, nmov, mov, parm); 6373 6374 if (arg_slot_reg_p(loc->arg_slot)) { 6375 tcg_out_addi_ptr(s, tcg_target_call_iarg_regs[loc->arg_slot], 6376 TCG_REG_CALL_STACK, 6377 arg_slot_stk_ofs(loc->ref_slot)); 6378 } else { 6379 tcg_debug_assert(parm->ntmp != 0); 6380 tcg_out_addi_ptr(s, parm->tmp[0], TCG_REG_CALL_STACK, 6381 arg_slot_stk_ofs(loc->ref_slot)); 6382 tcg_out_st(s, TCG_TYPE_PTR, parm->tmp[0], 6383 TCG_REG_CALL_STACK, arg_slot_stk_ofs(loc->arg_slot)); 6384 } 6385 next_arg += 2; 6386 break; 6387 6388 default: 6389 g_assert_not_reached(); 6390 } 6391 6392 if (TCG_TARGET_REG_BITS == 32) { 6393 /* Zero extend the address by loading a zero for the high part. */ 6394 loc = &info->in[1 + !HOST_BIG_ENDIAN]; 6395 tcg_out_helper_load_imm(s, loc->arg_slot, TCG_TYPE_I32, 0, parm); 6396 } 6397 6398 tcg_out_helper_load_common_args(s, ldst, parm, info, next_arg); 6399 } 6400 6401 int tcg_gen_code(TCGContext *s, TranslationBlock *tb, uint64_t pc_start) 6402 { 6403 int i, start_words, num_insns; 6404 TCGOp *op; 6405 6406 if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP) 6407 && qemu_log_in_addr_range(pc_start))) { 6408 FILE *logfile = qemu_log_trylock(); 6409 if (logfile) { 6410 fprintf(logfile, "OP:\n"); 6411 tcg_dump_ops(s, logfile, false); 6412 fprintf(logfile, "\n"); 6413 qemu_log_unlock(logfile); 6414 } 6415 } 6416 6417 #ifdef CONFIG_DEBUG_TCG 6418 /* Ensure all labels referenced have been emitted. */ 6419 { 6420 TCGLabel *l; 6421 bool error = false; 6422 6423 QSIMPLEQ_FOREACH(l, &s->labels, next) { 6424 if (unlikely(!l->present) && !QSIMPLEQ_EMPTY(&l->branches)) { 6425 qemu_log_mask(CPU_LOG_TB_OP, 6426 "$L%d referenced but not present.\n", l->id); 6427 error = true; 6428 } 6429 } 6430 assert(!error); 6431 } 6432 #endif 6433 6434 /* Do not reuse any EBB that may be allocated within the TB. */ 6435 tcg_temp_ebb_reset_freed(s); 6436 6437 tcg_optimize(s); 6438 6439 reachable_code_pass(s); 6440 liveness_pass_0(s); 6441 liveness_pass_1(s); 6442 6443 if (s->nb_indirects > 0) { 6444 if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_IND) 6445 && qemu_log_in_addr_range(pc_start))) { 6446 FILE *logfile = qemu_log_trylock(); 6447 if (logfile) { 6448 fprintf(logfile, "OP before indirect lowering:\n"); 6449 tcg_dump_ops(s, logfile, false); 6450 fprintf(logfile, "\n"); 6451 qemu_log_unlock(logfile); 6452 } 6453 } 6454 6455 /* Replace indirect temps with direct temps. */ 6456 if (liveness_pass_2(s)) { 6457 /* If changes were made, re-run liveness. */ 6458 liveness_pass_1(s); 6459 } 6460 } 6461 6462 if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_OPT) 6463 && qemu_log_in_addr_range(pc_start))) { 6464 FILE *logfile = qemu_log_trylock(); 6465 if (logfile) { 6466 fprintf(logfile, "OP after optimization and liveness analysis:\n"); 6467 tcg_dump_ops(s, logfile, true); 6468 fprintf(logfile, "\n"); 6469 qemu_log_unlock(logfile); 6470 } 6471 } 6472 6473 /* Initialize goto_tb jump offsets. */ 6474 tb->jmp_reset_offset[0] = TB_JMP_OFFSET_INVALID; 6475 tb->jmp_reset_offset[1] = TB_JMP_OFFSET_INVALID; 6476 tb->jmp_insn_offset[0] = TB_JMP_OFFSET_INVALID; 6477 tb->jmp_insn_offset[1] = TB_JMP_OFFSET_INVALID; 6478 6479 tcg_reg_alloc_start(s); 6480 6481 /* 6482 * Reset the buffer pointers when restarting after overflow. 6483 * TODO: Move this into translate-all.c with the rest of the 6484 * buffer management. Having only this done here is confusing. 6485 */ 6486 s->code_buf = tcg_splitwx_to_rw(tb->tc.ptr); 6487 s->code_ptr = s->code_buf; 6488 s->data_gen_ptr = NULL; 6489 6490 QSIMPLEQ_INIT(&s->ldst_labels); 6491 s->pool_labels = NULL; 6492 6493 start_words = s->insn_start_words; 6494 s->gen_insn_data = 6495 tcg_malloc(sizeof(uint64_t) * s->gen_tb->icount * start_words); 6496 6497 tcg_out_tb_start(s); 6498 6499 num_insns = -1; 6500 QTAILQ_FOREACH(op, &s->ops, link) { 6501 TCGOpcode opc = op->opc; 6502 6503 switch (opc) { 6504 case INDEX_op_mov: 6505 case INDEX_op_mov_vec: 6506 tcg_reg_alloc_mov(s, op); 6507 break; 6508 case INDEX_op_dup_vec: 6509 tcg_reg_alloc_dup(s, op); 6510 break; 6511 case INDEX_op_insn_start: 6512 if (num_insns >= 0) { 6513 size_t off = tcg_current_code_size(s); 6514 s->gen_insn_end_off[num_insns] = off; 6515 /* Assert that we do not overflow our stored offset. */ 6516 assert(s->gen_insn_end_off[num_insns] == off); 6517 } 6518 num_insns++; 6519 for (i = 0; i < start_words; ++i) { 6520 s->gen_insn_data[num_insns * start_words + i] = 6521 tcg_get_insn_start_param(op, i); 6522 } 6523 break; 6524 case INDEX_op_discard: 6525 temp_dead(s, arg_temp(op->args[0])); 6526 break; 6527 case INDEX_op_set_label: 6528 tcg_reg_alloc_bb_end(s, s->reserved_regs); 6529 tcg_out_label(s, arg_label(op->args[0])); 6530 break; 6531 case INDEX_op_call: 6532 tcg_reg_alloc_call(s, op); 6533 break; 6534 case INDEX_op_exit_tb: 6535 tcg_out_exit_tb(s, op->args[0]); 6536 break; 6537 case INDEX_op_goto_tb: 6538 tcg_out_goto_tb(s, op->args[0]); 6539 break; 6540 case INDEX_op_dup2_vec: 6541 if (tcg_reg_alloc_dup2(s, op)) { 6542 break; 6543 } 6544 /* fall through */ 6545 default: 6546 /* Sanity check that we've not introduced any unhandled opcodes. */ 6547 tcg_debug_assert(tcg_op_supported(opc, TCGOP_TYPE(op), 6548 TCGOP_FLAGS(op))); 6549 /* Note: in order to speed up the code, it would be much 6550 faster to have specialized register allocator functions for 6551 some common argument patterns */ 6552 tcg_reg_alloc_op(s, op); 6553 break; 6554 } 6555 /* Test for (pending) buffer overflow. The assumption is that any 6556 one operation beginning below the high water mark cannot overrun 6557 the buffer completely. Thus we can test for overflow after 6558 generating code without having to check during generation. */ 6559 if (unlikely((void *)s->code_ptr > s->code_gen_highwater)) { 6560 return -1; 6561 } 6562 /* Test for TB overflow, as seen by gen_insn_end_off. */ 6563 if (unlikely(tcg_current_code_size(s) > UINT16_MAX)) { 6564 return -2; 6565 } 6566 } 6567 tcg_debug_assert(num_insns + 1 == s->gen_tb->icount); 6568 s->gen_insn_end_off[num_insns] = tcg_current_code_size(s); 6569 6570 /* Generate TB finalization at the end of block */ 6571 i = tcg_out_ldst_finalize(s); 6572 if (i < 0) { 6573 return i; 6574 } 6575 i = tcg_out_pool_finalize(s); 6576 if (i < 0) { 6577 return i; 6578 } 6579 if (!tcg_resolve_relocs(s)) { 6580 return -2; 6581 } 6582 6583 #ifndef CONFIG_TCG_INTERPRETER 6584 /* flush instruction cache */ 6585 flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(s->code_buf), 6586 (uintptr_t)s->code_buf, 6587 tcg_ptr_byte_diff(s->code_ptr, s->code_buf)); 6588 #endif 6589 6590 return tcg_current_code_size(s); 6591 } 6592 6593 #ifdef ELF_HOST_MACHINE 6594 /* In order to use this feature, the backend needs to do three things: 6595 6596 (1) Define ELF_HOST_MACHINE to indicate both what value to 6597 put into the ELF image and to indicate support for the feature. 6598 6599 (2) Define tcg_register_jit. This should create a buffer containing 6600 the contents of a .debug_frame section that describes the post- 6601 prologue unwind info for the tcg machine. 6602 6603 (3) Call tcg_register_jit_int, with the constructed .debug_frame. 6604 */ 6605 6606 /* Begin GDB interface. THE FOLLOWING MUST MATCH GDB DOCS. */ 6607 typedef enum { 6608 JIT_NOACTION = 0, 6609 JIT_REGISTER_FN, 6610 JIT_UNREGISTER_FN 6611 } jit_actions_t; 6612 6613 struct jit_code_entry { 6614 struct jit_code_entry *next_entry; 6615 struct jit_code_entry *prev_entry; 6616 const void *symfile_addr; 6617 uint64_t symfile_size; 6618 }; 6619 6620 struct jit_descriptor { 6621 uint32_t version; 6622 uint32_t action_flag; 6623 struct jit_code_entry *relevant_entry; 6624 struct jit_code_entry *first_entry; 6625 }; 6626 6627 void __jit_debug_register_code(void) __attribute__((noinline)); 6628 void __jit_debug_register_code(void) 6629 { 6630 asm(""); 6631 } 6632 6633 /* Must statically initialize the version, because GDB may check 6634 the version before we can set it. */ 6635 struct jit_descriptor __jit_debug_descriptor = { 1, 0, 0, 0 }; 6636 6637 /* End GDB interface. */ 6638 6639 static int find_string(const char *strtab, const char *str) 6640 { 6641 const char *p = strtab + 1; 6642 6643 while (1) { 6644 if (strcmp(p, str) == 0) { 6645 return p - strtab; 6646 } 6647 p += strlen(p) + 1; 6648 } 6649 } 6650 6651 static void tcg_register_jit_int(const void *buf_ptr, size_t buf_size, 6652 const void *debug_frame, 6653 size_t debug_frame_size) 6654 { 6655 struct __attribute__((packed)) DebugInfo { 6656 uint32_t len; 6657 uint16_t version; 6658 uint32_t abbrev; 6659 uint8_t ptr_size; 6660 uint8_t cu_die; 6661 uint16_t cu_lang; 6662 uintptr_t cu_low_pc; 6663 uintptr_t cu_high_pc; 6664 uint8_t fn_die; 6665 char fn_name[16]; 6666 uintptr_t fn_low_pc; 6667 uintptr_t fn_high_pc; 6668 uint8_t cu_eoc; 6669 }; 6670 6671 struct ElfImage { 6672 ElfW(Ehdr) ehdr; 6673 ElfW(Phdr) phdr; 6674 ElfW(Shdr) shdr[7]; 6675 ElfW(Sym) sym[2]; 6676 struct DebugInfo di; 6677 uint8_t da[24]; 6678 char str[80]; 6679 }; 6680 6681 struct ElfImage *img; 6682 6683 static const struct ElfImage img_template = { 6684 .ehdr = { 6685 .e_ident[EI_MAG0] = ELFMAG0, 6686 .e_ident[EI_MAG1] = ELFMAG1, 6687 .e_ident[EI_MAG2] = ELFMAG2, 6688 .e_ident[EI_MAG3] = ELFMAG3, 6689 .e_ident[EI_CLASS] = ELF_CLASS, 6690 .e_ident[EI_DATA] = ELF_DATA, 6691 .e_ident[EI_VERSION] = EV_CURRENT, 6692 .e_type = ET_EXEC, 6693 .e_machine = ELF_HOST_MACHINE, 6694 .e_version = EV_CURRENT, 6695 .e_phoff = offsetof(struct ElfImage, phdr), 6696 .e_shoff = offsetof(struct ElfImage, shdr), 6697 .e_ehsize = sizeof(ElfW(Shdr)), 6698 .e_phentsize = sizeof(ElfW(Phdr)), 6699 .e_phnum = 1, 6700 .e_shentsize = sizeof(ElfW(Shdr)), 6701 .e_shnum = ARRAY_SIZE(img->shdr), 6702 .e_shstrndx = ARRAY_SIZE(img->shdr) - 1, 6703 #ifdef ELF_HOST_FLAGS 6704 .e_flags = ELF_HOST_FLAGS, 6705 #endif 6706 #ifdef ELF_OSABI 6707 .e_ident[EI_OSABI] = ELF_OSABI, 6708 #endif 6709 }, 6710 .phdr = { 6711 .p_type = PT_LOAD, 6712 .p_flags = PF_X, 6713 }, 6714 .shdr = { 6715 [0] = { .sh_type = SHT_NULL }, 6716 /* Trick: The contents of code_gen_buffer are not present in 6717 this fake ELF file; that got allocated elsewhere. Therefore 6718 we mark .text as SHT_NOBITS (similar to .bss) so that readers 6719 will not look for contents. We can record any address. */ 6720 [1] = { /* .text */ 6721 .sh_type = SHT_NOBITS, 6722 .sh_flags = SHF_EXECINSTR | SHF_ALLOC, 6723 }, 6724 [2] = { /* .debug_info */ 6725 .sh_type = SHT_PROGBITS, 6726 .sh_offset = offsetof(struct ElfImage, di), 6727 .sh_size = sizeof(struct DebugInfo), 6728 }, 6729 [3] = { /* .debug_abbrev */ 6730 .sh_type = SHT_PROGBITS, 6731 .sh_offset = offsetof(struct ElfImage, da), 6732 .sh_size = sizeof(img->da), 6733 }, 6734 [4] = { /* .debug_frame */ 6735 .sh_type = SHT_PROGBITS, 6736 .sh_offset = sizeof(struct ElfImage), 6737 }, 6738 [5] = { /* .symtab */ 6739 .sh_type = SHT_SYMTAB, 6740 .sh_offset = offsetof(struct ElfImage, sym), 6741 .sh_size = sizeof(img->sym), 6742 .sh_info = 1, 6743 .sh_link = ARRAY_SIZE(img->shdr) - 1, 6744 .sh_entsize = sizeof(ElfW(Sym)), 6745 }, 6746 [6] = { /* .strtab */ 6747 .sh_type = SHT_STRTAB, 6748 .sh_offset = offsetof(struct ElfImage, str), 6749 .sh_size = sizeof(img->str), 6750 } 6751 }, 6752 .sym = { 6753 [1] = { /* code_gen_buffer */ 6754 .st_info = ELF_ST_INFO(STB_GLOBAL, STT_FUNC), 6755 .st_shndx = 1, 6756 } 6757 }, 6758 .di = { 6759 .len = sizeof(struct DebugInfo) - 4, 6760 .version = 2, 6761 .ptr_size = sizeof(void *), 6762 .cu_die = 1, 6763 .cu_lang = 0x8001, /* DW_LANG_Mips_Assembler */ 6764 .fn_die = 2, 6765 .fn_name = "code_gen_buffer" 6766 }, 6767 .da = { 6768 1, /* abbrev number (the cu) */ 6769 0x11, 1, /* DW_TAG_compile_unit, has children */ 6770 0x13, 0x5, /* DW_AT_language, DW_FORM_data2 */ 6771 0x11, 0x1, /* DW_AT_low_pc, DW_FORM_addr */ 6772 0x12, 0x1, /* DW_AT_high_pc, DW_FORM_addr */ 6773 0, 0, /* end of abbrev */ 6774 2, /* abbrev number (the fn) */ 6775 0x2e, 0, /* DW_TAG_subprogram, no children */ 6776 0x3, 0x8, /* DW_AT_name, DW_FORM_string */ 6777 0x11, 0x1, /* DW_AT_low_pc, DW_FORM_addr */ 6778 0x12, 0x1, /* DW_AT_high_pc, DW_FORM_addr */ 6779 0, 0, /* end of abbrev */ 6780 0 /* no more abbrev */ 6781 }, 6782 .str = "\0" ".text\0" ".debug_info\0" ".debug_abbrev\0" 6783 ".debug_frame\0" ".symtab\0" ".strtab\0" "code_gen_buffer", 6784 }; 6785 6786 /* We only need a single jit entry; statically allocate it. */ 6787 static struct jit_code_entry one_entry; 6788 6789 uintptr_t buf = (uintptr_t)buf_ptr; 6790 size_t img_size = sizeof(struct ElfImage) + debug_frame_size; 6791 DebugFrameHeader *dfh; 6792 6793 img = g_malloc(img_size); 6794 *img = img_template; 6795 6796 img->phdr.p_vaddr = buf; 6797 img->phdr.p_paddr = buf; 6798 img->phdr.p_memsz = buf_size; 6799 6800 img->shdr[1].sh_name = find_string(img->str, ".text"); 6801 img->shdr[1].sh_addr = buf; 6802 img->shdr[1].sh_size = buf_size; 6803 6804 img->shdr[2].sh_name = find_string(img->str, ".debug_info"); 6805 img->shdr[3].sh_name = find_string(img->str, ".debug_abbrev"); 6806 6807 img->shdr[4].sh_name = find_string(img->str, ".debug_frame"); 6808 img->shdr[4].sh_size = debug_frame_size; 6809 6810 img->shdr[5].sh_name = find_string(img->str, ".symtab"); 6811 img->shdr[6].sh_name = find_string(img->str, ".strtab"); 6812 6813 img->sym[1].st_name = find_string(img->str, "code_gen_buffer"); 6814 img->sym[1].st_value = buf; 6815 img->sym[1].st_size = buf_size; 6816 6817 img->di.cu_low_pc = buf; 6818 img->di.cu_high_pc = buf + buf_size; 6819 img->di.fn_low_pc = buf; 6820 img->di.fn_high_pc = buf + buf_size; 6821 6822 dfh = (DebugFrameHeader *)(img + 1); 6823 memcpy(dfh, debug_frame, debug_frame_size); 6824 dfh->fde.func_start = buf; 6825 dfh->fde.func_len = buf_size; 6826 6827 #ifdef DEBUG_JIT 6828 /* Enable this block to be able to debug the ELF image file creation. 6829 One can use readelf, objdump, or other inspection utilities. */ 6830 { 6831 g_autofree char *jit = g_strdup_printf("%s/qemu.jit", g_get_tmp_dir()); 6832 FILE *f = fopen(jit, "w+b"); 6833 if (f) { 6834 if (fwrite(img, img_size, 1, f) != img_size) { 6835 /* Avoid stupid unused return value warning for fwrite. */ 6836 } 6837 fclose(f); 6838 } 6839 } 6840 #endif 6841 6842 one_entry.symfile_addr = img; 6843 one_entry.symfile_size = img_size; 6844 6845 __jit_debug_descriptor.action_flag = JIT_REGISTER_FN; 6846 __jit_debug_descriptor.relevant_entry = &one_entry; 6847 __jit_debug_descriptor.first_entry = &one_entry; 6848 __jit_debug_register_code(); 6849 } 6850 #else 6851 /* No support for the feature. Provide the entry point expected by exec.c, 6852 and implement the internal function we declared earlier. */ 6853 6854 static void tcg_register_jit_int(const void *buf, size_t size, 6855 const void *debug_frame, 6856 size_t debug_frame_size) 6857 { 6858 } 6859 6860 void tcg_register_jit(const void *buf, size_t buf_size) 6861 { 6862 } 6863 #endif /* ELF_HOST_MACHINE */ 6864 6865 #if !TCG_TARGET_MAYBE_vec 6866 void tcg_expand_vec_op(TCGOpcode o, TCGType t, unsigned e, TCGArg a0, ...) 6867 { 6868 g_assert_not_reached(); 6869 } 6870 #endif 6871