1 /* 2 * Tiny Code Generator for QEMU 3 * 4 * Copyright (c) 2008 Fabrice Bellard 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a copy 7 * of this software and associated documentation files (the "Software"), to deal 8 * in the Software without restriction, including without limitation the rights 9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 * copies of the Software, and to permit persons to whom the Software is 11 * furnished to do so, subject to the following conditions: 12 * 13 * The above copyright notice and this permission notice shall be included in 14 * all copies or substantial portions of the Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 22 * THE SOFTWARE. 23 */ 24 25 #include "qemu/osdep.h" 26 27 /* Define to jump the ELF file used to communicate with GDB. */ 28 #undef DEBUG_JIT 29 30 #include "qemu/error-report.h" 31 #include "qemu/cutils.h" 32 #include "qemu/host-utils.h" 33 #include "qemu/qemu-print.h" 34 #include "qemu/cacheflush.h" 35 #include "qemu/cacheinfo.h" 36 #include "qemu/timer.h" 37 #include "exec/translation-block.h" 38 #include "exec/tlb-common.h" 39 #include "tcg/startup.h" 40 #include "tcg/tcg-op-common.h" 41 42 #if UINTPTR_MAX == UINT32_MAX 43 # define ELF_CLASS ELFCLASS32 44 #else 45 # define ELF_CLASS ELFCLASS64 46 #endif 47 #if HOST_BIG_ENDIAN 48 # define ELF_DATA ELFDATA2MSB 49 #else 50 # define ELF_DATA ELFDATA2LSB 51 #endif 52 53 #include "elf.h" 54 #include "exec/log.h" 55 #include "tcg/tcg-ldst.h" 56 #include "tcg/tcg-temp-internal.h" 57 #include "tcg-internal.h" 58 #include "tcg/perf.h" 59 #include "tcg-has.h" 60 #ifdef CONFIG_USER_ONLY 61 #include "user/guest-base.h" 62 #endif 63 64 /* Forward declarations for functions declared in tcg-target.c.inc and 65 used here. */ 66 static void tcg_target_init(TCGContext *s); 67 static void tcg_target_qemu_prologue(TCGContext *s); 68 static bool patch_reloc(tcg_insn_unit *code_ptr, int type, 69 intptr_t value, intptr_t addend); 70 static void tcg_out_nop_fill(tcg_insn_unit *p, int count); 71 72 typedef struct TCGLabelQemuLdst TCGLabelQemuLdst; 73 static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l); 74 static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l); 75 76 /* The CIE and FDE header definitions will be common to all hosts. */ 77 typedef struct { 78 uint32_t len __attribute__((aligned((sizeof(void *))))); 79 uint32_t id; 80 uint8_t version; 81 char augmentation[1]; 82 uint8_t code_align; 83 uint8_t data_align; 84 uint8_t return_column; 85 } DebugFrameCIE; 86 87 typedef struct QEMU_PACKED { 88 uint32_t len __attribute__((aligned((sizeof(void *))))); 89 uint32_t cie_offset; 90 uintptr_t func_start; 91 uintptr_t func_len; 92 } DebugFrameFDEHeader; 93 94 typedef struct QEMU_PACKED { 95 DebugFrameCIE cie; 96 DebugFrameFDEHeader fde; 97 } DebugFrameHeader; 98 99 struct TCGLabelQemuLdst { 100 bool is_ld; /* qemu_ld: true, qemu_st: false */ 101 MemOpIdx oi; 102 TCGType type; /* result type of a load */ 103 TCGReg addr_reg; /* reg index for guest virtual addr */ 104 TCGReg datalo_reg; /* reg index for low word to be loaded or stored */ 105 TCGReg datahi_reg; /* reg index for high word to be loaded or stored */ 106 const tcg_insn_unit *raddr; /* addr of the next IR of qemu_ld/st IR */ 107 tcg_insn_unit *label_ptr[2]; /* label pointers to be updated */ 108 QSIMPLEQ_ENTRY(TCGLabelQemuLdst) next; 109 }; 110 111 static void tcg_register_jit_int(const void *buf, size_t size, 112 const void *debug_frame, 113 size_t debug_frame_size) 114 __attribute__((unused)); 115 116 /* Forward declarations for functions declared and used in tcg-target.c.inc. */ 117 static void tcg_out_tb_start(TCGContext *s); 118 static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg1, 119 intptr_t arg2); 120 static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg); 121 static void tcg_out_movi(TCGContext *s, TCGType type, 122 TCGReg ret, tcg_target_long arg); 123 static void tcg_out_ext8s(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg); 124 static void tcg_out_ext16s(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg); 125 static void tcg_out_ext8u(TCGContext *s, TCGReg ret, TCGReg arg); 126 static void tcg_out_ext16u(TCGContext *s, TCGReg ret, TCGReg arg); 127 static void tcg_out_ext32s(TCGContext *s, TCGReg ret, TCGReg arg); 128 static void tcg_out_ext32u(TCGContext *s, TCGReg ret, TCGReg arg); 129 static void tcg_out_exts_i32_i64(TCGContext *s, TCGReg ret, TCGReg arg); 130 static void tcg_out_extu_i32_i64(TCGContext *s, TCGReg ret, TCGReg arg); 131 static void tcg_out_extrl_i64_i32(TCGContext *s, TCGReg ret, TCGReg arg); 132 static void tcg_out_addi_ptr(TCGContext *s, TCGReg, TCGReg, tcg_target_long); 133 static bool tcg_out_xchg(TCGContext *s, TCGType type, TCGReg r1, TCGReg r2); 134 static void tcg_out_exit_tb(TCGContext *s, uintptr_t arg); 135 static void tcg_out_goto_tb(TCGContext *s, int which); 136 static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, 137 const TCGArg args[TCG_MAX_OP_ARGS], 138 const int const_args[TCG_MAX_OP_ARGS]); 139 #if TCG_TARGET_MAYBE_vec 140 static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece, 141 TCGReg dst, TCGReg src); 142 static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece, 143 TCGReg dst, TCGReg base, intptr_t offset); 144 static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece, 145 TCGReg dst, int64_t arg); 146 static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, 147 unsigned vecl, unsigned vece, 148 const TCGArg args[TCG_MAX_OP_ARGS], 149 const int const_args[TCG_MAX_OP_ARGS]); 150 #else 151 static inline bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece, 152 TCGReg dst, TCGReg src) 153 { 154 g_assert_not_reached(); 155 } 156 static inline bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece, 157 TCGReg dst, TCGReg base, intptr_t offset) 158 { 159 g_assert_not_reached(); 160 } 161 static inline void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece, 162 TCGReg dst, int64_t arg) 163 { 164 g_assert_not_reached(); 165 } 166 static inline void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, 167 unsigned vecl, unsigned vece, 168 const TCGArg args[TCG_MAX_OP_ARGS], 169 const int const_args[TCG_MAX_OP_ARGS]) 170 { 171 g_assert_not_reached(); 172 } 173 int tcg_can_emit_vec_op(TCGOpcode o, TCGType t, unsigned ve) 174 { 175 return 0; 176 } 177 #endif 178 static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, TCGReg arg1, 179 intptr_t arg2); 180 static bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val, 181 TCGReg base, intptr_t ofs); 182 static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target, 183 const TCGHelperInfo *info); 184 static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot); 185 static bool tcg_target_const_match(int64_t val, int ct, 186 TCGType type, TCGCond cond, int vece); 187 188 #ifndef CONFIG_USER_ONLY 189 #define guest_base ({ qemu_build_not_reached(); (uintptr_t)0; }) 190 #endif 191 192 typedef struct TCGLdstHelperParam { 193 TCGReg (*ra_gen)(TCGContext *s, const TCGLabelQemuLdst *l, int arg_reg); 194 unsigned ntmp; 195 int tmp[3]; 196 } TCGLdstHelperParam; 197 198 static void tcg_out_ld_helper_args(TCGContext *s, const TCGLabelQemuLdst *l, 199 const TCGLdstHelperParam *p) 200 __attribute__((unused)); 201 static void tcg_out_ld_helper_ret(TCGContext *s, const TCGLabelQemuLdst *l, 202 bool load_sign, const TCGLdstHelperParam *p) 203 __attribute__((unused)); 204 static void tcg_out_st_helper_args(TCGContext *s, const TCGLabelQemuLdst *l, 205 const TCGLdstHelperParam *p) 206 __attribute__((unused)); 207 208 static void * const qemu_ld_helpers[MO_SSIZE + 1] __attribute__((unused)) = { 209 [MO_UB] = helper_ldub_mmu, 210 [MO_SB] = helper_ldsb_mmu, 211 [MO_UW] = helper_lduw_mmu, 212 [MO_SW] = helper_ldsw_mmu, 213 [MO_UL] = helper_ldul_mmu, 214 [MO_UQ] = helper_ldq_mmu, 215 #if TCG_TARGET_REG_BITS == 64 216 [MO_SL] = helper_ldsl_mmu, 217 [MO_128] = helper_ld16_mmu, 218 #endif 219 }; 220 221 static void * const qemu_st_helpers[MO_SIZE + 1] __attribute__((unused)) = { 222 [MO_8] = helper_stb_mmu, 223 [MO_16] = helper_stw_mmu, 224 [MO_32] = helper_stl_mmu, 225 [MO_64] = helper_stq_mmu, 226 #if TCG_TARGET_REG_BITS == 64 227 [MO_128] = helper_st16_mmu, 228 #endif 229 }; 230 231 typedef struct { 232 MemOp atom; /* lg2 bits of atomicity required */ 233 MemOp align; /* lg2 bits of alignment to use */ 234 } TCGAtomAlign; 235 236 static TCGAtomAlign atom_and_align_for_opc(TCGContext *s, MemOp opc, 237 MemOp host_atom, bool allow_two_ops) 238 __attribute__((unused)); 239 240 #ifdef CONFIG_USER_ONLY 241 bool tcg_use_softmmu; 242 #endif 243 244 TCGContext tcg_init_ctx; 245 __thread TCGContext *tcg_ctx; 246 247 TCGContext **tcg_ctxs; 248 unsigned int tcg_cur_ctxs; 249 unsigned int tcg_max_ctxs; 250 TCGv_env tcg_env; 251 const void *tcg_code_gen_epilogue; 252 uintptr_t tcg_splitwx_diff; 253 254 #ifndef CONFIG_TCG_INTERPRETER 255 tcg_prologue_fn *tcg_qemu_tb_exec; 256 #endif 257 258 static TCGRegSet tcg_target_available_regs[TCG_TYPE_COUNT]; 259 static TCGRegSet tcg_target_call_clobber_regs; 260 261 #if TCG_TARGET_INSN_UNIT_SIZE == 1 262 static __attribute__((unused)) inline void tcg_out8(TCGContext *s, uint8_t v) 263 { 264 *s->code_ptr++ = v; 265 } 266 267 static __attribute__((unused)) inline void tcg_patch8(tcg_insn_unit *p, 268 uint8_t v) 269 { 270 *p = v; 271 } 272 #endif 273 274 #if TCG_TARGET_INSN_UNIT_SIZE <= 2 275 static __attribute__((unused)) inline void tcg_out16(TCGContext *s, uint16_t v) 276 { 277 if (TCG_TARGET_INSN_UNIT_SIZE == 2) { 278 *s->code_ptr++ = v; 279 } else { 280 tcg_insn_unit *p = s->code_ptr; 281 memcpy(p, &v, sizeof(v)); 282 s->code_ptr = p + (2 / TCG_TARGET_INSN_UNIT_SIZE); 283 } 284 } 285 286 static __attribute__((unused)) inline void tcg_patch16(tcg_insn_unit *p, 287 uint16_t v) 288 { 289 if (TCG_TARGET_INSN_UNIT_SIZE == 2) { 290 *p = v; 291 } else { 292 memcpy(p, &v, sizeof(v)); 293 } 294 } 295 #endif 296 297 #if TCG_TARGET_INSN_UNIT_SIZE <= 4 298 static __attribute__((unused)) inline void tcg_out32(TCGContext *s, uint32_t v) 299 { 300 if (TCG_TARGET_INSN_UNIT_SIZE == 4) { 301 *s->code_ptr++ = v; 302 } else { 303 tcg_insn_unit *p = s->code_ptr; 304 memcpy(p, &v, sizeof(v)); 305 s->code_ptr = p + (4 / TCG_TARGET_INSN_UNIT_SIZE); 306 } 307 } 308 309 static __attribute__((unused)) inline void tcg_patch32(tcg_insn_unit *p, 310 uint32_t v) 311 { 312 if (TCG_TARGET_INSN_UNIT_SIZE == 4) { 313 *p = v; 314 } else { 315 memcpy(p, &v, sizeof(v)); 316 } 317 } 318 #endif 319 320 #if TCG_TARGET_INSN_UNIT_SIZE <= 8 321 static __attribute__((unused)) inline void tcg_out64(TCGContext *s, uint64_t v) 322 { 323 if (TCG_TARGET_INSN_UNIT_SIZE == 8) { 324 *s->code_ptr++ = v; 325 } else { 326 tcg_insn_unit *p = s->code_ptr; 327 memcpy(p, &v, sizeof(v)); 328 s->code_ptr = p + (8 / TCG_TARGET_INSN_UNIT_SIZE); 329 } 330 } 331 332 static __attribute__((unused)) inline void tcg_patch64(tcg_insn_unit *p, 333 uint64_t v) 334 { 335 if (TCG_TARGET_INSN_UNIT_SIZE == 8) { 336 *p = v; 337 } else { 338 memcpy(p, &v, sizeof(v)); 339 } 340 } 341 #endif 342 343 /* label relocation processing */ 344 345 static void tcg_out_reloc(TCGContext *s, tcg_insn_unit *code_ptr, int type, 346 TCGLabel *l, intptr_t addend) 347 { 348 TCGRelocation *r = tcg_malloc(sizeof(TCGRelocation)); 349 350 r->type = type; 351 r->ptr = code_ptr; 352 r->addend = addend; 353 QSIMPLEQ_INSERT_TAIL(&l->relocs, r, next); 354 } 355 356 static void tcg_out_label(TCGContext *s, TCGLabel *l) 357 { 358 tcg_debug_assert(!l->has_value); 359 l->has_value = 1; 360 l->u.value_ptr = tcg_splitwx_to_rx(s->code_ptr); 361 } 362 363 TCGLabel *gen_new_label(void) 364 { 365 TCGContext *s = tcg_ctx; 366 TCGLabel *l = tcg_malloc(sizeof(TCGLabel)); 367 368 memset(l, 0, sizeof(TCGLabel)); 369 l->id = s->nb_labels++; 370 QSIMPLEQ_INIT(&l->branches); 371 QSIMPLEQ_INIT(&l->relocs); 372 373 QSIMPLEQ_INSERT_TAIL(&s->labels, l, next); 374 375 return l; 376 } 377 378 static bool tcg_resolve_relocs(TCGContext *s) 379 { 380 TCGLabel *l; 381 382 QSIMPLEQ_FOREACH(l, &s->labels, next) { 383 TCGRelocation *r; 384 uintptr_t value = l->u.value; 385 386 QSIMPLEQ_FOREACH(r, &l->relocs, next) { 387 if (!patch_reloc(r->ptr, r->type, value, r->addend)) { 388 return false; 389 } 390 } 391 } 392 return true; 393 } 394 395 static void set_jmp_reset_offset(TCGContext *s, int which) 396 { 397 /* 398 * We will check for overflow at the end of the opcode loop in 399 * tcg_gen_code, where we bound tcg_current_code_size to UINT16_MAX. 400 */ 401 s->gen_tb->jmp_reset_offset[which] = tcg_current_code_size(s); 402 } 403 404 static void G_GNUC_UNUSED set_jmp_insn_offset(TCGContext *s, int which) 405 { 406 /* 407 * We will check for overflow at the end of the opcode loop in 408 * tcg_gen_code, where we bound tcg_current_code_size to UINT16_MAX. 409 */ 410 s->gen_tb->jmp_insn_offset[which] = tcg_current_code_size(s); 411 } 412 413 static uintptr_t G_GNUC_UNUSED get_jmp_target_addr(TCGContext *s, int which) 414 { 415 /* 416 * Return the read-execute version of the pointer, for the benefit 417 * of any pc-relative addressing mode. 418 */ 419 return (uintptr_t)tcg_splitwx_to_rx(&s->gen_tb->jmp_target_addr[which]); 420 } 421 422 static int __attribute__((unused)) 423 tlb_mask_table_ofs(TCGContext *s, int which) 424 { 425 return (offsetof(CPUNegativeOffsetState, tlb.f[which]) - 426 sizeof(CPUNegativeOffsetState)); 427 } 428 429 /* Signal overflow, starting over with fewer guest insns. */ 430 static G_NORETURN 431 void tcg_raise_tb_overflow(TCGContext *s) 432 { 433 siglongjmp(s->jmp_trans, -2); 434 } 435 436 /* 437 * Used by tcg_out_movext{1,2} to hold the arguments for tcg_out_movext. 438 * By the time we arrive at tcg_out_movext1, @dst is always a TCGReg. 439 * 440 * However, tcg_out_helper_load_slots reuses this field to hold an 441 * argument slot number (which may designate a argument register or an 442 * argument stack slot), converting to TCGReg once all arguments that 443 * are destined for the stack are processed. 444 */ 445 typedef struct TCGMovExtend { 446 unsigned dst; 447 TCGReg src; 448 TCGType dst_type; 449 TCGType src_type; 450 MemOp src_ext; 451 } TCGMovExtend; 452 453 /** 454 * tcg_out_movext -- move and extend 455 * @s: tcg context 456 * @dst_type: integral type for destination 457 * @dst: destination register 458 * @src_type: integral type for source 459 * @src_ext: extension to apply to source 460 * @src: source register 461 * 462 * Move or extend @src into @dst, depending on @src_ext and the types. 463 */ 464 static void tcg_out_movext(TCGContext *s, TCGType dst_type, TCGReg dst, 465 TCGType src_type, MemOp src_ext, TCGReg src) 466 { 467 switch (src_ext) { 468 case MO_UB: 469 tcg_out_ext8u(s, dst, src); 470 break; 471 case MO_SB: 472 tcg_out_ext8s(s, dst_type, dst, src); 473 break; 474 case MO_UW: 475 tcg_out_ext16u(s, dst, src); 476 break; 477 case MO_SW: 478 tcg_out_ext16s(s, dst_type, dst, src); 479 break; 480 case MO_UL: 481 case MO_SL: 482 if (dst_type == TCG_TYPE_I32) { 483 if (src_type == TCG_TYPE_I32) { 484 tcg_out_mov(s, TCG_TYPE_I32, dst, src); 485 } else { 486 tcg_out_extrl_i64_i32(s, dst, src); 487 } 488 } else if (src_type == TCG_TYPE_I32) { 489 if (src_ext & MO_SIGN) { 490 tcg_out_exts_i32_i64(s, dst, src); 491 } else { 492 tcg_out_extu_i32_i64(s, dst, src); 493 } 494 } else { 495 if (src_ext & MO_SIGN) { 496 tcg_out_ext32s(s, dst, src); 497 } else { 498 tcg_out_ext32u(s, dst, src); 499 } 500 } 501 break; 502 case MO_UQ: 503 tcg_debug_assert(TCG_TARGET_REG_BITS == 64); 504 if (dst_type == TCG_TYPE_I32) { 505 tcg_out_extrl_i64_i32(s, dst, src); 506 } else { 507 tcg_out_mov(s, TCG_TYPE_I64, dst, src); 508 } 509 break; 510 default: 511 g_assert_not_reached(); 512 } 513 } 514 515 /* Minor variations on a theme, using a structure. */ 516 static void tcg_out_movext1_new_src(TCGContext *s, const TCGMovExtend *i, 517 TCGReg src) 518 { 519 tcg_out_movext(s, i->dst_type, i->dst, i->src_type, i->src_ext, src); 520 } 521 522 static void tcg_out_movext1(TCGContext *s, const TCGMovExtend *i) 523 { 524 tcg_out_movext1_new_src(s, i, i->src); 525 } 526 527 /** 528 * tcg_out_movext2 -- move and extend two pair 529 * @s: tcg context 530 * @i1: first move description 531 * @i2: second move description 532 * @scratch: temporary register, or -1 for none 533 * 534 * As tcg_out_movext, for both @i1 and @i2, caring for overlap 535 * between the sources and destinations. 536 */ 537 538 static void tcg_out_movext2(TCGContext *s, const TCGMovExtend *i1, 539 const TCGMovExtend *i2, int scratch) 540 { 541 TCGReg src1 = i1->src; 542 TCGReg src2 = i2->src; 543 544 if (i1->dst != src2) { 545 tcg_out_movext1(s, i1); 546 tcg_out_movext1(s, i2); 547 return; 548 } 549 if (i2->dst == src1) { 550 TCGType src1_type = i1->src_type; 551 TCGType src2_type = i2->src_type; 552 553 if (tcg_out_xchg(s, MAX(src1_type, src2_type), src1, src2)) { 554 /* The data is now in the correct registers, now extend. */ 555 src1 = i2->src; 556 src2 = i1->src; 557 } else { 558 tcg_debug_assert(scratch >= 0); 559 tcg_out_mov(s, src1_type, scratch, src1); 560 src1 = scratch; 561 } 562 } 563 tcg_out_movext1_new_src(s, i2, src2); 564 tcg_out_movext1_new_src(s, i1, src1); 565 } 566 567 /** 568 * tcg_out_movext3 -- move and extend three pair 569 * @s: tcg context 570 * @i1: first move description 571 * @i2: second move description 572 * @i3: third move description 573 * @scratch: temporary register, or -1 for none 574 * 575 * As tcg_out_movext, for all of @i1, @i2 and @i3, caring for overlap 576 * between the sources and destinations. 577 */ 578 579 static void tcg_out_movext3(TCGContext *s, const TCGMovExtend *i1, 580 const TCGMovExtend *i2, const TCGMovExtend *i3, 581 int scratch) 582 { 583 TCGReg src1 = i1->src; 584 TCGReg src2 = i2->src; 585 TCGReg src3 = i3->src; 586 587 if (i1->dst != src2 && i1->dst != src3) { 588 tcg_out_movext1(s, i1); 589 tcg_out_movext2(s, i2, i3, scratch); 590 return; 591 } 592 if (i2->dst != src1 && i2->dst != src3) { 593 tcg_out_movext1(s, i2); 594 tcg_out_movext2(s, i1, i3, scratch); 595 return; 596 } 597 if (i3->dst != src1 && i3->dst != src2) { 598 tcg_out_movext1(s, i3); 599 tcg_out_movext2(s, i1, i2, scratch); 600 return; 601 } 602 603 /* 604 * There is a cycle. Since there are only 3 nodes, the cycle is 605 * either "clockwise" or "anti-clockwise", and can be solved with 606 * a single scratch or two xchg. 607 */ 608 if (i1->dst == src2 && i2->dst == src3 && i3->dst == src1) { 609 /* "Clockwise" */ 610 if (tcg_out_xchg(s, MAX(i1->src_type, i2->src_type), src1, src2)) { 611 tcg_out_xchg(s, MAX(i2->src_type, i3->src_type), src2, src3); 612 /* The data is now in the correct registers, now extend. */ 613 tcg_out_movext1_new_src(s, i1, i1->dst); 614 tcg_out_movext1_new_src(s, i2, i2->dst); 615 tcg_out_movext1_new_src(s, i3, i3->dst); 616 } else { 617 tcg_debug_assert(scratch >= 0); 618 tcg_out_mov(s, i1->src_type, scratch, src1); 619 tcg_out_movext1(s, i3); 620 tcg_out_movext1(s, i2); 621 tcg_out_movext1_new_src(s, i1, scratch); 622 } 623 } else if (i1->dst == src3 && i2->dst == src1 && i3->dst == src2) { 624 /* "Anti-clockwise" */ 625 if (tcg_out_xchg(s, MAX(i2->src_type, i3->src_type), src2, src3)) { 626 tcg_out_xchg(s, MAX(i1->src_type, i2->src_type), src1, src2); 627 /* The data is now in the correct registers, now extend. */ 628 tcg_out_movext1_new_src(s, i1, i1->dst); 629 tcg_out_movext1_new_src(s, i2, i2->dst); 630 tcg_out_movext1_new_src(s, i3, i3->dst); 631 } else { 632 tcg_debug_assert(scratch >= 0); 633 tcg_out_mov(s, i1->src_type, scratch, src1); 634 tcg_out_movext1(s, i2); 635 tcg_out_movext1(s, i3); 636 tcg_out_movext1_new_src(s, i1, scratch); 637 } 638 } else { 639 g_assert_not_reached(); 640 } 641 } 642 643 /* 644 * Allocate a new TCGLabelQemuLdst entry. 645 */ 646 647 __attribute__((unused)) 648 static TCGLabelQemuLdst *new_ldst_label(TCGContext *s) 649 { 650 TCGLabelQemuLdst *l = tcg_malloc(sizeof(*l)); 651 652 memset(l, 0, sizeof(*l)); 653 QSIMPLEQ_INSERT_TAIL(&s->ldst_labels, l, next); 654 655 return l; 656 } 657 658 /* 659 * Allocate new constant pool entries. 660 */ 661 662 typedef struct TCGLabelPoolData { 663 struct TCGLabelPoolData *next; 664 tcg_insn_unit *label; 665 intptr_t addend; 666 int rtype; 667 unsigned nlong; 668 tcg_target_ulong data[]; 669 } TCGLabelPoolData; 670 671 static TCGLabelPoolData *new_pool_alloc(TCGContext *s, int nlong, int rtype, 672 tcg_insn_unit *label, intptr_t addend) 673 { 674 TCGLabelPoolData *n = tcg_malloc(sizeof(TCGLabelPoolData) 675 + sizeof(tcg_target_ulong) * nlong); 676 677 n->label = label; 678 n->addend = addend; 679 n->rtype = rtype; 680 n->nlong = nlong; 681 return n; 682 } 683 684 static void new_pool_insert(TCGContext *s, TCGLabelPoolData *n) 685 { 686 TCGLabelPoolData *i, **pp; 687 int nlong = n->nlong; 688 689 /* Insertion sort on the pool. */ 690 for (pp = &s->pool_labels; (i = *pp) != NULL; pp = &i->next) { 691 if (nlong > i->nlong) { 692 break; 693 } 694 if (nlong < i->nlong) { 695 continue; 696 } 697 if (memcmp(n->data, i->data, sizeof(tcg_target_ulong) * nlong) >= 0) { 698 break; 699 } 700 } 701 n->next = *pp; 702 *pp = n; 703 } 704 705 /* The "usual" for generic integer code. */ 706 __attribute__((unused)) 707 static void new_pool_label(TCGContext *s, tcg_target_ulong d, int rtype, 708 tcg_insn_unit *label, intptr_t addend) 709 { 710 TCGLabelPoolData *n = new_pool_alloc(s, 1, rtype, label, addend); 711 n->data[0] = d; 712 new_pool_insert(s, n); 713 } 714 715 /* For v64 or v128, depending on the host. */ 716 __attribute__((unused)) 717 static void new_pool_l2(TCGContext *s, int rtype, tcg_insn_unit *label, 718 intptr_t addend, tcg_target_ulong d0, 719 tcg_target_ulong d1) 720 { 721 TCGLabelPoolData *n = new_pool_alloc(s, 2, rtype, label, addend); 722 n->data[0] = d0; 723 n->data[1] = d1; 724 new_pool_insert(s, n); 725 } 726 727 /* For v128 or v256, depending on the host. */ 728 __attribute__((unused)) 729 static void new_pool_l4(TCGContext *s, int rtype, tcg_insn_unit *label, 730 intptr_t addend, tcg_target_ulong d0, 731 tcg_target_ulong d1, tcg_target_ulong d2, 732 tcg_target_ulong d3) 733 { 734 TCGLabelPoolData *n = new_pool_alloc(s, 4, rtype, label, addend); 735 n->data[0] = d0; 736 n->data[1] = d1; 737 n->data[2] = d2; 738 n->data[3] = d3; 739 new_pool_insert(s, n); 740 } 741 742 /* For v256, for 32-bit host. */ 743 __attribute__((unused)) 744 static void new_pool_l8(TCGContext *s, int rtype, tcg_insn_unit *label, 745 intptr_t addend, tcg_target_ulong d0, 746 tcg_target_ulong d1, tcg_target_ulong d2, 747 tcg_target_ulong d3, tcg_target_ulong d4, 748 tcg_target_ulong d5, tcg_target_ulong d6, 749 tcg_target_ulong d7) 750 { 751 TCGLabelPoolData *n = new_pool_alloc(s, 8, rtype, label, addend); 752 n->data[0] = d0; 753 n->data[1] = d1; 754 n->data[2] = d2; 755 n->data[3] = d3; 756 n->data[4] = d4; 757 n->data[5] = d5; 758 n->data[6] = d6; 759 n->data[7] = d7; 760 new_pool_insert(s, n); 761 } 762 763 /* 764 * Generate TB finalization at the end of block 765 */ 766 767 static int tcg_out_ldst_finalize(TCGContext *s) 768 { 769 TCGLabelQemuLdst *lb; 770 771 /* qemu_ld/st slow paths */ 772 QSIMPLEQ_FOREACH(lb, &s->ldst_labels, next) { 773 if (lb->is_ld 774 ? !tcg_out_qemu_ld_slow_path(s, lb) 775 : !tcg_out_qemu_st_slow_path(s, lb)) { 776 return -2; 777 } 778 779 /* 780 * Test for (pending) buffer overflow. The assumption is that any 781 * one operation beginning below the high water mark cannot overrun 782 * the buffer completely. Thus we can test for overflow after 783 * generating code without having to check during generation. 784 */ 785 if (unlikely((void *)s->code_ptr > s->code_gen_highwater)) { 786 return -1; 787 } 788 } 789 return 0; 790 } 791 792 static int tcg_out_pool_finalize(TCGContext *s) 793 { 794 TCGLabelPoolData *p = s->pool_labels; 795 TCGLabelPoolData *l = NULL; 796 void *a; 797 798 if (p == NULL) { 799 return 0; 800 } 801 802 /* 803 * ??? Round up to qemu_icache_linesize, but then do not round 804 * again when allocating the next TranslationBlock structure. 805 */ 806 a = (void *)ROUND_UP((uintptr_t)s->code_ptr, 807 sizeof(tcg_target_ulong) * p->nlong); 808 tcg_out_nop_fill(s->code_ptr, (tcg_insn_unit *)a - s->code_ptr); 809 s->data_gen_ptr = a; 810 811 for (; p != NULL; p = p->next) { 812 size_t size = sizeof(tcg_target_ulong) * p->nlong; 813 uintptr_t value; 814 815 if (!l || l->nlong != p->nlong || memcmp(l->data, p->data, size)) { 816 if (unlikely(a > s->code_gen_highwater)) { 817 return -1; 818 } 819 memcpy(a, p->data, size); 820 a += size; 821 l = p; 822 } 823 824 value = (uintptr_t)tcg_splitwx_to_rx(a) - size; 825 if (!patch_reloc(p->label, p->rtype, value, p->addend)) { 826 return -2; 827 } 828 } 829 830 s->code_ptr = a; 831 return 0; 832 } 833 834 #define C_PFX1(P, A) P##A 835 #define C_PFX2(P, A, B) P##A##_##B 836 #define C_PFX3(P, A, B, C) P##A##_##B##_##C 837 #define C_PFX4(P, A, B, C, D) P##A##_##B##_##C##_##D 838 #define C_PFX5(P, A, B, C, D, E) P##A##_##B##_##C##_##D##_##E 839 #define C_PFX6(P, A, B, C, D, E, F) P##A##_##B##_##C##_##D##_##E##_##F 840 841 /* Define an enumeration for the various combinations. */ 842 843 #define C_O0_I1(I1) C_PFX1(c_o0_i1_, I1), 844 #define C_O0_I2(I1, I2) C_PFX2(c_o0_i2_, I1, I2), 845 #define C_O0_I3(I1, I2, I3) C_PFX3(c_o0_i3_, I1, I2, I3), 846 #define C_O0_I4(I1, I2, I3, I4) C_PFX4(c_o0_i4_, I1, I2, I3, I4), 847 848 #define C_O1_I1(O1, I1) C_PFX2(c_o1_i1_, O1, I1), 849 #define C_O1_I2(O1, I1, I2) C_PFX3(c_o1_i2_, O1, I1, I2), 850 #define C_O1_I3(O1, I1, I2, I3) C_PFX4(c_o1_i3_, O1, I1, I2, I3), 851 #define C_O1_I4(O1, I1, I2, I3, I4) C_PFX5(c_o1_i4_, O1, I1, I2, I3, I4), 852 853 #define C_N1_I2(O1, I1, I2) C_PFX3(c_n1_i2_, O1, I1, I2), 854 #define C_N1O1_I1(O1, O2, I1) C_PFX3(c_n1o1_i1_, O1, O2, I1), 855 #define C_N2_I1(O1, O2, I1) C_PFX3(c_n2_i1_, O1, O2, I1), 856 857 #define C_O2_I1(O1, O2, I1) C_PFX3(c_o2_i1_, O1, O2, I1), 858 #define C_O2_I2(O1, O2, I1, I2) C_PFX4(c_o2_i2_, O1, O2, I1, I2), 859 #define C_O2_I3(O1, O2, I1, I2, I3) C_PFX5(c_o2_i3_, O1, O2, I1, I2, I3), 860 #define C_O2_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_o2_i4_, O1, O2, I1, I2, I3, I4), 861 #define C_N1_O1_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_n1_o1_i4_, O1, O2, I1, I2, I3, I4), 862 863 typedef enum { 864 C_Dynamic = -2, 865 C_NotImplemented = -1, 866 #include "tcg-target-con-set.h" 867 } TCGConstraintSetIndex; 868 869 static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode, TCGType, unsigned); 870 871 #undef C_O0_I1 872 #undef C_O0_I2 873 #undef C_O0_I3 874 #undef C_O0_I4 875 #undef C_O1_I1 876 #undef C_O1_I2 877 #undef C_O1_I3 878 #undef C_O1_I4 879 #undef C_N1_I2 880 #undef C_N1O1_I1 881 #undef C_N2_I1 882 #undef C_O2_I1 883 #undef C_O2_I2 884 #undef C_O2_I3 885 #undef C_O2_I4 886 #undef C_N1_O1_I4 887 888 /* Put all of the constraint sets into an array, indexed by the enum. */ 889 890 typedef struct TCGConstraintSet { 891 uint8_t nb_oargs, nb_iargs; 892 const char *args_ct_str[TCG_MAX_OP_ARGS]; 893 } TCGConstraintSet; 894 895 #define C_O0_I1(I1) { 0, 1, { #I1 } }, 896 #define C_O0_I2(I1, I2) { 0, 2, { #I1, #I2 } }, 897 #define C_O0_I3(I1, I2, I3) { 0, 3, { #I1, #I2, #I3 } }, 898 #define C_O0_I4(I1, I2, I3, I4) { 0, 4, { #I1, #I2, #I3, #I4 } }, 899 900 #define C_O1_I1(O1, I1) { 1, 1, { #O1, #I1 } }, 901 #define C_O1_I2(O1, I1, I2) { 1, 2, { #O1, #I1, #I2 } }, 902 #define C_O1_I3(O1, I1, I2, I3) { 1, 3, { #O1, #I1, #I2, #I3 } }, 903 #define C_O1_I4(O1, I1, I2, I3, I4) { 1, 4, { #O1, #I1, #I2, #I3, #I4 } }, 904 905 #define C_N1_I2(O1, I1, I2) { 1, 2, { "&" #O1, #I1, #I2 } }, 906 #define C_N1O1_I1(O1, O2, I1) { 2, 1, { "&" #O1, #O2, #I1 } }, 907 #define C_N2_I1(O1, O2, I1) { 2, 1, { "&" #O1, "&" #O2, #I1 } }, 908 909 #define C_O2_I1(O1, O2, I1) { 2, 1, { #O1, #O2, #I1 } }, 910 #define C_O2_I2(O1, O2, I1, I2) { 2, 2, { #O1, #O2, #I1, #I2 } }, 911 #define C_O2_I3(O1, O2, I1, I2, I3) { 2, 3, { #O1, #O2, #I1, #I2, #I3 } }, 912 #define C_O2_I4(O1, O2, I1, I2, I3, I4) { 2, 4, { #O1, #O2, #I1, #I2, #I3, #I4 } }, 913 #define C_N1_O1_I4(O1, O2, I1, I2, I3, I4) { 2, 4, { "&" #O1, #O2, #I1, #I2, #I3, #I4 } }, 914 915 static const TCGConstraintSet constraint_sets[] = { 916 #include "tcg-target-con-set.h" 917 }; 918 919 #undef C_O0_I1 920 #undef C_O0_I2 921 #undef C_O0_I3 922 #undef C_O0_I4 923 #undef C_O1_I1 924 #undef C_O1_I2 925 #undef C_O1_I3 926 #undef C_O1_I4 927 #undef C_N1_I2 928 #undef C_N1O1_I1 929 #undef C_N2_I1 930 #undef C_O2_I1 931 #undef C_O2_I2 932 #undef C_O2_I3 933 #undef C_O2_I4 934 #undef C_N1_O1_I4 935 936 /* Expand the enumerator to be returned from tcg_target_op_def(). */ 937 938 #define C_O0_I1(I1) C_PFX1(c_o0_i1_, I1) 939 #define C_O0_I2(I1, I2) C_PFX2(c_o0_i2_, I1, I2) 940 #define C_O0_I3(I1, I2, I3) C_PFX3(c_o0_i3_, I1, I2, I3) 941 #define C_O0_I4(I1, I2, I3, I4) C_PFX4(c_o0_i4_, I1, I2, I3, I4) 942 943 #define C_O1_I1(O1, I1) C_PFX2(c_o1_i1_, O1, I1) 944 #define C_O1_I2(O1, I1, I2) C_PFX3(c_o1_i2_, O1, I1, I2) 945 #define C_O1_I3(O1, I1, I2, I3) C_PFX4(c_o1_i3_, O1, I1, I2, I3) 946 #define C_O1_I4(O1, I1, I2, I3, I4) C_PFX5(c_o1_i4_, O1, I1, I2, I3, I4) 947 948 #define C_N1_I2(O1, I1, I2) C_PFX3(c_n1_i2_, O1, I1, I2) 949 #define C_N1O1_I1(O1, O2, I1) C_PFX3(c_n1o1_i1_, O1, O2, I1) 950 #define C_N2_I1(O1, O2, I1) C_PFX3(c_n2_i1_, O1, O2, I1) 951 952 #define C_O2_I1(O1, O2, I1) C_PFX3(c_o2_i1_, O1, O2, I1) 953 #define C_O2_I2(O1, O2, I1, I2) C_PFX4(c_o2_i2_, O1, O2, I1, I2) 954 #define C_O2_I3(O1, O2, I1, I2, I3) C_PFX5(c_o2_i3_, O1, O2, I1, I2, I3) 955 #define C_O2_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_o2_i4_, O1, O2, I1, I2, I3, I4) 956 #define C_N1_O1_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_n1_o1_i4_, O1, O2, I1, I2, I3, I4) 957 958 /* 959 * TCGOutOp is the base class for a set of structures that describe how 960 * to generate code for a given TCGOpcode. 961 * 962 * @static_constraint: 963 * C_NotImplemented: The TCGOpcode is not supported by the backend. 964 * C_Dynamic: Use @dynamic_constraint to select a constraint set 965 * based on any of @type, @flags, or host isa. 966 * Otherwise: The register allocation constrains for the TCGOpcode. 967 * 968 * Subclasses of TCGOutOp will define a set of output routines that may 969 * be used. Such routines will often be selected by the set of registers 970 * and constants that come out of register allocation. The set of 971 * routines that are provided will guide the set of constraints that are 972 * legal. In particular, assume that tcg_optimize() has done its job in 973 * swapping commutative operands and folding operations for which all 974 * operands are constant. 975 */ 976 typedef struct TCGOutOp { 977 TCGConstraintSetIndex static_constraint; 978 TCGConstraintSetIndex (*dynamic_constraint)(TCGType type, unsigned flags); 979 } TCGOutOp; 980 981 typedef struct TCGOutOpBinary { 982 TCGOutOp base; 983 void (*out_rrr)(TCGContext *s, TCGType type, 984 TCGReg a0, TCGReg a1, TCGReg a2); 985 void (*out_rri)(TCGContext *s, TCGType type, 986 TCGReg a0, TCGReg a1, tcg_target_long a2); 987 } TCGOutOpBinary; 988 989 typedef struct TCGOutOpDivRem { 990 TCGOutOp base; 991 void (*out_rr01r)(TCGContext *s, TCGType type, 992 TCGReg a0, TCGReg a1, TCGReg a4); 993 } TCGOutOpDivRem; 994 995 typedef struct TCGOutOpMul2 { 996 TCGOutOp base; 997 void (*out_rrrr)(TCGContext *s, TCGType type, 998 TCGReg a0, TCGReg a1, TCGReg a2, TCGReg a3); 999 } TCGOutOpMul2; 1000 1001 typedef struct TCGOutOpUnary { 1002 TCGOutOp base; 1003 void (*out_rr)(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1); 1004 } TCGOutOpUnary; 1005 1006 typedef struct TCGOutOpSetcond { 1007 TCGOutOp base; 1008 void (*out_rrr)(TCGContext *s, TCGType type, TCGCond cond, 1009 TCGReg ret, TCGReg a1, TCGReg a2); 1010 void (*out_rri)(TCGContext *s, TCGType type, TCGCond cond, 1011 TCGReg ret, TCGReg a1, tcg_target_long a2); 1012 } TCGOutOpSetcond; 1013 1014 typedef struct TCGOutOpSubtract { 1015 TCGOutOp base; 1016 void (*out_rrr)(TCGContext *s, TCGType type, 1017 TCGReg a0, TCGReg a1, TCGReg a2); 1018 void (*out_rir)(TCGContext *s, TCGType type, 1019 TCGReg a0, tcg_target_long a1, TCGReg a2); 1020 } TCGOutOpSubtract; 1021 1022 #include "tcg-target.c.inc" 1023 1024 #ifndef CONFIG_TCG_INTERPRETER 1025 /* Validate CPUTLBDescFast placement. */ 1026 QEMU_BUILD_BUG_ON((int)(offsetof(CPUNegativeOffsetState, tlb.f[0]) - 1027 sizeof(CPUNegativeOffsetState)) 1028 < MIN_TLB_MASK_TABLE_OFS); 1029 #endif 1030 1031 /* 1032 * Register V as the TCGOutOp for O. 1033 * This verifies that V is of type T, otherwise give a nice compiler error. 1034 * This prevents trivial mistakes within each arch/tcg-target.c.inc. 1035 */ 1036 #define OUTOP(O, T, V) [O] = _Generic(V, T: &V.base) 1037 1038 /* Register allocation descriptions for every TCGOpcode. */ 1039 static const TCGOutOp * const all_outop[NB_OPS] = { 1040 OUTOP(INDEX_op_add, TCGOutOpBinary, outop_add), 1041 OUTOP(INDEX_op_and, TCGOutOpBinary, outop_and), 1042 OUTOP(INDEX_op_andc, TCGOutOpBinary, outop_andc), 1043 OUTOP(INDEX_op_clz, TCGOutOpBinary, outop_clz), 1044 OUTOP(INDEX_op_ctpop, TCGOutOpUnary, outop_ctpop), 1045 OUTOP(INDEX_op_ctz, TCGOutOpBinary, outop_ctz), 1046 OUTOP(INDEX_op_divs, TCGOutOpBinary, outop_divs), 1047 OUTOP(INDEX_op_divu, TCGOutOpBinary, outop_divu), 1048 OUTOP(INDEX_op_divs2, TCGOutOpDivRem, outop_divs2), 1049 OUTOP(INDEX_op_divu2, TCGOutOpDivRem, outop_divu2), 1050 OUTOP(INDEX_op_eqv, TCGOutOpBinary, outop_eqv), 1051 OUTOP(INDEX_op_mul, TCGOutOpBinary, outop_mul), 1052 OUTOP(INDEX_op_muls2, TCGOutOpMul2, outop_muls2), 1053 OUTOP(INDEX_op_mulsh, TCGOutOpBinary, outop_mulsh), 1054 OUTOP(INDEX_op_mulu2, TCGOutOpMul2, outop_mulu2), 1055 OUTOP(INDEX_op_muluh, TCGOutOpBinary, outop_muluh), 1056 OUTOP(INDEX_op_nand, TCGOutOpBinary, outop_nand), 1057 OUTOP(INDEX_op_neg, TCGOutOpUnary, outop_neg), 1058 OUTOP(INDEX_op_negsetcond, TCGOutOpSetcond, outop_negsetcond), 1059 OUTOP(INDEX_op_nor, TCGOutOpBinary, outop_nor), 1060 OUTOP(INDEX_op_not, TCGOutOpUnary, outop_not), 1061 OUTOP(INDEX_op_or, TCGOutOpBinary, outop_or), 1062 OUTOP(INDEX_op_orc, TCGOutOpBinary, outop_orc), 1063 OUTOP(INDEX_op_rems, TCGOutOpBinary, outop_rems), 1064 OUTOP(INDEX_op_remu, TCGOutOpBinary, outop_remu), 1065 OUTOP(INDEX_op_rotl, TCGOutOpBinary, outop_rotl), 1066 OUTOP(INDEX_op_rotr, TCGOutOpBinary, outop_rotr), 1067 OUTOP(INDEX_op_sar, TCGOutOpBinary, outop_sar), 1068 OUTOP(INDEX_op_setcond, TCGOutOpSetcond, outop_setcond), 1069 OUTOP(INDEX_op_shl, TCGOutOpBinary, outop_shl), 1070 OUTOP(INDEX_op_shr, TCGOutOpBinary, outop_shr), 1071 OUTOP(INDEX_op_sub, TCGOutOpSubtract, outop_sub), 1072 OUTOP(INDEX_op_xor, TCGOutOpBinary, outop_xor), 1073 }; 1074 1075 #undef OUTOP 1076 1077 /* 1078 * All TCG threads except the parent (i.e. the one that called tcg_context_init 1079 * and registered the target's TCG globals) must register with this function 1080 * before initiating translation. 1081 * 1082 * In user-mode we just point tcg_ctx to tcg_init_ctx. See the documentation 1083 * of tcg_region_init() for the reasoning behind this. 1084 * 1085 * In system-mode each caller registers its context in tcg_ctxs[]. Note that in 1086 * system-mode tcg_ctxs[] does not track tcg_ctx_init, since the initial context 1087 * is not used anymore for translation once this function is called. 1088 * 1089 * Not tracking tcg_init_ctx in tcg_ctxs[] in system-mode keeps code that 1090 * iterates over the array (e.g. tcg_code_size() the same for both system/user 1091 * modes. 1092 */ 1093 #ifdef CONFIG_USER_ONLY 1094 void tcg_register_thread(void) 1095 { 1096 tcg_ctx = &tcg_init_ctx; 1097 } 1098 #else 1099 void tcg_register_thread(void) 1100 { 1101 TCGContext *s = g_malloc(sizeof(*s)); 1102 unsigned int i, n; 1103 1104 *s = tcg_init_ctx; 1105 1106 /* Relink mem_base. */ 1107 for (i = 0, n = tcg_init_ctx.nb_globals; i < n; ++i) { 1108 if (tcg_init_ctx.temps[i].mem_base) { 1109 ptrdiff_t b = tcg_init_ctx.temps[i].mem_base - tcg_init_ctx.temps; 1110 tcg_debug_assert(b >= 0 && b < n); 1111 s->temps[i].mem_base = &s->temps[b]; 1112 } 1113 } 1114 1115 /* Claim an entry in tcg_ctxs */ 1116 n = qatomic_fetch_inc(&tcg_cur_ctxs); 1117 g_assert(n < tcg_max_ctxs); 1118 qatomic_set(&tcg_ctxs[n], s); 1119 1120 if (n > 0) { 1121 tcg_region_initial_alloc(s); 1122 } 1123 1124 tcg_ctx = s; 1125 } 1126 #endif /* !CONFIG_USER_ONLY */ 1127 1128 /* pool based memory allocation */ 1129 void *tcg_malloc_internal(TCGContext *s, int size) 1130 { 1131 TCGPool *p; 1132 int pool_size; 1133 1134 if (size > TCG_POOL_CHUNK_SIZE) { 1135 /* big malloc: insert a new pool (XXX: could optimize) */ 1136 p = g_malloc(sizeof(TCGPool) + size); 1137 p->size = size; 1138 p->next = s->pool_first_large; 1139 s->pool_first_large = p; 1140 return p->data; 1141 } else { 1142 p = s->pool_current; 1143 if (!p) { 1144 p = s->pool_first; 1145 if (!p) 1146 goto new_pool; 1147 } else { 1148 if (!p->next) { 1149 new_pool: 1150 pool_size = TCG_POOL_CHUNK_SIZE; 1151 p = g_malloc(sizeof(TCGPool) + pool_size); 1152 p->size = pool_size; 1153 p->next = NULL; 1154 if (s->pool_current) { 1155 s->pool_current->next = p; 1156 } else { 1157 s->pool_first = p; 1158 } 1159 } else { 1160 p = p->next; 1161 } 1162 } 1163 } 1164 s->pool_current = p; 1165 s->pool_cur = p->data + size; 1166 s->pool_end = p->data + p->size; 1167 return p->data; 1168 } 1169 1170 void tcg_pool_reset(TCGContext *s) 1171 { 1172 TCGPool *p, *t; 1173 for (p = s->pool_first_large; p; p = t) { 1174 t = p->next; 1175 g_free(p); 1176 } 1177 s->pool_first_large = NULL; 1178 s->pool_cur = s->pool_end = NULL; 1179 s->pool_current = NULL; 1180 } 1181 1182 /* 1183 * Create TCGHelperInfo structures for "tcg/tcg-ldst.h" functions, 1184 * akin to what "exec/helper-tcg.h" does with DEF_HELPER_FLAGS_N. 1185 * We only use these for layout in tcg_out_ld_helper_ret and 1186 * tcg_out_st_helper_args, and share them between several of 1187 * the helpers, with the end result that it's easier to build manually. 1188 */ 1189 1190 #if TCG_TARGET_REG_BITS == 32 1191 # define dh_typecode_ttl dh_typecode_i32 1192 #else 1193 # define dh_typecode_ttl dh_typecode_i64 1194 #endif 1195 1196 static TCGHelperInfo info_helper_ld32_mmu = { 1197 .flags = TCG_CALL_NO_WG, 1198 .typemask = dh_typemask(ttl, 0) /* return tcg_target_ulong */ 1199 | dh_typemask(env, 1) 1200 | dh_typemask(i64, 2) /* uint64_t addr */ 1201 | dh_typemask(i32, 3) /* unsigned oi */ 1202 | dh_typemask(ptr, 4) /* uintptr_t ra */ 1203 }; 1204 1205 static TCGHelperInfo info_helper_ld64_mmu = { 1206 .flags = TCG_CALL_NO_WG, 1207 .typemask = dh_typemask(i64, 0) /* return uint64_t */ 1208 | dh_typemask(env, 1) 1209 | dh_typemask(i64, 2) /* uint64_t addr */ 1210 | dh_typemask(i32, 3) /* unsigned oi */ 1211 | dh_typemask(ptr, 4) /* uintptr_t ra */ 1212 }; 1213 1214 static TCGHelperInfo info_helper_ld128_mmu = { 1215 .flags = TCG_CALL_NO_WG, 1216 .typemask = dh_typemask(i128, 0) /* return Int128 */ 1217 | dh_typemask(env, 1) 1218 | dh_typemask(i64, 2) /* uint64_t addr */ 1219 | dh_typemask(i32, 3) /* unsigned oi */ 1220 | dh_typemask(ptr, 4) /* uintptr_t ra */ 1221 }; 1222 1223 static TCGHelperInfo info_helper_st32_mmu = { 1224 .flags = TCG_CALL_NO_WG, 1225 .typemask = dh_typemask(void, 0) 1226 | dh_typemask(env, 1) 1227 | dh_typemask(i64, 2) /* uint64_t addr */ 1228 | dh_typemask(i32, 3) /* uint32_t data */ 1229 | dh_typemask(i32, 4) /* unsigned oi */ 1230 | dh_typemask(ptr, 5) /* uintptr_t ra */ 1231 }; 1232 1233 static TCGHelperInfo info_helper_st64_mmu = { 1234 .flags = TCG_CALL_NO_WG, 1235 .typemask = dh_typemask(void, 0) 1236 | dh_typemask(env, 1) 1237 | dh_typemask(i64, 2) /* uint64_t addr */ 1238 | dh_typemask(i64, 3) /* uint64_t data */ 1239 | dh_typemask(i32, 4) /* unsigned oi */ 1240 | dh_typemask(ptr, 5) /* uintptr_t ra */ 1241 }; 1242 1243 static TCGHelperInfo info_helper_st128_mmu = { 1244 .flags = TCG_CALL_NO_WG, 1245 .typemask = dh_typemask(void, 0) 1246 | dh_typemask(env, 1) 1247 | dh_typemask(i64, 2) /* uint64_t addr */ 1248 | dh_typemask(i128, 3) /* Int128 data */ 1249 | dh_typemask(i32, 4) /* unsigned oi */ 1250 | dh_typemask(ptr, 5) /* uintptr_t ra */ 1251 }; 1252 1253 #ifdef CONFIG_TCG_INTERPRETER 1254 static ffi_type *typecode_to_ffi(int argmask) 1255 { 1256 /* 1257 * libffi does not support __int128_t, so we have forced Int128 1258 * to use the structure definition instead of the builtin type. 1259 */ 1260 static ffi_type *ffi_type_i128_elements[3] = { 1261 &ffi_type_uint64, 1262 &ffi_type_uint64, 1263 NULL 1264 }; 1265 static ffi_type ffi_type_i128 = { 1266 .size = 16, 1267 .alignment = __alignof__(Int128), 1268 .type = FFI_TYPE_STRUCT, 1269 .elements = ffi_type_i128_elements, 1270 }; 1271 1272 switch (argmask) { 1273 case dh_typecode_void: 1274 return &ffi_type_void; 1275 case dh_typecode_i32: 1276 return &ffi_type_uint32; 1277 case dh_typecode_s32: 1278 return &ffi_type_sint32; 1279 case dh_typecode_i64: 1280 return &ffi_type_uint64; 1281 case dh_typecode_s64: 1282 return &ffi_type_sint64; 1283 case dh_typecode_ptr: 1284 return &ffi_type_pointer; 1285 case dh_typecode_i128: 1286 return &ffi_type_i128; 1287 } 1288 g_assert_not_reached(); 1289 } 1290 1291 static ffi_cif *init_ffi_layout(TCGHelperInfo *info) 1292 { 1293 unsigned typemask = info->typemask; 1294 struct { 1295 ffi_cif cif; 1296 ffi_type *args[]; 1297 } *ca; 1298 ffi_status status; 1299 int nargs; 1300 1301 /* Ignoring the return type, find the last non-zero field. */ 1302 nargs = 32 - clz32(typemask >> 3); 1303 nargs = DIV_ROUND_UP(nargs, 3); 1304 assert(nargs <= MAX_CALL_IARGS); 1305 1306 ca = g_malloc0(sizeof(*ca) + nargs * sizeof(ffi_type *)); 1307 ca->cif.rtype = typecode_to_ffi(typemask & 7); 1308 ca->cif.nargs = nargs; 1309 1310 if (nargs != 0) { 1311 ca->cif.arg_types = ca->args; 1312 for (int j = 0; j < nargs; ++j) { 1313 int typecode = extract32(typemask, (j + 1) * 3, 3); 1314 ca->args[j] = typecode_to_ffi(typecode); 1315 } 1316 } 1317 1318 status = ffi_prep_cif(&ca->cif, FFI_DEFAULT_ABI, nargs, 1319 ca->cif.rtype, ca->cif.arg_types); 1320 assert(status == FFI_OK); 1321 1322 return &ca->cif; 1323 } 1324 1325 #define HELPER_INFO_INIT(I) (&(I)->cif) 1326 #define HELPER_INFO_INIT_VAL(I) init_ffi_layout(I) 1327 #else 1328 #define HELPER_INFO_INIT(I) (&(I)->init) 1329 #define HELPER_INFO_INIT_VAL(I) 1 1330 #endif /* CONFIG_TCG_INTERPRETER */ 1331 1332 static inline bool arg_slot_reg_p(unsigned arg_slot) 1333 { 1334 /* 1335 * Split the sizeof away from the comparison to avoid Werror from 1336 * "unsigned < 0 is always false", when iarg_regs is empty. 1337 */ 1338 unsigned nreg = ARRAY_SIZE(tcg_target_call_iarg_regs); 1339 return arg_slot < nreg; 1340 } 1341 1342 static inline int arg_slot_stk_ofs(unsigned arg_slot) 1343 { 1344 unsigned max = TCG_STATIC_CALL_ARGS_SIZE / sizeof(tcg_target_long); 1345 unsigned stk_slot = arg_slot - ARRAY_SIZE(tcg_target_call_iarg_regs); 1346 1347 tcg_debug_assert(stk_slot < max); 1348 return TCG_TARGET_CALL_STACK_OFFSET + stk_slot * sizeof(tcg_target_long); 1349 } 1350 1351 typedef struct TCGCumulativeArgs { 1352 int arg_idx; /* tcg_gen_callN args[] */ 1353 int info_in_idx; /* TCGHelperInfo in[] */ 1354 int arg_slot; /* regs+stack slot */ 1355 int ref_slot; /* stack slots for references */ 1356 } TCGCumulativeArgs; 1357 1358 static void layout_arg_even(TCGCumulativeArgs *cum) 1359 { 1360 cum->arg_slot += cum->arg_slot & 1; 1361 } 1362 1363 static void layout_arg_1(TCGCumulativeArgs *cum, TCGHelperInfo *info, 1364 TCGCallArgumentKind kind) 1365 { 1366 TCGCallArgumentLoc *loc = &info->in[cum->info_in_idx]; 1367 1368 *loc = (TCGCallArgumentLoc){ 1369 .kind = kind, 1370 .arg_idx = cum->arg_idx, 1371 .arg_slot = cum->arg_slot, 1372 }; 1373 cum->info_in_idx++; 1374 cum->arg_slot++; 1375 } 1376 1377 static void layout_arg_normal_n(TCGCumulativeArgs *cum, 1378 TCGHelperInfo *info, int n) 1379 { 1380 TCGCallArgumentLoc *loc = &info->in[cum->info_in_idx]; 1381 1382 for (int i = 0; i < n; ++i) { 1383 /* Layout all using the same arg_idx, adjusting the subindex. */ 1384 loc[i] = (TCGCallArgumentLoc){ 1385 .kind = TCG_CALL_ARG_NORMAL, 1386 .arg_idx = cum->arg_idx, 1387 .tmp_subindex = i, 1388 .arg_slot = cum->arg_slot + i, 1389 }; 1390 } 1391 cum->info_in_idx += n; 1392 cum->arg_slot += n; 1393 } 1394 1395 static void layout_arg_by_ref(TCGCumulativeArgs *cum, TCGHelperInfo *info) 1396 { 1397 TCGCallArgumentLoc *loc = &info->in[cum->info_in_idx]; 1398 int n = 128 / TCG_TARGET_REG_BITS; 1399 1400 /* The first subindex carries the pointer. */ 1401 layout_arg_1(cum, info, TCG_CALL_ARG_BY_REF); 1402 1403 /* 1404 * The callee is allowed to clobber memory associated with 1405 * structure pass by-reference. Therefore we must make copies. 1406 * Allocate space from "ref_slot", which will be adjusted to 1407 * follow the parameters on the stack. 1408 */ 1409 loc[0].ref_slot = cum->ref_slot; 1410 1411 /* 1412 * Subsequent words also go into the reference slot, but 1413 * do not accumulate into the regular arguments. 1414 */ 1415 for (int i = 1; i < n; ++i) { 1416 loc[i] = (TCGCallArgumentLoc){ 1417 .kind = TCG_CALL_ARG_BY_REF_N, 1418 .arg_idx = cum->arg_idx, 1419 .tmp_subindex = i, 1420 .ref_slot = cum->ref_slot + i, 1421 }; 1422 } 1423 cum->info_in_idx += n - 1; /* i=0 accounted for in layout_arg_1 */ 1424 cum->ref_slot += n; 1425 } 1426 1427 static void init_call_layout(TCGHelperInfo *info) 1428 { 1429 int max_reg_slots = ARRAY_SIZE(tcg_target_call_iarg_regs); 1430 int max_stk_slots = TCG_STATIC_CALL_ARGS_SIZE / sizeof(tcg_target_long); 1431 unsigned typemask = info->typemask; 1432 unsigned typecode; 1433 TCGCumulativeArgs cum = { }; 1434 1435 /* 1436 * Parse and place any function return value. 1437 */ 1438 typecode = typemask & 7; 1439 switch (typecode) { 1440 case dh_typecode_void: 1441 info->nr_out = 0; 1442 break; 1443 case dh_typecode_i32: 1444 case dh_typecode_s32: 1445 case dh_typecode_ptr: 1446 info->nr_out = 1; 1447 info->out_kind = TCG_CALL_RET_NORMAL; 1448 break; 1449 case dh_typecode_i64: 1450 case dh_typecode_s64: 1451 info->nr_out = 64 / TCG_TARGET_REG_BITS; 1452 info->out_kind = TCG_CALL_RET_NORMAL; 1453 /* Query the last register now to trigger any assert early. */ 1454 tcg_target_call_oarg_reg(info->out_kind, info->nr_out - 1); 1455 break; 1456 case dh_typecode_i128: 1457 info->nr_out = 128 / TCG_TARGET_REG_BITS; 1458 info->out_kind = TCG_TARGET_CALL_RET_I128; 1459 switch (TCG_TARGET_CALL_RET_I128) { 1460 case TCG_CALL_RET_NORMAL: 1461 /* Query the last register now to trigger any assert early. */ 1462 tcg_target_call_oarg_reg(info->out_kind, info->nr_out - 1); 1463 break; 1464 case TCG_CALL_RET_BY_VEC: 1465 /* Query the single register now to trigger any assert early. */ 1466 tcg_target_call_oarg_reg(TCG_CALL_RET_BY_VEC, 0); 1467 break; 1468 case TCG_CALL_RET_BY_REF: 1469 /* 1470 * Allocate the first argument to the output. 1471 * We don't need to store this anywhere, just make it 1472 * unavailable for use in the input loop below. 1473 */ 1474 cum.arg_slot = 1; 1475 break; 1476 default: 1477 qemu_build_not_reached(); 1478 } 1479 break; 1480 default: 1481 g_assert_not_reached(); 1482 } 1483 1484 /* 1485 * Parse and place function arguments. 1486 */ 1487 for (typemask >>= 3; typemask; typemask >>= 3, cum.arg_idx++) { 1488 TCGCallArgumentKind kind; 1489 TCGType type; 1490 1491 typecode = typemask & 7; 1492 switch (typecode) { 1493 case dh_typecode_i32: 1494 case dh_typecode_s32: 1495 type = TCG_TYPE_I32; 1496 break; 1497 case dh_typecode_i64: 1498 case dh_typecode_s64: 1499 type = TCG_TYPE_I64; 1500 break; 1501 case dh_typecode_ptr: 1502 type = TCG_TYPE_PTR; 1503 break; 1504 case dh_typecode_i128: 1505 type = TCG_TYPE_I128; 1506 break; 1507 default: 1508 g_assert_not_reached(); 1509 } 1510 1511 switch (type) { 1512 case TCG_TYPE_I32: 1513 switch (TCG_TARGET_CALL_ARG_I32) { 1514 case TCG_CALL_ARG_EVEN: 1515 layout_arg_even(&cum); 1516 /* fall through */ 1517 case TCG_CALL_ARG_NORMAL: 1518 layout_arg_1(&cum, info, TCG_CALL_ARG_NORMAL); 1519 break; 1520 case TCG_CALL_ARG_EXTEND: 1521 kind = TCG_CALL_ARG_EXTEND_U + (typecode & 1); 1522 layout_arg_1(&cum, info, kind); 1523 break; 1524 default: 1525 qemu_build_not_reached(); 1526 } 1527 break; 1528 1529 case TCG_TYPE_I64: 1530 switch (TCG_TARGET_CALL_ARG_I64) { 1531 case TCG_CALL_ARG_EVEN: 1532 layout_arg_even(&cum); 1533 /* fall through */ 1534 case TCG_CALL_ARG_NORMAL: 1535 if (TCG_TARGET_REG_BITS == 32) { 1536 layout_arg_normal_n(&cum, info, 2); 1537 } else { 1538 layout_arg_1(&cum, info, TCG_CALL_ARG_NORMAL); 1539 } 1540 break; 1541 default: 1542 qemu_build_not_reached(); 1543 } 1544 break; 1545 1546 case TCG_TYPE_I128: 1547 switch (TCG_TARGET_CALL_ARG_I128) { 1548 case TCG_CALL_ARG_EVEN: 1549 layout_arg_even(&cum); 1550 /* fall through */ 1551 case TCG_CALL_ARG_NORMAL: 1552 layout_arg_normal_n(&cum, info, 128 / TCG_TARGET_REG_BITS); 1553 break; 1554 case TCG_CALL_ARG_BY_REF: 1555 layout_arg_by_ref(&cum, info); 1556 break; 1557 default: 1558 qemu_build_not_reached(); 1559 } 1560 break; 1561 1562 default: 1563 g_assert_not_reached(); 1564 } 1565 } 1566 info->nr_in = cum.info_in_idx; 1567 1568 /* Validate that we didn't overrun the input array. */ 1569 assert(cum.info_in_idx <= ARRAY_SIZE(info->in)); 1570 /* Validate the backend has enough argument space. */ 1571 assert(cum.arg_slot <= max_reg_slots + max_stk_slots); 1572 1573 /* 1574 * Relocate the "ref_slot" area to the end of the parameters. 1575 * Minimizing this stack offset helps code size for x86, 1576 * which has a signed 8-bit offset encoding. 1577 */ 1578 if (cum.ref_slot != 0) { 1579 int ref_base = 0; 1580 1581 if (cum.arg_slot > max_reg_slots) { 1582 int align = __alignof(Int128) / sizeof(tcg_target_long); 1583 1584 ref_base = cum.arg_slot - max_reg_slots; 1585 if (align > 1) { 1586 ref_base = ROUND_UP(ref_base, align); 1587 } 1588 } 1589 assert(ref_base + cum.ref_slot <= max_stk_slots); 1590 ref_base += max_reg_slots; 1591 1592 if (ref_base != 0) { 1593 for (int i = cum.info_in_idx - 1; i >= 0; --i) { 1594 TCGCallArgumentLoc *loc = &info->in[i]; 1595 switch (loc->kind) { 1596 case TCG_CALL_ARG_BY_REF: 1597 case TCG_CALL_ARG_BY_REF_N: 1598 loc->ref_slot += ref_base; 1599 break; 1600 default: 1601 break; 1602 } 1603 } 1604 } 1605 } 1606 } 1607 1608 static int indirect_reg_alloc_order[ARRAY_SIZE(tcg_target_reg_alloc_order)]; 1609 static void process_constraint_sets(void); 1610 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type, 1611 TCGReg reg, const char *name); 1612 1613 static void tcg_context_init(unsigned max_threads) 1614 { 1615 TCGContext *s = &tcg_init_ctx; 1616 int n, i; 1617 TCGTemp *ts; 1618 1619 memset(s, 0, sizeof(*s)); 1620 s->nb_globals = 0; 1621 1622 init_call_layout(&info_helper_ld32_mmu); 1623 init_call_layout(&info_helper_ld64_mmu); 1624 init_call_layout(&info_helper_ld128_mmu); 1625 init_call_layout(&info_helper_st32_mmu); 1626 init_call_layout(&info_helper_st64_mmu); 1627 init_call_layout(&info_helper_st128_mmu); 1628 1629 tcg_target_init(s); 1630 process_constraint_sets(); 1631 1632 /* Reverse the order of the saved registers, assuming they're all at 1633 the start of tcg_target_reg_alloc_order. */ 1634 for (n = 0; n < ARRAY_SIZE(tcg_target_reg_alloc_order); ++n) { 1635 int r = tcg_target_reg_alloc_order[n]; 1636 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, r)) { 1637 break; 1638 } 1639 } 1640 for (i = 0; i < n; ++i) { 1641 indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[n - 1 - i]; 1642 } 1643 for (; i < ARRAY_SIZE(tcg_target_reg_alloc_order); ++i) { 1644 indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[i]; 1645 } 1646 1647 tcg_ctx = s; 1648 /* 1649 * In user-mode we simply share the init context among threads, since we 1650 * use a single region. See the documentation tcg_region_init() for the 1651 * reasoning behind this. 1652 * In system-mode we will have at most max_threads TCG threads. 1653 */ 1654 #ifdef CONFIG_USER_ONLY 1655 tcg_ctxs = &tcg_ctx; 1656 tcg_cur_ctxs = 1; 1657 tcg_max_ctxs = 1; 1658 #else 1659 tcg_max_ctxs = max_threads; 1660 tcg_ctxs = g_new0(TCGContext *, max_threads); 1661 #endif 1662 1663 tcg_debug_assert(!tcg_regset_test_reg(s->reserved_regs, TCG_AREG0)); 1664 ts = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, TCG_AREG0, "env"); 1665 tcg_env = temp_tcgv_ptr(ts); 1666 } 1667 1668 void tcg_init(size_t tb_size, int splitwx, unsigned max_threads) 1669 { 1670 tcg_context_init(max_threads); 1671 tcg_region_init(tb_size, splitwx, max_threads); 1672 } 1673 1674 /* 1675 * Allocate TBs right before their corresponding translated code, making 1676 * sure that TBs and code are on different cache lines. 1677 */ 1678 TranslationBlock *tcg_tb_alloc(TCGContext *s) 1679 { 1680 uintptr_t align = qemu_icache_linesize; 1681 TranslationBlock *tb; 1682 void *next; 1683 1684 retry: 1685 tb = (void *)ROUND_UP((uintptr_t)s->code_gen_ptr, align); 1686 next = (void *)ROUND_UP((uintptr_t)(tb + 1), align); 1687 1688 if (unlikely(next > s->code_gen_highwater)) { 1689 if (tcg_region_alloc(s)) { 1690 return NULL; 1691 } 1692 goto retry; 1693 } 1694 qatomic_set(&s->code_gen_ptr, next); 1695 return tb; 1696 } 1697 1698 void tcg_prologue_init(void) 1699 { 1700 TCGContext *s = tcg_ctx; 1701 size_t prologue_size; 1702 1703 s->code_ptr = s->code_gen_ptr; 1704 s->code_buf = s->code_gen_ptr; 1705 s->data_gen_ptr = NULL; 1706 1707 #ifndef CONFIG_TCG_INTERPRETER 1708 tcg_qemu_tb_exec = (tcg_prologue_fn *)tcg_splitwx_to_rx(s->code_ptr); 1709 #endif 1710 1711 s->pool_labels = NULL; 1712 1713 qemu_thread_jit_write(); 1714 /* Generate the prologue. */ 1715 tcg_target_qemu_prologue(s); 1716 1717 /* Allow the prologue to put e.g. guest_base into a pool entry. */ 1718 { 1719 int result = tcg_out_pool_finalize(s); 1720 tcg_debug_assert(result == 0); 1721 } 1722 1723 prologue_size = tcg_current_code_size(s); 1724 perf_report_prologue(s->code_gen_ptr, prologue_size); 1725 1726 #ifndef CONFIG_TCG_INTERPRETER 1727 flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(s->code_buf), 1728 (uintptr_t)s->code_buf, prologue_size); 1729 #endif 1730 1731 if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM)) { 1732 FILE *logfile = qemu_log_trylock(); 1733 if (logfile) { 1734 fprintf(logfile, "PROLOGUE: [size=%zu]\n", prologue_size); 1735 if (s->data_gen_ptr) { 1736 size_t code_size = s->data_gen_ptr - s->code_gen_ptr; 1737 size_t data_size = prologue_size - code_size; 1738 size_t i; 1739 1740 disas(logfile, s->code_gen_ptr, code_size); 1741 1742 for (i = 0; i < data_size; i += sizeof(tcg_target_ulong)) { 1743 if (sizeof(tcg_target_ulong) == 8) { 1744 fprintf(logfile, 1745 "0x%08" PRIxPTR ": .quad 0x%016" PRIx64 "\n", 1746 (uintptr_t)s->data_gen_ptr + i, 1747 *(uint64_t *)(s->data_gen_ptr + i)); 1748 } else { 1749 fprintf(logfile, 1750 "0x%08" PRIxPTR ": .long 0x%08x\n", 1751 (uintptr_t)s->data_gen_ptr + i, 1752 *(uint32_t *)(s->data_gen_ptr + i)); 1753 } 1754 } 1755 } else { 1756 disas(logfile, s->code_gen_ptr, prologue_size); 1757 } 1758 fprintf(logfile, "\n"); 1759 qemu_log_unlock(logfile); 1760 } 1761 } 1762 1763 #ifndef CONFIG_TCG_INTERPRETER 1764 /* 1765 * Assert that goto_ptr is implemented completely, setting an epilogue. 1766 * For tci, we use NULL as the signal to return from the interpreter, 1767 * so skip this check. 1768 */ 1769 tcg_debug_assert(tcg_code_gen_epilogue != NULL); 1770 #endif 1771 1772 tcg_region_prologue_set(s); 1773 } 1774 1775 void tcg_func_start(TCGContext *s) 1776 { 1777 tcg_pool_reset(s); 1778 s->nb_temps = s->nb_globals; 1779 1780 /* No temps have been previously allocated for size or locality. */ 1781 tcg_temp_ebb_reset_freed(s); 1782 1783 /* No constant temps have been previously allocated. */ 1784 for (int i = 0; i < TCG_TYPE_COUNT; ++i) { 1785 if (s->const_table[i]) { 1786 g_hash_table_remove_all(s->const_table[i]); 1787 } 1788 } 1789 1790 s->nb_ops = 0; 1791 s->nb_labels = 0; 1792 s->current_frame_offset = s->frame_start; 1793 1794 #ifdef CONFIG_DEBUG_TCG 1795 s->goto_tb_issue_mask = 0; 1796 #endif 1797 1798 QTAILQ_INIT(&s->ops); 1799 QTAILQ_INIT(&s->free_ops); 1800 s->emit_before_op = NULL; 1801 QSIMPLEQ_INIT(&s->labels); 1802 1803 tcg_debug_assert(s->addr_type <= TCG_TYPE_REG); 1804 tcg_debug_assert(s->insn_start_words > 0); 1805 } 1806 1807 static TCGTemp *tcg_temp_alloc(TCGContext *s) 1808 { 1809 int n = s->nb_temps++; 1810 1811 if (n >= TCG_MAX_TEMPS) { 1812 tcg_raise_tb_overflow(s); 1813 } 1814 return memset(&s->temps[n], 0, sizeof(TCGTemp)); 1815 } 1816 1817 static TCGTemp *tcg_global_alloc(TCGContext *s) 1818 { 1819 TCGTemp *ts; 1820 1821 tcg_debug_assert(s->nb_globals == s->nb_temps); 1822 tcg_debug_assert(s->nb_globals < TCG_MAX_TEMPS); 1823 s->nb_globals++; 1824 ts = tcg_temp_alloc(s); 1825 ts->kind = TEMP_GLOBAL; 1826 1827 return ts; 1828 } 1829 1830 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type, 1831 TCGReg reg, const char *name) 1832 { 1833 TCGTemp *ts; 1834 1835 tcg_debug_assert(TCG_TARGET_REG_BITS == 64 || type == TCG_TYPE_I32); 1836 1837 ts = tcg_global_alloc(s); 1838 ts->base_type = type; 1839 ts->type = type; 1840 ts->kind = TEMP_FIXED; 1841 ts->reg = reg; 1842 ts->name = name; 1843 tcg_regset_set_reg(s->reserved_regs, reg); 1844 1845 return ts; 1846 } 1847 1848 void tcg_set_frame(TCGContext *s, TCGReg reg, intptr_t start, intptr_t size) 1849 { 1850 s->frame_start = start; 1851 s->frame_end = start + size; 1852 s->frame_temp 1853 = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, reg, "_frame"); 1854 } 1855 1856 static TCGTemp *tcg_global_mem_new_internal(TCGv_ptr base, intptr_t offset, 1857 const char *name, TCGType type) 1858 { 1859 TCGContext *s = tcg_ctx; 1860 TCGTemp *base_ts = tcgv_ptr_temp(base); 1861 TCGTemp *ts = tcg_global_alloc(s); 1862 int indirect_reg = 0; 1863 1864 switch (base_ts->kind) { 1865 case TEMP_FIXED: 1866 break; 1867 case TEMP_GLOBAL: 1868 /* We do not support double-indirect registers. */ 1869 tcg_debug_assert(!base_ts->indirect_reg); 1870 base_ts->indirect_base = 1; 1871 s->nb_indirects += (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64 1872 ? 2 : 1); 1873 indirect_reg = 1; 1874 break; 1875 default: 1876 g_assert_not_reached(); 1877 } 1878 1879 if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) { 1880 TCGTemp *ts2 = tcg_global_alloc(s); 1881 char buf[64]; 1882 1883 ts->base_type = TCG_TYPE_I64; 1884 ts->type = TCG_TYPE_I32; 1885 ts->indirect_reg = indirect_reg; 1886 ts->mem_allocated = 1; 1887 ts->mem_base = base_ts; 1888 ts->mem_offset = offset; 1889 pstrcpy(buf, sizeof(buf), name); 1890 pstrcat(buf, sizeof(buf), "_0"); 1891 ts->name = strdup(buf); 1892 1893 tcg_debug_assert(ts2 == ts + 1); 1894 ts2->base_type = TCG_TYPE_I64; 1895 ts2->type = TCG_TYPE_I32; 1896 ts2->indirect_reg = indirect_reg; 1897 ts2->mem_allocated = 1; 1898 ts2->mem_base = base_ts; 1899 ts2->mem_offset = offset + 4; 1900 ts2->temp_subindex = 1; 1901 pstrcpy(buf, sizeof(buf), name); 1902 pstrcat(buf, sizeof(buf), "_1"); 1903 ts2->name = strdup(buf); 1904 } else { 1905 ts->base_type = type; 1906 ts->type = type; 1907 ts->indirect_reg = indirect_reg; 1908 ts->mem_allocated = 1; 1909 ts->mem_base = base_ts; 1910 ts->mem_offset = offset; 1911 ts->name = name; 1912 } 1913 return ts; 1914 } 1915 1916 TCGv_i32 tcg_global_mem_new_i32(TCGv_ptr reg, intptr_t off, const char *name) 1917 { 1918 TCGTemp *ts = tcg_global_mem_new_internal(reg, off, name, TCG_TYPE_I32); 1919 return temp_tcgv_i32(ts); 1920 } 1921 1922 TCGv_i64 tcg_global_mem_new_i64(TCGv_ptr reg, intptr_t off, const char *name) 1923 { 1924 TCGTemp *ts = tcg_global_mem_new_internal(reg, off, name, TCG_TYPE_I64); 1925 return temp_tcgv_i64(ts); 1926 } 1927 1928 TCGv_ptr tcg_global_mem_new_ptr(TCGv_ptr reg, intptr_t off, const char *name) 1929 { 1930 TCGTemp *ts = tcg_global_mem_new_internal(reg, off, name, TCG_TYPE_PTR); 1931 return temp_tcgv_ptr(ts); 1932 } 1933 1934 TCGTemp *tcg_temp_new_internal(TCGType type, TCGTempKind kind) 1935 { 1936 TCGContext *s = tcg_ctx; 1937 TCGTemp *ts; 1938 int n; 1939 1940 if (kind == TEMP_EBB) { 1941 int idx = find_first_bit(s->free_temps[type].l, TCG_MAX_TEMPS); 1942 1943 if (idx < TCG_MAX_TEMPS) { 1944 /* There is already an available temp with the right type. */ 1945 clear_bit(idx, s->free_temps[type].l); 1946 1947 ts = &s->temps[idx]; 1948 ts->temp_allocated = 1; 1949 tcg_debug_assert(ts->base_type == type); 1950 tcg_debug_assert(ts->kind == kind); 1951 return ts; 1952 } 1953 } else { 1954 tcg_debug_assert(kind == TEMP_TB); 1955 } 1956 1957 switch (type) { 1958 case TCG_TYPE_I32: 1959 case TCG_TYPE_V64: 1960 case TCG_TYPE_V128: 1961 case TCG_TYPE_V256: 1962 n = 1; 1963 break; 1964 case TCG_TYPE_I64: 1965 n = 64 / TCG_TARGET_REG_BITS; 1966 break; 1967 case TCG_TYPE_I128: 1968 n = 128 / TCG_TARGET_REG_BITS; 1969 break; 1970 default: 1971 g_assert_not_reached(); 1972 } 1973 1974 ts = tcg_temp_alloc(s); 1975 ts->base_type = type; 1976 ts->temp_allocated = 1; 1977 ts->kind = kind; 1978 1979 if (n == 1) { 1980 ts->type = type; 1981 } else { 1982 ts->type = TCG_TYPE_REG; 1983 1984 for (int i = 1; i < n; ++i) { 1985 TCGTemp *ts2 = tcg_temp_alloc(s); 1986 1987 tcg_debug_assert(ts2 == ts + i); 1988 ts2->base_type = type; 1989 ts2->type = TCG_TYPE_REG; 1990 ts2->temp_allocated = 1; 1991 ts2->temp_subindex = i; 1992 ts2->kind = kind; 1993 } 1994 } 1995 return ts; 1996 } 1997 1998 TCGv_i32 tcg_temp_new_i32(void) 1999 { 2000 return temp_tcgv_i32(tcg_temp_new_internal(TCG_TYPE_I32, TEMP_TB)); 2001 } 2002 2003 TCGv_i32 tcg_temp_ebb_new_i32(void) 2004 { 2005 return temp_tcgv_i32(tcg_temp_new_internal(TCG_TYPE_I32, TEMP_EBB)); 2006 } 2007 2008 TCGv_i64 tcg_temp_new_i64(void) 2009 { 2010 return temp_tcgv_i64(tcg_temp_new_internal(TCG_TYPE_I64, TEMP_TB)); 2011 } 2012 2013 TCGv_i64 tcg_temp_ebb_new_i64(void) 2014 { 2015 return temp_tcgv_i64(tcg_temp_new_internal(TCG_TYPE_I64, TEMP_EBB)); 2016 } 2017 2018 TCGv_ptr tcg_temp_new_ptr(void) 2019 { 2020 return temp_tcgv_ptr(tcg_temp_new_internal(TCG_TYPE_PTR, TEMP_TB)); 2021 } 2022 2023 TCGv_ptr tcg_temp_ebb_new_ptr(void) 2024 { 2025 return temp_tcgv_ptr(tcg_temp_new_internal(TCG_TYPE_PTR, TEMP_EBB)); 2026 } 2027 2028 TCGv_i128 tcg_temp_new_i128(void) 2029 { 2030 return temp_tcgv_i128(tcg_temp_new_internal(TCG_TYPE_I128, TEMP_TB)); 2031 } 2032 2033 TCGv_i128 tcg_temp_ebb_new_i128(void) 2034 { 2035 return temp_tcgv_i128(tcg_temp_new_internal(TCG_TYPE_I128, TEMP_EBB)); 2036 } 2037 2038 TCGv_vec tcg_temp_new_vec(TCGType type) 2039 { 2040 TCGTemp *t; 2041 2042 #ifdef CONFIG_DEBUG_TCG 2043 switch (type) { 2044 case TCG_TYPE_V64: 2045 assert(TCG_TARGET_HAS_v64); 2046 break; 2047 case TCG_TYPE_V128: 2048 assert(TCG_TARGET_HAS_v128); 2049 break; 2050 case TCG_TYPE_V256: 2051 assert(TCG_TARGET_HAS_v256); 2052 break; 2053 default: 2054 g_assert_not_reached(); 2055 } 2056 #endif 2057 2058 t = tcg_temp_new_internal(type, TEMP_EBB); 2059 return temp_tcgv_vec(t); 2060 } 2061 2062 /* Create a new temp of the same type as an existing temp. */ 2063 TCGv_vec tcg_temp_new_vec_matching(TCGv_vec match) 2064 { 2065 TCGTemp *t = tcgv_vec_temp(match); 2066 2067 tcg_debug_assert(t->temp_allocated != 0); 2068 2069 t = tcg_temp_new_internal(t->base_type, TEMP_EBB); 2070 return temp_tcgv_vec(t); 2071 } 2072 2073 void tcg_temp_free_internal(TCGTemp *ts) 2074 { 2075 TCGContext *s = tcg_ctx; 2076 2077 switch (ts->kind) { 2078 case TEMP_CONST: 2079 case TEMP_TB: 2080 /* Silently ignore free. */ 2081 break; 2082 case TEMP_EBB: 2083 tcg_debug_assert(ts->temp_allocated != 0); 2084 ts->temp_allocated = 0; 2085 set_bit(temp_idx(ts), s->free_temps[ts->base_type].l); 2086 break; 2087 default: 2088 /* It never made sense to free TEMP_FIXED or TEMP_GLOBAL. */ 2089 g_assert_not_reached(); 2090 } 2091 } 2092 2093 void tcg_temp_free_i32(TCGv_i32 arg) 2094 { 2095 tcg_temp_free_internal(tcgv_i32_temp(arg)); 2096 } 2097 2098 void tcg_temp_free_i64(TCGv_i64 arg) 2099 { 2100 tcg_temp_free_internal(tcgv_i64_temp(arg)); 2101 } 2102 2103 void tcg_temp_free_i128(TCGv_i128 arg) 2104 { 2105 tcg_temp_free_internal(tcgv_i128_temp(arg)); 2106 } 2107 2108 void tcg_temp_free_ptr(TCGv_ptr arg) 2109 { 2110 tcg_temp_free_internal(tcgv_ptr_temp(arg)); 2111 } 2112 2113 void tcg_temp_free_vec(TCGv_vec arg) 2114 { 2115 tcg_temp_free_internal(tcgv_vec_temp(arg)); 2116 } 2117 2118 TCGTemp *tcg_constant_internal(TCGType type, int64_t val) 2119 { 2120 TCGContext *s = tcg_ctx; 2121 GHashTable *h = s->const_table[type]; 2122 TCGTemp *ts; 2123 2124 if (h == NULL) { 2125 h = g_hash_table_new(g_int64_hash, g_int64_equal); 2126 s->const_table[type] = h; 2127 } 2128 2129 ts = g_hash_table_lookup(h, &val); 2130 if (ts == NULL) { 2131 int64_t *val_ptr; 2132 2133 ts = tcg_temp_alloc(s); 2134 2135 if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) { 2136 TCGTemp *ts2 = tcg_temp_alloc(s); 2137 2138 tcg_debug_assert(ts2 == ts + 1); 2139 2140 ts->base_type = TCG_TYPE_I64; 2141 ts->type = TCG_TYPE_I32; 2142 ts->kind = TEMP_CONST; 2143 ts->temp_allocated = 1; 2144 2145 ts2->base_type = TCG_TYPE_I64; 2146 ts2->type = TCG_TYPE_I32; 2147 ts2->kind = TEMP_CONST; 2148 ts2->temp_allocated = 1; 2149 ts2->temp_subindex = 1; 2150 2151 /* 2152 * Retain the full value of the 64-bit constant in the low 2153 * part, so that the hash table works. Actual uses will 2154 * truncate the value to the low part. 2155 */ 2156 ts[HOST_BIG_ENDIAN].val = val; 2157 ts[!HOST_BIG_ENDIAN].val = val >> 32; 2158 val_ptr = &ts[HOST_BIG_ENDIAN].val; 2159 } else { 2160 ts->base_type = type; 2161 ts->type = type; 2162 ts->kind = TEMP_CONST; 2163 ts->temp_allocated = 1; 2164 ts->val = val; 2165 val_ptr = &ts->val; 2166 } 2167 g_hash_table_insert(h, val_ptr, ts); 2168 } 2169 2170 return ts; 2171 } 2172 2173 TCGv_i32 tcg_constant_i32(int32_t val) 2174 { 2175 return temp_tcgv_i32(tcg_constant_internal(TCG_TYPE_I32, val)); 2176 } 2177 2178 TCGv_i64 tcg_constant_i64(int64_t val) 2179 { 2180 return temp_tcgv_i64(tcg_constant_internal(TCG_TYPE_I64, val)); 2181 } 2182 2183 TCGv_ptr tcg_constant_ptr_int(intptr_t val) 2184 { 2185 return temp_tcgv_ptr(tcg_constant_internal(TCG_TYPE_PTR, val)); 2186 } 2187 2188 TCGv_vec tcg_constant_vec(TCGType type, unsigned vece, int64_t val) 2189 { 2190 val = dup_const(vece, val); 2191 return temp_tcgv_vec(tcg_constant_internal(type, val)); 2192 } 2193 2194 TCGv_vec tcg_constant_vec_matching(TCGv_vec match, unsigned vece, int64_t val) 2195 { 2196 TCGTemp *t = tcgv_vec_temp(match); 2197 2198 tcg_debug_assert(t->temp_allocated != 0); 2199 return tcg_constant_vec(t->base_type, vece, val); 2200 } 2201 2202 #ifdef CONFIG_DEBUG_TCG 2203 size_t temp_idx(TCGTemp *ts) 2204 { 2205 ptrdiff_t n = ts - tcg_ctx->temps; 2206 assert(n >= 0 && n < tcg_ctx->nb_temps); 2207 return n; 2208 } 2209 2210 TCGTemp *tcgv_i32_temp(TCGv_i32 v) 2211 { 2212 uintptr_t o = (uintptr_t)v - offsetof(TCGContext, temps); 2213 2214 assert(o < sizeof(TCGTemp) * tcg_ctx->nb_temps); 2215 assert(o % sizeof(TCGTemp) == 0); 2216 2217 return (void *)tcg_ctx + (uintptr_t)v; 2218 } 2219 #endif /* CONFIG_DEBUG_TCG */ 2220 2221 /* 2222 * Return true if OP may appear in the opcode stream with TYPE. 2223 * Test the runtime variable that controls each opcode. 2224 */ 2225 bool tcg_op_supported(TCGOpcode op, TCGType type, unsigned flags) 2226 { 2227 bool has_type; 2228 2229 switch (type) { 2230 case TCG_TYPE_I32: 2231 has_type = true; 2232 break; 2233 case TCG_TYPE_I64: 2234 has_type = TCG_TARGET_REG_BITS == 64; 2235 break; 2236 case TCG_TYPE_V64: 2237 has_type = TCG_TARGET_HAS_v64; 2238 break; 2239 case TCG_TYPE_V128: 2240 has_type = TCG_TARGET_HAS_v128; 2241 break; 2242 case TCG_TYPE_V256: 2243 has_type = TCG_TARGET_HAS_v256; 2244 break; 2245 default: 2246 has_type = false; 2247 break; 2248 } 2249 2250 switch (op) { 2251 case INDEX_op_discard: 2252 case INDEX_op_set_label: 2253 case INDEX_op_call: 2254 case INDEX_op_br: 2255 case INDEX_op_mb: 2256 case INDEX_op_insn_start: 2257 case INDEX_op_exit_tb: 2258 case INDEX_op_goto_tb: 2259 case INDEX_op_goto_ptr: 2260 case INDEX_op_qemu_ld_i32: 2261 case INDEX_op_qemu_st_i32: 2262 case INDEX_op_qemu_ld_i64: 2263 case INDEX_op_qemu_st_i64: 2264 return true; 2265 2266 case INDEX_op_qemu_st8_i32: 2267 return TCG_TARGET_HAS_qemu_st8_i32; 2268 2269 case INDEX_op_qemu_ld_i128: 2270 case INDEX_op_qemu_st_i128: 2271 return TCG_TARGET_HAS_qemu_ldst_i128; 2272 2273 case INDEX_op_add: 2274 case INDEX_op_and: 2275 case INDEX_op_mov: 2276 case INDEX_op_negsetcond: 2277 case INDEX_op_or: 2278 case INDEX_op_setcond: 2279 case INDEX_op_xor: 2280 return has_type; 2281 2282 case INDEX_op_brcond_i32: 2283 case INDEX_op_movcond_i32: 2284 case INDEX_op_ld8u_i32: 2285 case INDEX_op_ld8s_i32: 2286 case INDEX_op_ld16u_i32: 2287 case INDEX_op_ld16s_i32: 2288 case INDEX_op_ld_i32: 2289 case INDEX_op_st8_i32: 2290 case INDEX_op_st16_i32: 2291 case INDEX_op_st_i32: 2292 case INDEX_op_extract_i32: 2293 case INDEX_op_sextract_i32: 2294 case INDEX_op_deposit_i32: 2295 return true; 2296 2297 case INDEX_op_extract2_i32: 2298 return TCG_TARGET_HAS_extract2_i32; 2299 case INDEX_op_add2_i32: 2300 return TCG_TARGET_HAS_add2_i32; 2301 case INDEX_op_sub2_i32: 2302 return TCG_TARGET_HAS_sub2_i32; 2303 case INDEX_op_bswap16_i32: 2304 return TCG_TARGET_HAS_bswap16_i32; 2305 case INDEX_op_bswap32_i32: 2306 return TCG_TARGET_HAS_bswap32_i32; 2307 2308 case INDEX_op_brcond2_i32: 2309 case INDEX_op_setcond2_i32: 2310 return TCG_TARGET_REG_BITS == 32; 2311 2312 case INDEX_op_brcond_i64: 2313 case INDEX_op_movcond_i64: 2314 case INDEX_op_ld8u_i64: 2315 case INDEX_op_ld8s_i64: 2316 case INDEX_op_ld16u_i64: 2317 case INDEX_op_ld16s_i64: 2318 case INDEX_op_ld32u_i64: 2319 case INDEX_op_ld32s_i64: 2320 case INDEX_op_ld_i64: 2321 case INDEX_op_st8_i64: 2322 case INDEX_op_st16_i64: 2323 case INDEX_op_st32_i64: 2324 case INDEX_op_st_i64: 2325 case INDEX_op_ext_i32_i64: 2326 case INDEX_op_extu_i32_i64: 2327 case INDEX_op_extract_i64: 2328 case INDEX_op_sextract_i64: 2329 case INDEX_op_deposit_i64: 2330 return TCG_TARGET_REG_BITS == 64; 2331 2332 case INDEX_op_extract2_i64: 2333 return TCG_TARGET_HAS_extract2_i64; 2334 case INDEX_op_extrl_i64_i32: 2335 case INDEX_op_extrh_i64_i32: 2336 return TCG_TARGET_HAS_extr_i64_i32; 2337 case INDEX_op_bswap16_i64: 2338 return TCG_TARGET_HAS_bswap16_i64; 2339 case INDEX_op_bswap32_i64: 2340 return TCG_TARGET_HAS_bswap32_i64; 2341 case INDEX_op_bswap64_i64: 2342 return TCG_TARGET_HAS_bswap64_i64; 2343 case INDEX_op_add2_i64: 2344 return TCG_TARGET_HAS_add2_i64; 2345 case INDEX_op_sub2_i64: 2346 return TCG_TARGET_HAS_sub2_i64; 2347 2348 case INDEX_op_mov_vec: 2349 case INDEX_op_dup_vec: 2350 case INDEX_op_dupm_vec: 2351 case INDEX_op_ld_vec: 2352 case INDEX_op_st_vec: 2353 case INDEX_op_add_vec: 2354 case INDEX_op_sub_vec: 2355 case INDEX_op_and_vec: 2356 case INDEX_op_or_vec: 2357 case INDEX_op_xor_vec: 2358 case INDEX_op_cmp_vec: 2359 return has_type; 2360 case INDEX_op_dup2_vec: 2361 return has_type && TCG_TARGET_REG_BITS == 32; 2362 case INDEX_op_not_vec: 2363 return has_type && TCG_TARGET_HAS_not_vec; 2364 case INDEX_op_neg_vec: 2365 return has_type && TCG_TARGET_HAS_neg_vec; 2366 case INDEX_op_abs_vec: 2367 return has_type && TCG_TARGET_HAS_abs_vec; 2368 case INDEX_op_andc_vec: 2369 return has_type && TCG_TARGET_HAS_andc_vec; 2370 case INDEX_op_orc_vec: 2371 return has_type && TCG_TARGET_HAS_orc_vec; 2372 case INDEX_op_nand_vec: 2373 return has_type && TCG_TARGET_HAS_nand_vec; 2374 case INDEX_op_nor_vec: 2375 return has_type && TCG_TARGET_HAS_nor_vec; 2376 case INDEX_op_eqv_vec: 2377 return has_type && TCG_TARGET_HAS_eqv_vec; 2378 case INDEX_op_mul_vec: 2379 return has_type && TCG_TARGET_HAS_mul_vec; 2380 case INDEX_op_shli_vec: 2381 case INDEX_op_shri_vec: 2382 case INDEX_op_sari_vec: 2383 return has_type && TCG_TARGET_HAS_shi_vec; 2384 case INDEX_op_shls_vec: 2385 case INDEX_op_shrs_vec: 2386 case INDEX_op_sars_vec: 2387 return has_type && TCG_TARGET_HAS_shs_vec; 2388 case INDEX_op_shlv_vec: 2389 case INDEX_op_shrv_vec: 2390 case INDEX_op_sarv_vec: 2391 return has_type && TCG_TARGET_HAS_shv_vec; 2392 case INDEX_op_rotli_vec: 2393 return has_type && TCG_TARGET_HAS_roti_vec; 2394 case INDEX_op_rotls_vec: 2395 return has_type && TCG_TARGET_HAS_rots_vec; 2396 case INDEX_op_rotlv_vec: 2397 case INDEX_op_rotrv_vec: 2398 return has_type && TCG_TARGET_HAS_rotv_vec; 2399 case INDEX_op_ssadd_vec: 2400 case INDEX_op_usadd_vec: 2401 case INDEX_op_sssub_vec: 2402 case INDEX_op_ussub_vec: 2403 return has_type && TCG_TARGET_HAS_sat_vec; 2404 case INDEX_op_smin_vec: 2405 case INDEX_op_umin_vec: 2406 case INDEX_op_smax_vec: 2407 case INDEX_op_umax_vec: 2408 return has_type && TCG_TARGET_HAS_minmax_vec; 2409 case INDEX_op_bitsel_vec: 2410 return has_type && TCG_TARGET_HAS_bitsel_vec; 2411 case INDEX_op_cmpsel_vec: 2412 return has_type && TCG_TARGET_HAS_cmpsel_vec; 2413 2414 default: 2415 if (op < INDEX_op_last_generic) { 2416 const TCGOutOp *outop; 2417 TCGConstraintSetIndex con_set; 2418 2419 if (!has_type) { 2420 return false; 2421 } 2422 2423 outop = all_outop[op]; 2424 tcg_debug_assert(outop != NULL); 2425 2426 con_set = outop->static_constraint; 2427 if (con_set == C_Dynamic) { 2428 con_set = outop->dynamic_constraint(type, flags); 2429 } 2430 if (con_set >= 0) { 2431 return true; 2432 } 2433 tcg_debug_assert(con_set == C_NotImplemented); 2434 return false; 2435 } 2436 tcg_debug_assert(op < NB_OPS); 2437 return true; 2438 2439 case INDEX_op_last_generic: 2440 g_assert_not_reached(); 2441 } 2442 } 2443 2444 bool tcg_op_deposit_valid(TCGType type, unsigned ofs, unsigned len) 2445 { 2446 unsigned width; 2447 2448 tcg_debug_assert(type == TCG_TYPE_I32 || type == TCG_TYPE_I64); 2449 width = (type == TCG_TYPE_I32 ? 32 : 64); 2450 2451 tcg_debug_assert(ofs < width); 2452 tcg_debug_assert(len > 0); 2453 tcg_debug_assert(len <= width - ofs); 2454 2455 return TCG_TARGET_deposit_valid(type, ofs, len); 2456 } 2457 2458 static TCGOp *tcg_op_alloc(TCGOpcode opc, unsigned nargs); 2459 2460 static void tcg_gen_callN(void *func, TCGHelperInfo *info, 2461 TCGTemp *ret, TCGTemp **args) 2462 { 2463 TCGv_i64 extend_free[MAX_CALL_IARGS]; 2464 int n_extend = 0; 2465 TCGOp *op; 2466 int i, n, pi = 0, total_args; 2467 2468 if (unlikely(g_once_init_enter(HELPER_INFO_INIT(info)))) { 2469 init_call_layout(info); 2470 g_once_init_leave(HELPER_INFO_INIT(info), HELPER_INFO_INIT_VAL(info)); 2471 } 2472 2473 total_args = info->nr_out + info->nr_in + 2; 2474 op = tcg_op_alloc(INDEX_op_call, total_args); 2475 2476 #ifdef CONFIG_PLUGIN 2477 /* Flag helpers that may affect guest state */ 2478 if (tcg_ctx->plugin_insn && !(info->flags & TCG_CALL_NO_SIDE_EFFECTS)) { 2479 tcg_ctx->plugin_insn->calls_helpers = true; 2480 } 2481 #endif 2482 2483 TCGOP_CALLO(op) = n = info->nr_out; 2484 switch (n) { 2485 case 0: 2486 tcg_debug_assert(ret == NULL); 2487 break; 2488 case 1: 2489 tcg_debug_assert(ret != NULL); 2490 op->args[pi++] = temp_arg(ret); 2491 break; 2492 case 2: 2493 case 4: 2494 tcg_debug_assert(ret != NULL); 2495 tcg_debug_assert(ret->base_type == ret->type + ctz32(n)); 2496 tcg_debug_assert(ret->temp_subindex == 0); 2497 for (i = 0; i < n; ++i) { 2498 op->args[pi++] = temp_arg(ret + i); 2499 } 2500 break; 2501 default: 2502 g_assert_not_reached(); 2503 } 2504 2505 TCGOP_CALLI(op) = n = info->nr_in; 2506 for (i = 0; i < n; i++) { 2507 const TCGCallArgumentLoc *loc = &info->in[i]; 2508 TCGTemp *ts = args[loc->arg_idx] + loc->tmp_subindex; 2509 2510 switch (loc->kind) { 2511 case TCG_CALL_ARG_NORMAL: 2512 case TCG_CALL_ARG_BY_REF: 2513 case TCG_CALL_ARG_BY_REF_N: 2514 op->args[pi++] = temp_arg(ts); 2515 break; 2516 2517 case TCG_CALL_ARG_EXTEND_U: 2518 case TCG_CALL_ARG_EXTEND_S: 2519 { 2520 TCGv_i64 temp = tcg_temp_ebb_new_i64(); 2521 TCGv_i32 orig = temp_tcgv_i32(ts); 2522 2523 if (loc->kind == TCG_CALL_ARG_EXTEND_S) { 2524 tcg_gen_ext_i32_i64(temp, orig); 2525 } else { 2526 tcg_gen_extu_i32_i64(temp, orig); 2527 } 2528 op->args[pi++] = tcgv_i64_arg(temp); 2529 extend_free[n_extend++] = temp; 2530 } 2531 break; 2532 2533 default: 2534 g_assert_not_reached(); 2535 } 2536 } 2537 op->args[pi++] = (uintptr_t)func; 2538 op->args[pi++] = (uintptr_t)info; 2539 tcg_debug_assert(pi == total_args); 2540 2541 if (tcg_ctx->emit_before_op) { 2542 QTAILQ_INSERT_BEFORE(tcg_ctx->emit_before_op, op, link); 2543 } else { 2544 QTAILQ_INSERT_TAIL(&tcg_ctx->ops, op, link); 2545 } 2546 2547 tcg_debug_assert(n_extend < ARRAY_SIZE(extend_free)); 2548 for (i = 0; i < n_extend; ++i) { 2549 tcg_temp_free_i64(extend_free[i]); 2550 } 2551 } 2552 2553 void tcg_gen_call0(void *func, TCGHelperInfo *info, TCGTemp *ret) 2554 { 2555 tcg_gen_callN(func, info, ret, NULL); 2556 } 2557 2558 void tcg_gen_call1(void *func, TCGHelperInfo *info, TCGTemp *ret, TCGTemp *t1) 2559 { 2560 tcg_gen_callN(func, info, ret, &t1); 2561 } 2562 2563 void tcg_gen_call2(void *func, TCGHelperInfo *info, TCGTemp *ret, 2564 TCGTemp *t1, TCGTemp *t2) 2565 { 2566 TCGTemp *args[2] = { t1, t2 }; 2567 tcg_gen_callN(func, info, ret, args); 2568 } 2569 2570 void tcg_gen_call3(void *func, TCGHelperInfo *info, TCGTemp *ret, 2571 TCGTemp *t1, TCGTemp *t2, TCGTemp *t3) 2572 { 2573 TCGTemp *args[3] = { t1, t2, t3 }; 2574 tcg_gen_callN(func, info, ret, args); 2575 } 2576 2577 void tcg_gen_call4(void *func, TCGHelperInfo *info, TCGTemp *ret, 2578 TCGTemp *t1, TCGTemp *t2, TCGTemp *t3, TCGTemp *t4) 2579 { 2580 TCGTemp *args[4] = { t1, t2, t3, t4 }; 2581 tcg_gen_callN(func, info, ret, args); 2582 } 2583 2584 void tcg_gen_call5(void *func, TCGHelperInfo *info, TCGTemp *ret, TCGTemp *t1, 2585 TCGTemp *t2, TCGTemp *t3, TCGTemp *t4, TCGTemp *t5) 2586 { 2587 TCGTemp *args[5] = { t1, t2, t3, t4, t5 }; 2588 tcg_gen_callN(func, info, ret, args); 2589 } 2590 2591 void tcg_gen_call6(void *func, TCGHelperInfo *info, TCGTemp *ret, 2592 TCGTemp *t1, TCGTemp *t2, TCGTemp *t3, 2593 TCGTemp *t4, TCGTemp *t5, TCGTemp *t6) 2594 { 2595 TCGTemp *args[6] = { t1, t2, t3, t4, t5, t6 }; 2596 tcg_gen_callN(func, info, ret, args); 2597 } 2598 2599 void tcg_gen_call7(void *func, TCGHelperInfo *info, TCGTemp *ret, TCGTemp *t1, 2600 TCGTemp *t2, TCGTemp *t3, TCGTemp *t4, 2601 TCGTemp *t5, TCGTemp *t6, TCGTemp *t7) 2602 { 2603 TCGTemp *args[7] = { t1, t2, t3, t4, t5, t6, t7 }; 2604 tcg_gen_callN(func, info, ret, args); 2605 } 2606 2607 static void tcg_reg_alloc_start(TCGContext *s) 2608 { 2609 int i, n; 2610 2611 for (i = 0, n = s->nb_temps; i < n; i++) { 2612 TCGTemp *ts = &s->temps[i]; 2613 TCGTempVal val = TEMP_VAL_MEM; 2614 2615 switch (ts->kind) { 2616 case TEMP_CONST: 2617 val = TEMP_VAL_CONST; 2618 break; 2619 case TEMP_FIXED: 2620 val = TEMP_VAL_REG; 2621 break; 2622 case TEMP_GLOBAL: 2623 break; 2624 case TEMP_EBB: 2625 val = TEMP_VAL_DEAD; 2626 /* fall through */ 2627 case TEMP_TB: 2628 ts->mem_allocated = 0; 2629 break; 2630 default: 2631 g_assert_not_reached(); 2632 } 2633 ts->val_type = val; 2634 } 2635 2636 memset(s->reg_to_temp, 0, sizeof(s->reg_to_temp)); 2637 } 2638 2639 static char *tcg_get_arg_str_ptr(TCGContext *s, char *buf, int buf_size, 2640 TCGTemp *ts) 2641 { 2642 int idx = temp_idx(ts); 2643 2644 switch (ts->kind) { 2645 case TEMP_FIXED: 2646 case TEMP_GLOBAL: 2647 pstrcpy(buf, buf_size, ts->name); 2648 break; 2649 case TEMP_TB: 2650 snprintf(buf, buf_size, "loc%d", idx - s->nb_globals); 2651 break; 2652 case TEMP_EBB: 2653 snprintf(buf, buf_size, "tmp%d", idx - s->nb_globals); 2654 break; 2655 case TEMP_CONST: 2656 switch (ts->type) { 2657 case TCG_TYPE_I32: 2658 snprintf(buf, buf_size, "$0x%x", (int32_t)ts->val); 2659 break; 2660 #if TCG_TARGET_REG_BITS > 32 2661 case TCG_TYPE_I64: 2662 snprintf(buf, buf_size, "$0x%" PRIx64, ts->val); 2663 break; 2664 #endif 2665 case TCG_TYPE_V64: 2666 case TCG_TYPE_V128: 2667 case TCG_TYPE_V256: 2668 snprintf(buf, buf_size, "v%d$0x%" PRIx64, 2669 64 << (ts->type - TCG_TYPE_V64), ts->val); 2670 break; 2671 default: 2672 g_assert_not_reached(); 2673 } 2674 break; 2675 } 2676 return buf; 2677 } 2678 2679 static char *tcg_get_arg_str(TCGContext *s, char *buf, 2680 int buf_size, TCGArg arg) 2681 { 2682 return tcg_get_arg_str_ptr(s, buf, buf_size, arg_temp(arg)); 2683 } 2684 2685 static const char * const cond_name[] = 2686 { 2687 [TCG_COND_NEVER] = "never", 2688 [TCG_COND_ALWAYS] = "always", 2689 [TCG_COND_EQ] = "eq", 2690 [TCG_COND_NE] = "ne", 2691 [TCG_COND_LT] = "lt", 2692 [TCG_COND_GE] = "ge", 2693 [TCG_COND_LE] = "le", 2694 [TCG_COND_GT] = "gt", 2695 [TCG_COND_LTU] = "ltu", 2696 [TCG_COND_GEU] = "geu", 2697 [TCG_COND_LEU] = "leu", 2698 [TCG_COND_GTU] = "gtu", 2699 [TCG_COND_TSTEQ] = "tsteq", 2700 [TCG_COND_TSTNE] = "tstne", 2701 }; 2702 2703 static const char * const ldst_name[(MO_BSWAP | MO_SSIZE) + 1] = 2704 { 2705 [MO_UB] = "ub", 2706 [MO_SB] = "sb", 2707 [MO_LEUW] = "leuw", 2708 [MO_LESW] = "lesw", 2709 [MO_LEUL] = "leul", 2710 [MO_LESL] = "lesl", 2711 [MO_LEUQ] = "leq", 2712 [MO_BEUW] = "beuw", 2713 [MO_BESW] = "besw", 2714 [MO_BEUL] = "beul", 2715 [MO_BESL] = "besl", 2716 [MO_BEUQ] = "beq", 2717 [MO_128 + MO_BE] = "beo", 2718 [MO_128 + MO_LE] = "leo", 2719 }; 2720 2721 static const char * const alignment_name[(MO_AMASK >> MO_ASHIFT) + 1] = { 2722 [MO_UNALN >> MO_ASHIFT] = "un+", 2723 [MO_ALIGN >> MO_ASHIFT] = "al+", 2724 [MO_ALIGN_2 >> MO_ASHIFT] = "al2+", 2725 [MO_ALIGN_4 >> MO_ASHIFT] = "al4+", 2726 [MO_ALIGN_8 >> MO_ASHIFT] = "al8+", 2727 [MO_ALIGN_16 >> MO_ASHIFT] = "al16+", 2728 [MO_ALIGN_32 >> MO_ASHIFT] = "al32+", 2729 [MO_ALIGN_64 >> MO_ASHIFT] = "al64+", 2730 }; 2731 2732 static const char * const atom_name[(MO_ATOM_MASK >> MO_ATOM_SHIFT) + 1] = { 2733 [MO_ATOM_IFALIGN >> MO_ATOM_SHIFT] = "", 2734 [MO_ATOM_IFALIGN_PAIR >> MO_ATOM_SHIFT] = "pair+", 2735 [MO_ATOM_WITHIN16 >> MO_ATOM_SHIFT] = "w16+", 2736 [MO_ATOM_WITHIN16_PAIR >> MO_ATOM_SHIFT] = "w16p+", 2737 [MO_ATOM_SUBALIGN >> MO_ATOM_SHIFT] = "sub+", 2738 [MO_ATOM_NONE >> MO_ATOM_SHIFT] = "noat+", 2739 }; 2740 2741 static const char bswap_flag_name[][6] = { 2742 [TCG_BSWAP_IZ] = "iz", 2743 [TCG_BSWAP_OZ] = "oz", 2744 [TCG_BSWAP_OS] = "os", 2745 [TCG_BSWAP_IZ | TCG_BSWAP_OZ] = "iz,oz", 2746 [TCG_BSWAP_IZ | TCG_BSWAP_OS] = "iz,os", 2747 }; 2748 2749 #ifdef CONFIG_PLUGIN 2750 static const char * const plugin_from_name[] = { 2751 "from-tb", 2752 "from-insn", 2753 "after-insn", 2754 "after-tb", 2755 }; 2756 #endif 2757 2758 static inline bool tcg_regset_single(TCGRegSet d) 2759 { 2760 return (d & (d - 1)) == 0; 2761 } 2762 2763 static inline TCGReg tcg_regset_first(TCGRegSet d) 2764 { 2765 if (TCG_TARGET_NB_REGS <= 32) { 2766 return ctz32(d); 2767 } else { 2768 return ctz64(d); 2769 } 2770 } 2771 2772 /* Return only the number of characters output -- no error return. */ 2773 #define ne_fprintf(...) \ 2774 ({ int ret_ = fprintf(__VA_ARGS__); ret_ >= 0 ? ret_ : 0; }) 2775 2776 void tcg_dump_ops(TCGContext *s, FILE *f, bool have_prefs) 2777 { 2778 char buf[128]; 2779 TCGOp *op; 2780 2781 QTAILQ_FOREACH(op, &s->ops, link) { 2782 int i, k, nb_oargs, nb_iargs, nb_cargs; 2783 const TCGOpDef *def; 2784 TCGOpcode c; 2785 int col = 0; 2786 2787 c = op->opc; 2788 def = &tcg_op_defs[c]; 2789 2790 if (c == INDEX_op_insn_start) { 2791 nb_oargs = 0; 2792 col += ne_fprintf(f, "\n ----"); 2793 2794 for (i = 0, k = s->insn_start_words; i < k; ++i) { 2795 col += ne_fprintf(f, " %016" PRIx64, 2796 tcg_get_insn_start_param(op, i)); 2797 } 2798 } else if (c == INDEX_op_call) { 2799 const TCGHelperInfo *info = tcg_call_info(op); 2800 void *func = tcg_call_func(op); 2801 2802 /* variable number of arguments */ 2803 nb_oargs = TCGOP_CALLO(op); 2804 nb_iargs = TCGOP_CALLI(op); 2805 nb_cargs = def->nb_cargs; 2806 2807 col += ne_fprintf(f, " %s ", def->name); 2808 2809 /* 2810 * Print the function name from TCGHelperInfo, if available. 2811 * Note that plugins have a template function for the info, 2812 * but the actual function pointer comes from the plugin. 2813 */ 2814 if (func == info->func) { 2815 col += ne_fprintf(f, "%s", info->name); 2816 } else { 2817 col += ne_fprintf(f, "plugin(%p)", func); 2818 } 2819 2820 col += ne_fprintf(f, ",$0x%x,$%d", info->flags, nb_oargs); 2821 for (i = 0; i < nb_oargs; i++) { 2822 col += ne_fprintf(f, ",%s", tcg_get_arg_str(s, buf, sizeof(buf), 2823 op->args[i])); 2824 } 2825 for (i = 0; i < nb_iargs; i++) { 2826 TCGArg arg = op->args[nb_oargs + i]; 2827 const char *t = tcg_get_arg_str(s, buf, sizeof(buf), arg); 2828 col += ne_fprintf(f, ",%s", t); 2829 } 2830 } else { 2831 if (def->flags & TCG_OPF_INT) { 2832 col += ne_fprintf(f, " %s_i%d ", 2833 def->name, 2834 8 * tcg_type_size(TCGOP_TYPE(op))); 2835 } else if (def->flags & TCG_OPF_VECTOR) { 2836 col += ne_fprintf(f, "%s v%d,e%d,", 2837 def->name, 2838 8 * tcg_type_size(TCGOP_TYPE(op)), 2839 8 << TCGOP_VECE(op)); 2840 } else { 2841 col += ne_fprintf(f, " %s ", def->name); 2842 } 2843 2844 nb_oargs = def->nb_oargs; 2845 nb_iargs = def->nb_iargs; 2846 nb_cargs = def->nb_cargs; 2847 2848 k = 0; 2849 for (i = 0; i < nb_oargs; i++) { 2850 const char *sep = k ? "," : ""; 2851 col += ne_fprintf(f, "%s%s", sep, 2852 tcg_get_arg_str(s, buf, sizeof(buf), 2853 op->args[k++])); 2854 } 2855 for (i = 0; i < nb_iargs; i++) { 2856 const char *sep = k ? "," : ""; 2857 col += ne_fprintf(f, "%s%s", sep, 2858 tcg_get_arg_str(s, buf, sizeof(buf), 2859 op->args[k++])); 2860 } 2861 switch (c) { 2862 case INDEX_op_brcond_i32: 2863 case INDEX_op_setcond: 2864 case INDEX_op_negsetcond: 2865 case INDEX_op_movcond_i32: 2866 case INDEX_op_brcond2_i32: 2867 case INDEX_op_setcond2_i32: 2868 case INDEX_op_brcond_i64: 2869 case INDEX_op_movcond_i64: 2870 case INDEX_op_cmp_vec: 2871 case INDEX_op_cmpsel_vec: 2872 if (op->args[k] < ARRAY_SIZE(cond_name) 2873 && cond_name[op->args[k]]) { 2874 col += ne_fprintf(f, ",%s", cond_name[op->args[k++]]); 2875 } else { 2876 col += ne_fprintf(f, ",$0x%" TCG_PRIlx, op->args[k++]); 2877 } 2878 i = 1; 2879 break; 2880 case INDEX_op_qemu_ld_i32: 2881 case INDEX_op_qemu_st_i32: 2882 case INDEX_op_qemu_st8_i32: 2883 case INDEX_op_qemu_ld_i64: 2884 case INDEX_op_qemu_st_i64: 2885 case INDEX_op_qemu_ld_i128: 2886 case INDEX_op_qemu_st_i128: 2887 { 2888 const char *s_al, *s_op, *s_at; 2889 MemOpIdx oi = op->args[k++]; 2890 MemOp mop = get_memop(oi); 2891 unsigned ix = get_mmuidx(oi); 2892 2893 s_al = alignment_name[(mop & MO_AMASK) >> MO_ASHIFT]; 2894 s_op = ldst_name[mop & (MO_BSWAP | MO_SSIZE)]; 2895 s_at = atom_name[(mop & MO_ATOM_MASK) >> MO_ATOM_SHIFT]; 2896 mop &= ~(MO_AMASK | MO_BSWAP | MO_SSIZE | MO_ATOM_MASK); 2897 2898 /* If all fields are accounted for, print symbolically. */ 2899 if (!mop && s_al && s_op && s_at) { 2900 col += ne_fprintf(f, ",%s%s%s,%u", 2901 s_at, s_al, s_op, ix); 2902 } else { 2903 mop = get_memop(oi); 2904 col += ne_fprintf(f, ",$0x%x,%u", mop, ix); 2905 } 2906 i = 1; 2907 } 2908 break; 2909 case INDEX_op_bswap16_i32: 2910 case INDEX_op_bswap16_i64: 2911 case INDEX_op_bswap32_i32: 2912 case INDEX_op_bswap32_i64: 2913 case INDEX_op_bswap64_i64: 2914 { 2915 TCGArg flags = op->args[k]; 2916 const char *name = NULL; 2917 2918 if (flags < ARRAY_SIZE(bswap_flag_name)) { 2919 name = bswap_flag_name[flags]; 2920 } 2921 if (name) { 2922 col += ne_fprintf(f, ",%s", name); 2923 } else { 2924 col += ne_fprintf(f, ",$0x%" TCG_PRIlx, flags); 2925 } 2926 i = k = 1; 2927 } 2928 break; 2929 #ifdef CONFIG_PLUGIN 2930 case INDEX_op_plugin_cb: 2931 { 2932 TCGArg from = op->args[k++]; 2933 const char *name = NULL; 2934 2935 if (from < ARRAY_SIZE(plugin_from_name)) { 2936 name = plugin_from_name[from]; 2937 } 2938 if (name) { 2939 col += ne_fprintf(f, "%s", name); 2940 } else { 2941 col += ne_fprintf(f, "$0x%" TCG_PRIlx, from); 2942 } 2943 i = 1; 2944 } 2945 break; 2946 #endif 2947 default: 2948 i = 0; 2949 break; 2950 } 2951 switch (c) { 2952 case INDEX_op_set_label: 2953 case INDEX_op_br: 2954 case INDEX_op_brcond_i32: 2955 case INDEX_op_brcond_i64: 2956 case INDEX_op_brcond2_i32: 2957 col += ne_fprintf(f, "%s$L%d", k ? "," : "", 2958 arg_label(op->args[k])->id); 2959 i++, k++; 2960 break; 2961 case INDEX_op_mb: 2962 { 2963 TCGBar membar = op->args[k]; 2964 const char *b_op, *m_op; 2965 2966 switch (membar & TCG_BAR_SC) { 2967 case 0: 2968 b_op = "none"; 2969 break; 2970 case TCG_BAR_LDAQ: 2971 b_op = "acq"; 2972 break; 2973 case TCG_BAR_STRL: 2974 b_op = "rel"; 2975 break; 2976 case TCG_BAR_SC: 2977 b_op = "seq"; 2978 break; 2979 default: 2980 g_assert_not_reached(); 2981 } 2982 2983 switch (membar & TCG_MO_ALL) { 2984 case 0: 2985 m_op = "none"; 2986 break; 2987 case TCG_MO_LD_LD: 2988 m_op = "rr"; 2989 break; 2990 case TCG_MO_LD_ST: 2991 m_op = "rw"; 2992 break; 2993 case TCG_MO_ST_LD: 2994 m_op = "wr"; 2995 break; 2996 case TCG_MO_ST_ST: 2997 m_op = "ww"; 2998 break; 2999 case TCG_MO_LD_LD | TCG_MO_LD_ST: 3000 m_op = "rr+rw"; 3001 break; 3002 case TCG_MO_LD_LD | TCG_MO_ST_LD: 3003 m_op = "rr+wr"; 3004 break; 3005 case TCG_MO_LD_LD | TCG_MO_ST_ST: 3006 m_op = "rr+ww"; 3007 break; 3008 case TCG_MO_LD_ST | TCG_MO_ST_LD: 3009 m_op = "rw+wr"; 3010 break; 3011 case TCG_MO_LD_ST | TCG_MO_ST_ST: 3012 m_op = "rw+ww"; 3013 break; 3014 case TCG_MO_ST_LD | TCG_MO_ST_ST: 3015 m_op = "wr+ww"; 3016 break; 3017 case TCG_MO_LD_LD | TCG_MO_LD_ST | TCG_MO_ST_LD: 3018 m_op = "rr+rw+wr"; 3019 break; 3020 case TCG_MO_LD_LD | TCG_MO_LD_ST | TCG_MO_ST_ST: 3021 m_op = "rr+rw+ww"; 3022 break; 3023 case TCG_MO_LD_LD | TCG_MO_ST_LD | TCG_MO_ST_ST: 3024 m_op = "rr+wr+ww"; 3025 break; 3026 case TCG_MO_LD_ST | TCG_MO_ST_LD | TCG_MO_ST_ST: 3027 m_op = "rw+wr+ww"; 3028 break; 3029 case TCG_MO_ALL: 3030 m_op = "all"; 3031 break; 3032 default: 3033 g_assert_not_reached(); 3034 } 3035 3036 col += ne_fprintf(f, "%s%s:%s", (k ? "," : ""), b_op, m_op); 3037 i++, k++; 3038 } 3039 break; 3040 default: 3041 break; 3042 } 3043 for (; i < nb_cargs; i++, k++) { 3044 col += ne_fprintf(f, "%s$0x%" TCG_PRIlx, k ? "," : "", 3045 op->args[k]); 3046 } 3047 } 3048 3049 if (have_prefs || op->life) { 3050 for (; col < 40; ++col) { 3051 putc(' ', f); 3052 } 3053 } 3054 3055 if (op->life) { 3056 unsigned life = op->life; 3057 3058 if (life & (SYNC_ARG * 3)) { 3059 ne_fprintf(f, " sync:"); 3060 for (i = 0; i < 2; ++i) { 3061 if (life & (SYNC_ARG << i)) { 3062 ne_fprintf(f, " %d", i); 3063 } 3064 } 3065 } 3066 life /= DEAD_ARG; 3067 if (life) { 3068 ne_fprintf(f, " dead:"); 3069 for (i = 0; life; ++i, life >>= 1) { 3070 if (life & 1) { 3071 ne_fprintf(f, " %d", i); 3072 } 3073 } 3074 } 3075 } 3076 3077 if (have_prefs) { 3078 for (i = 0; i < nb_oargs; ++i) { 3079 TCGRegSet set = output_pref(op, i); 3080 3081 if (i == 0) { 3082 ne_fprintf(f, " pref="); 3083 } else { 3084 ne_fprintf(f, ","); 3085 } 3086 if (set == 0) { 3087 ne_fprintf(f, "none"); 3088 } else if (set == MAKE_64BIT_MASK(0, TCG_TARGET_NB_REGS)) { 3089 ne_fprintf(f, "all"); 3090 #ifdef CONFIG_DEBUG_TCG 3091 } else if (tcg_regset_single(set)) { 3092 TCGReg reg = tcg_regset_first(set); 3093 ne_fprintf(f, "%s", tcg_target_reg_names[reg]); 3094 #endif 3095 } else if (TCG_TARGET_NB_REGS <= 32) { 3096 ne_fprintf(f, "0x%x", (uint32_t)set); 3097 } else { 3098 ne_fprintf(f, "0x%" PRIx64, (uint64_t)set); 3099 } 3100 } 3101 } 3102 3103 putc('\n', f); 3104 } 3105 } 3106 3107 /* we give more priority to constraints with less registers */ 3108 static int get_constraint_priority(const TCGArgConstraint *arg_ct, int k) 3109 { 3110 int n; 3111 3112 arg_ct += k; 3113 n = ctpop64(arg_ct->regs); 3114 3115 /* 3116 * Sort constraints of a single register first, which includes output 3117 * aliases (which must exactly match the input already allocated). 3118 */ 3119 if (n == 1 || arg_ct->oalias) { 3120 return INT_MAX; 3121 } 3122 3123 /* 3124 * Sort register pairs next, first then second immediately after. 3125 * Arbitrarily sort multiple pairs by the index of the first reg; 3126 * there shouldn't be many pairs. 3127 */ 3128 switch (arg_ct->pair) { 3129 case 1: 3130 case 3: 3131 return (k + 1) * 2; 3132 case 2: 3133 return (arg_ct->pair_index + 1) * 2 - 1; 3134 } 3135 3136 /* Finally, sort by decreasing register count. */ 3137 assert(n > 1); 3138 return -n; 3139 } 3140 3141 /* sort from highest priority to lowest */ 3142 static void sort_constraints(TCGArgConstraint *a, int start, int n) 3143 { 3144 int i, j; 3145 3146 for (i = 0; i < n; i++) { 3147 a[start + i].sort_index = start + i; 3148 } 3149 if (n <= 1) { 3150 return; 3151 } 3152 for (i = 0; i < n - 1; i++) { 3153 for (j = i + 1; j < n; j++) { 3154 int p1 = get_constraint_priority(a, a[start + i].sort_index); 3155 int p2 = get_constraint_priority(a, a[start + j].sort_index); 3156 if (p1 < p2) { 3157 int tmp = a[start + i].sort_index; 3158 a[start + i].sort_index = a[start + j].sort_index; 3159 a[start + j].sort_index = tmp; 3160 } 3161 } 3162 } 3163 } 3164 3165 static const TCGArgConstraint empty_cts[TCG_MAX_OP_ARGS]; 3166 static TCGArgConstraint all_cts[ARRAY_SIZE(constraint_sets)][TCG_MAX_OP_ARGS]; 3167 3168 static void process_constraint_sets(void) 3169 { 3170 for (size_t c = 0; c < ARRAY_SIZE(constraint_sets); ++c) { 3171 const TCGConstraintSet *tdefs = &constraint_sets[c]; 3172 TCGArgConstraint *args_ct = all_cts[c]; 3173 int nb_oargs = tdefs->nb_oargs; 3174 int nb_iargs = tdefs->nb_iargs; 3175 int nb_args = nb_oargs + nb_iargs; 3176 bool saw_alias_pair = false; 3177 3178 for (int i = 0; i < nb_args; i++) { 3179 const char *ct_str = tdefs->args_ct_str[i]; 3180 bool input_p = i >= nb_oargs; 3181 int o; 3182 3183 switch (*ct_str) { 3184 case '0' ... '9': 3185 o = *ct_str - '0'; 3186 tcg_debug_assert(input_p); 3187 tcg_debug_assert(o < nb_oargs); 3188 tcg_debug_assert(args_ct[o].regs != 0); 3189 tcg_debug_assert(!args_ct[o].oalias); 3190 args_ct[i] = args_ct[o]; 3191 /* The output sets oalias. */ 3192 args_ct[o].oalias = 1; 3193 args_ct[o].alias_index = i; 3194 /* The input sets ialias. */ 3195 args_ct[i].ialias = 1; 3196 args_ct[i].alias_index = o; 3197 if (args_ct[i].pair) { 3198 saw_alias_pair = true; 3199 } 3200 tcg_debug_assert(ct_str[1] == '\0'); 3201 continue; 3202 3203 case '&': 3204 tcg_debug_assert(!input_p); 3205 args_ct[i].newreg = true; 3206 ct_str++; 3207 break; 3208 3209 case 'p': /* plus */ 3210 /* Allocate to the register after the previous. */ 3211 tcg_debug_assert(i > (input_p ? nb_oargs : 0)); 3212 o = i - 1; 3213 tcg_debug_assert(!args_ct[o].pair); 3214 tcg_debug_assert(!args_ct[o].ct); 3215 args_ct[i] = (TCGArgConstraint){ 3216 .pair = 2, 3217 .pair_index = o, 3218 .regs = args_ct[o].regs << 1, 3219 .newreg = args_ct[o].newreg, 3220 }; 3221 args_ct[o].pair = 1; 3222 args_ct[o].pair_index = i; 3223 tcg_debug_assert(ct_str[1] == '\0'); 3224 continue; 3225 3226 case 'm': /* minus */ 3227 /* Allocate to the register before the previous. */ 3228 tcg_debug_assert(i > (input_p ? nb_oargs : 0)); 3229 o = i - 1; 3230 tcg_debug_assert(!args_ct[o].pair); 3231 tcg_debug_assert(!args_ct[o].ct); 3232 args_ct[i] = (TCGArgConstraint){ 3233 .pair = 1, 3234 .pair_index = o, 3235 .regs = args_ct[o].regs >> 1, 3236 .newreg = args_ct[o].newreg, 3237 }; 3238 args_ct[o].pair = 2; 3239 args_ct[o].pair_index = i; 3240 tcg_debug_assert(ct_str[1] == '\0'); 3241 continue; 3242 } 3243 3244 do { 3245 switch (*ct_str) { 3246 case 'i': 3247 args_ct[i].ct |= TCG_CT_CONST; 3248 break; 3249 #ifdef TCG_REG_ZERO 3250 case 'z': 3251 args_ct[i].ct |= TCG_CT_REG_ZERO; 3252 break; 3253 #endif 3254 3255 /* Include all of the target-specific constraints. */ 3256 3257 #undef CONST 3258 #define CONST(CASE, MASK) \ 3259 case CASE: args_ct[i].ct |= MASK; break; 3260 #define REGS(CASE, MASK) \ 3261 case CASE: args_ct[i].regs |= MASK; break; 3262 3263 #include "tcg-target-con-str.h" 3264 3265 #undef REGS 3266 #undef CONST 3267 default: 3268 case '0' ... '9': 3269 case '&': 3270 case 'p': 3271 case 'm': 3272 /* Typo in TCGConstraintSet constraint. */ 3273 g_assert_not_reached(); 3274 } 3275 } while (*++ct_str != '\0'); 3276 } 3277 3278 /* 3279 * Fix up output pairs that are aliased with inputs. 3280 * When we created the alias, we copied pair from the output. 3281 * There are three cases: 3282 * (1a) Pairs of inputs alias pairs of outputs. 3283 * (1b) One input aliases the first of a pair of outputs. 3284 * (2) One input aliases the second of a pair of outputs. 3285 * 3286 * Case 1a is handled by making sure that the pair_index'es are 3287 * properly updated so that they appear the same as a pair of inputs. 3288 * 3289 * Case 1b is handled by setting the pair_index of the input to 3290 * itself, simply so it doesn't point to an unrelated argument. 3291 * Since we don't encounter the "second" during the input allocation 3292 * phase, nothing happens with the second half of the input pair. 3293 * 3294 * Case 2 is handled by setting the second input to pair=3, the 3295 * first output to pair=3, and the pair_index'es to match. 3296 */ 3297 if (saw_alias_pair) { 3298 for (int i = nb_oargs; i < nb_args; i++) { 3299 int o, o2, i2; 3300 3301 /* 3302 * Since [0-9pm] must be alone in the constraint string, 3303 * the only way they can both be set is if the pair comes 3304 * from the output alias. 3305 */ 3306 if (!args_ct[i].ialias) { 3307 continue; 3308 } 3309 switch (args_ct[i].pair) { 3310 case 0: 3311 break; 3312 case 1: 3313 o = args_ct[i].alias_index; 3314 o2 = args_ct[o].pair_index; 3315 tcg_debug_assert(args_ct[o].pair == 1); 3316 tcg_debug_assert(args_ct[o2].pair == 2); 3317 if (args_ct[o2].oalias) { 3318 /* Case 1a */ 3319 i2 = args_ct[o2].alias_index; 3320 tcg_debug_assert(args_ct[i2].pair == 2); 3321 args_ct[i2].pair_index = i; 3322 args_ct[i].pair_index = i2; 3323 } else { 3324 /* Case 1b */ 3325 args_ct[i].pair_index = i; 3326 } 3327 break; 3328 case 2: 3329 o = args_ct[i].alias_index; 3330 o2 = args_ct[o].pair_index; 3331 tcg_debug_assert(args_ct[o].pair == 2); 3332 tcg_debug_assert(args_ct[o2].pair == 1); 3333 if (args_ct[o2].oalias) { 3334 /* Case 1a */ 3335 i2 = args_ct[o2].alias_index; 3336 tcg_debug_assert(args_ct[i2].pair == 1); 3337 args_ct[i2].pair_index = i; 3338 args_ct[i].pair_index = i2; 3339 } else { 3340 /* Case 2 */ 3341 args_ct[i].pair = 3; 3342 args_ct[o2].pair = 3; 3343 args_ct[i].pair_index = o2; 3344 args_ct[o2].pair_index = i; 3345 } 3346 break; 3347 default: 3348 g_assert_not_reached(); 3349 } 3350 } 3351 } 3352 3353 /* sort the constraints (XXX: this is just an heuristic) */ 3354 sort_constraints(args_ct, 0, nb_oargs); 3355 sort_constraints(args_ct, nb_oargs, nb_iargs); 3356 } 3357 } 3358 3359 static const TCGArgConstraint *opcode_args_ct(const TCGOp *op) 3360 { 3361 TCGOpcode opc = op->opc; 3362 TCGType type = TCGOP_TYPE(op); 3363 unsigned flags = TCGOP_FLAGS(op); 3364 const TCGOpDef *def = &tcg_op_defs[opc]; 3365 const TCGOutOp *outop = all_outop[opc]; 3366 TCGConstraintSetIndex con_set; 3367 3368 if (def->flags & TCG_OPF_NOT_PRESENT) { 3369 return empty_cts; 3370 } 3371 3372 if (outop) { 3373 con_set = outop->static_constraint; 3374 if (con_set == C_Dynamic) { 3375 con_set = outop->dynamic_constraint(type, flags); 3376 } 3377 } else { 3378 con_set = tcg_target_op_def(opc, type, flags); 3379 } 3380 tcg_debug_assert(con_set >= 0); 3381 tcg_debug_assert(con_set < ARRAY_SIZE(constraint_sets)); 3382 3383 /* The constraint arguments must match TCGOpcode arguments. */ 3384 tcg_debug_assert(constraint_sets[con_set].nb_oargs == def->nb_oargs); 3385 tcg_debug_assert(constraint_sets[con_set].nb_iargs == def->nb_iargs); 3386 3387 return all_cts[con_set]; 3388 } 3389 3390 static void remove_label_use(TCGOp *op, int idx) 3391 { 3392 TCGLabel *label = arg_label(op->args[idx]); 3393 TCGLabelUse *use; 3394 3395 QSIMPLEQ_FOREACH(use, &label->branches, next) { 3396 if (use->op == op) { 3397 QSIMPLEQ_REMOVE(&label->branches, use, TCGLabelUse, next); 3398 return; 3399 } 3400 } 3401 g_assert_not_reached(); 3402 } 3403 3404 void tcg_op_remove(TCGContext *s, TCGOp *op) 3405 { 3406 switch (op->opc) { 3407 case INDEX_op_br: 3408 remove_label_use(op, 0); 3409 break; 3410 case INDEX_op_brcond_i32: 3411 case INDEX_op_brcond_i64: 3412 remove_label_use(op, 3); 3413 break; 3414 case INDEX_op_brcond2_i32: 3415 remove_label_use(op, 5); 3416 break; 3417 default: 3418 break; 3419 } 3420 3421 QTAILQ_REMOVE(&s->ops, op, link); 3422 QTAILQ_INSERT_TAIL(&s->free_ops, op, link); 3423 s->nb_ops--; 3424 } 3425 3426 void tcg_remove_ops_after(TCGOp *op) 3427 { 3428 TCGContext *s = tcg_ctx; 3429 3430 while (true) { 3431 TCGOp *last = tcg_last_op(); 3432 if (last == op) { 3433 return; 3434 } 3435 tcg_op_remove(s, last); 3436 } 3437 } 3438 3439 static TCGOp *tcg_op_alloc(TCGOpcode opc, unsigned nargs) 3440 { 3441 TCGContext *s = tcg_ctx; 3442 TCGOp *op = NULL; 3443 3444 if (unlikely(!QTAILQ_EMPTY(&s->free_ops))) { 3445 QTAILQ_FOREACH(op, &s->free_ops, link) { 3446 if (nargs <= op->nargs) { 3447 QTAILQ_REMOVE(&s->free_ops, op, link); 3448 nargs = op->nargs; 3449 goto found; 3450 } 3451 } 3452 } 3453 3454 /* Most opcodes have 3 or 4 operands: reduce fragmentation. */ 3455 nargs = MAX(4, nargs); 3456 op = tcg_malloc(sizeof(TCGOp) + sizeof(TCGArg) * nargs); 3457 3458 found: 3459 memset(op, 0, offsetof(TCGOp, link)); 3460 op->opc = opc; 3461 op->nargs = nargs; 3462 3463 /* Check for bitfield overflow. */ 3464 tcg_debug_assert(op->nargs == nargs); 3465 3466 s->nb_ops++; 3467 return op; 3468 } 3469 3470 TCGOp *tcg_emit_op(TCGOpcode opc, unsigned nargs) 3471 { 3472 TCGOp *op = tcg_op_alloc(opc, nargs); 3473 3474 if (tcg_ctx->emit_before_op) { 3475 QTAILQ_INSERT_BEFORE(tcg_ctx->emit_before_op, op, link); 3476 } else { 3477 QTAILQ_INSERT_TAIL(&tcg_ctx->ops, op, link); 3478 } 3479 return op; 3480 } 3481 3482 TCGOp *tcg_op_insert_before(TCGContext *s, TCGOp *old_op, 3483 TCGOpcode opc, TCGType type, unsigned nargs) 3484 { 3485 TCGOp *new_op = tcg_op_alloc(opc, nargs); 3486 3487 TCGOP_TYPE(new_op) = type; 3488 QTAILQ_INSERT_BEFORE(old_op, new_op, link); 3489 return new_op; 3490 } 3491 3492 TCGOp *tcg_op_insert_after(TCGContext *s, TCGOp *old_op, 3493 TCGOpcode opc, TCGType type, unsigned nargs) 3494 { 3495 TCGOp *new_op = tcg_op_alloc(opc, nargs); 3496 3497 TCGOP_TYPE(new_op) = type; 3498 QTAILQ_INSERT_AFTER(&s->ops, old_op, new_op, link); 3499 return new_op; 3500 } 3501 3502 static void move_label_uses(TCGLabel *to, TCGLabel *from) 3503 { 3504 TCGLabelUse *u; 3505 3506 QSIMPLEQ_FOREACH(u, &from->branches, next) { 3507 TCGOp *op = u->op; 3508 switch (op->opc) { 3509 case INDEX_op_br: 3510 op->args[0] = label_arg(to); 3511 break; 3512 case INDEX_op_brcond_i32: 3513 case INDEX_op_brcond_i64: 3514 op->args[3] = label_arg(to); 3515 break; 3516 case INDEX_op_brcond2_i32: 3517 op->args[5] = label_arg(to); 3518 break; 3519 default: 3520 g_assert_not_reached(); 3521 } 3522 } 3523 3524 QSIMPLEQ_CONCAT(&to->branches, &from->branches); 3525 } 3526 3527 /* Reachable analysis : remove unreachable code. */ 3528 static void __attribute__((noinline)) 3529 reachable_code_pass(TCGContext *s) 3530 { 3531 TCGOp *op, *op_next, *op_prev; 3532 bool dead = false; 3533 3534 QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) { 3535 bool remove = dead; 3536 TCGLabel *label; 3537 3538 switch (op->opc) { 3539 case INDEX_op_set_label: 3540 label = arg_label(op->args[0]); 3541 3542 /* 3543 * Note that the first op in the TB is always a load, 3544 * so there is always something before a label. 3545 */ 3546 op_prev = QTAILQ_PREV(op, link); 3547 3548 /* 3549 * If we find two sequential labels, move all branches to 3550 * reference the second label and remove the first label. 3551 * Do this before branch to next optimization, so that the 3552 * middle label is out of the way. 3553 */ 3554 if (op_prev->opc == INDEX_op_set_label) { 3555 move_label_uses(label, arg_label(op_prev->args[0])); 3556 tcg_op_remove(s, op_prev); 3557 op_prev = QTAILQ_PREV(op, link); 3558 } 3559 3560 /* 3561 * Optimization can fold conditional branches to unconditional. 3562 * If we find a label which is preceded by an unconditional 3563 * branch to next, remove the branch. We couldn't do this when 3564 * processing the branch because any dead code between the branch 3565 * and label had not yet been removed. 3566 */ 3567 if (op_prev->opc == INDEX_op_br && 3568 label == arg_label(op_prev->args[0])) { 3569 tcg_op_remove(s, op_prev); 3570 /* Fall through means insns become live again. */ 3571 dead = false; 3572 } 3573 3574 if (QSIMPLEQ_EMPTY(&label->branches)) { 3575 /* 3576 * While there is an occasional backward branch, virtually 3577 * all branches generated by the translators are forward. 3578 * Which means that generally we will have already removed 3579 * all references to the label that will be, and there is 3580 * little to be gained by iterating. 3581 */ 3582 remove = true; 3583 } else { 3584 /* Once we see a label, insns become live again. */ 3585 dead = false; 3586 remove = false; 3587 } 3588 break; 3589 3590 case INDEX_op_br: 3591 case INDEX_op_exit_tb: 3592 case INDEX_op_goto_ptr: 3593 /* Unconditional branches; everything following is dead. */ 3594 dead = true; 3595 break; 3596 3597 case INDEX_op_call: 3598 /* Notice noreturn helper calls, raising exceptions. */ 3599 if (tcg_call_flags(op) & TCG_CALL_NO_RETURN) { 3600 dead = true; 3601 } 3602 break; 3603 3604 case INDEX_op_insn_start: 3605 /* Never remove -- we need to keep these for unwind. */ 3606 remove = false; 3607 break; 3608 3609 default: 3610 break; 3611 } 3612 3613 if (remove) { 3614 tcg_op_remove(s, op); 3615 } 3616 } 3617 } 3618 3619 #define TS_DEAD 1 3620 #define TS_MEM 2 3621 3622 #define IS_DEAD_ARG(n) (arg_life & (DEAD_ARG << (n))) 3623 #define NEED_SYNC_ARG(n) (arg_life & (SYNC_ARG << (n))) 3624 3625 /* For liveness_pass_1, the register preferences for a given temp. */ 3626 static inline TCGRegSet *la_temp_pref(TCGTemp *ts) 3627 { 3628 return ts->state_ptr; 3629 } 3630 3631 /* For liveness_pass_1, reset the preferences for a given temp to the 3632 * maximal regset for its type. 3633 */ 3634 static inline void la_reset_pref(TCGTemp *ts) 3635 { 3636 *la_temp_pref(ts) 3637 = (ts->state == TS_DEAD ? 0 : tcg_target_available_regs[ts->type]); 3638 } 3639 3640 /* liveness analysis: end of function: all temps are dead, and globals 3641 should be in memory. */ 3642 static void la_func_end(TCGContext *s, int ng, int nt) 3643 { 3644 int i; 3645 3646 for (i = 0; i < ng; ++i) { 3647 s->temps[i].state = TS_DEAD | TS_MEM; 3648 la_reset_pref(&s->temps[i]); 3649 } 3650 for (i = ng; i < nt; ++i) { 3651 s->temps[i].state = TS_DEAD; 3652 la_reset_pref(&s->temps[i]); 3653 } 3654 } 3655 3656 /* liveness analysis: end of basic block: all temps are dead, globals 3657 and local temps should be in memory. */ 3658 static void la_bb_end(TCGContext *s, int ng, int nt) 3659 { 3660 int i; 3661 3662 for (i = 0; i < nt; ++i) { 3663 TCGTemp *ts = &s->temps[i]; 3664 int state; 3665 3666 switch (ts->kind) { 3667 case TEMP_FIXED: 3668 case TEMP_GLOBAL: 3669 case TEMP_TB: 3670 state = TS_DEAD | TS_MEM; 3671 break; 3672 case TEMP_EBB: 3673 case TEMP_CONST: 3674 state = TS_DEAD; 3675 break; 3676 default: 3677 g_assert_not_reached(); 3678 } 3679 ts->state = state; 3680 la_reset_pref(ts); 3681 } 3682 } 3683 3684 /* liveness analysis: sync globals back to memory. */ 3685 static void la_global_sync(TCGContext *s, int ng) 3686 { 3687 int i; 3688 3689 for (i = 0; i < ng; ++i) { 3690 int state = s->temps[i].state; 3691 s->temps[i].state = state | TS_MEM; 3692 if (state == TS_DEAD) { 3693 /* If the global was previously dead, reset prefs. */ 3694 la_reset_pref(&s->temps[i]); 3695 } 3696 } 3697 } 3698 3699 /* 3700 * liveness analysis: conditional branch: all temps are dead unless 3701 * explicitly live-across-conditional-branch, globals and local temps 3702 * should be synced. 3703 */ 3704 static void la_bb_sync(TCGContext *s, int ng, int nt) 3705 { 3706 la_global_sync(s, ng); 3707 3708 for (int i = ng; i < nt; ++i) { 3709 TCGTemp *ts = &s->temps[i]; 3710 int state; 3711 3712 switch (ts->kind) { 3713 case TEMP_TB: 3714 state = ts->state; 3715 ts->state = state | TS_MEM; 3716 if (state != TS_DEAD) { 3717 continue; 3718 } 3719 break; 3720 case TEMP_EBB: 3721 case TEMP_CONST: 3722 continue; 3723 default: 3724 g_assert_not_reached(); 3725 } 3726 la_reset_pref(&s->temps[i]); 3727 } 3728 } 3729 3730 /* liveness analysis: sync globals back to memory and kill. */ 3731 static void la_global_kill(TCGContext *s, int ng) 3732 { 3733 int i; 3734 3735 for (i = 0; i < ng; i++) { 3736 s->temps[i].state = TS_DEAD | TS_MEM; 3737 la_reset_pref(&s->temps[i]); 3738 } 3739 } 3740 3741 /* liveness analysis: note live globals crossing calls. */ 3742 static void la_cross_call(TCGContext *s, int nt) 3743 { 3744 TCGRegSet mask = ~tcg_target_call_clobber_regs; 3745 int i; 3746 3747 for (i = 0; i < nt; i++) { 3748 TCGTemp *ts = &s->temps[i]; 3749 if (!(ts->state & TS_DEAD)) { 3750 TCGRegSet *pset = la_temp_pref(ts); 3751 TCGRegSet set = *pset; 3752 3753 set &= mask; 3754 /* If the combination is not possible, restart. */ 3755 if (set == 0) { 3756 set = tcg_target_available_regs[ts->type] & mask; 3757 } 3758 *pset = set; 3759 } 3760 } 3761 } 3762 3763 /* 3764 * Liveness analysis: Verify the lifetime of TEMP_TB, and reduce 3765 * to TEMP_EBB, if possible. 3766 */ 3767 static void __attribute__((noinline)) 3768 liveness_pass_0(TCGContext *s) 3769 { 3770 void * const multiple_ebb = (void *)(uintptr_t)-1; 3771 int nb_temps = s->nb_temps; 3772 TCGOp *op, *ebb; 3773 3774 for (int i = s->nb_globals; i < nb_temps; ++i) { 3775 s->temps[i].state_ptr = NULL; 3776 } 3777 3778 /* 3779 * Represent each EBB by the op at which it begins. In the case of 3780 * the first EBB, this is the first op, otherwise it is a label. 3781 * Collect the uses of each TEMP_TB: NULL for unused, EBB for use 3782 * within a single EBB, else MULTIPLE_EBB. 3783 */ 3784 ebb = QTAILQ_FIRST(&s->ops); 3785 QTAILQ_FOREACH(op, &s->ops, link) { 3786 const TCGOpDef *def; 3787 int nb_oargs, nb_iargs; 3788 3789 switch (op->opc) { 3790 case INDEX_op_set_label: 3791 ebb = op; 3792 continue; 3793 case INDEX_op_discard: 3794 continue; 3795 case INDEX_op_call: 3796 nb_oargs = TCGOP_CALLO(op); 3797 nb_iargs = TCGOP_CALLI(op); 3798 break; 3799 default: 3800 def = &tcg_op_defs[op->opc]; 3801 nb_oargs = def->nb_oargs; 3802 nb_iargs = def->nb_iargs; 3803 break; 3804 } 3805 3806 for (int i = 0; i < nb_oargs + nb_iargs; ++i) { 3807 TCGTemp *ts = arg_temp(op->args[i]); 3808 3809 if (ts->kind != TEMP_TB) { 3810 continue; 3811 } 3812 if (ts->state_ptr == NULL) { 3813 ts->state_ptr = ebb; 3814 } else if (ts->state_ptr != ebb) { 3815 ts->state_ptr = multiple_ebb; 3816 } 3817 } 3818 } 3819 3820 /* 3821 * For TEMP_TB that turned out not to be used beyond one EBB, 3822 * reduce the liveness to TEMP_EBB. 3823 */ 3824 for (int i = s->nb_globals; i < nb_temps; ++i) { 3825 TCGTemp *ts = &s->temps[i]; 3826 if (ts->kind == TEMP_TB && ts->state_ptr != multiple_ebb) { 3827 ts->kind = TEMP_EBB; 3828 } 3829 } 3830 } 3831 3832 /* Liveness analysis : update the opc_arg_life array to tell if a 3833 given input arguments is dead. Instructions updating dead 3834 temporaries are removed. */ 3835 static void __attribute__((noinline)) 3836 liveness_pass_1(TCGContext *s) 3837 { 3838 int nb_globals = s->nb_globals; 3839 int nb_temps = s->nb_temps; 3840 TCGOp *op, *op_prev; 3841 TCGRegSet *prefs; 3842 int i; 3843 3844 prefs = tcg_malloc(sizeof(TCGRegSet) * nb_temps); 3845 for (i = 0; i < nb_temps; ++i) { 3846 s->temps[i].state_ptr = prefs + i; 3847 } 3848 3849 /* ??? Should be redundant with the exit_tb that ends the TB. */ 3850 la_func_end(s, nb_globals, nb_temps); 3851 3852 QTAILQ_FOREACH_REVERSE_SAFE(op, &s->ops, link, op_prev) { 3853 int nb_iargs, nb_oargs; 3854 TCGOpcode opc_new, opc_new2; 3855 TCGLifeData arg_life = 0; 3856 TCGTemp *ts; 3857 TCGOpcode opc = op->opc; 3858 const TCGOpDef *def = &tcg_op_defs[opc]; 3859 const TCGArgConstraint *args_ct; 3860 3861 switch (opc) { 3862 case INDEX_op_call: 3863 { 3864 const TCGHelperInfo *info = tcg_call_info(op); 3865 int call_flags = tcg_call_flags(op); 3866 3867 nb_oargs = TCGOP_CALLO(op); 3868 nb_iargs = TCGOP_CALLI(op); 3869 3870 /* pure functions can be removed if their result is unused */ 3871 if (call_flags & TCG_CALL_NO_SIDE_EFFECTS) { 3872 for (i = 0; i < nb_oargs; i++) { 3873 ts = arg_temp(op->args[i]); 3874 if (ts->state != TS_DEAD) { 3875 goto do_not_remove_call; 3876 } 3877 } 3878 goto do_remove; 3879 } 3880 do_not_remove_call: 3881 3882 /* Output args are dead. */ 3883 for (i = 0; i < nb_oargs; i++) { 3884 ts = arg_temp(op->args[i]); 3885 if (ts->state & TS_DEAD) { 3886 arg_life |= DEAD_ARG << i; 3887 } 3888 if (ts->state & TS_MEM) { 3889 arg_life |= SYNC_ARG << i; 3890 } 3891 ts->state = TS_DEAD; 3892 la_reset_pref(ts); 3893 } 3894 3895 /* Not used -- it will be tcg_target_call_oarg_reg(). */ 3896 memset(op->output_pref, 0, sizeof(op->output_pref)); 3897 3898 if (!(call_flags & (TCG_CALL_NO_WRITE_GLOBALS | 3899 TCG_CALL_NO_READ_GLOBALS))) { 3900 la_global_kill(s, nb_globals); 3901 } else if (!(call_flags & TCG_CALL_NO_READ_GLOBALS)) { 3902 la_global_sync(s, nb_globals); 3903 } 3904 3905 /* Record arguments that die in this helper. */ 3906 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) { 3907 ts = arg_temp(op->args[i]); 3908 if (ts->state & TS_DEAD) { 3909 arg_life |= DEAD_ARG << i; 3910 } 3911 } 3912 3913 /* For all live registers, remove call-clobbered prefs. */ 3914 la_cross_call(s, nb_temps); 3915 3916 /* 3917 * Input arguments are live for preceding opcodes. 3918 * 3919 * For those arguments that die, and will be allocated in 3920 * registers, clear the register set for that arg, to be 3921 * filled in below. For args that will be on the stack, 3922 * reset to any available reg. Process arguments in reverse 3923 * order so that if a temp is used more than once, the stack 3924 * reset to max happens before the register reset to 0. 3925 */ 3926 for (i = nb_iargs - 1; i >= 0; i--) { 3927 const TCGCallArgumentLoc *loc = &info->in[i]; 3928 ts = arg_temp(op->args[nb_oargs + i]); 3929 3930 if (ts->state & TS_DEAD) { 3931 switch (loc->kind) { 3932 case TCG_CALL_ARG_NORMAL: 3933 case TCG_CALL_ARG_EXTEND_U: 3934 case TCG_CALL_ARG_EXTEND_S: 3935 if (arg_slot_reg_p(loc->arg_slot)) { 3936 *la_temp_pref(ts) = 0; 3937 break; 3938 } 3939 /* fall through */ 3940 default: 3941 *la_temp_pref(ts) = 3942 tcg_target_available_regs[ts->type]; 3943 break; 3944 } 3945 ts->state &= ~TS_DEAD; 3946 } 3947 } 3948 3949 /* 3950 * For each input argument, add its input register to prefs. 3951 * If a temp is used once, this produces a single set bit; 3952 * if a temp is used multiple times, this produces a set. 3953 */ 3954 for (i = 0; i < nb_iargs; i++) { 3955 const TCGCallArgumentLoc *loc = &info->in[i]; 3956 ts = arg_temp(op->args[nb_oargs + i]); 3957 3958 switch (loc->kind) { 3959 case TCG_CALL_ARG_NORMAL: 3960 case TCG_CALL_ARG_EXTEND_U: 3961 case TCG_CALL_ARG_EXTEND_S: 3962 if (arg_slot_reg_p(loc->arg_slot)) { 3963 tcg_regset_set_reg(*la_temp_pref(ts), 3964 tcg_target_call_iarg_regs[loc->arg_slot]); 3965 } 3966 break; 3967 default: 3968 break; 3969 } 3970 } 3971 } 3972 break; 3973 case INDEX_op_insn_start: 3974 break; 3975 case INDEX_op_discard: 3976 /* mark the temporary as dead */ 3977 ts = arg_temp(op->args[0]); 3978 ts->state = TS_DEAD; 3979 la_reset_pref(ts); 3980 break; 3981 3982 case INDEX_op_add2_i32: 3983 case INDEX_op_add2_i64: 3984 opc_new = INDEX_op_add; 3985 goto do_addsub2; 3986 case INDEX_op_sub2_i32: 3987 case INDEX_op_sub2_i64: 3988 opc_new = INDEX_op_sub; 3989 do_addsub2: 3990 nb_iargs = 4; 3991 nb_oargs = 2; 3992 /* Test if the high part of the operation is dead, but not 3993 the low part. The result can be optimized to a simple 3994 add or sub. This happens often for x86_64 guest when the 3995 cpu mode is set to 32 bit. */ 3996 if (arg_temp(op->args[1])->state == TS_DEAD) { 3997 if (arg_temp(op->args[0])->state == TS_DEAD) { 3998 goto do_remove; 3999 } 4000 /* Replace the opcode and adjust the args in place, 4001 leaving 3 unused args at the end. */ 4002 op->opc = opc = opc_new; 4003 op->args[1] = op->args[2]; 4004 op->args[2] = op->args[4]; 4005 /* Fall through and mark the single-word operation live. */ 4006 nb_iargs = 2; 4007 nb_oargs = 1; 4008 } 4009 goto do_not_remove; 4010 4011 case INDEX_op_muls2: 4012 opc_new = INDEX_op_mul; 4013 opc_new2 = INDEX_op_mulsh; 4014 goto do_mul2; 4015 case INDEX_op_mulu2: 4016 opc_new = INDEX_op_mul; 4017 opc_new2 = INDEX_op_muluh; 4018 do_mul2: 4019 nb_iargs = 2; 4020 nb_oargs = 2; 4021 if (arg_temp(op->args[1])->state == TS_DEAD) { 4022 if (arg_temp(op->args[0])->state == TS_DEAD) { 4023 /* Both parts of the operation are dead. */ 4024 goto do_remove; 4025 } 4026 /* The high part of the operation is dead; generate the low. */ 4027 op->opc = opc = opc_new; 4028 op->args[1] = op->args[2]; 4029 op->args[2] = op->args[3]; 4030 } else if (arg_temp(op->args[0])->state == TS_DEAD && 4031 tcg_op_supported(opc_new2, TCGOP_TYPE(op), 0)) { 4032 /* The low part of the operation is dead; generate the high. */ 4033 op->opc = opc = opc_new2; 4034 op->args[0] = op->args[1]; 4035 op->args[1] = op->args[2]; 4036 op->args[2] = op->args[3]; 4037 } else { 4038 goto do_not_remove; 4039 } 4040 /* Mark the single-word operation live. */ 4041 nb_oargs = 1; 4042 goto do_not_remove; 4043 4044 default: 4045 /* XXX: optimize by hardcoding common cases (e.g. triadic ops) */ 4046 nb_iargs = def->nb_iargs; 4047 nb_oargs = def->nb_oargs; 4048 4049 /* Test if the operation can be removed because all 4050 its outputs are dead. We assume that nb_oargs == 0 4051 implies side effects */ 4052 if (!(def->flags & TCG_OPF_SIDE_EFFECTS) && nb_oargs != 0) { 4053 for (i = 0; i < nb_oargs; i++) { 4054 if (arg_temp(op->args[i])->state != TS_DEAD) { 4055 goto do_not_remove; 4056 } 4057 } 4058 goto do_remove; 4059 } 4060 goto do_not_remove; 4061 4062 do_remove: 4063 tcg_op_remove(s, op); 4064 break; 4065 4066 do_not_remove: 4067 for (i = 0; i < nb_oargs; i++) { 4068 ts = arg_temp(op->args[i]); 4069 4070 /* Remember the preference of the uses that followed. */ 4071 if (i < ARRAY_SIZE(op->output_pref)) { 4072 op->output_pref[i] = *la_temp_pref(ts); 4073 } 4074 4075 /* Output args are dead. */ 4076 if (ts->state & TS_DEAD) { 4077 arg_life |= DEAD_ARG << i; 4078 } 4079 if (ts->state & TS_MEM) { 4080 arg_life |= SYNC_ARG << i; 4081 } 4082 ts->state = TS_DEAD; 4083 la_reset_pref(ts); 4084 } 4085 4086 /* If end of basic block, update. */ 4087 if (def->flags & TCG_OPF_BB_EXIT) { 4088 la_func_end(s, nb_globals, nb_temps); 4089 } else if (def->flags & TCG_OPF_COND_BRANCH) { 4090 la_bb_sync(s, nb_globals, nb_temps); 4091 } else if (def->flags & TCG_OPF_BB_END) { 4092 la_bb_end(s, nb_globals, nb_temps); 4093 } else if (def->flags & TCG_OPF_SIDE_EFFECTS) { 4094 la_global_sync(s, nb_globals); 4095 if (def->flags & TCG_OPF_CALL_CLOBBER) { 4096 la_cross_call(s, nb_temps); 4097 } 4098 } 4099 4100 /* Record arguments that die in this opcode. */ 4101 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) { 4102 ts = arg_temp(op->args[i]); 4103 if (ts->state & TS_DEAD) { 4104 arg_life |= DEAD_ARG << i; 4105 } 4106 } 4107 4108 /* Input arguments are live for preceding opcodes. */ 4109 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) { 4110 ts = arg_temp(op->args[i]); 4111 if (ts->state & TS_DEAD) { 4112 /* For operands that were dead, initially allow 4113 all regs for the type. */ 4114 *la_temp_pref(ts) = tcg_target_available_regs[ts->type]; 4115 ts->state &= ~TS_DEAD; 4116 } 4117 } 4118 4119 /* Incorporate constraints for this operand. */ 4120 switch (opc) { 4121 case INDEX_op_mov: 4122 /* Note that these are TCG_OPF_NOT_PRESENT and do not 4123 have proper constraints. That said, special case 4124 moves to propagate preferences backward. */ 4125 if (IS_DEAD_ARG(1)) { 4126 *la_temp_pref(arg_temp(op->args[0])) 4127 = *la_temp_pref(arg_temp(op->args[1])); 4128 } 4129 break; 4130 4131 default: 4132 args_ct = opcode_args_ct(op); 4133 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) { 4134 const TCGArgConstraint *ct = &args_ct[i]; 4135 TCGRegSet set, *pset; 4136 4137 ts = arg_temp(op->args[i]); 4138 pset = la_temp_pref(ts); 4139 set = *pset; 4140 4141 set &= ct->regs; 4142 if (ct->ialias) { 4143 set &= output_pref(op, ct->alias_index); 4144 } 4145 /* If the combination is not possible, restart. */ 4146 if (set == 0) { 4147 set = ct->regs; 4148 } 4149 *pset = set; 4150 } 4151 break; 4152 } 4153 break; 4154 } 4155 op->life = arg_life; 4156 } 4157 } 4158 4159 /* Liveness analysis: Convert indirect regs to direct temporaries. */ 4160 static bool __attribute__((noinline)) 4161 liveness_pass_2(TCGContext *s) 4162 { 4163 int nb_globals = s->nb_globals; 4164 int nb_temps, i; 4165 bool changes = false; 4166 TCGOp *op, *op_next; 4167 4168 /* Create a temporary for each indirect global. */ 4169 for (i = 0; i < nb_globals; ++i) { 4170 TCGTemp *its = &s->temps[i]; 4171 if (its->indirect_reg) { 4172 TCGTemp *dts = tcg_temp_alloc(s); 4173 dts->type = its->type; 4174 dts->base_type = its->base_type; 4175 dts->temp_subindex = its->temp_subindex; 4176 dts->kind = TEMP_EBB; 4177 its->state_ptr = dts; 4178 } else { 4179 its->state_ptr = NULL; 4180 } 4181 /* All globals begin dead. */ 4182 its->state = TS_DEAD; 4183 } 4184 for (nb_temps = s->nb_temps; i < nb_temps; ++i) { 4185 TCGTemp *its = &s->temps[i]; 4186 its->state_ptr = NULL; 4187 its->state = TS_DEAD; 4188 } 4189 4190 QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) { 4191 TCGOpcode opc = op->opc; 4192 const TCGOpDef *def = &tcg_op_defs[opc]; 4193 TCGLifeData arg_life = op->life; 4194 int nb_iargs, nb_oargs, call_flags; 4195 TCGTemp *arg_ts, *dir_ts; 4196 4197 if (opc == INDEX_op_call) { 4198 nb_oargs = TCGOP_CALLO(op); 4199 nb_iargs = TCGOP_CALLI(op); 4200 call_flags = tcg_call_flags(op); 4201 } else { 4202 nb_iargs = def->nb_iargs; 4203 nb_oargs = def->nb_oargs; 4204 4205 /* Set flags similar to how calls require. */ 4206 if (def->flags & TCG_OPF_COND_BRANCH) { 4207 /* Like reading globals: sync_globals */ 4208 call_flags = TCG_CALL_NO_WRITE_GLOBALS; 4209 } else if (def->flags & TCG_OPF_BB_END) { 4210 /* Like writing globals: save_globals */ 4211 call_flags = 0; 4212 } else if (def->flags & TCG_OPF_SIDE_EFFECTS) { 4213 /* Like reading globals: sync_globals */ 4214 call_flags = TCG_CALL_NO_WRITE_GLOBALS; 4215 } else { 4216 /* No effect on globals. */ 4217 call_flags = (TCG_CALL_NO_READ_GLOBALS | 4218 TCG_CALL_NO_WRITE_GLOBALS); 4219 } 4220 } 4221 4222 /* Make sure that input arguments are available. */ 4223 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) { 4224 arg_ts = arg_temp(op->args[i]); 4225 dir_ts = arg_ts->state_ptr; 4226 if (dir_ts && arg_ts->state == TS_DEAD) { 4227 TCGOpcode lopc = (arg_ts->type == TCG_TYPE_I32 4228 ? INDEX_op_ld_i32 4229 : INDEX_op_ld_i64); 4230 TCGOp *lop = tcg_op_insert_before(s, op, lopc, 4231 arg_ts->type, 3); 4232 4233 lop->args[0] = temp_arg(dir_ts); 4234 lop->args[1] = temp_arg(arg_ts->mem_base); 4235 lop->args[2] = arg_ts->mem_offset; 4236 4237 /* Loaded, but synced with memory. */ 4238 arg_ts->state = TS_MEM; 4239 } 4240 } 4241 4242 /* Perform input replacement, and mark inputs that became dead. 4243 No action is required except keeping temp_state up to date 4244 so that we reload when needed. */ 4245 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) { 4246 arg_ts = arg_temp(op->args[i]); 4247 dir_ts = arg_ts->state_ptr; 4248 if (dir_ts) { 4249 op->args[i] = temp_arg(dir_ts); 4250 changes = true; 4251 if (IS_DEAD_ARG(i)) { 4252 arg_ts->state = TS_DEAD; 4253 } 4254 } 4255 } 4256 4257 /* Liveness analysis should ensure that the following are 4258 all correct, for call sites and basic block end points. */ 4259 if (call_flags & TCG_CALL_NO_READ_GLOBALS) { 4260 /* Nothing to do */ 4261 } else if (call_flags & TCG_CALL_NO_WRITE_GLOBALS) { 4262 for (i = 0; i < nb_globals; ++i) { 4263 /* Liveness should see that globals are synced back, 4264 that is, either TS_DEAD or TS_MEM. */ 4265 arg_ts = &s->temps[i]; 4266 tcg_debug_assert(arg_ts->state_ptr == 0 4267 || arg_ts->state != 0); 4268 } 4269 } else { 4270 for (i = 0; i < nb_globals; ++i) { 4271 /* Liveness should see that globals are saved back, 4272 that is, TS_DEAD, waiting to be reloaded. */ 4273 arg_ts = &s->temps[i]; 4274 tcg_debug_assert(arg_ts->state_ptr == 0 4275 || arg_ts->state == TS_DEAD); 4276 } 4277 } 4278 4279 /* Outputs become available. */ 4280 if (opc == INDEX_op_mov) { 4281 arg_ts = arg_temp(op->args[0]); 4282 dir_ts = arg_ts->state_ptr; 4283 if (dir_ts) { 4284 op->args[0] = temp_arg(dir_ts); 4285 changes = true; 4286 4287 /* The output is now live and modified. */ 4288 arg_ts->state = 0; 4289 4290 if (NEED_SYNC_ARG(0)) { 4291 TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32 4292 ? INDEX_op_st_i32 4293 : INDEX_op_st_i64); 4294 TCGOp *sop = tcg_op_insert_after(s, op, sopc, 4295 arg_ts->type, 3); 4296 TCGTemp *out_ts = dir_ts; 4297 4298 if (IS_DEAD_ARG(0)) { 4299 out_ts = arg_temp(op->args[1]); 4300 arg_ts->state = TS_DEAD; 4301 tcg_op_remove(s, op); 4302 } else { 4303 arg_ts->state = TS_MEM; 4304 } 4305 4306 sop->args[0] = temp_arg(out_ts); 4307 sop->args[1] = temp_arg(arg_ts->mem_base); 4308 sop->args[2] = arg_ts->mem_offset; 4309 } else { 4310 tcg_debug_assert(!IS_DEAD_ARG(0)); 4311 } 4312 } 4313 } else { 4314 for (i = 0; i < nb_oargs; i++) { 4315 arg_ts = arg_temp(op->args[i]); 4316 dir_ts = arg_ts->state_ptr; 4317 if (!dir_ts) { 4318 continue; 4319 } 4320 op->args[i] = temp_arg(dir_ts); 4321 changes = true; 4322 4323 /* The output is now live and modified. */ 4324 arg_ts->state = 0; 4325 4326 /* Sync outputs upon their last write. */ 4327 if (NEED_SYNC_ARG(i)) { 4328 TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32 4329 ? INDEX_op_st_i32 4330 : INDEX_op_st_i64); 4331 TCGOp *sop = tcg_op_insert_after(s, op, sopc, 4332 arg_ts->type, 3); 4333 4334 sop->args[0] = temp_arg(dir_ts); 4335 sop->args[1] = temp_arg(arg_ts->mem_base); 4336 sop->args[2] = arg_ts->mem_offset; 4337 4338 arg_ts->state = TS_MEM; 4339 } 4340 /* Drop outputs that are dead. */ 4341 if (IS_DEAD_ARG(i)) { 4342 arg_ts->state = TS_DEAD; 4343 } 4344 } 4345 } 4346 } 4347 4348 return changes; 4349 } 4350 4351 static void temp_allocate_frame(TCGContext *s, TCGTemp *ts) 4352 { 4353 intptr_t off; 4354 int size, align; 4355 4356 /* When allocating an object, look at the full type. */ 4357 size = tcg_type_size(ts->base_type); 4358 switch (ts->base_type) { 4359 case TCG_TYPE_I32: 4360 align = 4; 4361 break; 4362 case TCG_TYPE_I64: 4363 case TCG_TYPE_V64: 4364 align = 8; 4365 break; 4366 case TCG_TYPE_I128: 4367 case TCG_TYPE_V128: 4368 case TCG_TYPE_V256: 4369 /* 4370 * Note that we do not require aligned storage for V256, 4371 * and that we provide alignment for I128 to match V128, 4372 * even if that's above what the host ABI requires. 4373 */ 4374 align = 16; 4375 break; 4376 default: 4377 g_assert_not_reached(); 4378 } 4379 4380 /* 4381 * Assume the stack is sufficiently aligned. 4382 * This affects e.g. ARM NEON, where we have 8 byte stack alignment 4383 * and do not require 16 byte vector alignment. This seems slightly 4384 * easier than fully parameterizing the above switch statement. 4385 */ 4386 align = MIN(TCG_TARGET_STACK_ALIGN, align); 4387 off = ROUND_UP(s->current_frame_offset, align); 4388 4389 /* If we've exhausted the stack frame, restart with a smaller TB. */ 4390 if (off + size > s->frame_end) { 4391 tcg_raise_tb_overflow(s); 4392 } 4393 s->current_frame_offset = off + size; 4394 #if defined(__sparc__) 4395 off += TCG_TARGET_STACK_BIAS; 4396 #endif 4397 4398 /* If the object was subdivided, assign memory to all the parts. */ 4399 if (ts->base_type != ts->type) { 4400 int part_size = tcg_type_size(ts->type); 4401 int part_count = size / part_size; 4402 4403 /* 4404 * Each part is allocated sequentially in tcg_temp_new_internal. 4405 * Jump back to the first part by subtracting the current index. 4406 */ 4407 ts -= ts->temp_subindex; 4408 for (int i = 0; i < part_count; ++i) { 4409 ts[i].mem_offset = off + i * part_size; 4410 ts[i].mem_base = s->frame_temp; 4411 ts[i].mem_allocated = 1; 4412 } 4413 } else { 4414 ts->mem_offset = off; 4415 ts->mem_base = s->frame_temp; 4416 ts->mem_allocated = 1; 4417 } 4418 } 4419 4420 /* Assign @reg to @ts, and update reg_to_temp[]. */ 4421 static void set_temp_val_reg(TCGContext *s, TCGTemp *ts, TCGReg reg) 4422 { 4423 if (ts->val_type == TEMP_VAL_REG) { 4424 TCGReg old = ts->reg; 4425 tcg_debug_assert(s->reg_to_temp[old] == ts); 4426 if (old == reg) { 4427 return; 4428 } 4429 s->reg_to_temp[old] = NULL; 4430 } 4431 tcg_debug_assert(s->reg_to_temp[reg] == NULL); 4432 s->reg_to_temp[reg] = ts; 4433 ts->val_type = TEMP_VAL_REG; 4434 ts->reg = reg; 4435 } 4436 4437 /* Assign a non-register value type to @ts, and update reg_to_temp[]. */ 4438 static void set_temp_val_nonreg(TCGContext *s, TCGTemp *ts, TCGTempVal type) 4439 { 4440 tcg_debug_assert(type != TEMP_VAL_REG); 4441 if (ts->val_type == TEMP_VAL_REG) { 4442 TCGReg reg = ts->reg; 4443 tcg_debug_assert(s->reg_to_temp[reg] == ts); 4444 s->reg_to_temp[reg] = NULL; 4445 } 4446 ts->val_type = type; 4447 } 4448 4449 static void temp_load(TCGContext *, TCGTemp *, TCGRegSet, TCGRegSet, TCGRegSet); 4450 4451 /* Mark a temporary as free or dead. If 'free_or_dead' is negative, 4452 mark it free; otherwise mark it dead. */ 4453 static void temp_free_or_dead(TCGContext *s, TCGTemp *ts, int free_or_dead) 4454 { 4455 TCGTempVal new_type; 4456 4457 switch (ts->kind) { 4458 case TEMP_FIXED: 4459 return; 4460 case TEMP_GLOBAL: 4461 case TEMP_TB: 4462 new_type = TEMP_VAL_MEM; 4463 break; 4464 case TEMP_EBB: 4465 new_type = free_or_dead < 0 ? TEMP_VAL_MEM : TEMP_VAL_DEAD; 4466 break; 4467 case TEMP_CONST: 4468 new_type = TEMP_VAL_CONST; 4469 break; 4470 default: 4471 g_assert_not_reached(); 4472 } 4473 set_temp_val_nonreg(s, ts, new_type); 4474 } 4475 4476 /* Mark a temporary as dead. */ 4477 static inline void temp_dead(TCGContext *s, TCGTemp *ts) 4478 { 4479 temp_free_or_dead(s, ts, 1); 4480 } 4481 4482 /* Sync a temporary to memory. 'allocated_regs' is used in case a temporary 4483 registers needs to be allocated to store a constant. If 'free_or_dead' 4484 is non-zero, subsequently release the temporary; if it is positive, the 4485 temp is dead; if it is negative, the temp is free. */ 4486 static void temp_sync(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs, 4487 TCGRegSet preferred_regs, int free_or_dead) 4488 { 4489 if (!temp_readonly(ts) && !ts->mem_coherent) { 4490 if (!ts->mem_allocated) { 4491 temp_allocate_frame(s, ts); 4492 } 4493 switch (ts->val_type) { 4494 case TEMP_VAL_CONST: 4495 /* If we're going to free the temp immediately, then we won't 4496 require it later in a register, so attempt to store the 4497 constant to memory directly. */ 4498 if (free_or_dead 4499 && tcg_out_sti(s, ts->type, ts->val, 4500 ts->mem_base->reg, ts->mem_offset)) { 4501 break; 4502 } 4503 temp_load(s, ts, tcg_target_available_regs[ts->type], 4504 allocated_regs, preferred_regs); 4505 /* fallthrough */ 4506 4507 case TEMP_VAL_REG: 4508 tcg_out_st(s, ts->type, ts->reg, 4509 ts->mem_base->reg, ts->mem_offset); 4510 break; 4511 4512 case TEMP_VAL_MEM: 4513 break; 4514 4515 case TEMP_VAL_DEAD: 4516 default: 4517 g_assert_not_reached(); 4518 } 4519 ts->mem_coherent = 1; 4520 } 4521 if (free_or_dead) { 4522 temp_free_or_dead(s, ts, free_or_dead); 4523 } 4524 } 4525 4526 /* free register 'reg' by spilling the corresponding temporary if necessary */ 4527 static void tcg_reg_free(TCGContext *s, TCGReg reg, TCGRegSet allocated_regs) 4528 { 4529 TCGTemp *ts = s->reg_to_temp[reg]; 4530 if (ts != NULL) { 4531 temp_sync(s, ts, allocated_regs, 0, -1); 4532 } 4533 } 4534 4535 /** 4536 * tcg_reg_alloc: 4537 * @required_regs: Set of registers in which we must allocate. 4538 * @allocated_regs: Set of registers which must be avoided. 4539 * @preferred_regs: Set of registers we should prefer. 4540 * @rev: True if we search the registers in "indirect" order. 4541 * 4542 * The allocated register must be in @required_regs & ~@allocated_regs, 4543 * but if we can put it in @preferred_regs we may save a move later. 4544 */ 4545 static TCGReg tcg_reg_alloc(TCGContext *s, TCGRegSet required_regs, 4546 TCGRegSet allocated_regs, 4547 TCGRegSet preferred_regs, bool rev) 4548 { 4549 int i, j, f, n = ARRAY_SIZE(tcg_target_reg_alloc_order); 4550 TCGRegSet reg_ct[2]; 4551 const int *order; 4552 4553 reg_ct[1] = required_regs & ~allocated_regs; 4554 tcg_debug_assert(reg_ct[1] != 0); 4555 reg_ct[0] = reg_ct[1] & preferred_regs; 4556 4557 /* Skip the preferred_regs option if it cannot be satisfied, 4558 or if the preference made no difference. */ 4559 f = reg_ct[0] == 0 || reg_ct[0] == reg_ct[1]; 4560 4561 order = rev ? indirect_reg_alloc_order : tcg_target_reg_alloc_order; 4562 4563 /* Try free registers, preferences first. */ 4564 for (j = f; j < 2; j++) { 4565 TCGRegSet set = reg_ct[j]; 4566 4567 if (tcg_regset_single(set)) { 4568 /* One register in the set. */ 4569 TCGReg reg = tcg_regset_first(set); 4570 if (s->reg_to_temp[reg] == NULL) { 4571 return reg; 4572 } 4573 } else { 4574 for (i = 0; i < n; i++) { 4575 TCGReg reg = order[i]; 4576 if (s->reg_to_temp[reg] == NULL && 4577 tcg_regset_test_reg(set, reg)) { 4578 return reg; 4579 } 4580 } 4581 } 4582 } 4583 4584 /* We must spill something. */ 4585 for (j = f; j < 2; j++) { 4586 TCGRegSet set = reg_ct[j]; 4587 4588 if (tcg_regset_single(set)) { 4589 /* One register in the set. */ 4590 TCGReg reg = tcg_regset_first(set); 4591 tcg_reg_free(s, reg, allocated_regs); 4592 return reg; 4593 } else { 4594 for (i = 0; i < n; i++) { 4595 TCGReg reg = order[i]; 4596 if (tcg_regset_test_reg(set, reg)) { 4597 tcg_reg_free(s, reg, allocated_regs); 4598 return reg; 4599 } 4600 } 4601 } 4602 } 4603 4604 g_assert_not_reached(); 4605 } 4606 4607 static TCGReg tcg_reg_alloc_pair(TCGContext *s, TCGRegSet required_regs, 4608 TCGRegSet allocated_regs, 4609 TCGRegSet preferred_regs, bool rev) 4610 { 4611 int i, j, k, fmin, n = ARRAY_SIZE(tcg_target_reg_alloc_order); 4612 TCGRegSet reg_ct[2]; 4613 const int *order; 4614 4615 /* Ensure that if I is not in allocated_regs, I+1 is not either. */ 4616 reg_ct[1] = required_regs & ~(allocated_regs | (allocated_regs >> 1)); 4617 tcg_debug_assert(reg_ct[1] != 0); 4618 reg_ct[0] = reg_ct[1] & preferred_regs; 4619 4620 order = rev ? indirect_reg_alloc_order : tcg_target_reg_alloc_order; 4621 4622 /* 4623 * Skip the preferred_regs option if it cannot be satisfied, 4624 * or if the preference made no difference. 4625 */ 4626 k = reg_ct[0] == 0 || reg_ct[0] == reg_ct[1]; 4627 4628 /* 4629 * Minimize the number of flushes by looking for 2 free registers first, 4630 * then a single flush, then two flushes. 4631 */ 4632 for (fmin = 2; fmin >= 0; fmin--) { 4633 for (j = k; j < 2; j++) { 4634 TCGRegSet set = reg_ct[j]; 4635 4636 for (i = 0; i < n; i++) { 4637 TCGReg reg = order[i]; 4638 4639 if (tcg_regset_test_reg(set, reg)) { 4640 int f = !s->reg_to_temp[reg] + !s->reg_to_temp[reg + 1]; 4641 if (f >= fmin) { 4642 tcg_reg_free(s, reg, allocated_regs); 4643 tcg_reg_free(s, reg + 1, allocated_regs); 4644 return reg; 4645 } 4646 } 4647 } 4648 } 4649 } 4650 g_assert_not_reached(); 4651 } 4652 4653 /* Make sure the temporary is in a register. If needed, allocate the register 4654 from DESIRED while avoiding ALLOCATED. */ 4655 static void temp_load(TCGContext *s, TCGTemp *ts, TCGRegSet desired_regs, 4656 TCGRegSet allocated_regs, TCGRegSet preferred_regs) 4657 { 4658 TCGReg reg; 4659 4660 switch (ts->val_type) { 4661 case TEMP_VAL_REG: 4662 return; 4663 case TEMP_VAL_CONST: 4664 reg = tcg_reg_alloc(s, desired_regs, allocated_regs, 4665 preferred_regs, ts->indirect_base); 4666 if (ts->type <= TCG_TYPE_I64) { 4667 tcg_out_movi(s, ts->type, reg, ts->val); 4668 } else { 4669 uint64_t val = ts->val; 4670 MemOp vece = MO_64; 4671 4672 /* 4673 * Find the minimal vector element that matches the constant. 4674 * The targets will, in general, have to do this search anyway, 4675 * do this generically. 4676 */ 4677 if (val == dup_const(MO_8, val)) { 4678 vece = MO_8; 4679 } else if (val == dup_const(MO_16, val)) { 4680 vece = MO_16; 4681 } else if (val == dup_const(MO_32, val)) { 4682 vece = MO_32; 4683 } 4684 4685 tcg_out_dupi_vec(s, ts->type, vece, reg, ts->val); 4686 } 4687 ts->mem_coherent = 0; 4688 break; 4689 case TEMP_VAL_MEM: 4690 if (!ts->mem_allocated) { 4691 temp_allocate_frame(s, ts); 4692 } 4693 reg = tcg_reg_alloc(s, desired_regs, allocated_regs, 4694 preferred_regs, ts->indirect_base); 4695 tcg_out_ld(s, ts->type, reg, ts->mem_base->reg, ts->mem_offset); 4696 ts->mem_coherent = 1; 4697 break; 4698 case TEMP_VAL_DEAD: 4699 default: 4700 g_assert_not_reached(); 4701 } 4702 set_temp_val_reg(s, ts, reg); 4703 } 4704 4705 /* Save a temporary to memory. 'allocated_regs' is used in case a 4706 temporary registers needs to be allocated to store a constant. */ 4707 static void temp_save(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs) 4708 { 4709 /* The liveness analysis already ensures that globals are back 4710 in memory. Keep an tcg_debug_assert for safety. */ 4711 tcg_debug_assert(ts->val_type == TEMP_VAL_MEM || temp_readonly(ts)); 4712 } 4713 4714 /* save globals to their canonical location and assume they can be 4715 modified be the following code. 'allocated_regs' is used in case a 4716 temporary registers needs to be allocated to store a constant. */ 4717 static void save_globals(TCGContext *s, TCGRegSet allocated_regs) 4718 { 4719 int i, n; 4720 4721 for (i = 0, n = s->nb_globals; i < n; i++) { 4722 temp_save(s, &s->temps[i], allocated_regs); 4723 } 4724 } 4725 4726 /* sync globals to their canonical location and assume they can be 4727 read by the following code. 'allocated_regs' is used in case a 4728 temporary registers needs to be allocated to store a constant. */ 4729 static void sync_globals(TCGContext *s, TCGRegSet allocated_regs) 4730 { 4731 int i, n; 4732 4733 for (i = 0, n = s->nb_globals; i < n; i++) { 4734 TCGTemp *ts = &s->temps[i]; 4735 tcg_debug_assert(ts->val_type != TEMP_VAL_REG 4736 || ts->kind == TEMP_FIXED 4737 || ts->mem_coherent); 4738 } 4739 } 4740 4741 /* at the end of a basic block, we assume all temporaries are dead and 4742 all globals are stored at their canonical location. */ 4743 static void tcg_reg_alloc_bb_end(TCGContext *s, TCGRegSet allocated_regs) 4744 { 4745 int i; 4746 4747 for (i = s->nb_globals; i < s->nb_temps; i++) { 4748 TCGTemp *ts = &s->temps[i]; 4749 4750 switch (ts->kind) { 4751 case TEMP_TB: 4752 temp_save(s, ts, allocated_regs); 4753 break; 4754 case TEMP_EBB: 4755 /* The liveness analysis already ensures that temps are dead. 4756 Keep an tcg_debug_assert for safety. */ 4757 tcg_debug_assert(ts->val_type == TEMP_VAL_DEAD); 4758 break; 4759 case TEMP_CONST: 4760 /* Similarly, we should have freed any allocated register. */ 4761 tcg_debug_assert(ts->val_type == TEMP_VAL_CONST); 4762 break; 4763 default: 4764 g_assert_not_reached(); 4765 } 4766 } 4767 4768 save_globals(s, allocated_regs); 4769 } 4770 4771 /* 4772 * At a conditional branch, we assume all temporaries are dead unless 4773 * explicitly live-across-conditional-branch; all globals and local 4774 * temps are synced to their location. 4775 */ 4776 static void tcg_reg_alloc_cbranch(TCGContext *s, TCGRegSet allocated_regs) 4777 { 4778 sync_globals(s, allocated_regs); 4779 4780 for (int i = s->nb_globals; i < s->nb_temps; i++) { 4781 TCGTemp *ts = &s->temps[i]; 4782 /* 4783 * The liveness analysis already ensures that temps are dead. 4784 * Keep tcg_debug_asserts for safety. 4785 */ 4786 switch (ts->kind) { 4787 case TEMP_TB: 4788 tcg_debug_assert(ts->val_type != TEMP_VAL_REG || ts->mem_coherent); 4789 break; 4790 case TEMP_EBB: 4791 case TEMP_CONST: 4792 break; 4793 default: 4794 g_assert_not_reached(); 4795 } 4796 } 4797 } 4798 4799 /* 4800 * Specialized code generation for INDEX_op_mov_* with a constant. 4801 */ 4802 static void tcg_reg_alloc_do_movi(TCGContext *s, TCGTemp *ots, 4803 tcg_target_ulong val, TCGLifeData arg_life, 4804 TCGRegSet preferred_regs) 4805 { 4806 /* ENV should not be modified. */ 4807 tcg_debug_assert(!temp_readonly(ots)); 4808 4809 /* The movi is not explicitly generated here. */ 4810 set_temp_val_nonreg(s, ots, TEMP_VAL_CONST); 4811 ots->val = val; 4812 ots->mem_coherent = 0; 4813 if (NEED_SYNC_ARG(0)) { 4814 temp_sync(s, ots, s->reserved_regs, preferred_regs, IS_DEAD_ARG(0)); 4815 } else if (IS_DEAD_ARG(0)) { 4816 temp_dead(s, ots); 4817 } 4818 } 4819 4820 /* 4821 * Specialized code generation for INDEX_op_mov_*. 4822 */ 4823 static void tcg_reg_alloc_mov(TCGContext *s, const TCGOp *op) 4824 { 4825 const TCGLifeData arg_life = op->life; 4826 TCGRegSet allocated_regs, preferred_regs; 4827 TCGTemp *ts, *ots; 4828 TCGType otype, itype; 4829 TCGReg oreg, ireg; 4830 4831 allocated_regs = s->reserved_regs; 4832 preferred_regs = output_pref(op, 0); 4833 ots = arg_temp(op->args[0]); 4834 ts = arg_temp(op->args[1]); 4835 4836 /* ENV should not be modified. */ 4837 tcg_debug_assert(!temp_readonly(ots)); 4838 4839 /* Note that otype != itype for no-op truncation. */ 4840 otype = ots->type; 4841 itype = ts->type; 4842 4843 if (ts->val_type == TEMP_VAL_CONST) { 4844 /* propagate constant or generate sti */ 4845 tcg_target_ulong val = ts->val; 4846 if (IS_DEAD_ARG(1)) { 4847 temp_dead(s, ts); 4848 } 4849 tcg_reg_alloc_do_movi(s, ots, val, arg_life, preferred_regs); 4850 return; 4851 } 4852 4853 /* If the source value is in memory we're going to be forced 4854 to have it in a register in order to perform the copy. Copy 4855 the SOURCE value into its own register first, that way we 4856 don't have to reload SOURCE the next time it is used. */ 4857 if (ts->val_type == TEMP_VAL_MEM) { 4858 temp_load(s, ts, tcg_target_available_regs[itype], 4859 allocated_regs, preferred_regs); 4860 } 4861 tcg_debug_assert(ts->val_type == TEMP_VAL_REG); 4862 ireg = ts->reg; 4863 4864 if (IS_DEAD_ARG(0)) { 4865 /* mov to a non-saved dead register makes no sense (even with 4866 liveness analysis disabled). */ 4867 tcg_debug_assert(NEED_SYNC_ARG(0)); 4868 if (!ots->mem_allocated) { 4869 temp_allocate_frame(s, ots); 4870 } 4871 tcg_out_st(s, otype, ireg, ots->mem_base->reg, ots->mem_offset); 4872 if (IS_DEAD_ARG(1)) { 4873 temp_dead(s, ts); 4874 } 4875 temp_dead(s, ots); 4876 return; 4877 } 4878 4879 if (IS_DEAD_ARG(1) && ts->kind != TEMP_FIXED) { 4880 /* 4881 * The mov can be suppressed. Kill input first, so that it 4882 * is unlinked from reg_to_temp, then set the output to the 4883 * reg that we saved from the input. 4884 */ 4885 temp_dead(s, ts); 4886 oreg = ireg; 4887 } else { 4888 if (ots->val_type == TEMP_VAL_REG) { 4889 oreg = ots->reg; 4890 } else { 4891 /* Make sure to not spill the input register during allocation. */ 4892 oreg = tcg_reg_alloc(s, tcg_target_available_regs[otype], 4893 allocated_regs | ((TCGRegSet)1 << ireg), 4894 preferred_regs, ots->indirect_base); 4895 } 4896 if (!tcg_out_mov(s, otype, oreg, ireg)) { 4897 /* 4898 * Cross register class move not supported. 4899 * Store the source register into the destination slot 4900 * and leave the destination temp as TEMP_VAL_MEM. 4901 */ 4902 assert(!temp_readonly(ots)); 4903 if (!ts->mem_allocated) { 4904 temp_allocate_frame(s, ots); 4905 } 4906 tcg_out_st(s, ts->type, ireg, ots->mem_base->reg, ots->mem_offset); 4907 set_temp_val_nonreg(s, ts, TEMP_VAL_MEM); 4908 ots->mem_coherent = 1; 4909 return; 4910 } 4911 } 4912 set_temp_val_reg(s, ots, oreg); 4913 ots->mem_coherent = 0; 4914 4915 if (NEED_SYNC_ARG(0)) { 4916 temp_sync(s, ots, allocated_regs, 0, 0); 4917 } 4918 } 4919 4920 /* 4921 * Specialized code generation for INDEX_op_dup_vec. 4922 */ 4923 static void tcg_reg_alloc_dup(TCGContext *s, const TCGOp *op) 4924 { 4925 const TCGLifeData arg_life = op->life; 4926 TCGRegSet dup_out_regs, dup_in_regs; 4927 const TCGArgConstraint *dup_args_ct; 4928 TCGTemp *its, *ots; 4929 TCGType itype, vtype; 4930 unsigned vece; 4931 int lowpart_ofs; 4932 bool ok; 4933 4934 ots = arg_temp(op->args[0]); 4935 its = arg_temp(op->args[1]); 4936 4937 /* ENV should not be modified. */ 4938 tcg_debug_assert(!temp_readonly(ots)); 4939 4940 itype = its->type; 4941 vece = TCGOP_VECE(op); 4942 vtype = TCGOP_TYPE(op); 4943 4944 if (its->val_type == TEMP_VAL_CONST) { 4945 /* Propagate constant via movi -> dupi. */ 4946 tcg_target_ulong val = its->val; 4947 if (IS_DEAD_ARG(1)) { 4948 temp_dead(s, its); 4949 } 4950 tcg_reg_alloc_do_movi(s, ots, val, arg_life, output_pref(op, 0)); 4951 return; 4952 } 4953 4954 dup_args_ct = opcode_args_ct(op); 4955 dup_out_regs = dup_args_ct[0].regs; 4956 dup_in_regs = dup_args_ct[1].regs; 4957 4958 /* Allocate the output register now. */ 4959 if (ots->val_type != TEMP_VAL_REG) { 4960 TCGRegSet allocated_regs = s->reserved_regs; 4961 TCGReg oreg; 4962 4963 if (!IS_DEAD_ARG(1) && its->val_type == TEMP_VAL_REG) { 4964 /* Make sure to not spill the input register. */ 4965 tcg_regset_set_reg(allocated_regs, its->reg); 4966 } 4967 oreg = tcg_reg_alloc(s, dup_out_regs, allocated_regs, 4968 output_pref(op, 0), ots->indirect_base); 4969 set_temp_val_reg(s, ots, oreg); 4970 } 4971 4972 switch (its->val_type) { 4973 case TEMP_VAL_REG: 4974 /* 4975 * The dup constriaints must be broad, covering all possible VECE. 4976 * However, tcg_op_dup_vec() gets to see the VECE and we allow it 4977 * to fail, indicating that extra moves are required for that case. 4978 */ 4979 if (tcg_regset_test_reg(dup_in_regs, its->reg)) { 4980 if (tcg_out_dup_vec(s, vtype, vece, ots->reg, its->reg)) { 4981 goto done; 4982 } 4983 /* Try again from memory or a vector input register. */ 4984 } 4985 if (!its->mem_coherent) { 4986 /* 4987 * The input register is not synced, and so an extra store 4988 * would be required to use memory. Attempt an integer-vector 4989 * register move first. We do not have a TCGRegSet for this. 4990 */ 4991 if (tcg_out_mov(s, itype, ots->reg, its->reg)) { 4992 break; 4993 } 4994 /* Sync the temp back to its slot and load from there. */ 4995 temp_sync(s, its, s->reserved_regs, 0, 0); 4996 } 4997 /* fall through */ 4998 4999 case TEMP_VAL_MEM: 5000 lowpart_ofs = 0; 5001 if (HOST_BIG_ENDIAN) { 5002 lowpart_ofs = tcg_type_size(itype) - (1 << vece); 5003 } 5004 if (tcg_out_dupm_vec(s, vtype, vece, ots->reg, its->mem_base->reg, 5005 its->mem_offset + lowpart_ofs)) { 5006 goto done; 5007 } 5008 /* Load the input into the destination vector register. */ 5009 tcg_out_ld(s, itype, ots->reg, its->mem_base->reg, its->mem_offset); 5010 break; 5011 5012 default: 5013 g_assert_not_reached(); 5014 } 5015 5016 /* We now have a vector input register, so dup must succeed. */ 5017 ok = tcg_out_dup_vec(s, vtype, vece, ots->reg, ots->reg); 5018 tcg_debug_assert(ok); 5019 5020 done: 5021 ots->mem_coherent = 0; 5022 if (IS_DEAD_ARG(1)) { 5023 temp_dead(s, its); 5024 } 5025 if (NEED_SYNC_ARG(0)) { 5026 temp_sync(s, ots, s->reserved_regs, 0, 0); 5027 } 5028 if (IS_DEAD_ARG(0)) { 5029 temp_dead(s, ots); 5030 } 5031 } 5032 5033 static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op) 5034 { 5035 const TCGLifeData arg_life = op->life; 5036 const TCGOpDef * const def = &tcg_op_defs[op->opc]; 5037 TCGRegSet i_allocated_regs; 5038 TCGRegSet o_allocated_regs; 5039 int i, k, nb_iargs, nb_oargs; 5040 TCGReg reg; 5041 TCGArg arg; 5042 const TCGArgConstraint *args_ct; 5043 const TCGArgConstraint *arg_ct; 5044 TCGTemp *ts; 5045 TCGArg new_args[TCG_MAX_OP_ARGS]; 5046 int const_args[TCG_MAX_OP_ARGS]; 5047 TCGCond op_cond; 5048 5049 nb_oargs = def->nb_oargs; 5050 nb_iargs = def->nb_iargs; 5051 5052 /* copy constants */ 5053 memcpy(new_args + nb_oargs + nb_iargs, 5054 op->args + nb_oargs + nb_iargs, 5055 sizeof(TCGArg) * def->nb_cargs); 5056 5057 i_allocated_regs = s->reserved_regs; 5058 o_allocated_regs = s->reserved_regs; 5059 5060 switch (op->opc) { 5061 case INDEX_op_brcond_i32: 5062 case INDEX_op_brcond_i64: 5063 op_cond = op->args[2]; 5064 break; 5065 case INDEX_op_setcond: 5066 case INDEX_op_negsetcond: 5067 case INDEX_op_cmp_vec: 5068 op_cond = op->args[3]; 5069 break; 5070 case INDEX_op_brcond2_i32: 5071 op_cond = op->args[4]; 5072 break; 5073 case INDEX_op_movcond_i32: 5074 case INDEX_op_movcond_i64: 5075 case INDEX_op_setcond2_i32: 5076 case INDEX_op_cmpsel_vec: 5077 op_cond = op->args[5]; 5078 break; 5079 default: 5080 /* No condition within opcode. */ 5081 op_cond = TCG_COND_ALWAYS; 5082 break; 5083 } 5084 5085 args_ct = opcode_args_ct(op); 5086 5087 /* satisfy input constraints */ 5088 for (k = 0; k < nb_iargs; k++) { 5089 TCGRegSet i_preferred_regs, i_required_regs; 5090 bool allocate_new_reg, copyto_new_reg; 5091 TCGTemp *ts2; 5092 int i1, i2; 5093 5094 i = args_ct[nb_oargs + k].sort_index; 5095 arg = op->args[i]; 5096 arg_ct = &args_ct[i]; 5097 ts = arg_temp(arg); 5098 5099 if (ts->val_type == TEMP_VAL_CONST) { 5100 #ifdef TCG_REG_ZERO 5101 if (ts->val == 0 && (arg_ct->ct & TCG_CT_REG_ZERO)) { 5102 /* Hardware zero register: indicate register via non-const. */ 5103 const_args[i] = 0; 5104 new_args[i] = TCG_REG_ZERO; 5105 continue; 5106 } 5107 #endif 5108 5109 if (tcg_target_const_match(ts->val, arg_ct->ct, ts->type, 5110 op_cond, TCGOP_VECE(op))) { 5111 /* constant is OK for instruction */ 5112 const_args[i] = 1; 5113 new_args[i] = ts->val; 5114 continue; 5115 } 5116 } 5117 5118 reg = ts->reg; 5119 i_preferred_regs = 0; 5120 i_required_regs = arg_ct->regs; 5121 allocate_new_reg = false; 5122 copyto_new_reg = false; 5123 5124 switch (arg_ct->pair) { 5125 case 0: /* not paired */ 5126 if (arg_ct->ialias) { 5127 i_preferred_regs = output_pref(op, arg_ct->alias_index); 5128 5129 /* 5130 * If the input is readonly, then it cannot also be an 5131 * output and aliased to itself. If the input is not 5132 * dead after the instruction, we must allocate a new 5133 * register and move it. 5134 */ 5135 if (temp_readonly(ts) || !IS_DEAD_ARG(i) 5136 || args_ct[arg_ct->alias_index].newreg) { 5137 allocate_new_reg = true; 5138 } else if (ts->val_type == TEMP_VAL_REG) { 5139 /* 5140 * Check if the current register has already been 5141 * allocated for another input. 5142 */ 5143 allocate_new_reg = 5144 tcg_regset_test_reg(i_allocated_regs, reg); 5145 } 5146 } 5147 if (!allocate_new_reg) { 5148 temp_load(s, ts, i_required_regs, i_allocated_regs, 5149 i_preferred_regs); 5150 reg = ts->reg; 5151 allocate_new_reg = !tcg_regset_test_reg(i_required_regs, reg); 5152 } 5153 if (allocate_new_reg) { 5154 /* 5155 * Allocate a new register matching the constraint 5156 * and move the temporary register into it. 5157 */ 5158 temp_load(s, ts, tcg_target_available_regs[ts->type], 5159 i_allocated_regs, 0); 5160 reg = tcg_reg_alloc(s, i_required_regs, i_allocated_regs, 5161 i_preferred_regs, ts->indirect_base); 5162 copyto_new_reg = true; 5163 } 5164 break; 5165 5166 case 1: 5167 /* First of an input pair; if i1 == i2, the second is an output. */ 5168 i1 = i; 5169 i2 = arg_ct->pair_index; 5170 ts2 = i1 != i2 ? arg_temp(op->args[i2]) : NULL; 5171 5172 /* 5173 * It is easier to default to allocating a new pair 5174 * and to identify a few cases where it's not required. 5175 */ 5176 if (arg_ct->ialias) { 5177 i_preferred_regs = output_pref(op, arg_ct->alias_index); 5178 if (IS_DEAD_ARG(i1) && 5179 IS_DEAD_ARG(i2) && 5180 !temp_readonly(ts) && 5181 ts->val_type == TEMP_VAL_REG && 5182 ts->reg < TCG_TARGET_NB_REGS - 1 && 5183 tcg_regset_test_reg(i_required_regs, reg) && 5184 !tcg_regset_test_reg(i_allocated_regs, reg) && 5185 !tcg_regset_test_reg(i_allocated_regs, reg + 1) && 5186 (ts2 5187 ? ts2->val_type == TEMP_VAL_REG && 5188 ts2->reg == reg + 1 && 5189 !temp_readonly(ts2) 5190 : s->reg_to_temp[reg + 1] == NULL)) { 5191 break; 5192 } 5193 } else { 5194 /* Without aliasing, the pair must also be an input. */ 5195 tcg_debug_assert(ts2); 5196 if (ts->val_type == TEMP_VAL_REG && 5197 ts2->val_type == TEMP_VAL_REG && 5198 ts2->reg == reg + 1 && 5199 tcg_regset_test_reg(i_required_regs, reg)) { 5200 break; 5201 } 5202 } 5203 reg = tcg_reg_alloc_pair(s, i_required_regs, i_allocated_regs, 5204 0, ts->indirect_base); 5205 goto do_pair; 5206 5207 case 2: /* pair second */ 5208 reg = new_args[arg_ct->pair_index] + 1; 5209 goto do_pair; 5210 5211 case 3: /* ialias with second output, no first input */ 5212 tcg_debug_assert(arg_ct->ialias); 5213 i_preferred_regs = output_pref(op, arg_ct->alias_index); 5214 5215 if (IS_DEAD_ARG(i) && 5216 !temp_readonly(ts) && 5217 ts->val_type == TEMP_VAL_REG && 5218 reg > 0 && 5219 s->reg_to_temp[reg - 1] == NULL && 5220 tcg_regset_test_reg(i_required_regs, reg) && 5221 !tcg_regset_test_reg(i_allocated_regs, reg) && 5222 !tcg_regset_test_reg(i_allocated_regs, reg - 1)) { 5223 tcg_regset_set_reg(i_allocated_regs, reg - 1); 5224 break; 5225 } 5226 reg = tcg_reg_alloc_pair(s, i_required_regs >> 1, 5227 i_allocated_regs, 0, 5228 ts->indirect_base); 5229 tcg_regset_set_reg(i_allocated_regs, reg); 5230 reg += 1; 5231 goto do_pair; 5232 5233 do_pair: 5234 /* 5235 * If an aliased input is not dead after the instruction, 5236 * we must allocate a new register and move it. 5237 */ 5238 if (arg_ct->ialias && (!IS_DEAD_ARG(i) || temp_readonly(ts))) { 5239 TCGRegSet t_allocated_regs = i_allocated_regs; 5240 5241 /* 5242 * Because of the alias, and the continued life, make sure 5243 * that the temp is somewhere *other* than the reg pair, 5244 * and we get a copy in reg. 5245 */ 5246 tcg_regset_set_reg(t_allocated_regs, reg); 5247 tcg_regset_set_reg(t_allocated_regs, reg + 1); 5248 if (ts->val_type == TEMP_VAL_REG && ts->reg == reg) { 5249 /* If ts was already in reg, copy it somewhere else. */ 5250 TCGReg nr; 5251 bool ok; 5252 5253 tcg_debug_assert(ts->kind != TEMP_FIXED); 5254 nr = tcg_reg_alloc(s, tcg_target_available_regs[ts->type], 5255 t_allocated_regs, 0, ts->indirect_base); 5256 ok = tcg_out_mov(s, ts->type, nr, reg); 5257 tcg_debug_assert(ok); 5258 5259 set_temp_val_reg(s, ts, nr); 5260 } else { 5261 temp_load(s, ts, tcg_target_available_regs[ts->type], 5262 t_allocated_regs, 0); 5263 copyto_new_reg = true; 5264 } 5265 } else { 5266 /* Preferably allocate to reg, otherwise copy. */ 5267 i_required_regs = (TCGRegSet)1 << reg; 5268 temp_load(s, ts, i_required_regs, i_allocated_regs, 5269 i_preferred_regs); 5270 copyto_new_reg = ts->reg != reg; 5271 } 5272 break; 5273 5274 default: 5275 g_assert_not_reached(); 5276 } 5277 5278 if (copyto_new_reg) { 5279 if (!tcg_out_mov(s, ts->type, reg, ts->reg)) { 5280 /* 5281 * Cross register class move not supported. Sync the 5282 * temp back to its slot and load from there. 5283 */ 5284 temp_sync(s, ts, i_allocated_regs, 0, 0); 5285 tcg_out_ld(s, ts->type, reg, 5286 ts->mem_base->reg, ts->mem_offset); 5287 } 5288 } 5289 new_args[i] = reg; 5290 const_args[i] = 0; 5291 tcg_regset_set_reg(i_allocated_regs, reg); 5292 } 5293 5294 /* mark dead temporaries and free the associated registers */ 5295 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) { 5296 if (IS_DEAD_ARG(i)) { 5297 temp_dead(s, arg_temp(op->args[i])); 5298 } 5299 } 5300 5301 if (def->flags & TCG_OPF_COND_BRANCH) { 5302 tcg_reg_alloc_cbranch(s, i_allocated_regs); 5303 } else if (def->flags & TCG_OPF_BB_END) { 5304 tcg_reg_alloc_bb_end(s, i_allocated_regs); 5305 } else { 5306 if (def->flags & TCG_OPF_CALL_CLOBBER) { 5307 /* XXX: permit generic clobber register list ? */ 5308 for (i = 0; i < TCG_TARGET_NB_REGS; i++) { 5309 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) { 5310 tcg_reg_free(s, i, i_allocated_regs); 5311 } 5312 } 5313 } 5314 if (def->flags & TCG_OPF_SIDE_EFFECTS) { 5315 /* sync globals if the op has side effects and might trigger 5316 an exception. */ 5317 sync_globals(s, i_allocated_regs); 5318 } 5319 5320 /* satisfy the output constraints */ 5321 for (k = 0; k < nb_oargs; k++) { 5322 i = args_ct[k].sort_index; 5323 arg = op->args[i]; 5324 arg_ct = &args_ct[i]; 5325 ts = arg_temp(arg); 5326 5327 /* ENV should not be modified. */ 5328 tcg_debug_assert(!temp_readonly(ts)); 5329 5330 switch (arg_ct->pair) { 5331 case 0: /* not paired */ 5332 if (arg_ct->oalias && !const_args[arg_ct->alias_index]) { 5333 reg = new_args[arg_ct->alias_index]; 5334 } else if (arg_ct->newreg) { 5335 reg = tcg_reg_alloc(s, arg_ct->regs, 5336 i_allocated_regs | o_allocated_regs, 5337 output_pref(op, k), ts->indirect_base); 5338 } else { 5339 reg = tcg_reg_alloc(s, arg_ct->regs, o_allocated_regs, 5340 output_pref(op, k), ts->indirect_base); 5341 } 5342 break; 5343 5344 case 1: /* first of pair */ 5345 if (arg_ct->oalias) { 5346 reg = new_args[arg_ct->alias_index]; 5347 } else if (arg_ct->newreg) { 5348 reg = tcg_reg_alloc_pair(s, arg_ct->regs, 5349 i_allocated_regs | o_allocated_regs, 5350 output_pref(op, k), 5351 ts->indirect_base); 5352 } else { 5353 reg = tcg_reg_alloc_pair(s, arg_ct->regs, o_allocated_regs, 5354 output_pref(op, k), 5355 ts->indirect_base); 5356 } 5357 break; 5358 5359 case 2: /* second of pair */ 5360 if (arg_ct->oalias) { 5361 reg = new_args[arg_ct->alias_index]; 5362 } else { 5363 reg = new_args[arg_ct->pair_index] + 1; 5364 } 5365 break; 5366 5367 case 3: /* first of pair, aliasing with a second input */ 5368 tcg_debug_assert(!arg_ct->newreg); 5369 reg = new_args[arg_ct->pair_index] - 1; 5370 break; 5371 5372 default: 5373 g_assert_not_reached(); 5374 } 5375 tcg_regset_set_reg(o_allocated_regs, reg); 5376 set_temp_val_reg(s, ts, reg); 5377 ts->mem_coherent = 0; 5378 new_args[i] = reg; 5379 } 5380 } 5381 5382 /* emit instruction */ 5383 TCGType type = TCGOP_TYPE(op); 5384 switch (op->opc) { 5385 case INDEX_op_ext_i32_i64: 5386 tcg_out_exts_i32_i64(s, new_args[0], new_args[1]); 5387 break; 5388 case INDEX_op_extu_i32_i64: 5389 tcg_out_extu_i32_i64(s, new_args[0], new_args[1]); 5390 break; 5391 case INDEX_op_extrl_i64_i32: 5392 tcg_out_extrl_i64_i32(s, new_args[0], new_args[1]); 5393 break; 5394 5395 case INDEX_op_add: 5396 case INDEX_op_and: 5397 case INDEX_op_andc: 5398 case INDEX_op_clz: 5399 case INDEX_op_ctz: 5400 case INDEX_op_divs: 5401 case INDEX_op_divu: 5402 case INDEX_op_eqv: 5403 case INDEX_op_mul: 5404 case INDEX_op_mulsh: 5405 case INDEX_op_muluh: 5406 case INDEX_op_nand: 5407 case INDEX_op_nor: 5408 case INDEX_op_or: 5409 case INDEX_op_orc: 5410 case INDEX_op_rems: 5411 case INDEX_op_remu: 5412 case INDEX_op_rotl: 5413 case INDEX_op_rotr: 5414 case INDEX_op_sar: 5415 case INDEX_op_shl: 5416 case INDEX_op_shr: 5417 case INDEX_op_xor: 5418 { 5419 const TCGOutOpBinary *out = 5420 container_of(all_outop[op->opc], TCGOutOpBinary, base); 5421 5422 /* Constants should never appear in the first source operand. */ 5423 tcg_debug_assert(!const_args[1]); 5424 if (const_args[2]) { 5425 out->out_rri(s, type, new_args[0], new_args[1], new_args[2]); 5426 } else { 5427 out->out_rrr(s, type, new_args[0], new_args[1], new_args[2]); 5428 } 5429 } 5430 break; 5431 5432 case INDEX_op_sub: 5433 { 5434 const TCGOutOpSubtract *out = &outop_sub; 5435 5436 /* 5437 * Constants should never appear in the second source operand. 5438 * These are folded to add with negative constant. 5439 */ 5440 tcg_debug_assert(!const_args[2]); 5441 if (const_args[1]) { 5442 out->out_rir(s, type, new_args[0], new_args[1], new_args[2]); 5443 } else { 5444 out->out_rrr(s, type, new_args[0], new_args[1], new_args[2]); 5445 } 5446 } 5447 break; 5448 5449 case INDEX_op_ctpop: 5450 case INDEX_op_neg: 5451 case INDEX_op_not: 5452 { 5453 const TCGOutOpUnary *out = 5454 container_of(all_outop[op->opc], TCGOutOpUnary, base); 5455 5456 /* Constants should have been folded. */ 5457 tcg_debug_assert(!const_args[1]); 5458 out->out_rr(s, type, new_args[0], new_args[1]); 5459 } 5460 break; 5461 5462 case INDEX_op_divs2: 5463 case INDEX_op_divu2: 5464 { 5465 const TCGOutOpDivRem *out = 5466 container_of(all_outop[op->opc], TCGOutOpDivRem, base); 5467 5468 /* Only used by x86 and s390x, which use matching constraints. */ 5469 tcg_debug_assert(new_args[0] == new_args[2]); 5470 tcg_debug_assert(new_args[1] == new_args[3]); 5471 tcg_debug_assert(!const_args[4]); 5472 out->out_rr01r(s, type, new_args[0], new_args[1], new_args[4]); 5473 } 5474 break; 5475 5476 case INDEX_op_muls2: 5477 case INDEX_op_mulu2: 5478 { 5479 const TCGOutOpMul2 *out = 5480 container_of(all_outop[op->opc], TCGOutOpMul2, base); 5481 5482 tcg_debug_assert(!const_args[2]); 5483 tcg_debug_assert(!const_args[3]); 5484 out->out_rrrr(s, type, new_args[0], new_args[1], 5485 new_args[2], new_args[3]); 5486 } 5487 break; 5488 5489 case INDEX_op_setcond: 5490 case INDEX_op_negsetcond: 5491 { 5492 const TCGOutOpSetcond *out = 5493 container_of(all_outop[op->opc], TCGOutOpSetcond, base); 5494 TCGCond cond = new_args[3]; 5495 5496 tcg_debug_assert(!const_args[1]); 5497 if (const_args[2]) { 5498 out->out_rri(s, type, cond, 5499 new_args[0], new_args[1], new_args[2]); 5500 } else { 5501 out->out_rrr(s, type, cond, 5502 new_args[0], new_args[1], new_args[2]); 5503 } 5504 } 5505 break; 5506 5507 default: 5508 if (def->flags & TCG_OPF_VECTOR) { 5509 tcg_out_vec_op(s, op->opc, type - TCG_TYPE_V64, 5510 TCGOP_VECE(op), new_args, const_args); 5511 } else { 5512 tcg_out_op(s, op->opc, type, new_args, const_args); 5513 } 5514 break; 5515 } 5516 5517 /* move the outputs in the correct register if needed */ 5518 for(i = 0; i < nb_oargs; i++) { 5519 ts = arg_temp(op->args[i]); 5520 5521 /* ENV should not be modified. */ 5522 tcg_debug_assert(!temp_readonly(ts)); 5523 5524 if (NEED_SYNC_ARG(i)) { 5525 temp_sync(s, ts, o_allocated_regs, 0, IS_DEAD_ARG(i)); 5526 } else if (IS_DEAD_ARG(i)) { 5527 temp_dead(s, ts); 5528 } 5529 } 5530 } 5531 5532 static bool tcg_reg_alloc_dup2(TCGContext *s, const TCGOp *op) 5533 { 5534 const TCGLifeData arg_life = op->life; 5535 TCGTemp *ots, *itsl, *itsh; 5536 TCGType vtype = TCGOP_TYPE(op); 5537 5538 /* This opcode is only valid for 32-bit hosts, for 64-bit elements. */ 5539 tcg_debug_assert(TCG_TARGET_REG_BITS == 32); 5540 tcg_debug_assert(TCGOP_VECE(op) == MO_64); 5541 5542 ots = arg_temp(op->args[0]); 5543 itsl = arg_temp(op->args[1]); 5544 itsh = arg_temp(op->args[2]); 5545 5546 /* ENV should not be modified. */ 5547 tcg_debug_assert(!temp_readonly(ots)); 5548 5549 /* Allocate the output register now. */ 5550 if (ots->val_type != TEMP_VAL_REG) { 5551 TCGRegSet allocated_regs = s->reserved_regs; 5552 TCGRegSet dup_out_regs = opcode_args_ct(op)[0].regs; 5553 TCGReg oreg; 5554 5555 /* Make sure to not spill the input registers. */ 5556 if (!IS_DEAD_ARG(1) && itsl->val_type == TEMP_VAL_REG) { 5557 tcg_regset_set_reg(allocated_regs, itsl->reg); 5558 } 5559 if (!IS_DEAD_ARG(2) && itsh->val_type == TEMP_VAL_REG) { 5560 tcg_regset_set_reg(allocated_regs, itsh->reg); 5561 } 5562 5563 oreg = tcg_reg_alloc(s, dup_out_regs, allocated_regs, 5564 output_pref(op, 0), ots->indirect_base); 5565 set_temp_val_reg(s, ots, oreg); 5566 } 5567 5568 /* Promote dup2 of immediates to dupi_vec. */ 5569 if (itsl->val_type == TEMP_VAL_CONST && itsh->val_type == TEMP_VAL_CONST) { 5570 uint64_t val = deposit64(itsl->val, 32, 32, itsh->val); 5571 MemOp vece = MO_64; 5572 5573 if (val == dup_const(MO_8, val)) { 5574 vece = MO_8; 5575 } else if (val == dup_const(MO_16, val)) { 5576 vece = MO_16; 5577 } else if (val == dup_const(MO_32, val)) { 5578 vece = MO_32; 5579 } 5580 5581 tcg_out_dupi_vec(s, vtype, vece, ots->reg, val); 5582 goto done; 5583 } 5584 5585 /* If the two inputs form one 64-bit value, try dupm_vec. */ 5586 if (itsl->temp_subindex == HOST_BIG_ENDIAN && 5587 itsh->temp_subindex == !HOST_BIG_ENDIAN && 5588 itsl == itsh + (HOST_BIG_ENDIAN ? 1 : -1)) { 5589 TCGTemp *its = itsl - HOST_BIG_ENDIAN; 5590 5591 temp_sync(s, its + 0, s->reserved_regs, 0, 0); 5592 temp_sync(s, its + 1, s->reserved_regs, 0, 0); 5593 5594 if (tcg_out_dupm_vec(s, vtype, MO_64, ots->reg, 5595 its->mem_base->reg, its->mem_offset)) { 5596 goto done; 5597 } 5598 } 5599 5600 /* Fall back to generic expansion. */ 5601 return false; 5602 5603 done: 5604 ots->mem_coherent = 0; 5605 if (IS_DEAD_ARG(1)) { 5606 temp_dead(s, itsl); 5607 } 5608 if (IS_DEAD_ARG(2)) { 5609 temp_dead(s, itsh); 5610 } 5611 if (NEED_SYNC_ARG(0)) { 5612 temp_sync(s, ots, s->reserved_regs, 0, IS_DEAD_ARG(0)); 5613 } else if (IS_DEAD_ARG(0)) { 5614 temp_dead(s, ots); 5615 } 5616 return true; 5617 } 5618 5619 static void load_arg_reg(TCGContext *s, TCGReg reg, TCGTemp *ts, 5620 TCGRegSet allocated_regs) 5621 { 5622 if (ts->val_type == TEMP_VAL_REG) { 5623 if (ts->reg != reg) { 5624 tcg_reg_free(s, reg, allocated_regs); 5625 if (!tcg_out_mov(s, ts->type, reg, ts->reg)) { 5626 /* 5627 * Cross register class move not supported. Sync the 5628 * temp back to its slot and load from there. 5629 */ 5630 temp_sync(s, ts, allocated_regs, 0, 0); 5631 tcg_out_ld(s, ts->type, reg, 5632 ts->mem_base->reg, ts->mem_offset); 5633 } 5634 } 5635 } else { 5636 TCGRegSet arg_set = 0; 5637 5638 tcg_reg_free(s, reg, allocated_regs); 5639 tcg_regset_set_reg(arg_set, reg); 5640 temp_load(s, ts, arg_set, allocated_regs, 0); 5641 } 5642 } 5643 5644 static void load_arg_stk(TCGContext *s, unsigned arg_slot, TCGTemp *ts, 5645 TCGRegSet allocated_regs) 5646 { 5647 /* 5648 * When the destination is on the stack, load up the temp and store. 5649 * If there are many call-saved registers, the temp might live to 5650 * see another use; otherwise it'll be discarded. 5651 */ 5652 temp_load(s, ts, tcg_target_available_regs[ts->type], allocated_regs, 0); 5653 tcg_out_st(s, ts->type, ts->reg, TCG_REG_CALL_STACK, 5654 arg_slot_stk_ofs(arg_slot)); 5655 } 5656 5657 static void load_arg_normal(TCGContext *s, const TCGCallArgumentLoc *l, 5658 TCGTemp *ts, TCGRegSet *allocated_regs) 5659 { 5660 if (arg_slot_reg_p(l->arg_slot)) { 5661 TCGReg reg = tcg_target_call_iarg_regs[l->arg_slot]; 5662 load_arg_reg(s, reg, ts, *allocated_regs); 5663 tcg_regset_set_reg(*allocated_regs, reg); 5664 } else { 5665 load_arg_stk(s, l->arg_slot, ts, *allocated_regs); 5666 } 5667 } 5668 5669 static void load_arg_ref(TCGContext *s, unsigned arg_slot, TCGReg ref_base, 5670 intptr_t ref_off, TCGRegSet *allocated_regs) 5671 { 5672 TCGReg reg; 5673 5674 if (arg_slot_reg_p(arg_slot)) { 5675 reg = tcg_target_call_iarg_regs[arg_slot]; 5676 tcg_reg_free(s, reg, *allocated_regs); 5677 tcg_out_addi_ptr(s, reg, ref_base, ref_off); 5678 tcg_regset_set_reg(*allocated_regs, reg); 5679 } else { 5680 reg = tcg_reg_alloc(s, tcg_target_available_regs[TCG_TYPE_PTR], 5681 *allocated_regs, 0, false); 5682 tcg_out_addi_ptr(s, reg, ref_base, ref_off); 5683 tcg_out_st(s, TCG_TYPE_PTR, reg, TCG_REG_CALL_STACK, 5684 arg_slot_stk_ofs(arg_slot)); 5685 } 5686 } 5687 5688 static void tcg_reg_alloc_call(TCGContext *s, TCGOp *op) 5689 { 5690 const int nb_oargs = TCGOP_CALLO(op); 5691 const int nb_iargs = TCGOP_CALLI(op); 5692 const TCGLifeData arg_life = op->life; 5693 const TCGHelperInfo *info = tcg_call_info(op); 5694 TCGRegSet allocated_regs = s->reserved_regs; 5695 int i; 5696 5697 /* 5698 * Move inputs into place in reverse order, 5699 * so that we place stacked arguments first. 5700 */ 5701 for (i = nb_iargs - 1; i >= 0; --i) { 5702 const TCGCallArgumentLoc *loc = &info->in[i]; 5703 TCGTemp *ts = arg_temp(op->args[nb_oargs + i]); 5704 5705 switch (loc->kind) { 5706 case TCG_CALL_ARG_NORMAL: 5707 case TCG_CALL_ARG_EXTEND_U: 5708 case TCG_CALL_ARG_EXTEND_S: 5709 load_arg_normal(s, loc, ts, &allocated_regs); 5710 break; 5711 case TCG_CALL_ARG_BY_REF: 5712 load_arg_stk(s, loc->ref_slot, ts, allocated_regs); 5713 load_arg_ref(s, loc->arg_slot, TCG_REG_CALL_STACK, 5714 arg_slot_stk_ofs(loc->ref_slot), 5715 &allocated_regs); 5716 break; 5717 case TCG_CALL_ARG_BY_REF_N: 5718 load_arg_stk(s, loc->ref_slot, ts, allocated_regs); 5719 break; 5720 default: 5721 g_assert_not_reached(); 5722 } 5723 } 5724 5725 /* Mark dead temporaries and free the associated registers. */ 5726 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) { 5727 if (IS_DEAD_ARG(i)) { 5728 temp_dead(s, arg_temp(op->args[i])); 5729 } 5730 } 5731 5732 /* Clobber call registers. */ 5733 for (i = 0; i < TCG_TARGET_NB_REGS; i++) { 5734 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) { 5735 tcg_reg_free(s, i, allocated_regs); 5736 } 5737 } 5738 5739 /* 5740 * Save globals if they might be written by the helper, 5741 * sync them if they might be read. 5742 */ 5743 if (info->flags & TCG_CALL_NO_READ_GLOBALS) { 5744 /* Nothing to do */ 5745 } else if (info->flags & TCG_CALL_NO_WRITE_GLOBALS) { 5746 sync_globals(s, allocated_regs); 5747 } else { 5748 save_globals(s, allocated_regs); 5749 } 5750 5751 /* 5752 * If the ABI passes a pointer to the returned struct as the first 5753 * argument, load that now. Pass a pointer to the output home slot. 5754 */ 5755 if (info->out_kind == TCG_CALL_RET_BY_REF) { 5756 TCGTemp *ts = arg_temp(op->args[0]); 5757 5758 if (!ts->mem_allocated) { 5759 temp_allocate_frame(s, ts); 5760 } 5761 load_arg_ref(s, 0, ts->mem_base->reg, ts->mem_offset, &allocated_regs); 5762 } 5763 5764 tcg_out_call(s, tcg_call_func(op), info); 5765 5766 /* Assign output registers and emit moves if needed. */ 5767 switch (info->out_kind) { 5768 case TCG_CALL_RET_NORMAL: 5769 for (i = 0; i < nb_oargs; i++) { 5770 TCGTemp *ts = arg_temp(op->args[i]); 5771 TCGReg reg = tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, i); 5772 5773 /* ENV should not be modified. */ 5774 tcg_debug_assert(!temp_readonly(ts)); 5775 5776 set_temp_val_reg(s, ts, reg); 5777 ts->mem_coherent = 0; 5778 } 5779 break; 5780 5781 case TCG_CALL_RET_BY_VEC: 5782 { 5783 TCGTemp *ts = arg_temp(op->args[0]); 5784 5785 tcg_debug_assert(ts->base_type == TCG_TYPE_I128); 5786 tcg_debug_assert(ts->temp_subindex == 0); 5787 if (!ts->mem_allocated) { 5788 temp_allocate_frame(s, ts); 5789 } 5790 tcg_out_st(s, TCG_TYPE_V128, 5791 tcg_target_call_oarg_reg(TCG_CALL_RET_BY_VEC, 0), 5792 ts->mem_base->reg, ts->mem_offset); 5793 } 5794 /* fall through to mark all parts in memory */ 5795 5796 case TCG_CALL_RET_BY_REF: 5797 /* The callee has performed a write through the reference. */ 5798 for (i = 0; i < nb_oargs; i++) { 5799 TCGTemp *ts = arg_temp(op->args[i]); 5800 ts->val_type = TEMP_VAL_MEM; 5801 } 5802 break; 5803 5804 default: 5805 g_assert_not_reached(); 5806 } 5807 5808 /* Flush or discard output registers as needed. */ 5809 for (i = 0; i < nb_oargs; i++) { 5810 TCGTemp *ts = arg_temp(op->args[i]); 5811 if (NEED_SYNC_ARG(i)) { 5812 temp_sync(s, ts, s->reserved_regs, 0, IS_DEAD_ARG(i)); 5813 } else if (IS_DEAD_ARG(i)) { 5814 temp_dead(s, ts); 5815 } 5816 } 5817 } 5818 5819 /** 5820 * atom_and_align_for_opc: 5821 * @s: tcg context 5822 * @opc: memory operation code 5823 * @host_atom: MO_ATOM_{IFALIGN,WITHIN16,SUBALIGN} for host operations 5824 * @allow_two_ops: true if we are prepared to issue two operations 5825 * 5826 * Return the alignment and atomicity to use for the inline fast path 5827 * for the given memory operation. The alignment may be larger than 5828 * that specified in @opc, and the correct alignment will be diagnosed 5829 * by the slow path helper. 5830 * 5831 * If @allow_two_ops, the host is prepared to test for 2x alignment, 5832 * and issue two loads or stores for subalignment. 5833 */ 5834 static TCGAtomAlign atom_and_align_for_opc(TCGContext *s, MemOp opc, 5835 MemOp host_atom, bool allow_two_ops) 5836 { 5837 MemOp align = memop_alignment_bits(opc); 5838 MemOp size = opc & MO_SIZE; 5839 MemOp half = size ? size - 1 : 0; 5840 MemOp atom = opc & MO_ATOM_MASK; 5841 MemOp atmax; 5842 5843 switch (atom) { 5844 case MO_ATOM_NONE: 5845 /* The operation requires no specific atomicity. */ 5846 atmax = MO_8; 5847 break; 5848 5849 case MO_ATOM_IFALIGN: 5850 atmax = size; 5851 break; 5852 5853 case MO_ATOM_IFALIGN_PAIR: 5854 atmax = half; 5855 break; 5856 5857 case MO_ATOM_WITHIN16: 5858 atmax = size; 5859 if (size == MO_128) { 5860 /* Misalignment implies !within16, and therefore no atomicity. */ 5861 } else if (host_atom != MO_ATOM_WITHIN16) { 5862 /* The host does not implement within16, so require alignment. */ 5863 align = MAX(align, size); 5864 } 5865 break; 5866 5867 case MO_ATOM_WITHIN16_PAIR: 5868 atmax = size; 5869 /* 5870 * Misalignment implies !within16, and therefore half atomicity. 5871 * Any host prepared for two operations can implement this with 5872 * half alignment. 5873 */ 5874 if (host_atom != MO_ATOM_WITHIN16 && allow_two_ops) { 5875 align = MAX(align, half); 5876 } 5877 break; 5878 5879 case MO_ATOM_SUBALIGN: 5880 atmax = size; 5881 if (host_atom != MO_ATOM_SUBALIGN) { 5882 /* If unaligned but not odd, there are subobjects up to half. */ 5883 if (allow_two_ops) { 5884 align = MAX(align, half); 5885 } else { 5886 align = MAX(align, size); 5887 } 5888 } 5889 break; 5890 5891 default: 5892 g_assert_not_reached(); 5893 } 5894 5895 return (TCGAtomAlign){ .atom = atmax, .align = align }; 5896 } 5897 5898 /* 5899 * Similarly for qemu_ld/st slow path helpers. 5900 * We must re-implement tcg_gen_callN and tcg_reg_alloc_call simultaneously, 5901 * using only the provided backend tcg_out_* functions. 5902 */ 5903 5904 static int tcg_out_helper_stk_ofs(TCGType type, unsigned slot) 5905 { 5906 int ofs = arg_slot_stk_ofs(slot); 5907 5908 /* 5909 * Each stack slot is TCG_TARGET_LONG_BITS. If the host does not 5910 * require extension to uint64_t, adjust the address for uint32_t. 5911 */ 5912 if (HOST_BIG_ENDIAN && 5913 TCG_TARGET_REG_BITS == 64 && 5914 type == TCG_TYPE_I32) { 5915 ofs += 4; 5916 } 5917 return ofs; 5918 } 5919 5920 static void tcg_out_helper_load_slots(TCGContext *s, 5921 unsigned nmov, TCGMovExtend *mov, 5922 const TCGLdstHelperParam *parm) 5923 { 5924 unsigned i; 5925 TCGReg dst3; 5926 5927 /* 5928 * Start from the end, storing to the stack first. 5929 * This frees those registers, so we need not consider overlap. 5930 */ 5931 for (i = nmov; i-- > 0; ) { 5932 unsigned slot = mov[i].dst; 5933 5934 if (arg_slot_reg_p(slot)) { 5935 goto found_reg; 5936 } 5937 5938 TCGReg src = mov[i].src; 5939 TCGType dst_type = mov[i].dst_type; 5940 MemOp dst_mo = dst_type == TCG_TYPE_I32 ? MO_32 : MO_64; 5941 5942 /* The argument is going onto the stack; extend into scratch. */ 5943 if ((mov[i].src_ext & MO_SIZE) != dst_mo) { 5944 tcg_debug_assert(parm->ntmp != 0); 5945 mov[i].dst = src = parm->tmp[0]; 5946 tcg_out_movext1(s, &mov[i]); 5947 } 5948 5949 tcg_out_st(s, dst_type, src, TCG_REG_CALL_STACK, 5950 tcg_out_helper_stk_ofs(dst_type, slot)); 5951 } 5952 return; 5953 5954 found_reg: 5955 /* 5956 * The remaining arguments are in registers. 5957 * Convert slot numbers to argument registers. 5958 */ 5959 nmov = i + 1; 5960 for (i = 0; i < nmov; ++i) { 5961 mov[i].dst = tcg_target_call_iarg_regs[mov[i].dst]; 5962 } 5963 5964 switch (nmov) { 5965 case 4: 5966 /* The backend must have provided enough temps for the worst case. */ 5967 tcg_debug_assert(parm->ntmp >= 2); 5968 5969 dst3 = mov[3].dst; 5970 for (unsigned j = 0; j < 3; ++j) { 5971 if (dst3 == mov[j].src) { 5972 /* 5973 * Conflict. Copy the source to a temporary, perform the 5974 * remaining moves, then the extension from our scratch 5975 * on the way out. 5976 */ 5977 TCGReg scratch = parm->tmp[1]; 5978 5979 tcg_out_mov(s, mov[3].src_type, scratch, mov[3].src); 5980 tcg_out_movext3(s, mov, mov + 1, mov + 2, parm->tmp[0]); 5981 tcg_out_movext1_new_src(s, &mov[3], scratch); 5982 break; 5983 } 5984 } 5985 5986 /* No conflicts: perform this move and continue. */ 5987 tcg_out_movext1(s, &mov[3]); 5988 /* fall through */ 5989 5990 case 3: 5991 tcg_out_movext3(s, mov, mov + 1, mov + 2, 5992 parm->ntmp ? parm->tmp[0] : -1); 5993 break; 5994 case 2: 5995 tcg_out_movext2(s, mov, mov + 1, 5996 parm->ntmp ? parm->tmp[0] : -1); 5997 break; 5998 case 1: 5999 tcg_out_movext1(s, mov); 6000 break; 6001 default: 6002 g_assert_not_reached(); 6003 } 6004 } 6005 6006 static void tcg_out_helper_load_imm(TCGContext *s, unsigned slot, 6007 TCGType type, tcg_target_long imm, 6008 const TCGLdstHelperParam *parm) 6009 { 6010 if (arg_slot_reg_p(slot)) { 6011 tcg_out_movi(s, type, tcg_target_call_iarg_regs[slot], imm); 6012 } else { 6013 int ofs = tcg_out_helper_stk_ofs(type, slot); 6014 if (!tcg_out_sti(s, type, imm, TCG_REG_CALL_STACK, ofs)) { 6015 tcg_debug_assert(parm->ntmp != 0); 6016 tcg_out_movi(s, type, parm->tmp[0], imm); 6017 tcg_out_st(s, type, parm->tmp[0], TCG_REG_CALL_STACK, ofs); 6018 } 6019 } 6020 } 6021 6022 static void tcg_out_helper_load_common_args(TCGContext *s, 6023 const TCGLabelQemuLdst *ldst, 6024 const TCGLdstHelperParam *parm, 6025 const TCGHelperInfo *info, 6026 unsigned next_arg) 6027 { 6028 TCGMovExtend ptr_mov = { 6029 .dst_type = TCG_TYPE_PTR, 6030 .src_type = TCG_TYPE_PTR, 6031 .src_ext = sizeof(void *) == 4 ? MO_32 : MO_64 6032 }; 6033 const TCGCallArgumentLoc *loc = &info->in[0]; 6034 TCGType type; 6035 unsigned slot; 6036 tcg_target_ulong imm; 6037 6038 /* 6039 * Handle env, which is always first. 6040 */ 6041 ptr_mov.dst = loc->arg_slot; 6042 ptr_mov.src = TCG_AREG0; 6043 tcg_out_helper_load_slots(s, 1, &ptr_mov, parm); 6044 6045 /* 6046 * Handle oi. 6047 */ 6048 imm = ldst->oi; 6049 loc = &info->in[next_arg]; 6050 type = TCG_TYPE_I32; 6051 switch (loc->kind) { 6052 case TCG_CALL_ARG_NORMAL: 6053 break; 6054 case TCG_CALL_ARG_EXTEND_U: 6055 case TCG_CALL_ARG_EXTEND_S: 6056 /* No extension required for MemOpIdx. */ 6057 tcg_debug_assert(imm <= INT32_MAX); 6058 type = TCG_TYPE_REG; 6059 break; 6060 default: 6061 g_assert_not_reached(); 6062 } 6063 tcg_out_helper_load_imm(s, loc->arg_slot, type, imm, parm); 6064 next_arg++; 6065 6066 /* 6067 * Handle ra. 6068 */ 6069 loc = &info->in[next_arg]; 6070 slot = loc->arg_slot; 6071 if (parm->ra_gen) { 6072 int arg_reg = -1; 6073 TCGReg ra_reg; 6074 6075 if (arg_slot_reg_p(slot)) { 6076 arg_reg = tcg_target_call_iarg_regs[slot]; 6077 } 6078 ra_reg = parm->ra_gen(s, ldst, arg_reg); 6079 6080 ptr_mov.dst = slot; 6081 ptr_mov.src = ra_reg; 6082 tcg_out_helper_load_slots(s, 1, &ptr_mov, parm); 6083 } else { 6084 imm = (uintptr_t)ldst->raddr; 6085 tcg_out_helper_load_imm(s, slot, TCG_TYPE_PTR, imm, parm); 6086 } 6087 } 6088 6089 static unsigned tcg_out_helper_add_mov(TCGMovExtend *mov, 6090 const TCGCallArgumentLoc *loc, 6091 TCGType dst_type, TCGType src_type, 6092 TCGReg lo, TCGReg hi) 6093 { 6094 MemOp reg_mo; 6095 6096 if (dst_type <= TCG_TYPE_REG) { 6097 MemOp src_ext; 6098 6099 switch (loc->kind) { 6100 case TCG_CALL_ARG_NORMAL: 6101 src_ext = src_type == TCG_TYPE_I32 ? MO_32 : MO_64; 6102 break; 6103 case TCG_CALL_ARG_EXTEND_U: 6104 dst_type = TCG_TYPE_REG; 6105 src_ext = MO_UL; 6106 break; 6107 case TCG_CALL_ARG_EXTEND_S: 6108 dst_type = TCG_TYPE_REG; 6109 src_ext = MO_SL; 6110 break; 6111 default: 6112 g_assert_not_reached(); 6113 } 6114 6115 mov[0].dst = loc->arg_slot; 6116 mov[0].dst_type = dst_type; 6117 mov[0].src = lo; 6118 mov[0].src_type = src_type; 6119 mov[0].src_ext = src_ext; 6120 return 1; 6121 } 6122 6123 if (TCG_TARGET_REG_BITS == 32) { 6124 assert(dst_type == TCG_TYPE_I64); 6125 reg_mo = MO_32; 6126 } else { 6127 assert(dst_type == TCG_TYPE_I128); 6128 reg_mo = MO_64; 6129 } 6130 6131 mov[0].dst = loc[HOST_BIG_ENDIAN].arg_slot; 6132 mov[0].src = lo; 6133 mov[0].dst_type = TCG_TYPE_REG; 6134 mov[0].src_type = TCG_TYPE_REG; 6135 mov[0].src_ext = reg_mo; 6136 6137 mov[1].dst = loc[!HOST_BIG_ENDIAN].arg_slot; 6138 mov[1].src = hi; 6139 mov[1].dst_type = TCG_TYPE_REG; 6140 mov[1].src_type = TCG_TYPE_REG; 6141 mov[1].src_ext = reg_mo; 6142 6143 return 2; 6144 } 6145 6146 static void tcg_out_ld_helper_args(TCGContext *s, const TCGLabelQemuLdst *ldst, 6147 const TCGLdstHelperParam *parm) 6148 { 6149 const TCGHelperInfo *info; 6150 const TCGCallArgumentLoc *loc; 6151 TCGMovExtend mov[2]; 6152 unsigned next_arg, nmov; 6153 MemOp mop = get_memop(ldst->oi); 6154 6155 switch (mop & MO_SIZE) { 6156 case MO_8: 6157 case MO_16: 6158 case MO_32: 6159 info = &info_helper_ld32_mmu; 6160 break; 6161 case MO_64: 6162 info = &info_helper_ld64_mmu; 6163 break; 6164 case MO_128: 6165 info = &info_helper_ld128_mmu; 6166 break; 6167 default: 6168 g_assert_not_reached(); 6169 } 6170 6171 /* Defer env argument. */ 6172 next_arg = 1; 6173 6174 loc = &info->in[next_arg]; 6175 if (TCG_TARGET_REG_BITS == 32 && s->addr_type == TCG_TYPE_I32) { 6176 /* 6177 * 32-bit host with 32-bit guest: zero-extend the guest address 6178 * to 64-bits for the helper by storing the low part, then 6179 * load a zero for the high part. 6180 */ 6181 tcg_out_helper_add_mov(mov, loc + HOST_BIG_ENDIAN, 6182 TCG_TYPE_I32, TCG_TYPE_I32, 6183 ldst->addr_reg, -1); 6184 tcg_out_helper_load_slots(s, 1, mov, parm); 6185 6186 tcg_out_helper_load_imm(s, loc[!HOST_BIG_ENDIAN].arg_slot, 6187 TCG_TYPE_I32, 0, parm); 6188 next_arg += 2; 6189 } else { 6190 nmov = tcg_out_helper_add_mov(mov, loc, TCG_TYPE_I64, s->addr_type, 6191 ldst->addr_reg, -1); 6192 tcg_out_helper_load_slots(s, nmov, mov, parm); 6193 next_arg += nmov; 6194 } 6195 6196 switch (info->out_kind) { 6197 case TCG_CALL_RET_NORMAL: 6198 case TCG_CALL_RET_BY_VEC: 6199 break; 6200 case TCG_CALL_RET_BY_REF: 6201 /* 6202 * The return reference is in the first argument slot. 6203 * We need memory in which to return: re-use the top of stack. 6204 */ 6205 { 6206 int ofs_slot0 = TCG_TARGET_CALL_STACK_OFFSET; 6207 6208 if (arg_slot_reg_p(0)) { 6209 tcg_out_addi_ptr(s, tcg_target_call_iarg_regs[0], 6210 TCG_REG_CALL_STACK, ofs_slot0); 6211 } else { 6212 tcg_debug_assert(parm->ntmp != 0); 6213 tcg_out_addi_ptr(s, parm->tmp[0], 6214 TCG_REG_CALL_STACK, ofs_slot0); 6215 tcg_out_st(s, TCG_TYPE_PTR, parm->tmp[0], 6216 TCG_REG_CALL_STACK, ofs_slot0); 6217 } 6218 } 6219 break; 6220 default: 6221 g_assert_not_reached(); 6222 } 6223 6224 tcg_out_helper_load_common_args(s, ldst, parm, info, next_arg); 6225 } 6226 6227 static void tcg_out_ld_helper_ret(TCGContext *s, const TCGLabelQemuLdst *ldst, 6228 bool load_sign, 6229 const TCGLdstHelperParam *parm) 6230 { 6231 MemOp mop = get_memop(ldst->oi); 6232 TCGMovExtend mov[2]; 6233 int ofs_slot0; 6234 6235 switch (ldst->type) { 6236 case TCG_TYPE_I64: 6237 if (TCG_TARGET_REG_BITS == 32) { 6238 break; 6239 } 6240 /* fall through */ 6241 6242 case TCG_TYPE_I32: 6243 mov[0].dst = ldst->datalo_reg; 6244 mov[0].src = tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, 0); 6245 mov[0].dst_type = ldst->type; 6246 mov[0].src_type = TCG_TYPE_REG; 6247 6248 /* 6249 * If load_sign, then we allowed the helper to perform the 6250 * appropriate sign extension to tcg_target_ulong, and all 6251 * we need now is a plain move. 6252 * 6253 * If they do not, then we expect the relevant extension 6254 * instruction to be no more expensive than a move, and 6255 * we thus save the icache etc by only using one of two 6256 * helper functions. 6257 */ 6258 if (load_sign || !(mop & MO_SIGN)) { 6259 if (TCG_TARGET_REG_BITS == 32 || ldst->type == TCG_TYPE_I32) { 6260 mov[0].src_ext = MO_32; 6261 } else { 6262 mov[0].src_ext = MO_64; 6263 } 6264 } else { 6265 mov[0].src_ext = mop & MO_SSIZE; 6266 } 6267 tcg_out_movext1(s, mov); 6268 return; 6269 6270 case TCG_TYPE_I128: 6271 tcg_debug_assert(TCG_TARGET_REG_BITS == 64); 6272 ofs_slot0 = TCG_TARGET_CALL_STACK_OFFSET; 6273 switch (TCG_TARGET_CALL_RET_I128) { 6274 case TCG_CALL_RET_NORMAL: 6275 break; 6276 case TCG_CALL_RET_BY_VEC: 6277 tcg_out_st(s, TCG_TYPE_V128, 6278 tcg_target_call_oarg_reg(TCG_CALL_RET_BY_VEC, 0), 6279 TCG_REG_CALL_STACK, ofs_slot0); 6280 /* fall through */ 6281 case TCG_CALL_RET_BY_REF: 6282 tcg_out_ld(s, TCG_TYPE_I64, ldst->datalo_reg, 6283 TCG_REG_CALL_STACK, ofs_slot0 + 8 * HOST_BIG_ENDIAN); 6284 tcg_out_ld(s, TCG_TYPE_I64, ldst->datahi_reg, 6285 TCG_REG_CALL_STACK, ofs_slot0 + 8 * !HOST_BIG_ENDIAN); 6286 return; 6287 default: 6288 g_assert_not_reached(); 6289 } 6290 break; 6291 6292 default: 6293 g_assert_not_reached(); 6294 } 6295 6296 mov[0].dst = ldst->datalo_reg; 6297 mov[0].src = 6298 tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, HOST_BIG_ENDIAN); 6299 mov[0].dst_type = TCG_TYPE_REG; 6300 mov[0].src_type = TCG_TYPE_REG; 6301 mov[0].src_ext = TCG_TARGET_REG_BITS == 32 ? MO_32 : MO_64; 6302 6303 mov[1].dst = ldst->datahi_reg; 6304 mov[1].src = 6305 tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, !HOST_BIG_ENDIAN); 6306 mov[1].dst_type = TCG_TYPE_REG; 6307 mov[1].src_type = TCG_TYPE_REG; 6308 mov[1].src_ext = TCG_TARGET_REG_BITS == 32 ? MO_32 : MO_64; 6309 6310 tcg_out_movext2(s, mov, mov + 1, parm->ntmp ? parm->tmp[0] : -1); 6311 } 6312 6313 static void tcg_out_st_helper_args(TCGContext *s, const TCGLabelQemuLdst *ldst, 6314 const TCGLdstHelperParam *parm) 6315 { 6316 const TCGHelperInfo *info; 6317 const TCGCallArgumentLoc *loc; 6318 TCGMovExtend mov[4]; 6319 TCGType data_type; 6320 unsigned next_arg, nmov, n; 6321 MemOp mop = get_memop(ldst->oi); 6322 6323 switch (mop & MO_SIZE) { 6324 case MO_8: 6325 case MO_16: 6326 case MO_32: 6327 info = &info_helper_st32_mmu; 6328 data_type = TCG_TYPE_I32; 6329 break; 6330 case MO_64: 6331 info = &info_helper_st64_mmu; 6332 data_type = TCG_TYPE_I64; 6333 break; 6334 case MO_128: 6335 info = &info_helper_st128_mmu; 6336 data_type = TCG_TYPE_I128; 6337 break; 6338 default: 6339 g_assert_not_reached(); 6340 } 6341 6342 /* Defer env argument. */ 6343 next_arg = 1; 6344 nmov = 0; 6345 6346 /* Handle addr argument. */ 6347 loc = &info->in[next_arg]; 6348 tcg_debug_assert(s->addr_type <= TCG_TYPE_REG); 6349 if (TCG_TARGET_REG_BITS == 32) { 6350 /* 6351 * 32-bit host (and thus 32-bit guest): zero-extend the guest address 6352 * to 64-bits for the helper by storing the low part. Later, 6353 * after we have processed the register inputs, we will load a 6354 * zero for the high part. 6355 */ 6356 tcg_out_helper_add_mov(mov, loc + HOST_BIG_ENDIAN, 6357 TCG_TYPE_I32, TCG_TYPE_I32, 6358 ldst->addr_reg, -1); 6359 next_arg += 2; 6360 nmov += 1; 6361 } else { 6362 n = tcg_out_helper_add_mov(mov, loc, TCG_TYPE_I64, s->addr_type, 6363 ldst->addr_reg, -1); 6364 next_arg += n; 6365 nmov += n; 6366 } 6367 6368 /* Handle data argument. */ 6369 loc = &info->in[next_arg]; 6370 switch (loc->kind) { 6371 case TCG_CALL_ARG_NORMAL: 6372 case TCG_CALL_ARG_EXTEND_U: 6373 case TCG_CALL_ARG_EXTEND_S: 6374 n = tcg_out_helper_add_mov(mov + nmov, loc, data_type, ldst->type, 6375 ldst->datalo_reg, ldst->datahi_reg); 6376 next_arg += n; 6377 nmov += n; 6378 tcg_out_helper_load_slots(s, nmov, mov, parm); 6379 break; 6380 6381 case TCG_CALL_ARG_BY_REF: 6382 tcg_debug_assert(TCG_TARGET_REG_BITS == 64); 6383 tcg_debug_assert(data_type == TCG_TYPE_I128); 6384 tcg_out_st(s, TCG_TYPE_I64, 6385 HOST_BIG_ENDIAN ? ldst->datahi_reg : ldst->datalo_reg, 6386 TCG_REG_CALL_STACK, arg_slot_stk_ofs(loc[0].ref_slot)); 6387 tcg_out_st(s, TCG_TYPE_I64, 6388 HOST_BIG_ENDIAN ? ldst->datalo_reg : ldst->datahi_reg, 6389 TCG_REG_CALL_STACK, arg_slot_stk_ofs(loc[1].ref_slot)); 6390 6391 tcg_out_helper_load_slots(s, nmov, mov, parm); 6392 6393 if (arg_slot_reg_p(loc->arg_slot)) { 6394 tcg_out_addi_ptr(s, tcg_target_call_iarg_regs[loc->arg_slot], 6395 TCG_REG_CALL_STACK, 6396 arg_slot_stk_ofs(loc->ref_slot)); 6397 } else { 6398 tcg_debug_assert(parm->ntmp != 0); 6399 tcg_out_addi_ptr(s, parm->tmp[0], TCG_REG_CALL_STACK, 6400 arg_slot_stk_ofs(loc->ref_slot)); 6401 tcg_out_st(s, TCG_TYPE_PTR, parm->tmp[0], 6402 TCG_REG_CALL_STACK, arg_slot_stk_ofs(loc->arg_slot)); 6403 } 6404 next_arg += 2; 6405 break; 6406 6407 default: 6408 g_assert_not_reached(); 6409 } 6410 6411 if (TCG_TARGET_REG_BITS == 32) { 6412 /* Zero extend the address by loading a zero for the high part. */ 6413 loc = &info->in[1 + !HOST_BIG_ENDIAN]; 6414 tcg_out_helper_load_imm(s, loc->arg_slot, TCG_TYPE_I32, 0, parm); 6415 } 6416 6417 tcg_out_helper_load_common_args(s, ldst, parm, info, next_arg); 6418 } 6419 6420 int tcg_gen_code(TCGContext *s, TranslationBlock *tb, uint64_t pc_start) 6421 { 6422 int i, start_words, num_insns; 6423 TCGOp *op; 6424 6425 if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP) 6426 && qemu_log_in_addr_range(pc_start))) { 6427 FILE *logfile = qemu_log_trylock(); 6428 if (logfile) { 6429 fprintf(logfile, "OP:\n"); 6430 tcg_dump_ops(s, logfile, false); 6431 fprintf(logfile, "\n"); 6432 qemu_log_unlock(logfile); 6433 } 6434 } 6435 6436 #ifdef CONFIG_DEBUG_TCG 6437 /* Ensure all labels referenced have been emitted. */ 6438 { 6439 TCGLabel *l; 6440 bool error = false; 6441 6442 QSIMPLEQ_FOREACH(l, &s->labels, next) { 6443 if (unlikely(!l->present) && !QSIMPLEQ_EMPTY(&l->branches)) { 6444 qemu_log_mask(CPU_LOG_TB_OP, 6445 "$L%d referenced but not present.\n", l->id); 6446 error = true; 6447 } 6448 } 6449 assert(!error); 6450 } 6451 #endif 6452 6453 /* Do not reuse any EBB that may be allocated within the TB. */ 6454 tcg_temp_ebb_reset_freed(s); 6455 6456 tcg_optimize(s); 6457 6458 reachable_code_pass(s); 6459 liveness_pass_0(s); 6460 liveness_pass_1(s); 6461 6462 if (s->nb_indirects > 0) { 6463 if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_IND) 6464 && qemu_log_in_addr_range(pc_start))) { 6465 FILE *logfile = qemu_log_trylock(); 6466 if (logfile) { 6467 fprintf(logfile, "OP before indirect lowering:\n"); 6468 tcg_dump_ops(s, logfile, false); 6469 fprintf(logfile, "\n"); 6470 qemu_log_unlock(logfile); 6471 } 6472 } 6473 6474 /* Replace indirect temps with direct temps. */ 6475 if (liveness_pass_2(s)) { 6476 /* If changes were made, re-run liveness. */ 6477 liveness_pass_1(s); 6478 } 6479 } 6480 6481 if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_OPT) 6482 && qemu_log_in_addr_range(pc_start))) { 6483 FILE *logfile = qemu_log_trylock(); 6484 if (logfile) { 6485 fprintf(logfile, "OP after optimization and liveness analysis:\n"); 6486 tcg_dump_ops(s, logfile, true); 6487 fprintf(logfile, "\n"); 6488 qemu_log_unlock(logfile); 6489 } 6490 } 6491 6492 /* Initialize goto_tb jump offsets. */ 6493 tb->jmp_reset_offset[0] = TB_JMP_OFFSET_INVALID; 6494 tb->jmp_reset_offset[1] = TB_JMP_OFFSET_INVALID; 6495 tb->jmp_insn_offset[0] = TB_JMP_OFFSET_INVALID; 6496 tb->jmp_insn_offset[1] = TB_JMP_OFFSET_INVALID; 6497 6498 tcg_reg_alloc_start(s); 6499 6500 /* 6501 * Reset the buffer pointers when restarting after overflow. 6502 * TODO: Move this into translate-all.c with the rest of the 6503 * buffer management. Having only this done here is confusing. 6504 */ 6505 s->code_buf = tcg_splitwx_to_rw(tb->tc.ptr); 6506 s->code_ptr = s->code_buf; 6507 s->data_gen_ptr = NULL; 6508 6509 QSIMPLEQ_INIT(&s->ldst_labels); 6510 s->pool_labels = NULL; 6511 6512 start_words = s->insn_start_words; 6513 s->gen_insn_data = 6514 tcg_malloc(sizeof(uint64_t) * s->gen_tb->icount * start_words); 6515 6516 tcg_out_tb_start(s); 6517 6518 num_insns = -1; 6519 QTAILQ_FOREACH(op, &s->ops, link) { 6520 TCGOpcode opc = op->opc; 6521 6522 switch (opc) { 6523 case INDEX_op_mov: 6524 case INDEX_op_mov_vec: 6525 tcg_reg_alloc_mov(s, op); 6526 break; 6527 case INDEX_op_dup_vec: 6528 tcg_reg_alloc_dup(s, op); 6529 break; 6530 case INDEX_op_insn_start: 6531 if (num_insns >= 0) { 6532 size_t off = tcg_current_code_size(s); 6533 s->gen_insn_end_off[num_insns] = off; 6534 /* Assert that we do not overflow our stored offset. */ 6535 assert(s->gen_insn_end_off[num_insns] == off); 6536 } 6537 num_insns++; 6538 for (i = 0; i < start_words; ++i) { 6539 s->gen_insn_data[num_insns * start_words + i] = 6540 tcg_get_insn_start_param(op, i); 6541 } 6542 break; 6543 case INDEX_op_discard: 6544 temp_dead(s, arg_temp(op->args[0])); 6545 break; 6546 case INDEX_op_set_label: 6547 tcg_reg_alloc_bb_end(s, s->reserved_regs); 6548 tcg_out_label(s, arg_label(op->args[0])); 6549 break; 6550 case INDEX_op_call: 6551 tcg_reg_alloc_call(s, op); 6552 break; 6553 case INDEX_op_exit_tb: 6554 tcg_out_exit_tb(s, op->args[0]); 6555 break; 6556 case INDEX_op_goto_tb: 6557 tcg_out_goto_tb(s, op->args[0]); 6558 break; 6559 case INDEX_op_dup2_vec: 6560 if (tcg_reg_alloc_dup2(s, op)) { 6561 break; 6562 } 6563 /* fall through */ 6564 default: 6565 /* Sanity check that we've not introduced any unhandled opcodes. */ 6566 tcg_debug_assert(tcg_op_supported(opc, TCGOP_TYPE(op), 6567 TCGOP_FLAGS(op))); 6568 /* Note: in order to speed up the code, it would be much 6569 faster to have specialized register allocator functions for 6570 some common argument patterns */ 6571 tcg_reg_alloc_op(s, op); 6572 break; 6573 } 6574 /* Test for (pending) buffer overflow. The assumption is that any 6575 one operation beginning below the high water mark cannot overrun 6576 the buffer completely. Thus we can test for overflow after 6577 generating code without having to check during generation. */ 6578 if (unlikely((void *)s->code_ptr > s->code_gen_highwater)) { 6579 return -1; 6580 } 6581 /* Test for TB overflow, as seen by gen_insn_end_off. */ 6582 if (unlikely(tcg_current_code_size(s) > UINT16_MAX)) { 6583 return -2; 6584 } 6585 } 6586 tcg_debug_assert(num_insns + 1 == s->gen_tb->icount); 6587 s->gen_insn_end_off[num_insns] = tcg_current_code_size(s); 6588 6589 /* Generate TB finalization at the end of block */ 6590 i = tcg_out_ldst_finalize(s); 6591 if (i < 0) { 6592 return i; 6593 } 6594 i = tcg_out_pool_finalize(s); 6595 if (i < 0) { 6596 return i; 6597 } 6598 if (!tcg_resolve_relocs(s)) { 6599 return -2; 6600 } 6601 6602 #ifndef CONFIG_TCG_INTERPRETER 6603 /* flush instruction cache */ 6604 flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(s->code_buf), 6605 (uintptr_t)s->code_buf, 6606 tcg_ptr_byte_diff(s->code_ptr, s->code_buf)); 6607 #endif 6608 6609 return tcg_current_code_size(s); 6610 } 6611 6612 #ifdef ELF_HOST_MACHINE 6613 /* In order to use this feature, the backend needs to do three things: 6614 6615 (1) Define ELF_HOST_MACHINE to indicate both what value to 6616 put into the ELF image and to indicate support for the feature. 6617 6618 (2) Define tcg_register_jit. This should create a buffer containing 6619 the contents of a .debug_frame section that describes the post- 6620 prologue unwind info for the tcg machine. 6621 6622 (3) Call tcg_register_jit_int, with the constructed .debug_frame. 6623 */ 6624 6625 /* Begin GDB interface. THE FOLLOWING MUST MATCH GDB DOCS. */ 6626 typedef enum { 6627 JIT_NOACTION = 0, 6628 JIT_REGISTER_FN, 6629 JIT_UNREGISTER_FN 6630 } jit_actions_t; 6631 6632 struct jit_code_entry { 6633 struct jit_code_entry *next_entry; 6634 struct jit_code_entry *prev_entry; 6635 const void *symfile_addr; 6636 uint64_t symfile_size; 6637 }; 6638 6639 struct jit_descriptor { 6640 uint32_t version; 6641 uint32_t action_flag; 6642 struct jit_code_entry *relevant_entry; 6643 struct jit_code_entry *first_entry; 6644 }; 6645 6646 void __jit_debug_register_code(void) __attribute__((noinline)); 6647 void __jit_debug_register_code(void) 6648 { 6649 asm(""); 6650 } 6651 6652 /* Must statically initialize the version, because GDB may check 6653 the version before we can set it. */ 6654 struct jit_descriptor __jit_debug_descriptor = { 1, 0, 0, 0 }; 6655 6656 /* End GDB interface. */ 6657 6658 static int find_string(const char *strtab, const char *str) 6659 { 6660 const char *p = strtab + 1; 6661 6662 while (1) { 6663 if (strcmp(p, str) == 0) { 6664 return p - strtab; 6665 } 6666 p += strlen(p) + 1; 6667 } 6668 } 6669 6670 static void tcg_register_jit_int(const void *buf_ptr, size_t buf_size, 6671 const void *debug_frame, 6672 size_t debug_frame_size) 6673 { 6674 struct __attribute__((packed)) DebugInfo { 6675 uint32_t len; 6676 uint16_t version; 6677 uint32_t abbrev; 6678 uint8_t ptr_size; 6679 uint8_t cu_die; 6680 uint16_t cu_lang; 6681 uintptr_t cu_low_pc; 6682 uintptr_t cu_high_pc; 6683 uint8_t fn_die; 6684 char fn_name[16]; 6685 uintptr_t fn_low_pc; 6686 uintptr_t fn_high_pc; 6687 uint8_t cu_eoc; 6688 }; 6689 6690 struct ElfImage { 6691 ElfW(Ehdr) ehdr; 6692 ElfW(Phdr) phdr; 6693 ElfW(Shdr) shdr[7]; 6694 ElfW(Sym) sym[2]; 6695 struct DebugInfo di; 6696 uint8_t da[24]; 6697 char str[80]; 6698 }; 6699 6700 struct ElfImage *img; 6701 6702 static const struct ElfImage img_template = { 6703 .ehdr = { 6704 .e_ident[EI_MAG0] = ELFMAG0, 6705 .e_ident[EI_MAG1] = ELFMAG1, 6706 .e_ident[EI_MAG2] = ELFMAG2, 6707 .e_ident[EI_MAG3] = ELFMAG3, 6708 .e_ident[EI_CLASS] = ELF_CLASS, 6709 .e_ident[EI_DATA] = ELF_DATA, 6710 .e_ident[EI_VERSION] = EV_CURRENT, 6711 .e_type = ET_EXEC, 6712 .e_machine = ELF_HOST_MACHINE, 6713 .e_version = EV_CURRENT, 6714 .e_phoff = offsetof(struct ElfImage, phdr), 6715 .e_shoff = offsetof(struct ElfImage, shdr), 6716 .e_ehsize = sizeof(ElfW(Shdr)), 6717 .e_phentsize = sizeof(ElfW(Phdr)), 6718 .e_phnum = 1, 6719 .e_shentsize = sizeof(ElfW(Shdr)), 6720 .e_shnum = ARRAY_SIZE(img->shdr), 6721 .e_shstrndx = ARRAY_SIZE(img->shdr) - 1, 6722 #ifdef ELF_HOST_FLAGS 6723 .e_flags = ELF_HOST_FLAGS, 6724 #endif 6725 #ifdef ELF_OSABI 6726 .e_ident[EI_OSABI] = ELF_OSABI, 6727 #endif 6728 }, 6729 .phdr = { 6730 .p_type = PT_LOAD, 6731 .p_flags = PF_X, 6732 }, 6733 .shdr = { 6734 [0] = { .sh_type = SHT_NULL }, 6735 /* Trick: The contents of code_gen_buffer are not present in 6736 this fake ELF file; that got allocated elsewhere. Therefore 6737 we mark .text as SHT_NOBITS (similar to .bss) so that readers 6738 will not look for contents. We can record any address. */ 6739 [1] = { /* .text */ 6740 .sh_type = SHT_NOBITS, 6741 .sh_flags = SHF_EXECINSTR | SHF_ALLOC, 6742 }, 6743 [2] = { /* .debug_info */ 6744 .sh_type = SHT_PROGBITS, 6745 .sh_offset = offsetof(struct ElfImage, di), 6746 .sh_size = sizeof(struct DebugInfo), 6747 }, 6748 [3] = { /* .debug_abbrev */ 6749 .sh_type = SHT_PROGBITS, 6750 .sh_offset = offsetof(struct ElfImage, da), 6751 .sh_size = sizeof(img->da), 6752 }, 6753 [4] = { /* .debug_frame */ 6754 .sh_type = SHT_PROGBITS, 6755 .sh_offset = sizeof(struct ElfImage), 6756 }, 6757 [5] = { /* .symtab */ 6758 .sh_type = SHT_SYMTAB, 6759 .sh_offset = offsetof(struct ElfImage, sym), 6760 .sh_size = sizeof(img->sym), 6761 .sh_info = 1, 6762 .sh_link = ARRAY_SIZE(img->shdr) - 1, 6763 .sh_entsize = sizeof(ElfW(Sym)), 6764 }, 6765 [6] = { /* .strtab */ 6766 .sh_type = SHT_STRTAB, 6767 .sh_offset = offsetof(struct ElfImage, str), 6768 .sh_size = sizeof(img->str), 6769 } 6770 }, 6771 .sym = { 6772 [1] = { /* code_gen_buffer */ 6773 .st_info = ELF_ST_INFO(STB_GLOBAL, STT_FUNC), 6774 .st_shndx = 1, 6775 } 6776 }, 6777 .di = { 6778 .len = sizeof(struct DebugInfo) - 4, 6779 .version = 2, 6780 .ptr_size = sizeof(void *), 6781 .cu_die = 1, 6782 .cu_lang = 0x8001, /* DW_LANG_Mips_Assembler */ 6783 .fn_die = 2, 6784 .fn_name = "code_gen_buffer" 6785 }, 6786 .da = { 6787 1, /* abbrev number (the cu) */ 6788 0x11, 1, /* DW_TAG_compile_unit, has children */ 6789 0x13, 0x5, /* DW_AT_language, DW_FORM_data2 */ 6790 0x11, 0x1, /* DW_AT_low_pc, DW_FORM_addr */ 6791 0x12, 0x1, /* DW_AT_high_pc, DW_FORM_addr */ 6792 0, 0, /* end of abbrev */ 6793 2, /* abbrev number (the fn) */ 6794 0x2e, 0, /* DW_TAG_subprogram, no children */ 6795 0x3, 0x8, /* DW_AT_name, DW_FORM_string */ 6796 0x11, 0x1, /* DW_AT_low_pc, DW_FORM_addr */ 6797 0x12, 0x1, /* DW_AT_high_pc, DW_FORM_addr */ 6798 0, 0, /* end of abbrev */ 6799 0 /* no more abbrev */ 6800 }, 6801 .str = "\0" ".text\0" ".debug_info\0" ".debug_abbrev\0" 6802 ".debug_frame\0" ".symtab\0" ".strtab\0" "code_gen_buffer", 6803 }; 6804 6805 /* We only need a single jit entry; statically allocate it. */ 6806 static struct jit_code_entry one_entry; 6807 6808 uintptr_t buf = (uintptr_t)buf_ptr; 6809 size_t img_size = sizeof(struct ElfImage) + debug_frame_size; 6810 DebugFrameHeader *dfh; 6811 6812 img = g_malloc(img_size); 6813 *img = img_template; 6814 6815 img->phdr.p_vaddr = buf; 6816 img->phdr.p_paddr = buf; 6817 img->phdr.p_memsz = buf_size; 6818 6819 img->shdr[1].sh_name = find_string(img->str, ".text"); 6820 img->shdr[1].sh_addr = buf; 6821 img->shdr[1].sh_size = buf_size; 6822 6823 img->shdr[2].sh_name = find_string(img->str, ".debug_info"); 6824 img->shdr[3].sh_name = find_string(img->str, ".debug_abbrev"); 6825 6826 img->shdr[4].sh_name = find_string(img->str, ".debug_frame"); 6827 img->shdr[4].sh_size = debug_frame_size; 6828 6829 img->shdr[5].sh_name = find_string(img->str, ".symtab"); 6830 img->shdr[6].sh_name = find_string(img->str, ".strtab"); 6831 6832 img->sym[1].st_name = find_string(img->str, "code_gen_buffer"); 6833 img->sym[1].st_value = buf; 6834 img->sym[1].st_size = buf_size; 6835 6836 img->di.cu_low_pc = buf; 6837 img->di.cu_high_pc = buf + buf_size; 6838 img->di.fn_low_pc = buf; 6839 img->di.fn_high_pc = buf + buf_size; 6840 6841 dfh = (DebugFrameHeader *)(img + 1); 6842 memcpy(dfh, debug_frame, debug_frame_size); 6843 dfh->fde.func_start = buf; 6844 dfh->fde.func_len = buf_size; 6845 6846 #ifdef DEBUG_JIT 6847 /* Enable this block to be able to debug the ELF image file creation. 6848 One can use readelf, objdump, or other inspection utilities. */ 6849 { 6850 g_autofree char *jit = g_strdup_printf("%s/qemu.jit", g_get_tmp_dir()); 6851 FILE *f = fopen(jit, "w+b"); 6852 if (f) { 6853 if (fwrite(img, img_size, 1, f) != img_size) { 6854 /* Avoid stupid unused return value warning for fwrite. */ 6855 } 6856 fclose(f); 6857 } 6858 } 6859 #endif 6860 6861 one_entry.symfile_addr = img; 6862 one_entry.symfile_size = img_size; 6863 6864 __jit_debug_descriptor.action_flag = JIT_REGISTER_FN; 6865 __jit_debug_descriptor.relevant_entry = &one_entry; 6866 __jit_debug_descriptor.first_entry = &one_entry; 6867 __jit_debug_register_code(); 6868 } 6869 #else 6870 /* No support for the feature. Provide the entry point expected by exec.c, 6871 and implement the internal function we declared earlier. */ 6872 6873 static void tcg_register_jit_int(const void *buf, size_t size, 6874 const void *debug_frame, 6875 size_t debug_frame_size) 6876 { 6877 } 6878 6879 void tcg_register_jit(const void *buf, size_t buf_size) 6880 { 6881 } 6882 #endif /* ELF_HOST_MACHINE */ 6883 6884 #if !TCG_TARGET_MAYBE_vec 6885 void tcg_expand_vec_op(TCGOpcode o, TCGType t, unsigned e, TCGArg a0, ...) 6886 { 6887 g_assert_not_reached(); 6888 } 6889 #endif 6890