1 /* 2 * Tiny Code Generator for QEMU 3 * 4 * Copyright (c) 2008 Fabrice Bellard 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a copy 7 * of this software and associated documentation files (the "Software"), to deal 8 * in the Software without restriction, including without limitation the rights 9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 * copies of the Software, and to permit persons to whom the Software is 11 * furnished to do so, subject to the following conditions: 12 * 13 * The above copyright notice and this permission notice shall be included in 14 * all copies or substantial portions of the Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 22 * THE SOFTWARE. 23 */ 24 25 #include "qemu/osdep.h" 26 27 /* Define to jump the ELF file used to communicate with GDB. */ 28 #undef DEBUG_JIT 29 30 #include "qemu/error-report.h" 31 #include "qemu/cutils.h" 32 #include "qemu/host-utils.h" 33 #include "qemu/qemu-print.h" 34 #include "qemu/cacheflush.h" 35 #include "qemu/cacheinfo.h" 36 #include "qemu/timer.h" 37 #include "exec/translation-block.h" 38 #include "exec/tlb-common.h" 39 #include "tcg/startup.h" 40 #include "tcg/tcg-op-common.h" 41 42 #if UINTPTR_MAX == UINT32_MAX 43 # define ELF_CLASS ELFCLASS32 44 #else 45 # define ELF_CLASS ELFCLASS64 46 #endif 47 #if HOST_BIG_ENDIAN 48 # define ELF_DATA ELFDATA2MSB 49 #else 50 # define ELF_DATA ELFDATA2LSB 51 #endif 52 53 #include "elf.h" 54 #include "exec/log.h" 55 #include "tcg/tcg-ldst.h" 56 #include "tcg/tcg-temp-internal.h" 57 #include "tcg-internal.h" 58 #include "tcg/perf.h" 59 #include "tcg-has.h" 60 #ifdef CONFIG_USER_ONLY 61 #include "user/guest-base.h" 62 #endif 63 64 /* Forward declarations for functions declared in tcg-target.c.inc and 65 used here. */ 66 static void tcg_target_init(TCGContext *s); 67 static void tcg_target_qemu_prologue(TCGContext *s); 68 static bool patch_reloc(tcg_insn_unit *code_ptr, int type, 69 intptr_t value, intptr_t addend); 70 static void tcg_out_nop_fill(tcg_insn_unit *p, int count); 71 72 typedef struct TCGLabelQemuLdst TCGLabelQemuLdst; 73 static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l); 74 static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l); 75 76 /* The CIE and FDE header definitions will be common to all hosts. */ 77 typedef struct { 78 uint32_t len __attribute__((aligned((sizeof(void *))))); 79 uint32_t id; 80 uint8_t version; 81 char augmentation[1]; 82 uint8_t code_align; 83 uint8_t data_align; 84 uint8_t return_column; 85 } DebugFrameCIE; 86 87 typedef struct QEMU_PACKED { 88 uint32_t len __attribute__((aligned((sizeof(void *))))); 89 uint32_t cie_offset; 90 uintptr_t func_start; 91 uintptr_t func_len; 92 } DebugFrameFDEHeader; 93 94 typedef struct QEMU_PACKED { 95 DebugFrameCIE cie; 96 DebugFrameFDEHeader fde; 97 } DebugFrameHeader; 98 99 struct TCGLabelQemuLdst { 100 bool is_ld; /* qemu_ld: true, qemu_st: false */ 101 MemOpIdx oi; 102 TCGType type; /* result type of a load */ 103 TCGReg addr_reg; /* reg index for guest virtual addr */ 104 TCGReg datalo_reg; /* reg index for low word to be loaded or stored */ 105 TCGReg datahi_reg; /* reg index for high word to be loaded or stored */ 106 const tcg_insn_unit *raddr; /* addr of the next IR of qemu_ld/st IR */ 107 tcg_insn_unit *label_ptr[2]; /* label pointers to be updated */ 108 QSIMPLEQ_ENTRY(TCGLabelQemuLdst) next; 109 }; 110 111 static void tcg_register_jit_int(const void *buf, size_t size, 112 const void *debug_frame, 113 size_t debug_frame_size) 114 __attribute__((unused)); 115 116 /* Forward declarations for functions declared and used in tcg-target.c.inc. */ 117 static void tcg_out_tb_start(TCGContext *s); 118 static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg1, 119 intptr_t arg2); 120 static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg); 121 static void tcg_out_movi(TCGContext *s, TCGType type, 122 TCGReg ret, tcg_target_long arg); 123 static void tcg_out_ext8s(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg); 124 static void tcg_out_ext16s(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg); 125 static void tcg_out_ext8u(TCGContext *s, TCGReg ret, TCGReg arg); 126 static void tcg_out_ext16u(TCGContext *s, TCGReg ret, TCGReg arg); 127 static void tcg_out_ext32s(TCGContext *s, TCGReg ret, TCGReg arg); 128 static void tcg_out_ext32u(TCGContext *s, TCGReg ret, TCGReg arg); 129 static void tcg_out_exts_i32_i64(TCGContext *s, TCGReg ret, TCGReg arg); 130 static void tcg_out_extu_i32_i64(TCGContext *s, TCGReg ret, TCGReg arg); 131 static void tcg_out_extrl_i64_i32(TCGContext *s, TCGReg ret, TCGReg arg); 132 static void tcg_out_addi_ptr(TCGContext *s, TCGReg, TCGReg, tcg_target_long); 133 static bool tcg_out_xchg(TCGContext *s, TCGType type, TCGReg r1, TCGReg r2); 134 static void tcg_out_exit_tb(TCGContext *s, uintptr_t arg); 135 static void tcg_out_goto_tb(TCGContext *s, int which); 136 static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, 137 const TCGArg args[TCG_MAX_OP_ARGS], 138 const int const_args[TCG_MAX_OP_ARGS]); 139 #if TCG_TARGET_MAYBE_vec 140 static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece, 141 TCGReg dst, TCGReg src); 142 static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece, 143 TCGReg dst, TCGReg base, intptr_t offset); 144 static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece, 145 TCGReg dst, int64_t arg); 146 static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, 147 unsigned vecl, unsigned vece, 148 const TCGArg args[TCG_MAX_OP_ARGS], 149 const int const_args[TCG_MAX_OP_ARGS]); 150 #else 151 static inline bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece, 152 TCGReg dst, TCGReg src) 153 { 154 g_assert_not_reached(); 155 } 156 static inline bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece, 157 TCGReg dst, TCGReg base, intptr_t offset) 158 { 159 g_assert_not_reached(); 160 } 161 static inline void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece, 162 TCGReg dst, int64_t arg) 163 { 164 g_assert_not_reached(); 165 } 166 static inline void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, 167 unsigned vecl, unsigned vece, 168 const TCGArg args[TCG_MAX_OP_ARGS], 169 const int const_args[TCG_MAX_OP_ARGS]) 170 { 171 g_assert_not_reached(); 172 } 173 int tcg_can_emit_vec_op(TCGOpcode o, TCGType t, unsigned ve) 174 { 175 return 0; 176 } 177 #endif 178 static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, TCGReg arg1, 179 intptr_t arg2); 180 static bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val, 181 TCGReg base, intptr_t ofs); 182 static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target, 183 const TCGHelperInfo *info); 184 static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot); 185 static bool tcg_target_const_match(int64_t val, int ct, 186 TCGType type, TCGCond cond, int vece); 187 188 #ifndef CONFIG_USER_ONLY 189 #define guest_base ({ qemu_build_not_reached(); (uintptr_t)0; }) 190 #endif 191 192 typedef struct TCGLdstHelperParam { 193 TCGReg (*ra_gen)(TCGContext *s, const TCGLabelQemuLdst *l, int arg_reg); 194 unsigned ntmp; 195 int tmp[3]; 196 } TCGLdstHelperParam; 197 198 static void tcg_out_ld_helper_args(TCGContext *s, const TCGLabelQemuLdst *l, 199 const TCGLdstHelperParam *p) 200 __attribute__((unused)); 201 static void tcg_out_ld_helper_ret(TCGContext *s, const TCGLabelQemuLdst *l, 202 bool load_sign, const TCGLdstHelperParam *p) 203 __attribute__((unused)); 204 static void tcg_out_st_helper_args(TCGContext *s, const TCGLabelQemuLdst *l, 205 const TCGLdstHelperParam *p) 206 __attribute__((unused)); 207 208 static void * const qemu_ld_helpers[MO_SSIZE + 1] __attribute__((unused)) = { 209 [MO_UB] = helper_ldub_mmu, 210 [MO_SB] = helper_ldsb_mmu, 211 [MO_UW] = helper_lduw_mmu, 212 [MO_SW] = helper_ldsw_mmu, 213 [MO_UL] = helper_ldul_mmu, 214 [MO_UQ] = helper_ldq_mmu, 215 #if TCG_TARGET_REG_BITS == 64 216 [MO_SL] = helper_ldsl_mmu, 217 [MO_128] = helper_ld16_mmu, 218 #endif 219 }; 220 221 static void * const qemu_st_helpers[MO_SIZE + 1] __attribute__((unused)) = { 222 [MO_8] = helper_stb_mmu, 223 [MO_16] = helper_stw_mmu, 224 [MO_32] = helper_stl_mmu, 225 [MO_64] = helper_stq_mmu, 226 #if TCG_TARGET_REG_BITS == 64 227 [MO_128] = helper_st16_mmu, 228 #endif 229 }; 230 231 typedef struct { 232 MemOp atom; /* lg2 bits of atomicity required */ 233 MemOp align; /* lg2 bits of alignment to use */ 234 } TCGAtomAlign; 235 236 static TCGAtomAlign atom_and_align_for_opc(TCGContext *s, MemOp opc, 237 MemOp host_atom, bool allow_two_ops) 238 __attribute__((unused)); 239 240 #ifdef CONFIG_USER_ONLY 241 bool tcg_use_softmmu; 242 #endif 243 244 TCGContext tcg_init_ctx; 245 __thread TCGContext *tcg_ctx; 246 247 TCGContext **tcg_ctxs; 248 unsigned int tcg_cur_ctxs; 249 unsigned int tcg_max_ctxs; 250 TCGv_env tcg_env; 251 const void *tcg_code_gen_epilogue; 252 uintptr_t tcg_splitwx_diff; 253 254 #ifndef CONFIG_TCG_INTERPRETER 255 tcg_prologue_fn *tcg_qemu_tb_exec; 256 #endif 257 258 static TCGRegSet tcg_target_available_regs[TCG_TYPE_COUNT]; 259 static TCGRegSet tcg_target_call_clobber_regs; 260 261 #if TCG_TARGET_INSN_UNIT_SIZE == 1 262 static __attribute__((unused)) inline void tcg_out8(TCGContext *s, uint8_t v) 263 { 264 *s->code_ptr++ = v; 265 } 266 267 static __attribute__((unused)) inline void tcg_patch8(tcg_insn_unit *p, 268 uint8_t v) 269 { 270 *p = v; 271 } 272 #endif 273 274 #if TCG_TARGET_INSN_UNIT_SIZE <= 2 275 static __attribute__((unused)) inline void tcg_out16(TCGContext *s, uint16_t v) 276 { 277 if (TCG_TARGET_INSN_UNIT_SIZE == 2) { 278 *s->code_ptr++ = v; 279 } else { 280 tcg_insn_unit *p = s->code_ptr; 281 memcpy(p, &v, sizeof(v)); 282 s->code_ptr = p + (2 / TCG_TARGET_INSN_UNIT_SIZE); 283 } 284 } 285 286 static __attribute__((unused)) inline void tcg_patch16(tcg_insn_unit *p, 287 uint16_t v) 288 { 289 if (TCG_TARGET_INSN_UNIT_SIZE == 2) { 290 *p = v; 291 } else { 292 memcpy(p, &v, sizeof(v)); 293 } 294 } 295 #endif 296 297 #if TCG_TARGET_INSN_UNIT_SIZE <= 4 298 static __attribute__((unused)) inline void tcg_out32(TCGContext *s, uint32_t v) 299 { 300 if (TCG_TARGET_INSN_UNIT_SIZE == 4) { 301 *s->code_ptr++ = v; 302 } else { 303 tcg_insn_unit *p = s->code_ptr; 304 memcpy(p, &v, sizeof(v)); 305 s->code_ptr = p + (4 / TCG_TARGET_INSN_UNIT_SIZE); 306 } 307 } 308 309 static __attribute__((unused)) inline void tcg_patch32(tcg_insn_unit *p, 310 uint32_t v) 311 { 312 if (TCG_TARGET_INSN_UNIT_SIZE == 4) { 313 *p = v; 314 } else { 315 memcpy(p, &v, sizeof(v)); 316 } 317 } 318 #endif 319 320 #if TCG_TARGET_INSN_UNIT_SIZE <= 8 321 static __attribute__((unused)) inline void tcg_out64(TCGContext *s, uint64_t v) 322 { 323 if (TCG_TARGET_INSN_UNIT_SIZE == 8) { 324 *s->code_ptr++ = v; 325 } else { 326 tcg_insn_unit *p = s->code_ptr; 327 memcpy(p, &v, sizeof(v)); 328 s->code_ptr = p + (8 / TCG_TARGET_INSN_UNIT_SIZE); 329 } 330 } 331 332 static __attribute__((unused)) inline void tcg_patch64(tcg_insn_unit *p, 333 uint64_t v) 334 { 335 if (TCG_TARGET_INSN_UNIT_SIZE == 8) { 336 *p = v; 337 } else { 338 memcpy(p, &v, sizeof(v)); 339 } 340 } 341 #endif 342 343 /* label relocation processing */ 344 345 static void tcg_out_reloc(TCGContext *s, tcg_insn_unit *code_ptr, int type, 346 TCGLabel *l, intptr_t addend) 347 { 348 TCGRelocation *r = tcg_malloc(sizeof(TCGRelocation)); 349 350 r->type = type; 351 r->ptr = code_ptr; 352 r->addend = addend; 353 QSIMPLEQ_INSERT_TAIL(&l->relocs, r, next); 354 } 355 356 static void tcg_out_label(TCGContext *s, TCGLabel *l) 357 { 358 tcg_debug_assert(!l->has_value); 359 l->has_value = 1; 360 l->u.value_ptr = tcg_splitwx_to_rx(s->code_ptr); 361 } 362 363 TCGLabel *gen_new_label(void) 364 { 365 TCGContext *s = tcg_ctx; 366 TCGLabel *l = tcg_malloc(sizeof(TCGLabel)); 367 368 memset(l, 0, sizeof(TCGLabel)); 369 l->id = s->nb_labels++; 370 QSIMPLEQ_INIT(&l->branches); 371 QSIMPLEQ_INIT(&l->relocs); 372 373 QSIMPLEQ_INSERT_TAIL(&s->labels, l, next); 374 375 return l; 376 } 377 378 static bool tcg_resolve_relocs(TCGContext *s) 379 { 380 TCGLabel *l; 381 382 QSIMPLEQ_FOREACH(l, &s->labels, next) { 383 TCGRelocation *r; 384 uintptr_t value = l->u.value; 385 386 QSIMPLEQ_FOREACH(r, &l->relocs, next) { 387 if (!patch_reloc(r->ptr, r->type, value, r->addend)) { 388 return false; 389 } 390 } 391 } 392 return true; 393 } 394 395 static void set_jmp_reset_offset(TCGContext *s, int which) 396 { 397 /* 398 * We will check for overflow at the end of the opcode loop in 399 * tcg_gen_code, where we bound tcg_current_code_size to UINT16_MAX. 400 */ 401 s->gen_tb->jmp_reset_offset[which] = tcg_current_code_size(s); 402 } 403 404 static void G_GNUC_UNUSED set_jmp_insn_offset(TCGContext *s, int which) 405 { 406 /* 407 * We will check for overflow at the end of the opcode loop in 408 * tcg_gen_code, where we bound tcg_current_code_size to UINT16_MAX. 409 */ 410 s->gen_tb->jmp_insn_offset[which] = tcg_current_code_size(s); 411 } 412 413 static uintptr_t G_GNUC_UNUSED get_jmp_target_addr(TCGContext *s, int which) 414 { 415 /* 416 * Return the read-execute version of the pointer, for the benefit 417 * of any pc-relative addressing mode. 418 */ 419 return (uintptr_t)tcg_splitwx_to_rx(&s->gen_tb->jmp_target_addr[which]); 420 } 421 422 static int __attribute__((unused)) 423 tlb_mask_table_ofs(TCGContext *s, int which) 424 { 425 return (offsetof(CPUNegativeOffsetState, tlb.f[which]) - 426 sizeof(CPUNegativeOffsetState)); 427 } 428 429 /* Signal overflow, starting over with fewer guest insns. */ 430 static G_NORETURN 431 void tcg_raise_tb_overflow(TCGContext *s) 432 { 433 siglongjmp(s->jmp_trans, -2); 434 } 435 436 /* 437 * Used by tcg_out_movext{1,2} to hold the arguments for tcg_out_movext. 438 * By the time we arrive at tcg_out_movext1, @dst is always a TCGReg. 439 * 440 * However, tcg_out_helper_load_slots reuses this field to hold an 441 * argument slot number (which may designate a argument register or an 442 * argument stack slot), converting to TCGReg once all arguments that 443 * are destined for the stack are processed. 444 */ 445 typedef struct TCGMovExtend { 446 unsigned dst; 447 TCGReg src; 448 TCGType dst_type; 449 TCGType src_type; 450 MemOp src_ext; 451 } TCGMovExtend; 452 453 /** 454 * tcg_out_movext -- move and extend 455 * @s: tcg context 456 * @dst_type: integral type for destination 457 * @dst: destination register 458 * @src_type: integral type for source 459 * @src_ext: extension to apply to source 460 * @src: source register 461 * 462 * Move or extend @src into @dst, depending on @src_ext and the types. 463 */ 464 static void tcg_out_movext(TCGContext *s, TCGType dst_type, TCGReg dst, 465 TCGType src_type, MemOp src_ext, TCGReg src) 466 { 467 switch (src_ext) { 468 case MO_UB: 469 tcg_out_ext8u(s, dst, src); 470 break; 471 case MO_SB: 472 tcg_out_ext8s(s, dst_type, dst, src); 473 break; 474 case MO_UW: 475 tcg_out_ext16u(s, dst, src); 476 break; 477 case MO_SW: 478 tcg_out_ext16s(s, dst_type, dst, src); 479 break; 480 case MO_UL: 481 case MO_SL: 482 if (dst_type == TCG_TYPE_I32) { 483 if (src_type == TCG_TYPE_I32) { 484 tcg_out_mov(s, TCG_TYPE_I32, dst, src); 485 } else { 486 tcg_out_extrl_i64_i32(s, dst, src); 487 } 488 } else if (src_type == TCG_TYPE_I32) { 489 if (src_ext & MO_SIGN) { 490 tcg_out_exts_i32_i64(s, dst, src); 491 } else { 492 tcg_out_extu_i32_i64(s, dst, src); 493 } 494 } else { 495 if (src_ext & MO_SIGN) { 496 tcg_out_ext32s(s, dst, src); 497 } else { 498 tcg_out_ext32u(s, dst, src); 499 } 500 } 501 break; 502 case MO_UQ: 503 tcg_debug_assert(TCG_TARGET_REG_BITS == 64); 504 if (dst_type == TCG_TYPE_I32) { 505 tcg_out_extrl_i64_i32(s, dst, src); 506 } else { 507 tcg_out_mov(s, TCG_TYPE_I64, dst, src); 508 } 509 break; 510 default: 511 g_assert_not_reached(); 512 } 513 } 514 515 /* Minor variations on a theme, using a structure. */ 516 static void tcg_out_movext1_new_src(TCGContext *s, const TCGMovExtend *i, 517 TCGReg src) 518 { 519 tcg_out_movext(s, i->dst_type, i->dst, i->src_type, i->src_ext, src); 520 } 521 522 static void tcg_out_movext1(TCGContext *s, const TCGMovExtend *i) 523 { 524 tcg_out_movext1_new_src(s, i, i->src); 525 } 526 527 /** 528 * tcg_out_movext2 -- move and extend two pair 529 * @s: tcg context 530 * @i1: first move description 531 * @i2: second move description 532 * @scratch: temporary register, or -1 for none 533 * 534 * As tcg_out_movext, for both @i1 and @i2, caring for overlap 535 * between the sources and destinations. 536 */ 537 538 static void tcg_out_movext2(TCGContext *s, const TCGMovExtend *i1, 539 const TCGMovExtend *i2, int scratch) 540 { 541 TCGReg src1 = i1->src; 542 TCGReg src2 = i2->src; 543 544 if (i1->dst != src2) { 545 tcg_out_movext1(s, i1); 546 tcg_out_movext1(s, i2); 547 return; 548 } 549 if (i2->dst == src1) { 550 TCGType src1_type = i1->src_type; 551 TCGType src2_type = i2->src_type; 552 553 if (tcg_out_xchg(s, MAX(src1_type, src2_type), src1, src2)) { 554 /* The data is now in the correct registers, now extend. */ 555 src1 = i2->src; 556 src2 = i1->src; 557 } else { 558 tcg_debug_assert(scratch >= 0); 559 tcg_out_mov(s, src1_type, scratch, src1); 560 src1 = scratch; 561 } 562 } 563 tcg_out_movext1_new_src(s, i2, src2); 564 tcg_out_movext1_new_src(s, i1, src1); 565 } 566 567 /** 568 * tcg_out_movext3 -- move and extend three pair 569 * @s: tcg context 570 * @i1: first move description 571 * @i2: second move description 572 * @i3: third move description 573 * @scratch: temporary register, or -1 for none 574 * 575 * As tcg_out_movext, for all of @i1, @i2 and @i3, caring for overlap 576 * between the sources and destinations. 577 */ 578 579 static void tcg_out_movext3(TCGContext *s, const TCGMovExtend *i1, 580 const TCGMovExtend *i2, const TCGMovExtend *i3, 581 int scratch) 582 { 583 TCGReg src1 = i1->src; 584 TCGReg src2 = i2->src; 585 TCGReg src3 = i3->src; 586 587 if (i1->dst != src2 && i1->dst != src3) { 588 tcg_out_movext1(s, i1); 589 tcg_out_movext2(s, i2, i3, scratch); 590 return; 591 } 592 if (i2->dst != src1 && i2->dst != src3) { 593 tcg_out_movext1(s, i2); 594 tcg_out_movext2(s, i1, i3, scratch); 595 return; 596 } 597 if (i3->dst != src1 && i3->dst != src2) { 598 tcg_out_movext1(s, i3); 599 tcg_out_movext2(s, i1, i2, scratch); 600 return; 601 } 602 603 /* 604 * There is a cycle. Since there are only 3 nodes, the cycle is 605 * either "clockwise" or "anti-clockwise", and can be solved with 606 * a single scratch or two xchg. 607 */ 608 if (i1->dst == src2 && i2->dst == src3 && i3->dst == src1) { 609 /* "Clockwise" */ 610 if (tcg_out_xchg(s, MAX(i1->src_type, i2->src_type), src1, src2)) { 611 tcg_out_xchg(s, MAX(i2->src_type, i3->src_type), src2, src3); 612 /* The data is now in the correct registers, now extend. */ 613 tcg_out_movext1_new_src(s, i1, i1->dst); 614 tcg_out_movext1_new_src(s, i2, i2->dst); 615 tcg_out_movext1_new_src(s, i3, i3->dst); 616 } else { 617 tcg_debug_assert(scratch >= 0); 618 tcg_out_mov(s, i1->src_type, scratch, src1); 619 tcg_out_movext1(s, i3); 620 tcg_out_movext1(s, i2); 621 tcg_out_movext1_new_src(s, i1, scratch); 622 } 623 } else if (i1->dst == src3 && i2->dst == src1 && i3->dst == src2) { 624 /* "Anti-clockwise" */ 625 if (tcg_out_xchg(s, MAX(i2->src_type, i3->src_type), src2, src3)) { 626 tcg_out_xchg(s, MAX(i1->src_type, i2->src_type), src1, src2); 627 /* The data is now in the correct registers, now extend. */ 628 tcg_out_movext1_new_src(s, i1, i1->dst); 629 tcg_out_movext1_new_src(s, i2, i2->dst); 630 tcg_out_movext1_new_src(s, i3, i3->dst); 631 } else { 632 tcg_debug_assert(scratch >= 0); 633 tcg_out_mov(s, i1->src_type, scratch, src1); 634 tcg_out_movext1(s, i2); 635 tcg_out_movext1(s, i3); 636 tcg_out_movext1_new_src(s, i1, scratch); 637 } 638 } else { 639 g_assert_not_reached(); 640 } 641 } 642 643 /* 644 * Allocate a new TCGLabelQemuLdst entry. 645 */ 646 647 __attribute__((unused)) 648 static TCGLabelQemuLdst *new_ldst_label(TCGContext *s) 649 { 650 TCGLabelQemuLdst *l = tcg_malloc(sizeof(*l)); 651 652 memset(l, 0, sizeof(*l)); 653 QSIMPLEQ_INSERT_TAIL(&s->ldst_labels, l, next); 654 655 return l; 656 } 657 658 /* 659 * Allocate new constant pool entries. 660 */ 661 662 typedef struct TCGLabelPoolData { 663 struct TCGLabelPoolData *next; 664 tcg_insn_unit *label; 665 intptr_t addend; 666 int rtype; 667 unsigned nlong; 668 tcg_target_ulong data[]; 669 } TCGLabelPoolData; 670 671 static TCGLabelPoolData *new_pool_alloc(TCGContext *s, int nlong, int rtype, 672 tcg_insn_unit *label, intptr_t addend) 673 { 674 TCGLabelPoolData *n = tcg_malloc(sizeof(TCGLabelPoolData) 675 + sizeof(tcg_target_ulong) * nlong); 676 677 n->label = label; 678 n->addend = addend; 679 n->rtype = rtype; 680 n->nlong = nlong; 681 return n; 682 } 683 684 static void new_pool_insert(TCGContext *s, TCGLabelPoolData *n) 685 { 686 TCGLabelPoolData *i, **pp; 687 int nlong = n->nlong; 688 689 /* Insertion sort on the pool. */ 690 for (pp = &s->pool_labels; (i = *pp) != NULL; pp = &i->next) { 691 if (nlong > i->nlong) { 692 break; 693 } 694 if (nlong < i->nlong) { 695 continue; 696 } 697 if (memcmp(n->data, i->data, sizeof(tcg_target_ulong) * nlong) >= 0) { 698 break; 699 } 700 } 701 n->next = *pp; 702 *pp = n; 703 } 704 705 /* The "usual" for generic integer code. */ 706 __attribute__((unused)) 707 static void new_pool_label(TCGContext *s, tcg_target_ulong d, int rtype, 708 tcg_insn_unit *label, intptr_t addend) 709 { 710 TCGLabelPoolData *n = new_pool_alloc(s, 1, rtype, label, addend); 711 n->data[0] = d; 712 new_pool_insert(s, n); 713 } 714 715 /* For v64 or v128, depending on the host. */ 716 __attribute__((unused)) 717 static void new_pool_l2(TCGContext *s, int rtype, tcg_insn_unit *label, 718 intptr_t addend, tcg_target_ulong d0, 719 tcg_target_ulong d1) 720 { 721 TCGLabelPoolData *n = new_pool_alloc(s, 2, rtype, label, addend); 722 n->data[0] = d0; 723 n->data[1] = d1; 724 new_pool_insert(s, n); 725 } 726 727 /* For v128 or v256, depending on the host. */ 728 __attribute__((unused)) 729 static void new_pool_l4(TCGContext *s, int rtype, tcg_insn_unit *label, 730 intptr_t addend, tcg_target_ulong d0, 731 tcg_target_ulong d1, tcg_target_ulong d2, 732 tcg_target_ulong d3) 733 { 734 TCGLabelPoolData *n = new_pool_alloc(s, 4, rtype, label, addend); 735 n->data[0] = d0; 736 n->data[1] = d1; 737 n->data[2] = d2; 738 n->data[3] = d3; 739 new_pool_insert(s, n); 740 } 741 742 /* For v256, for 32-bit host. */ 743 __attribute__((unused)) 744 static void new_pool_l8(TCGContext *s, int rtype, tcg_insn_unit *label, 745 intptr_t addend, tcg_target_ulong d0, 746 tcg_target_ulong d1, tcg_target_ulong d2, 747 tcg_target_ulong d3, tcg_target_ulong d4, 748 tcg_target_ulong d5, tcg_target_ulong d6, 749 tcg_target_ulong d7) 750 { 751 TCGLabelPoolData *n = new_pool_alloc(s, 8, rtype, label, addend); 752 n->data[0] = d0; 753 n->data[1] = d1; 754 n->data[2] = d2; 755 n->data[3] = d3; 756 n->data[4] = d4; 757 n->data[5] = d5; 758 n->data[6] = d6; 759 n->data[7] = d7; 760 new_pool_insert(s, n); 761 } 762 763 /* 764 * Generate TB finalization at the end of block 765 */ 766 767 static int tcg_out_ldst_finalize(TCGContext *s) 768 { 769 TCGLabelQemuLdst *lb; 770 771 /* qemu_ld/st slow paths */ 772 QSIMPLEQ_FOREACH(lb, &s->ldst_labels, next) { 773 if (lb->is_ld 774 ? !tcg_out_qemu_ld_slow_path(s, lb) 775 : !tcg_out_qemu_st_slow_path(s, lb)) { 776 return -2; 777 } 778 779 /* 780 * Test for (pending) buffer overflow. The assumption is that any 781 * one operation beginning below the high water mark cannot overrun 782 * the buffer completely. Thus we can test for overflow after 783 * generating code without having to check during generation. 784 */ 785 if (unlikely((void *)s->code_ptr > s->code_gen_highwater)) { 786 return -1; 787 } 788 } 789 return 0; 790 } 791 792 static int tcg_out_pool_finalize(TCGContext *s) 793 { 794 TCGLabelPoolData *p = s->pool_labels; 795 TCGLabelPoolData *l = NULL; 796 void *a; 797 798 if (p == NULL) { 799 return 0; 800 } 801 802 /* 803 * ??? Round up to qemu_icache_linesize, but then do not round 804 * again when allocating the next TranslationBlock structure. 805 */ 806 a = (void *)ROUND_UP((uintptr_t)s->code_ptr, 807 sizeof(tcg_target_ulong) * p->nlong); 808 tcg_out_nop_fill(s->code_ptr, (tcg_insn_unit *)a - s->code_ptr); 809 s->data_gen_ptr = a; 810 811 for (; p != NULL; p = p->next) { 812 size_t size = sizeof(tcg_target_ulong) * p->nlong; 813 uintptr_t value; 814 815 if (!l || l->nlong != p->nlong || memcmp(l->data, p->data, size)) { 816 if (unlikely(a > s->code_gen_highwater)) { 817 return -1; 818 } 819 memcpy(a, p->data, size); 820 a += size; 821 l = p; 822 } 823 824 value = (uintptr_t)tcg_splitwx_to_rx(a) - size; 825 if (!patch_reloc(p->label, p->rtype, value, p->addend)) { 826 return -2; 827 } 828 } 829 830 s->code_ptr = a; 831 return 0; 832 } 833 834 #define C_PFX1(P, A) P##A 835 #define C_PFX2(P, A, B) P##A##_##B 836 #define C_PFX3(P, A, B, C) P##A##_##B##_##C 837 #define C_PFX4(P, A, B, C, D) P##A##_##B##_##C##_##D 838 #define C_PFX5(P, A, B, C, D, E) P##A##_##B##_##C##_##D##_##E 839 #define C_PFX6(P, A, B, C, D, E, F) P##A##_##B##_##C##_##D##_##E##_##F 840 841 /* Define an enumeration for the various combinations. */ 842 843 #define C_O0_I1(I1) C_PFX1(c_o0_i1_, I1), 844 #define C_O0_I2(I1, I2) C_PFX2(c_o0_i2_, I1, I2), 845 #define C_O0_I3(I1, I2, I3) C_PFX3(c_o0_i3_, I1, I2, I3), 846 #define C_O0_I4(I1, I2, I3, I4) C_PFX4(c_o0_i4_, I1, I2, I3, I4), 847 848 #define C_O1_I1(O1, I1) C_PFX2(c_o1_i1_, O1, I1), 849 #define C_O1_I2(O1, I1, I2) C_PFX3(c_o1_i2_, O1, I1, I2), 850 #define C_O1_I3(O1, I1, I2, I3) C_PFX4(c_o1_i3_, O1, I1, I2, I3), 851 #define C_O1_I4(O1, I1, I2, I3, I4) C_PFX5(c_o1_i4_, O1, I1, I2, I3, I4), 852 853 #define C_N1_I2(O1, I1, I2) C_PFX3(c_n1_i2_, O1, I1, I2), 854 #define C_N1O1_I1(O1, O2, I1) C_PFX3(c_n1o1_i1_, O1, O2, I1), 855 #define C_N2_I1(O1, O2, I1) C_PFX3(c_n2_i1_, O1, O2, I1), 856 857 #define C_O2_I1(O1, O2, I1) C_PFX3(c_o2_i1_, O1, O2, I1), 858 #define C_O2_I2(O1, O2, I1, I2) C_PFX4(c_o2_i2_, O1, O2, I1, I2), 859 #define C_O2_I3(O1, O2, I1, I2, I3) C_PFX5(c_o2_i3_, O1, O2, I1, I2, I3), 860 #define C_O2_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_o2_i4_, O1, O2, I1, I2, I3, I4), 861 #define C_N1_O1_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_n1_o1_i4_, O1, O2, I1, I2, I3, I4), 862 863 typedef enum { 864 C_Dynamic = -2, 865 C_NotImplemented = -1, 866 #include "tcg-target-con-set.h" 867 } TCGConstraintSetIndex; 868 869 static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode, TCGType, unsigned); 870 871 #undef C_O0_I1 872 #undef C_O0_I2 873 #undef C_O0_I3 874 #undef C_O0_I4 875 #undef C_O1_I1 876 #undef C_O1_I2 877 #undef C_O1_I3 878 #undef C_O1_I4 879 #undef C_N1_I2 880 #undef C_N1O1_I1 881 #undef C_N2_I1 882 #undef C_O2_I1 883 #undef C_O2_I2 884 #undef C_O2_I3 885 #undef C_O2_I4 886 #undef C_N1_O1_I4 887 888 /* Put all of the constraint sets into an array, indexed by the enum. */ 889 890 typedef struct TCGConstraintSet { 891 uint8_t nb_oargs, nb_iargs; 892 const char *args_ct_str[TCG_MAX_OP_ARGS]; 893 } TCGConstraintSet; 894 895 #define C_O0_I1(I1) { 0, 1, { #I1 } }, 896 #define C_O0_I2(I1, I2) { 0, 2, { #I1, #I2 } }, 897 #define C_O0_I3(I1, I2, I3) { 0, 3, { #I1, #I2, #I3 } }, 898 #define C_O0_I4(I1, I2, I3, I4) { 0, 4, { #I1, #I2, #I3, #I4 } }, 899 900 #define C_O1_I1(O1, I1) { 1, 1, { #O1, #I1 } }, 901 #define C_O1_I2(O1, I1, I2) { 1, 2, { #O1, #I1, #I2 } }, 902 #define C_O1_I3(O1, I1, I2, I3) { 1, 3, { #O1, #I1, #I2, #I3 } }, 903 #define C_O1_I4(O1, I1, I2, I3, I4) { 1, 4, { #O1, #I1, #I2, #I3, #I4 } }, 904 905 #define C_N1_I2(O1, I1, I2) { 1, 2, { "&" #O1, #I1, #I2 } }, 906 #define C_N1O1_I1(O1, O2, I1) { 2, 1, { "&" #O1, #O2, #I1 } }, 907 #define C_N2_I1(O1, O2, I1) { 2, 1, { "&" #O1, "&" #O2, #I1 } }, 908 909 #define C_O2_I1(O1, O2, I1) { 2, 1, { #O1, #O2, #I1 } }, 910 #define C_O2_I2(O1, O2, I1, I2) { 2, 2, { #O1, #O2, #I1, #I2 } }, 911 #define C_O2_I3(O1, O2, I1, I2, I3) { 2, 3, { #O1, #O2, #I1, #I2, #I3 } }, 912 #define C_O2_I4(O1, O2, I1, I2, I3, I4) { 2, 4, { #O1, #O2, #I1, #I2, #I3, #I4 } }, 913 #define C_N1_O1_I4(O1, O2, I1, I2, I3, I4) { 2, 4, { "&" #O1, #O2, #I1, #I2, #I3, #I4 } }, 914 915 static const TCGConstraintSet constraint_sets[] = { 916 #include "tcg-target-con-set.h" 917 }; 918 919 #undef C_O0_I1 920 #undef C_O0_I2 921 #undef C_O0_I3 922 #undef C_O0_I4 923 #undef C_O1_I1 924 #undef C_O1_I2 925 #undef C_O1_I3 926 #undef C_O1_I4 927 #undef C_N1_I2 928 #undef C_N1O1_I1 929 #undef C_N2_I1 930 #undef C_O2_I1 931 #undef C_O2_I2 932 #undef C_O2_I3 933 #undef C_O2_I4 934 #undef C_N1_O1_I4 935 936 /* Expand the enumerator to be returned from tcg_target_op_def(). */ 937 938 #define C_O0_I1(I1) C_PFX1(c_o0_i1_, I1) 939 #define C_O0_I2(I1, I2) C_PFX2(c_o0_i2_, I1, I2) 940 #define C_O0_I3(I1, I2, I3) C_PFX3(c_o0_i3_, I1, I2, I3) 941 #define C_O0_I4(I1, I2, I3, I4) C_PFX4(c_o0_i4_, I1, I2, I3, I4) 942 943 #define C_O1_I1(O1, I1) C_PFX2(c_o1_i1_, O1, I1) 944 #define C_O1_I2(O1, I1, I2) C_PFX3(c_o1_i2_, O1, I1, I2) 945 #define C_O1_I3(O1, I1, I2, I3) C_PFX4(c_o1_i3_, O1, I1, I2, I3) 946 #define C_O1_I4(O1, I1, I2, I3, I4) C_PFX5(c_o1_i4_, O1, I1, I2, I3, I4) 947 948 #define C_N1_I2(O1, I1, I2) C_PFX3(c_n1_i2_, O1, I1, I2) 949 #define C_N1O1_I1(O1, O2, I1) C_PFX3(c_n1o1_i1_, O1, O2, I1) 950 #define C_N2_I1(O1, O2, I1) C_PFX3(c_n2_i1_, O1, O2, I1) 951 952 #define C_O2_I1(O1, O2, I1) C_PFX3(c_o2_i1_, O1, O2, I1) 953 #define C_O2_I2(O1, O2, I1, I2) C_PFX4(c_o2_i2_, O1, O2, I1, I2) 954 #define C_O2_I3(O1, O2, I1, I2, I3) C_PFX5(c_o2_i3_, O1, O2, I1, I2, I3) 955 #define C_O2_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_o2_i4_, O1, O2, I1, I2, I3, I4) 956 #define C_N1_O1_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_n1_o1_i4_, O1, O2, I1, I2, I3, I4) 957 958 /* 959 * TCGOutOp is the base class for a set of structures that describe how 960 * to generate code for a given TCGOpcode. 961 * 962 * @static_constraint: 963 * C_NotImplemented: The TCGOpcode is not supported by the backend. 964 * C_Dynamic: Use @dynamic_constraint to select a constraint set 965 * based on any of @type, @flags, or host isa. 966 * Otherwise: The register allocation constrains for the TCGOpcode. 967 * 968 * Subclasses of TCGOutOp will define a set of output routines that may 969 * be used. Such routines will often be selected by the set of registers 970 * and constants that come out of register allocation. The set of 971 * routines that are provided will guide the set of constraints that are 972 * legal. In particular, assume that tcg_optimize() has done its job in 973 * swapping commutative operands and folding operations for which all 974 * operands are constant. 975 */ 976 typedef struct TCGOutOp { 977 TCGConstraintSetIndex static_constraint; 978 TCGConstraintSetIndex (*dynamic_constraint)(TCGType type, unsigned flags); 979 } TCGOutOp; 980 981 typedef struct TCGOutOpBinary { 982 TCGOutOp base; 983 void (*out_rrr)(TCGContext *s, TCGType type, 984 TCGReg a0, TCGReg a1, TCGReg a2); 985 void (*out_rri)(TCGContext *s, TCGType type, 986 TCGReg a0, TCGReg a1, tcg_target_long a2); 987 } TCGOutOpBinary; 988 989 typedef struct TCGOutOpDivRem { 990 TCGOutOp base; 991 void (*out_rr01r)(TCGContext *s, TCGType type, 992 TCGReg a0, TCGReg a1, TCGReg a4); 993 } TCGOutOpDivRem; 994 995 typedef struct TCGOutOpUnary { 996 TCGOutOp base; 997 void (*out_rr)(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1); 998 } TCGOutOpUnary; 999 1000 typedef struct TCGOutOpSubtract { 1001 TCGOutOp base; 1002 void (*out_rrr)(TCGContext *s, TCGType type, 1003 TCGReg a0, TCGReg a1, TCGReg a2); 1004 void (*out_rir)(TCGContext *s, TCGType type, 1005 TCGReg a0, tcg_target_long a1, TCGReg a2); 1006 } TCGOutOpSubtract; 1007 1008 #include "tcg-target.c.inc" 1009 1010 #ifndef CONFIG_TCG_INTERPRETER 1011 /* Validate CPUTLBDescFast placement. */ 1012 QEMU_BUILD_BUG_ON((int)(offsetof(CPUNegativeOffsetState, tlb.f[0]) - 1013 sizeof(CPUNegativeOffsetState)) 1014 < MIN_TLB_MASK_TABLE_OFS); 1015 #endif 1016 1017 /* 1018 * Register V as the TCGOutOp for O. 1019 * This verifies that V is of type T, otherwise give a nice compiler error. 1020 * This prevents trivial mistakes within each arch/tcg-target.c.inc. 1021 */ 1022 #define OUTOP(O, T, V) [O] = _Generic(V, T: &V.base) 1023 1024 /* Register allocation descriptions for every TCGOpcode. */ 1025 static const TCGOutOp * const all_outop[NB_OPS] = { 1026 OUTOP(INDEX_op_add, TCGOutOpBinary, outop_add), 1027 OUTOP(INDEX_op_and, TCGOutOpBinary, outop_and), 1028 OUTOP(INDEX_op_andc, TCGOutOpBinary, outop_andc), 1029 OUTOP(INDEX_op_clz, TCGOutOpBinary, outop_clz), 1030 OUTOP(INDEX_op_ctz, TCGOutOpBinary, outop_ctz), 1031 OUTOP(INDEX_op_divs, TCGOutOpBinary, outop_divs), 1032 OUTOP(INDEX_op_divu, TCGOutOpBinary, outop_divu), 1033 OUTOP(INDEX_op_divs2, TCGOutOpDivRem, outop_divs2), 1034 OUTOP(INDEX_op_divu2, TCGOutOpDivRem, outop_divu2), 1035 OUTOP(INDEX_op_eqv, TCGOutOpBinary, outop_eqv), 1036 OUTOP(INDEX_op_mul, TCGOutOpBinary, outop_mul), 1037 OUTOP(INDEX_op_mulsh, TCGOutOpBinary, outop_mulsh), 1038 OUTOP(INDEX_op_muluh, TCGOutOpBinary, outop_muluh), 1039 OUTOP(INDEX_op_nand, TCGOutOpBinary, outop_nand), 1040 OUTOP(INDEX_op_neg, TCGOutOpUnary, outop_neg), 1041 OUTOP(INDEX_op_nor, TCGOutOpBinary, outop_nor), 1042 OUTOP(INDEX_op_not, TCGOutOpUnary, outop_not), 1043 OUTOP(INDEX_op_or, TCGOutOpBinary, outop_or), 1044 OUTOP(INDEX_op_orc, TCGOutOpBinary, outop_orc), 1045 OUTOP(INDEX_op_rems, TCGOutOpBinary, outop_rems), 1046 OUTOP(INDEX_op_remu, TCGOutOpBinary, outop_remu), 1047 OUTOP(INDEX_op_rotl, TCGOutOpBinary, outop_rotl), 1048 OUTOP(INDEX_op_rotr, TCGOutOpBinary, outop_rotr), 1049 OUTOP(INDEX_op_sar, TCGOutOpBinary, outop_sar), 1050 OUTOP(INDEX_op_shl, TCGOutOpBinary, outop_shl), 1051 OUTOP(INDEX_op_shr, TCGOutOpBinary, outop_shr), 1052 OUTOP(INDEX_op_sub, TCGOutOpSubtract, outop_sub), 1053 OUTOP(INDEX_op_xor, TCGOutOpBinary, outop_xor), 1054 }; 1055 1056 #undef OUTOP 1057 1058 /* 1059 * All TCG threads except the parent (i.e. the one that called tcg_context_init 1060 * and registered the target's TCG globals) must register with this function 1061 * before initiating translation. 1062 * 1063 * In user-mode we just point tcg_ctx to tcg_init_ctx. See the documentation 1064 * of tcg_region_init() for the reasoning behind this. 1065 * 1066 * In system-mode each caller registers its context in tcg_ctxs[]. Note that in 1067 * system-mode tcg_ctxs[] does not track tcg_ctx_init, since the initial context 1068 * is not used anymore for translation once this function is called. 1069 * 1070 * Not tracking tcg_init_ctx in tcg_ctxs[] in system-mode keeps code that 1071 * iterates over the array (e.g. tcg_code_size() the same for both system/user 1072 * modes. 1073 */ 1074 #ifdef CONFIG_USER_ONLY 1075 void tcg_register_thread(void) 1076 { 1077 tcg_ctx = &tcg_init_ctx; 1078 } 1079 #else 1080 void tcg_register_thread(void) 1081 { 1082 TCGContext *s = g_malloc(sizeof(*s)); 1083 unsigned int i, n; 1084 1085 *s = tcg_init_ctx; 1086 1087 /* Relink mem_base. */ 1088 for (i = 0, n = tcg_init_ctx.nb_globals; i < n; ++i) { 1089 if (tcg_init_ctx.temps[i].mem_base) { 1090 ptrdiff_t b = tcg_init_ctx.temps[i].mem_base - tcg_init_ctx.temps; 1091 tcg_debug_assert(b >= 0 && b < n); 1092 s->temps[i].mem_base = &s->temps[b]; 1093 } 1094 } 1095 1096 /* Claim an entry in tcg_ctxs */ 1097 n = qatomic_fetch_inc(&tcg_cur_ctxs); 1098 g_assert(n < tcg_max_ctxs); 1099 qatomic_set(&tcg_ctxs[n], s); 1100 1101 if (n > 0) { 1102 tcg_region_initial_alloc(s); 1103 } 1104 1105 tcg_ctx = s; 1106 } 1107 #endif /* !CONFIG_USER_ONLY */ 1108 1109 /* pool based memory allocation */ 1110 void *tcg_malloc_internal(TCGContext *s, int size) 1111 { 1112 TCGPool *p; 1113 int pool_size; 1114 1115 if (size > TCG_POOL_CHUNK_SIZE) { 1116 /* big malloc: insert a new pool (XXX: could optimize) */ 1117 p = g_malloc(sizeof(TCGPool) + size); 1118 p->size = size; 1119 p->next = s->pool_first_large; 1120 s->pool_first_large = p; 1121 return p->data; 1122 } else { 1123 p = s->pool_current; 1124 if (!p) { 1125 p = s->pool_first; 1126 if (!p) 1127 goto new_pool; 1128 } else { 1129 if (!p->next) { 1130 new_pool: 1131 pool_size = TCG_POOL_CHUNK_SIZE; 1132 p = g_malloc(sizeof(TCGPool) + pool_size); 1133 p->size = pool_size; 1134 p->next = NULL; 1135 if (s->pool_current) { 1136 s->pool_current->next = p; 1137 } else { 1138 s->pool_first = p; 1139 } 1140 } else { 1141 p = p->next; 1142 } 1143 } 1144 } 1145 s->pool_current = p; 1146 s->pool_cur = p->data + size; 1147 s->pool_end = p->data + p->size; 1148 return p->data; 1149 } 1150 1151 void tcg_pool_reset(TCGContext *s) 1152 { 1153 TCGPool *p, *t; 1154 for (p = s->pool_first_large; p; p = t) { 1155 t = p->next; 1156 g_free(p); 1157 } 1158 s->pool_first_large = NULL; 1159 s->pool_cur = s->pool_end = NULL; 1160 s->pool_current = NULL; 1161 } 1162 1163 /* 1164 * Create TCGHelperInfo structures for "tcg/tcg-ldst.h" functions, 1165 * akin to what "exec/helper-tcg.h" does with DEF_HELPER_FLAGS_N. 1166 * We only use these for layout in tcg_out_ld_helper_ret and 1167 * tcg_out_st_helper_args, and share them between several of 1168 * the helpers, with the end result that it's easier to build manually. 1169 */ 1170 1171 #if TCG_TARGET_REG_BITS == 32 1172 # define dh_typecode_ttl dh_typecode_i32 1173 #else 1174 # define dh_typecode_ttl dh_typecode_i64 1175 #endif 1176 1177 static TCGHelperInfo info_helper_ld32_mmu = { 1178 .flags = TCG_CALL_NO_WG, 1179 .typemask = dh_typemask(ttl, 0) /* return tcg_target_ulong */ 1180 | dh_typemask(env, 1) 1181 | dh_typemask(i64, 2) /* uint64_t addr */ 1182 | dh_typemask(i32, 3) /* unsigned oi */ 1183 | dh_typemask(ptr, 4) /* uintptr_t ra */ 1184 }; 1185 1186 static TCGHelperInfo info_helper_ld64_mmu = { 1187 .flags = TCG_CALL_NO_WG, 1188 .typemask = dh_typemask(i64, 0) /* return uint64_t */ 1189 | dh_typemask(env, 1) 1190 | dh_typemask(i64, 2) /* uint64_t addr */ 1191 | dh_typemask(i32, 3) /* unsigned oi */ 1192 | dh_typemask(ptr, 4) /* uintptr_t ra */ 1193 }; 1194 1195 static TCGHelperInfo info_helper_ld128_mmu = { 1196 .flags = TCG_CALL_NO_WG, 1197 .typemask = dh_typemask(i128, 0) /* return Int128 */ 1198 | dh_typemask(env, 1) 1199 | dh_typemask(i64, 2) /* uint64_t addr */ 1200 | dh_typemask(i32, 3) /* unsigned oi */ 1201 | dh_typemask(ptr, 4) /* uintptr_t ra */ 1202 }; 1203 1204 static TCGHelperInfo info_helper_st32_mmu = { 1205 .flags = TCG_CALL_NO_WG, 1206 .typemask = dh_typemask(void, 0) 1207 | dh_typemask(env, 1) 1208 | dh_typemask(i64, 2) /* uint64_t addr */ 1209 | dh_typemask(i32, 3) /* uint32_t data */ 1210 | dh_typemask(i32, 4) /* unsigned oi */ 1211 | dh_typemask(ptr, 5) /* uintptr_t ra */ 1212 }; 1213 1214 static TCGHelperInfo info_helper_st64_mmu = { 1215 .flags = TCG_CALL_NO_WG, 1216 .typemask = dh_typemask(void, 0) 1217 | dh_typemask(env, 1) 1218 | dh_typemask(i64, 2) /* uint64_t addr */ 1219 | dh_typemask(i64, 3) /* uint64_t data */ 1220 | dh_typemask(i32, 4) /* unsigned oi */ 1221 | dh_typemask(ptr, 5) /* uintptr_t ra */ 1222 }; 1223 1224 static TCGHelperInfo info_helper_st128_mmu = { 1225 .flags = TCG_CALL_NO_WG, 1226 .typemask = dh_typemask(void, 0) 1227 | dh_typemask(env, 1) 1228 | dh_typemask(i64, 2) /* uint64_t addr */ 1229 | dh_typemask(i128, 3) /* Int128 data */ 1230 | dh_typemask(i32, 4) /* unsigned oi */ 1231 | dh_typemask(ptr, 5) /* uintptr_t ra */ 1232 }; 1233 1234 #ifdef CONFIG_TCG_INTERPRETER 1235 static ffi_type *typecode_to_ffi(int argmask) 1236 { 1237 /* 1238 * libffi does not support __int128_t, so we have forced Int128 1239 * to use the structure definition instead of the builtin type. 1240 */ 1241 static ffi_type *ffi_type_i128_elements[3] = { 1242 &ffi_type_uint64, 1243 &ffi_type_uint64, 1244 NULL 1245 }; 1246 static ffi_type ffi_type_i128 = { 1247 .size = 16, 1248 .alignment = __alignof__(Int128), 1249 .type = FFI_TYPE_STRUCT, 1250 .elements = ffi_type_i128_elements, 1251 }; 1252 1253 switch (argmask) { 1254 case dh_typecode_void: 1255 return &ffi_type_void; 1256 case dh_typecode_i32: 1257 return &ffi_type_uint32; 1258 case dh_typecode_s32: 1259 return &ffi_type_sint32; 1260 case dh_typecode_i64: 1261 return &ffi_type_uint64; 1262 case dh_typecode_s64: 1263 return &ffi_type_sint64; 1264 case dh_typecode_ptr: 1265 return &ffi_type_pointer; 1266 case dh_typecode_i128: 1267 return &ffi_type_i128; 1268 } 1269 g_assert_not_reached(); 1270 } 1271 1272 static ffi_cif *init_ffi_layout(TCGHelperInfo *info) 1273 { 1274 unsigned typemask = info->typemask; 1275 struct { 1276 ffi_cif cif; 1277 ffi_type *args[]; 1278 } *ca; 1279 ffi_status status; 1280 int nargs; 1281 1282 /* Ignoring the return type, find the last non-zero field. */ 1283 nargs = 32 - clz32(typemask >> 3); 1284 nargs = DIV_ROUND_UP(nargs, 3); 1285 assert(nargs <= MAX_CALL_IARGS); 1286 1287 ca = g_malloc0(sizeof(*ca) + nargs * sizeof(ffi_type *)); 1288 ca->cif.rtype = typecode_to_ffi(typemask & 7); 1289 ca->cif.nargs = nargs; 1290 1291 if (nargs != 0) { 1292 ca->cif.arg_types = ca->args; 1293 for (int j = 0; j < nargs; ++j) { 1294 int typecode = extract32(typemask, (j + 1) * 3, 3); 1295 ca->args[j] = typecode_to_ffi(typecode); 1296 } 1297 } 1298 1299 status = ffi_prep_cif(&ca->cif, FFI_DEFAULT_ABI, nargs, 1300 ca->cif.rtype, ca->cif.arg_types); 1301 assert(status == FFI_OK); 1302 1303 return &ca->cif; 1304 } 1305 1306 #define HELPER_INFO_INIT(I) (&(I)->cif) 1307 #define HELPER_INFO_INIT_VAL(I) init_ffi_layout(I) 1308 #else 1309 #define HELPER_INFO_INIT(I) (&(I)->init) 1310 #define HELPER_INFO_INIT_VAL(I) 1 1311 #endif /* CONFIG_TCG_INTERPRETER */ 1312 1313 static inline bool arg_slot_reg_p(unsigned arg_slot) 1314 { 1315 /* 1316 * Split the sizeof away from the comparison to avoid Werror from 1317 * "unsigned < 0 is always false", when iarg_regs is empty. 1318 */ 1319 unsigned nreg = ARRAY_SIZE(tcg_target_call_iarg_regs); 1320 return arg_slot < nreg; 1321 } 1322 1323 static inline int arg_slot_stk_ofs(unsigned arg_slot) 1324 { 1325 unsigned max = TCG_STATIC_CALL_ARGS_SIZE / sizeof(tcg_target_long); 1326 unsigned stk_slot = arg_slot - ARRAY_SIZE(tcg_target_call_iarg_regs); 1327 1328 tcg_debug_assert(stk_slot < max); 1329 return TCG_TARGET_CALL_STACK_OFFSET + stk_slot * sizeof(tcg_target_long); 1330 } 1331 1332 typedef struct TCGCumulativeArgs { 1333 int arg_idx; /* tcg_gen_callN args[] */ 1334 int info_in_idx; /* TCGHelperInfo in[] */ 1335 int arg_slot; /* regs+stack slot */ 1336 int ref_slot; /* stack slots for references */ 1337 } TCGCumulativeArgs; 1338 1339 static void layout_arg_even(TCGCumulativeArgs *cum) 1340 { 1341 cum->arg_slot += cum->arg_slot & 1; 1342 } 1343 1344 static void layout_arg_1(TCGCumulativeArgs *cum, TCGHelperInfo *info, 1345 TCGCallArgumentKind kind) 1346 { 1347 TCGCallArgumentLoc *loc = &info->in[cum->info_in_idx]; 1348 1349 *loc = (TCGCallArgumentLoc){ 1350 .kind = kind, 1351 .arg_idx = cum->arg_idx, 1352 .arg_slot = cum->arg_slot, 1353 }; 1354 cum->info_in_idx++; 1355 cum->arg_slot++; 1356 } 1357 1358 static void layout_arg_normal_n(TCGCumulativeArgs *cum, 1359 TCGHelperInfo *info, int n) 1360 { 1361 TCGCallArgumentLoc *loc = &info->in[cum->info_in_idx]; 1362 1363 for (int i = 0; i < n; ++i) { 1364 /* Layout all using the same arg_idx, adjusting the subindex. */ 1365 loc[i] = (TCGCallArgumentLoc){ 1366 .kind = TCG_CALL_ARG_NORMAL, 1367 .arg_idx = cum->arg_idx, 1368 .tmp_subindex = i, 1369 .arg_slot = cum->arg_slot + i, 1370 }; 1371 } 1372 cum->info_in_idx += n; 1373 cum->arg_slot += n; 1374 } 1375 1376 static void layout_arg_by_ref(TCGCumulativeArgs *cum, TCGHelperInfo *info) 1377 { 1378 TCGCallArgumentLoc *loc = &info->in[cum->info_in_idx]; 1379 int n = 128 / TCG_TARGET_REG_BITS; 1380 1381 /* The first subindex carries the pointer. */ 1382 layout_arg_1(cum, info, TCG_CALL_ARG_BY_REF); 1383 1384 /* 1385 * The callee is allowed to clobber memory associated with 1386 * structure pass by-reference. Therefore we must make copies. 1387 * Allocate space from "ref_slot", which will be adjusted to 1388 * follow the parameters on the stack. 1389 */ 1390 loc[0].ref_slot = cum->ref_slot; 1391 1392 /* 1393 * Subsequent words also go into the reference slot, but 1394 * do not accumulate into the regular arguments. 1395 */ 1396 for (int i = 1; i < n; ++i) { 1397 loc[i] = (TCGCallArgumentLoc){ 1398 .kind = TCG_CALL_ARG_BY_REF_N, 1399 .arg_idx = cum->arg_idx, 1400 .tmp_subindex = i, 1401 .ref_slot = cum->ref_slot + i, 1402 }; 1403 } 1404 cum->info_in_idx += n - 1; /* i=0 accounted for in layout_arg_1 */ 1405 cum->ref_slot += n; 1406 } 1407 1408 static void init_call_layout(TCGHelperInfo *info) 1409 { 1410 int max_reg_slots = ARRAY_SIZE(tcg_target_call_iarg_regs); 1411 int max_stk_slots = TCG_STATIC_CALL_ARGS_SIZE / sizeof(tcg_target_long); 1412 unsigned typemask = info->typemask; 1413 unsigned typecode; 1414 TCGCumulativeArgs cum = { }; 1415 1416 /* 1417 * Parse and place any function return value. 1418 */ 1419 typecode = typemask & 7; 1420 switch (typecode) { 1421 case dh_typecode_void: 1422 info->nr_out = 0; 1423 break; 1424 case dh_typecode_i32: 1425 case dh_typecode_s32: 1426 case dh_typecode_ptr: 1427 info->nr_out = 1; 1428 info->out_kind = TCG_CALL_RET_NORMAL; 1429 break; 1430 case dh_typecode_i64: 1431 case dh_typecode_s64: 1432 info->nr_out = 64 / TCG_TARGET_REG_BITS; 1433 info->out_kind = TCG_CALL_RET_NORMAL; 1434 /* Query the last register now to trigger any assert early. */ 1435 tcg_target_call_oarg_reg(info->out_kind, info->nr_out - 1); 1436 break; 1437 case dh_typecode_i128: 1438 info->nr_out = 128 / TCG_TARGET_REG_BITS; 1439 info->out_kind = TCG_TARGET_CALL_RET_I128; 1440 switch (TCG_TARGET_CALL_RET_I128) { 1441 case TCG_CALL_RET_NORMAL: 1442 /* Query the last register now to trigger any assert early. */ 1443 tcg_target_call_oarg_reg(info->out_kind, info->nr_out - 1); 1444 break; 1445 case TCG_CALL_RET_BY_VEC: 1446 /* Query the single register now to trigger any assert early. */ 1447 tcg_target_call_oarg_reg(TCG_CALL_RET_BY_VEC, 0); 1448 break; 1449 case TCG_CALL_RET_BY_REF: 1450 /* 1451 * Allocate the first argument to the output. 1452 * We don't need to store this anywhere, just make it 1453 * unavailable for use in the input loop below. 1454 */ 1455 cum.arg_slot = 1; 1456 break; 1457 default: 1458 qemu_build_not_reached(); 1459 } 1460 break; 1461 default: 1462 g_assert_not_reached(); 1463 } 1464 1465 /* 1466 * Parse and place function arguments. 1467 */ 1468 for (typemask >>= 3; typemask; typemask >>= 3, cum.arg_idx++) { 1469 TCGCallArgumentKind kind; 1470 TCGType type; 1471 1472 typecode = typemask & 7; 1473 switch (typecode) { 1474 case dh_typecode_i32: 1475 case dh_typecode_s32: 1476 type = TCG_TYPE_I32; 1477 break; 1478 case dh_typecode_i64: 1479 case dh_typecode_s64: 1480 type = TCG_TYPE_I64; 1481 break; 1482 case dh_typecode_ptr: 1483 type = TCG_TYPE_PTR; 1484 break; 1485 case dh_typecode_i128: 1486 type = TCG_TYPE_I128; 1487 break; 1488 default: 1489 g_assert_not_reached(); 1490 } 1491 1492 switch (type) { 1493 case TCG_TYPE_I32: 1494 switch (TCG_TARGET_CALL_ARG_I32) { 1495 case TCG_CALL_ARG_EVEN: 1496 layout_arg_even(&cum); 1497 /* fall through */ 1498 case TCG_CALL_ARG_NORMAL: 1499 layout_arg_1(&cum, info, TCG_CALL_ARG_NORMAL); 1500 break; 1501 case TCG_CALL_ARG_EXTEND: 1502 kind = TCG_CALL_ARG_EXTEND_U + (typecode & 1); 1503 layout_arg_1(&cum, info, kind); 1504 break; 1505 default: 1506 qemu_build_not_reached(); 1507 } 1508 break; 1509 1510 case TCG_TYPE_I64: 1511 switch (TCG_TARGET_CALL_ARG_I64) { 1512 case TCG_CALL_ARG_EVEN: 1513 layout_arg_even(&cum); 1514 /* fall through */ 1515 case TCG_CALL_ARG_NORMAL: 1516 if (TCG_TARGET_REG_BITS == 32) { 1517 layout_arg_normal_n(&cum, info, 2); 1518 } else { 1519 layout_arg_1(&cum, info, TCG_CALL_ARG_NORMAL); 1520 } 1521 break; 1522 default: 1523 qemu_build_not_reached(); 1524 } 1525 break; 1526 1527 case TCG_TYPE_I128: 1528 switch (TCG_TARGET_CALL_ARG_I128) { 1529 case TCG_CALL_ARG_EVEN: 1530 layout_arg_even(&cum); 1531 /* fall through */ 1532 case TCG_CALL_ARG_NORMAL: 1533 layout_arg_normal_n(&cum, info, 128 / TCG_TARGET_REG_BITS); 1534 break; 1535 case TCG_CALL_ARG_BY_REF: 1536 layout_arg_by_ref(&cum, info); 1537 break; 1538 default: 1539 qemu_build_not_reached(); 1540 } 1541 break; 1542 1543 default: 1544 g_assert_not_reached(); 1545 } 1546 } 1547 info->nr_in = cum.info_in_idx; 1548 1549 /* Validate that we didn't overrun the input array. */ 1550 assert(cum.info_in_idx <= ARRAY_SIZE(info->in)); 1551 /* Validate the backend has enough argument space. */ 1552 assert(cum.arg_slot <= max_reg_slots + max_stk_slots); 1553 1554 /* 1555 * Relocate the "ref_slot" area to the end of the parameters. 1556 * Minimizing this stack offset helps code size for x86, 1557 * which has a signed 8-bit offset encoding. 1558 */ 1559 if (cum.ref_slot != 0) { 1560 int ref_base = 0; 1561 1562 if (cum.arg_slot > max_reg_slots) { 1563 int align = __alignof(Int128) / sizeof(tcg_target_long); 1564 1565 ref_base = cum.arg_slot - max_reg_slots; 1566 if (align > 1) { 1567 ref_base = ROUND_UP(ref_base, align); 1568 } 1569 } 1570 assert(ref_base + cum.ref_slot <= max_stk_slots); 1571 ref_base += max_reg_slots; 1572 1573 if (ref_base != 0) { 1574 for (int i = cum.info_in_idx - 1; i >= 0; --i) { 1575 TCGCallArgumentLoc *loc = &info->in[i]; 1576 switch (loc->kind) { 1577 case TCG_CALL_ARG_BY_REF: 1578 case TCG_CALL_ARG_BY_REF_N: 1579 loc->ref_slot += ref_base; 1580 break; 1581 default: 1582 break; 1583 } 1584 } 1585 } 1586 } 1587 } 1588 1589 static int indirect_reg_alloc_order[ARRAY_SIZE(tcg_target_reg_alloc_order)]; 1590 static void process_constraint_sets(void); 1591 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type, 1592 TCGReg reg, const char *name); 1593 1594 static void tcg_context_init(unsigned max_threads) 1595 { 1596 TCGContext *s = &tcg_init_ctx; 1597 int n, i; 1598 TCGTemp *ts; 1599 1600 memset(s, 0, sizeof(*s)); 1601 s->nb_globals = 0; 1602 1603 init_call_layout(&info_helper_ld32_mmu); 1604 init_call_layout(&info_helper_ld64_mmu); 1605 init_call_layout(&info_helper_ld128_mmu); 1606 init_call_layout(&info_helper_st32_mmu); 1607 init_call_layout(&info_helper_st64_mmu); 1608 init_call_layout(&info_helper_st128_mmu); 1609 1610 tcg_target_init(s); 1611 process_constraint_sets(); 1612 1613 /* Reverse the order of the saved registers, assuming they're all at 1614 the start of tcg_target_reg_alloc_order. */ 1615 for (n = 0; n < ARRAY_SIZE(tcg_target_reg_alloc_order); ++n) { 1616 int r = tcg_target_reg_alloc_order[n]; 1617 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, r)) { 1618 break; 1619 } 1620 } 1621 for (i = 0; i < n; ++i) { 1622 indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[n - 1 - i]; 1623 } 1624 for (; i < ARRAY_SIZE(tcg_target_reg_alloc_order); ++i) { 1625 indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[i]; 1626 } 1627 1628 tcg_ctx = s; 1629 /* 1630 * In user-mode we simply share the init context among threads, since we 1631 * use a single region. See the documentation tcg_region_init() for the 1632 * reasoning behind this. 1633 * In system-mode we will have at most max_threads TCG threads. 1634 */ 1635 #ifdef CONFIG_USER_ONLY 1636 tcg_ctxs = &tcg_ctx; 1637 tcg_cur_ctxs = 1; 1638 tcg_max_ctxs = 1; 1639 #else 1640 tcg_max_ctxs = max_threads; 1641 tcg_ctxs = g_new0(TCGContext *, max_threads); 1642 #endif 1643 1644 tcg_debug_assert(!tcg_regset_test_reg(s->reserved_regs, TCG_AREG0)); 1645 ts = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, TCG_AREG0, "env"); 1646 tcg_env = temp_tcgv_ptr(ts); 1647 } 1648 1649 void tcg_init(size_t tb_size, int splitwx, unsigned max_threads) 1650 { 1651 tcg_context_init(max_threads); 1652 tcg_region_init(tb_size, splitwx, max_threads); 1653 } 1654 1655 /* 1656 * Allocate TBs right before their corresponding translated code, making 1657 * sure that TBs and code are on different cache lines. 1658 */ 1659 TranslationBlock *tcg_tb_alloc(TCGContext *s) 1660 { 1661 uintptr_t align = qemu_icache_linesize; 1662 TranslationBlock *tb; 1663 void *next; 1664 1665 retry: 1666 tb = (void *)ROUND_UP((uintptr_t)s->code_gen_ptr, align); 1667 next = (void *)ROUND_UP((uintptr_t)(tb + 1), align); 1668 1669 if (unlikely(next > s->code_gen_highwater)) { 1670 if (tcg_region_alloc(s)) { 1671 return NULL; 1672 } 1673 goto retry; 1674 } 1675 qatomic_set(&s->code_gen_ptr, next); 1676 return tb; 1677 } 1678 1679 void tcg_prologue_init(void) 1680 { 1681 TCGContext *s = tcg_ctx; 1682 size_t prologue_size; 1683 1684 s->code_ptr = s->code_gen_ptr; 1685 s->code_buf = s->code_gen_ptr; 1686 s->data_gen_ptr = NULL; 1687 1688 #ifndef CONFIG_TCG_INTERPRETER 1689 tcg_qemu_tb_exec = (tcg_prologue_fn *)tcg_splitwx_to_rx(s->code_ptr); 1690 #endif 1691 1692 s->pool_labels = NULL; 1693 1694 qemu_thread_jit_write(); 1695 /* Generate the prologue. */ 1696 tcg_target_qemu_prologue(s); 1697 1698 /* Allow the prologue to put e.g. guest_base into a pool entry. */ 1699 { 1700 int result = tcg_out_pool_finalize(s); 1701 tcg_debug_assert(result == 0); 1702 } 1703 1704 prologue_size = tcg_current_code_size(s); 1705 perf_report_prologue(s->code_gen_ptr, prologue_size); 1706 1707 #ifndef CONFIG_TCG_INTERPRETER 1708 flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(s->code_buf), 1709 (uintptr_t)s->code_buf, prologue_size); 1710 #endif 1711 1712 if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM)) { 1713 FILE *logfile = qemu_log_trylock(); 1714 if (logfile) { 1715 fprintf(logfile, "PROLOGUE: [size=%zu]\n", prologue_size); 1716 if (s->data_gen_ptr) { 1717 size_t code_size = s->data_gen_ptr - s->code_gen_ptr; 1718 size_t data_size = prologue_size - code_size; 1719 size_t i; 1720 1721 disas(logfile, s->code_gen_ptr, code_size); 1722 1723 for (i = 0; i < data_size; i += sizeof(tcg_target_ulong)) { 1724 if (sizeof(tcg_target_ulong) == 8) { 1725 fprintf(logfile, 1726 "0x%08" PRIxPTR ": .quad 0x%016" PRIx64 "\n", 1727 (uintptr_t)s->data_gen_ptr + i, 1728 *(uint64_t *)(s->data_gen_ptr + i)); 1729 } else { 1730 fprintf(logfile, 1731 "0x%08" PRIxPTR ": .long 0x%08x\n", 1732 (uintptr_t)s->data_gen_ptr + i, 1733 *(uint32_t *)(s->data_gen_ptr + i)); 1734 } 1735 } 1736 } else { 1737 disas(logfile, s->code_gen_ptr, prologue_size); 1738 } 1739 fprintf(logfile, "\n"); 1740 qemu_log_unlock(logfile); 1741 } 1742 } 1743 1744 #ifndef CONFIG_TCG_INTERPRETER 1745 /* 1746 * Assert that goto_ptr is implemented completely, setting an epilogue. 1747 * For tci, we use NULL as the signal to return from the interpreter, 1748 * so skip this check. 1749 */ 1750 tcg_debug_assert(tcg_code_gen_epilogue != NULL); 1751 #endif 1752 1753 tcg_region_prologue_set(s); 1754 } 1755 1756 void tcg_func_start(TCGContext *s) 1757 { 1758 tcg_pool_reset(s); 1759 s->nb_temps = s->nb_globals; 1760 1761 /* No temps have been previously allocated for size or locality. */ 1762 tcg_temp_ebb_reset_freed(s); 1763 1764 /* No constant temps have been previously allocated. */ 1765 for (int i = 0; i < TCG_TYPE_COUNT; ++i) { 1766 if (s->const_table[i]) { 1767 g_hash_table_remove_all(s->const_table[i]); 1768 } 1769 } 1770 1771 s->nb_ops = 0; 1772 s->nb_labels = 0; 1773 s->current_frame_offset = s->frame_start; 1774 1775 #ifdef CONFIG_DEBUG_TCG 1776 s->goto_tb_issue_mask = 0; 1777 #endif 1778 1779 QTAILQ_INIT(&s->ops); 1780 QTAILQ_INIT(&s->free_ops); 1781 s->emit_before_op = NULL; 1782 QSIMPLEQ_INIT(&s->labels); 1783 1784 tcg_debug_assert(s->addr_type <= TCG_TYPE_REG); 1785 tcg_debug_assert(s->insn_start_words > 0); 1786 } 1787 1788 static TCGTemp *tcg_temp_alloc(TCGContext *s) 1789 { 1790 int n = s->nb_temps++; 1791 1792 if (n >= TCG_MAX_TEMPS) { 1793 tcg_raise_tb_overflow(s); 1794 } 1795 return memset(&s->temps[n], 0, sizeof(TCGTemp)); 1796 } 1797 1798 static TCGTemp *tcg_global_alloc(TCGContext *s) 1799 { 1800 TCGTemp *ts; 1801 1802 tcg_debug_assert(s->nb_globals == s->nb_temps); 1803 tcg_debug_assert(s->nb_globals < TCG_MAX_TEMPS); 1804 s->nb_globals++; 1805 ts = tcg_temp_alloc(s); 1806 ts->kind = TEMP_GLOBAL; 1807 1808 return ts; 1809 } 1810 1811 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type, 1812 TCGReg reg, const char *name) 1813 { 1814 TCGTemp *ts; 1815 1816 tcg_debug_assert(TCG_TARGET_REG_BITS == 64 || type == TCG_TYPE_I32); 1817 1818 ts = tcg_global_alloc(s); 1819 ts->base_type = type; 1820 ts->type = type; 1821 ts->kind = TEMP_FIXED; 1822 ts->reg = reg; 1823 ts->name = name; 1824 tcg_regset_set_reg(s->reserved_regs, reg); 1825 1826 return ts; 1827 } 1828 1829 void tcg_set_frame(TCGContext *s, TCGReg reg, intptr_t start, intptr_t size) 1830 { 1831 s->frame_start = start; 1832 s->frame_end = start + size; 1833 s->frame_temp 1834 = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, reg, "_frame"); 1835 } 1836 1837 static TCGTemp *tcg_global_mem_new_internal(TCGv_ptr base, intptr_t offset, 1838 const char *name, TCGType type) 1839 { 1840 TCGContext *s = tcg_ctx; 1841 TCGTemp *base_ts = tcgv_ptr_temp(base); 1842 TCGTemp *ts = tcg_global_alloc(s); 1843 int indirect_reg = 0; 1844 1845 switch (base_ts->kind) { 1846 case TEMP_FIXED: 1847 break; 1848 case TEMP_GLOBAL: 1849 /* We do not support double-indirect registers. */ 1850 tcg_debug_assert(!base_ts->indirect_reg); 1851 base_ts->indirect_base = 1; 1852 s->nb_indirects += (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64 1853 ? 2 : 1); 1854 indirect_reg = 1; 1855 break; 1856 default: 1857 g_assert_not_reached(); 1858 } 1859 1860 if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) { 1861 TCGTemp *ts2 = tcg_global_alloc(s); 1862 char buf[64]; 1863 1864 ts->base_type = TCG_TYPE_I64; 1865 ts->type = TCG_TYPE_I32; 1866 ts->indirect_reg = indirect_reg; 1867 ts->mem_allocated = 1; 1868 ts->mem_base = base_ts; 1869 ts->mem_offset = offset; 1870 pstrcpy(buf, sizeof(buf), name); 1871 pstrcat(buf, sizeof(buf), "_0"); 1872 ts->name = strdup(buf); 1873 1874 tcg_debug_assert(ts2 == ts + 1); 1875 ts2->base_type = TCG_TYPE_I64; 1876 ts2->type = TCG_TYPE_I32; 1877 ts2->indirect_reg = indirect_reg; 1878 ts2->mem_allocated = 1; 1879 ts2->mem_base = base_ts; 1880 ts2->mem_offset = offset + 4; 1881 ts2->temp_subindex = 1; 1882 pstrcpy(buf, sizeof(buf), name); 1883 pstrcat(buf, sizeof(buf), "_1"); 1884 ts2->name = strdup(buf); 1885 } else { 1886 ts->base_type = type; 1887 ts->type = type; 1888 ts->indirect_reg = indirect_reg; 1889 ts->mem_allocated = 1; 1890 ts->mem_base = base_ts; 1891 ts->mem_offset = offset; 1892 ts->name = name; 1893 } 1894 return ts; 1895 } 1896 1897 TCGv_i32 tcg_global_mem_new_i32(TCGv_ptr reg, intptr_t off, const char *name) 1898 { 1899 TCGTemp *ts = tcg_global_mem_new_internal(reg, off, name, TCG_TYPE_I32); 1900 return temp_tcgv_i32(ts); 1901 } 1902 1903 TCGv_i64 tcg_global_mem_new_i64(TCGv_ptr reg, intptr_t off, const char *name) 1904 { 1905 TCGTemp *ts = tcg_global_mem_new_internal(reg, off, name, TCG_TYPE_I64); 1906 return temp_tcgv_i64(ts); 1907 } 1908 1909 TCGv_ptr tcg_global_mem_new_ptr(TCGv_ptr reg, intptr_t off, const char *name) 1910 { 1911 TCGTemp *ts = tcg_global_mem_new_internal(reg, off, name, TCG_TYPE_PTR); 1912 return temp_tcgv_ptr(ts); 1913 } 1914 1915 TCGTemp *tcg_temp_new_internal(TCGType type, TCGTempKind kind) 1916 { 1917 TCGContext *s = tcg_ctx; 1918 TCGTemp *ts; 1919 int n; 1920 1921 if (kind == TEMP_EBB) { 1922 int idx = find_first_bit(s->free_temps[type].l, TCG_MAX_TEMPS); 1923 1924 if (idx < TCG_MAX_TEMPS) { 1925 /* There is already an available temp with the right type. */ 1926 clear_bit(idx, s->free_temps[type].l); 1927 1928 ts = &s->temps[idx]; 1929 ts->temp_allocated = 1; 1930 tcg_debug_assert(ts->base_type == type); 1931 tcg_debug_assert(ts->kind == kind); 1932 return ts; 1933 } 1934 } else { 1935 tcg_debug_assert(kind == TEMP_TB); 1936 } 1937 1938 switch (type) { 1939 case TCG_TYPE_I32: 1940 case TCG_TYPE_V64: 1941 case TCG_TYPE_V128: 1942 case TCG_TYPE_V256: 1943 n = 1; 1944 break; 1945 case TCG_TYPE_I64: 1946 n = 64 / TCG_TARGET_REG_BITS; 1947 break; 1948 case TCG_TYPE_I128: 1949 n = 128 / TCG_TARGET_REG_BITS; 1950 break; 1951 default: 1952 g_assert_not_reached(); 1953 } 1954 1955 ts = tcg_temp_alloc(s); 1956 ts->base_type = type; 1957 ts->temp_allocated = 1; 1958 ts->kind = kind; 1959 1960 if (n == 1) { 1961 ts->type = type; 1962 } else { 1963 ts->type = TCG_TYPE_REG; 1964 1965 for (int i = 1; i < n; ++i) { 1966 TCGTemp *ts2 = tcg_temp_alloc(s); 1967 1968 tcg_debug_assert(ts2 == ts + i); 1969 ts2->base_type = type; 1970 ts2->type = TCG_TYPE_REG; 1971 ts2->temp_allocated = 1; 1972 ts2->temp_subindex = i; 1973 ts2->kind = kind; 1974 } 1975 } 1976 return ts; 1977 } 1978 1979 TCGv_i32 tcg_temp_new_i32(void) 1980 { 1981 return temp_tcgv_i32(tcg_temp_new_internal(TCG_TYPE_I32, TEMP_TB)); 1982 } 1983 1984 TCGv_i32 tcg_temp_ebb_new_i32(void) 1985 { 1986 return temp_tcgv_i32(tcg_temp_new_internal(TCG_TYPE_I32, TEMP_EBB)); 1987 } 1988 1989 TCGv_i64 tcg_temp_new_i64(void) 1990 { 1991 return temp_tcgv_i64(tcg_temp_new_internal(TCG_TYPE_I64, TEMP_TB)); 1992 } 1993 1994 TCGv_i64 tcg_temp_ebb_new_i64(void) 1995 { 1996 return temp_tcgv_i64(tcg_temp_new_internal(TCG_TYPE_I64, TEMP_EBB)); 1997 } 1998 1999 TCGv_ptr tcg_temp_new_ptr(void) 2000 { 2001 return temp_tcgv_ptr(tcg_temp_new_internal(TCG_TYPE_PTR, TEMP_TB)); 2002 } 2003 2004 TCGv_ptr tcg_temp_ebb_new_ptr(void) 2005 { 2006 return temp_tcgv_ptr(tcg_temp_new_internal(TCG_TYPE_PTR, TEMP_EBB)); 2007 } 2008 2009 TCGv_i128 tcg_temp_new_i128(void) 2010 { 2011 return temp_tcgv_i128(tcg_temp_new_internal(TCG_TYPE_I128, TEMP_TB)); 2012 } 2013 2014 TCGv_i128 tcg_temp_ebb_new_i128(void) 2015 { 2016 return temp_tcgv_i128(tcg_temp_new_internal(TCG_TYPE_I128, TEMP_EBB)); 2017 } 2018 2019 TCGv_vec tcg_temp_new_vec(TCGType type) 2020 { 2021 TCGTemp *t; 2022 2023 #ifdef CONFIG_DEBUG_TCG 2024 switch (type) { 2025 case TCG_TYPE_V64: 2026 assert(TCG_TARGET_HAS_v64); 2027 break; 2028 case TCG_TYPE_V128: 2029 assert(TCG_TARGET_HAS_v128); 2030 break; 2031 case TCG_TYPE_V256: 2032 assert(TCG_TARGET_HAS_v256); 2033 break; 2034 default: 2035 g_assert_not_reached(); 2036 } 2037 #endif 2038 2039 t = tcg_temp_new_internal(type, TEMP_EBB); 2040 return temp_tcgv_vec(t); 2041 } 2042 2043 /* Create a new temp of the same type as an existing temp. */ 2044 TCGv_vec tcg_temp_new_vec_matching(TCGv_vec match) 2045 { 2046 TCGTemp *t = tcgv_vec_temp(match); 2047 2048 tcg_debug_assert(t->temp_allocated != 0); 2049 2050 t = tcg_temp_new_internal(t->base_type, TEMP_EBB); 2051 return temp_tcgv_vec(t); 2052 } 2053 2054 void tcg_temp_free_internal(TCGTemp *ts) 2055 { 2056 TCGContext *s = tcg_ctx; 2057 2058 switch (ts->kind) { 2059 case TEMP_CONST: 2060 case TEMP_TB: 2061 /* Silently ignore free. */ 2062 break; 2063 case TEMP_EBB: 2064 tcg_debug_assert(ts->temp_allocated != 0); 2065 ts->temp_allocated = 0; 2066 set_bit(temp_idx(ts), s->free_temps[ts->base_type].l); 2067 break; 2068 default: 2069 /* It never made sense to free TEMP_FIXED or TEMP_GLOBAL. */ 2070 g_assert_not_reached(); 2071 } 2072 } 2073 2074 void tcg_temp_free_i32(TCGv_i32 arg) 2075 { 2076 tcg_temp_free_internal(tcgv_i32_temp(arg)); 2077 } 2078 2079 void tcg_temp_free_i64(TCGv_i64 arg) 2080 { 2081 tcg_temp_free_internal(tcgv_i64_temp(arg)); 2082 } 2083 2084 void tcg_temp_free_i128(TCGv_i128 arg) 2085 { 2086 tcg_temp_free_internal(tcgv_i128_temp(arg)); 2087 } 2088 2089 void tcg_temp_free_ptr(TCGv_ptr arg) 2090 { 2091 tcg_temp_free_internal(tcgv_ptr_temp(arg)); 2092 } 2093 2094 void tcg_temp_free_vec(TCGv_vec arg) 2095 { 2096 tcg_temp_free_internal(tcgv_vec_temp(arg)); 2097 } 2098 2099 TCGTemp *tcg_constant_internal(TCGType type, int64_t val) 2100 { 2101 TCGContext *s = tcg_ctx; 2102 GHashTable *h = s->const_table[type]; 2103 TCGTemp *ts; 2104 2105 if (h == NULL) { 2106 h = g_hash_table_new(g_int64_hash, g_int64_equal); 2107 s->const_table[type] = h; 2108 } 2109 2110 ts = g_hash_table_lookup(h, &val); 2111 if (ts == NULL) { 2112 int64_t *val_ptr; 2113 2114 ts = tcg_temp_alloc(s); 2115 2116 if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) { 2117 TCGTemp *ts2 = tcg_temp_alloc(s); 2118 2119 tcg_debug_assert(ts2 == ts + 1); 2120 2121 ts->base_type = TCG_TYPE_I64; 2122 ts->type = TCG_TYPE_I32; 2123 ts->kind = TEMP_CONST; 2124 ts->temp_allocated = 1; 2125 2126 ts2->base_type = TCG_TYPE_I64; 2127 ts2->type = TCG_TYPE_I32; 2128 ts2->kind = TEMP_CONST; 2129 ts2->temp_allocated = 1; 2130 ts2->temp_subindex = 1; 2131 2132 /* 2133 * Retain the full value of the 64-bit constant in the low 2134 * part, so that the hash table works. Actual uses will 2135 * truncate the value to the low part. 2136 */ 2137 ts[HOST_BIG_ENDIAN].val = val; 2138 ts[!HOST_BIG_ENDIAN].val = val >> 32; 2139 val_ptr = &ts[HOST_BIG_ENDIAN].val; 2140 } else { 2141 ts->base_type = type; 2142 ts->type = type; 2143 ts->kind = TEMP_CONST; 2144 ts->temp_allocated = 1; 2145 ts->val = val; 2146 val_ptr = &ts->val; 2147 } 2148 g_hash_table_insert(h, val_ptr, ts); 2149 } 2150 2151 return ts; 2152 } 2153 2154 TCGv_i32 tcg_constant_i32(int32_t val) 2155 { 2156 return temp_tcgv_i32(tcg_constant_internal(TCG_TYPE_I32, val)); 2157 } 2158 2159 TCGv_i64 tcg_constant_i64(int64_t val) 2160 { 2161 return temp_tcgv_i64(tcg_constant_internal(TCG_TYPE_I64, val)); 2162 } 2163 2164 TCGv_ptr tcg_constant_ptr_int(intptr_t val) 2165 { 2166 return temp_tcgv_ptr(tcg_constant_internal(TCG_TYPE_PTR, val)); 2167 } 2168 2169 TCGv_vec tcg_constant_vec(TCGType type, unsigned vece, int64_t val) 2170 { 2171 val = dup_const(vece, val); 2172 return temp_tcgv_vec(tcg_constant_internal(type, val)); 2173 } 2174 2175 TCGv_vec tcg_constant_vec_matching(TCGv_vec match, unsigned vece, int64_t val) 2176 { 2177 TCGTemp *t = tcgv_vec_temp(match); 2178 2179 tcg_debug_assert(t->temp_allocated != 0); 2180 return tcg_constant_vec(t->base_type, vece, val); 2181 } 2182 2183 #ifdef CONFIG_DEBUG_TCG 2184 size_t temp_idx(TCGTemp *ts) 2185 { 2186 ptrdiff_t n = ts - tcg_ctx->temps; 2187 assert(n >= 0 && n < tcg_ctx->nb_temps); 2188 return n; 2189 } 2190 2191 TCGTemp *tcgv_i32_temp(TCGv_i32 v) 2192 { 2193 uintptr_t o = (uintptr_t)v - offsetof(TCGContext, temps); 2194 2195 assert(o < sizeof(TCGTemp) * tcg_ctx->nb_temps); 2196 assert(o % sizeof(TCGTemp) == 0); 2197 2198 return (void *)tcg_ctx + (uintptr_t)v; 2199 } 2200 #endif /* CONFIG_DEBUG_TCG */ 2201 2202 /* 2203 * Return true if OP may appear in the opcode stream with TYPE. 2204 * Test the runtime variable that controls each opcode. 2205 */ 2206 bool tcg_op_supported(TCGOpcode op, TCGType type, unsigned flags) 2207 { 2208 bool has_type; 2209 2210 switch (type) { 2211 case TCG_TYPE_I32: 2212 has_type = true; 2213 break; 2214 case TCG_TYPE_I64: 2215 has_type = TCG_TARGET_REG_BITS == 64; 2216 break; 2217 case TCG_TYPE_V64: 2218 has_type = TCG_TARGET_HAS_v64; 2219 break; 2220 case TCG_TYPE_V128: 2221 has_type = TCG_TARGET_HAS_v128; 2222 break; 2223 case TCG_TYPE_V256: 2224 has_type = TCG_TARGET_HAS_v256; 2225 break; 2226 default: 2227 has_type = false; 2228 break; 2229 } 2230 2231 switch (op) { 2232 case INDEX_op_discard: 2233 case INDEX_op_set_label: 2234 case INDEX_op_call: 2235 case INDEX_op_br: 2236 case INDEX_op_mb: 2237 case INDEX_op_insn_start: 2238 case INDEX_op_exit_tb: 2239 case INDEX_op_goto_tb: 2240 case INDEX_op_goto_ptr: 2241 case INDEX_op_qemu_ld_i32: 2242 case INDEX_op_qemu_st_i32: 2243 case INDEX_op_qemu_ld_i64: 2244 case INDEX_op_qemu_st_i64: 2245 return true; 2246 2247 case INDEX_op_qemu_st8_i32: 2248 return TCG_TARGET_HAS_qemu_st8_i32; 2249 2250 case INDEX_op_qemu_ld_i128: 2251 case INDEX_op_qemu_st_i128: 2252 return TCG_TARGET_HAS_qemu_ldst_i128; 2253 2254 case INDEX_op_add: 2255 case INDEX_op_and: 2256 case INDEX_op_mov: 2257 case INDEX_op_or: 2258 case INDEX_op_xor: 2259 return has_type; 2260 2261 case INDEX_op_setcond_i32: 2262 case INDEX_op_brcond_i32: 2263 case INDEX_op_movcond_i32: 2264 case INDEX_op_ld8u_i32: 2265 case INDEX_op_ld8s_i32: 2266 case INDEX_op_ld16u_i32: 2267 case INDEX_op_ld16s_i32: 2268 case INDEX_op_ld_i32: 2269 case INDEX_op_st8_i32: 2270 case INDEX_op_st16_i32: 2271 case INDEX_op_st_i32: 2272 case INDEX_op_extract_i32: 2273 case INDEX_op_sextract_i32: 2274 case INDEX_op_deposit_i32: 2275 return true; 2276 2277 case INDEX_op_negsetcond_i32: 2278 return TCG_TARGET_HAS_negsetcond_i32; 2279 case INDEX_op_extract2_i32: 2280 return TCG_TARGET_HAS_extract2_i32; 2281 case INDEX_op_add2_i32: 2282 return TCG_TARGET_HAS_add2_i32; 2283 case INDEX_op_sub2_i32: 2284 return TCG_TARGET_HAS_sub2_i32; 2285 case INDEX_op_mulu2_i32: 2286 return TCG_TARGET_HAS_mulu2_i32; 2287 case INDEX_op_muls2_i32: 2288 return TCG_TARGET_HAS_muls2_i32; 2289 case INDEX_op_bswap16_i32: 2290 return TCG_TARGET_HAS_bswap16_i32; 2291 case INDEX_op_bswap32_i32: 2292 return TCG_TARGET_HAS_bswap32_i32; 2293 case INDEX_op_ctpop_i32: 2294 return TCG_TARGET_HAS_ctpop_i32; 2295 2296 case INDEX_op_brcond2_i32: 2297 case INDEX_op_setcond2_i32: 2298 return TCG_TARGET_REG_BITS == 32; 2299 2300 case INDEX_op_setcond_i64: 2301 case INDEX_op_brcond_i64: 2302 case INDEX_op_movcond_i64: 2303 case INDEX_op_ld8u_i64: 2304 case INDEX_op_ld8s_i64: 2305 case INDEX_op_ld16u_i64: 2306 case INDEX_op_ld16s_i64: 2307 case INDEX_op_ld32u_i64: 2308 case INDEX_op_ld32s_i64: 2309 case INDEX_op_ld_i64: 2310 case INDEX_op_st8_i64: 2311 case INDEX_op_st16_i64: 2312 case INDEX_op_st32_i64: 2313 case INDEX_op_st_i64: 2314 case INDEX_op_ext_i32_i64: 2315 case INDEX_op_extu_i32_i64: 2316 case INDEX_op_extract_i64: 2317 case INDEX_op_sextract_i64: 2318 case INDEX_op_deposit_i64: 2319 return TCG_TARGET_REG_BITS == 64; 2320 2321 case INDEX_op_negsetcond_i64: 2322 return TCG_TARGET_HAS_negsetcond_i64; 2323 case INDEX_op_extract2_i64: 2324 return TCG_TARGET_HAS_extract2_i64; 2325 case INDEX_op_extrl_i64_i32: 2326 case INDEX_op_extrh_i64_i32: 2327 return TCG_TARGET_HAS_extr_i64_i32; 2328 case INDEX_op_bswap16_i64: 2329 return TCG_TARGET_HAS_bswap16_i64; 2330 case INDEX_op_bswap32_i64: 2331 return TCG_TARGET_HAS_bswap32_i64; 2332 case INDEX_op_bswap64_i64: 2333 return TCG_TARGET_HAS_bswap64_i64; 2334 case INDEX_op_ctpop_i64: 2335 return TCG_TARGET_HAS_ctpop_i64; 2336 case INDEX_op_add2_i64: 2337 return TCG_TARGET_HAS_add2_i64; 2338 case INDEX_op_sub2_i64: 2339 return TCG_TARGET_HAS_sub2_i64; 2340 case INDEX_op_mulu2_i64: 2341 return TCG_TARGET_HAS_mulu2_i64; 2342 case INDEX_op_muls2_i64: 2343 return TCG_TARGET_HAS_muls2_i64; 2344 2345 case INDEX_op_mov_vec: 2346 case INDEX_op_dup_vec: 2347 case INDEX_op_dupm_vec: 2348 case INDEX_op_ld_vec: 2349 case INDEX_op_st_vec: 2350 case INDEX_op_add_vec: 2351 case INDEX_op_sub_vec: 2352 case INDEX_op_and_vec: 2353 case INDEX_op_or_vec: 2354 case INDEX_op_xor_vec: 2355 case INDEX_op_cmp_vec: 2356 return has_type; 2357 case INDEX_op_dup2_vec: 2358 return has_type && TCG_TARGET_REG_BITS == 32; 2359 case INDEX_op_not_vec: 2360 return has_type && TCG_TARGET_HAS_not_vec; 2361 case INDEX_op_neg_vec: 2362 return has_type && TCG_TARGET_HAS_neg_vec; 2363 case INDEX_op_abs_vec: 2364 return has_type && TCG_TARGET_HAS_abs_vec; 2365 case INDEX_op_andc_vec: 2366 return has_type && TCG_TARGET_HAS_andc_vec; 2367 case INDEX_op_orc_vec: 2368 return has_type && TCG_TARGET_HAS_orc_vec; 2369 case INDEX_op_nand_vec: 2370 return has_type && TCG_TARGET_HAS_nand_vec; 2371 case INDEX_op_nor_vec: 2372 return has_type && TCG_TARGET_HAS_nor_vec; 2373 case INDEX_op_eqv_vec: 2374 return has_type && TCG_TARGET_HAS_eqv_vec; 2375 case INDEX_op_mul_vec: 2376 return has_type && TCG_TARGET_HAS_mul_vec; 2377 case INDEX_op_shli_vec: 2378 case INDEX_op_shri_vec: 2379 case INDEX_op_sari_vec: 2380 return has_type && TCG_TARGET_HAS_shi_vec; 2381 case INDEX_op_shls_vec: 2382 case INDEX_op_shrs_vec: 2383 case INDEX_op_sars_vec: 2384 return has_type && TCG_TARGET_HAS_shs_vec; 2385 case INDEX_op_shlv_vec: 2386 case INDEX_op_shrv_vec: 2387 case INDEX_op_sarv_vec: 2388 return has_type && TCG_TARGET_HAS_shv_vec; 2389 case INDEX_op_rotli_vec: 2390 return has_type && TCG_TARGET_HAS_roti_vec; 2391 case INDEX_op_rotls_vec: 2392 return has_type && TCG_TARGET_HAS_rots_vec; 2393 case INDEX_op_rotlv_vec: 2394 case INDEX_op_rotrv_vec: 2395 return has_type && TCG_TARGET_HAS_rotv_vec; 2396 case INDEX_op_ssadd_vec: 2397 case INDEX_op_usadd_vec: 2398 case INDEX_op_sssub_vec: 2399 case INDEX_op_ussub_vec: 2400 return has_type && TCG_TARGET_HAS_sat_vec; 2401 case INDEX_op_smin_vec: 2402 case INDEX_op_umin_vec: 2403 case INDEX_op_smax_vec: 2404 case INDEX_op_umax_vec: 2405 return has_type && TCG_TARGET_HAS_minmax_vec; 2406 case INDEX_op_bitsel_vec: 2407 return has_type && TCG_TARGET_HAS_bitsel_vec; 2408 case INDEX_op_cmpsel_vec: 2409 return has_type && TCG_TARGET_HAS_cmpsel_vec; 2410 2411 default: 2412 if (op < INDEX_op_last_generic) { 2413 const TCGOutOp *outop; 2414 TCGConstraintSetIndex con_set; 2415 2416 if (!has_type) { 2417 return false; 2418 } 2419 2420 outop = all_outop[op]; 2421 tcg_debug_assert(outop != NULL); 2422 2423 con_set = outop->static_constraint; 2424 if (con_set == C_Dynamic) { 2425 con_set = outop->dynamic_constraint(type, flags); 2426 } 2427 if (con_set >= 0) { 2428 return true; 2429 } 2430 tcg_debug_assert(con_set == C_NotImplemented); 2431 return false; 2432 } 2433 tcg_debug_assert(op < NB_OPS); 2434 return true; 2435 2436 case INDEX_op_last_generic: 2437 g_assert_not_reached(); 2438 } 2439 } 2440 2441 bool tcg_op_deposit_valid(TCGType type, unsigned ofs, unsigned len) 2442 { 2443 unsigned width; 2444 2445 tcg_debug_assert(type == TCG_TYPE_I32 || type == TCG_TYPE_I64); 2446 width = (type == TCG_TYPE_I32 ? 32 : 64); 2447 2448 tcg_debug_assert(ofs < width); 2449 tcg_debug_assert(len > 0); 2450 tcg_debug_assert(len <= width - ofs); 2451 2452 return TCG_TARGET_deposit_valid(type, ofs, len); 2453 } 2454 2455 static TCGOp *tcg_op_alloc(TCGOpcode opc, unsigned nargs); 2456 2457 static void tcg_gen_callN(void *func, TCGHelperInfo *info, 2458 TCGTemp *ret, TCGTemp **args) 2459 { 2460 TCGv_i64 extend_free[MAX_CALL_IARGS]; 2461 int n_extend = 0; 2462 TCGOp *op; 2463 int i, n, pi = 0, total_args; 2464 2465 if (unlikely(g_once_init_enter(HELPER_INFO_INIT(info)))) { 2466 init_call_layout(info); 2467 g_once_init_leave(HELPER_INFO_INIT(info), HELPER_INFO_INIT_VAL(info)); 2468 } 2469 2470 total_args = info->nr_out + info->nr_in + 2; 2471 op = tcg_op_alloc(INDEX_op_call, total_args); 2472 2473 #ifdef CONFIG_PLUGIN 2474 /* Flag helpers that may affect guest state */ 2475 if (tcg_ctx->plugin_insn && !(info->flags & TCG_CALL_NO_SIDE_EFFECTS)) { 2476 tcg_ctx->plugin_insn->calls_helpers = true; 2477 } 2478 #endif 2479 2480 TCGOP_CALLO(op) = n = info->nr_out; 2481 switch (n) { 2482 case 0: 2483 tcg_debug_assert(ret == NULL); 2484 break; 2485 case 1: 2486 tcg_debug_assert(ret != NULL); 2487 op->args[pi++] = temp_arg(ret); 2488 break; 2489 case 2: 2490 case 4: 2491 tcg_debug_assert(ret != NULL); 2492 tcg_debug_assert(ret->base_type == ret->type + ctz32(n)); 2493 tcg_debug_assert(ret->temp_subindex == 0); 2494 for (i = 0; i < n; ++i) { 2495 op->args[pi++] = temp_arg(ret + i); 2496 } 2497 break; 2498 default: 2499 g_assert_not_reached(); 2500 } 2501 2502 TCGOP_CALLI(op) = n = info->nr_in; 2503 for (i = 0; i < n; i++) { 2504 const TCGCallArgumentLoc *loc = &info->in[i]; 2505 TCGTemp *ts = args[loc->arg_idx] + loc->tmp_subindex; 2506 2507 switch (loc->kind) { 2508 case TCG_CALL_ARG_NORMAL: 2509 case TCG_CALL_ARG_BY_REF: 2510 case TCG_CALL_ARG_BY_REF_N: 2511 op->args[pi++] = temp_arg(ts); 2512 break; 2513 2514 case TCG_CALL_ARG_EXTEND_U: 2515 case TCG_CALL_ARG_EXTEND_S: 2516 { 2517 TCGv_i64 temp = tcg_temp_ebb_new_i64(); 2518 TCGv_i32 orig = temp_tcgv_i32(ts); 2519 2520 if (loc->kind == TCG_CALL_ARG_EXTEND_S) { 2521 tcg_gen_ext_i32_i64(temp, orig); 2522 } else { 2523 tcg_gen_extu_i32_i64(temp, orig); 2524 } 2525 op->args[pi++] = tcgv_i64_arg(temp); 2526 extend_free[n_extend++] = temp; 2527 } 2528 break; 2529 2530 default: 2531 g_assert_not_reached(); 2532 } 2533 } 2534 op->args[pi++] = (uintptr_t)func; 2535 op->args[pi++] = (uintptr_t)info; 2536 tcg_debug_assert(pi == total_args); 2537 2538 if (tcg_ctx->emit_before_op) { 2539 QTAILQ_INSERT_BEFORE(tcg_ctx->emit_before_op, op, link); 2540 } else { 2541 QTAILQ_INSERT_TAIL(&tcg_ctx->ops, op, link); 2542 } 2543 2544 tcg_debug_assert(n_extend < ARRAY_SIZE(extend_free)); 2545 for (i = 0; i < n_extend; ++i) { 2546 tcg_temp_free_i64(extend_free[i]); 2547 } 2548 } 2549 2550 void tcg_gen_call0(void *func, TCGHelperInfo *info, TCGTemp *ret) 2551 { 2552 tcg_gen_callN(func, info, ret, NULL); 2553 } 2554 2555 void tcg_gen_call1(void *func, TCGHelperInfo *info, TCGTemp *ret, TCGTemp *t1) 2556 { 2557 tcg_gen_callN(func, info, ret, &t1); 2558 } 2559 2560 void tcg_gen_call2(void *func, TCGHelperInfo *info, TCGTemp *ret, 2561 TCGTemp *t1, TCGTemp *t2) 2562 { 2563 TCGTemp *args[2] = { t1, t2 }; 2564 tcg_gen_callN(func, info, ret, args); 2565 } 2566 2567 void tcg_gen_call3(void *func, TCGHelperInfo *info, TCGTemp *ret, 2568 TCGTemp *t1, TCGTemp *t2, TCGTemp *t3) 2569 { 2570 TCGTemp *args[3] = { t1, t2, t3 }; 2571 tcg_gen_callN(func, info, ret, args); 2572 } 2573 2574 void tcg_gen_call4(void *func, TCGHelperInfo *info, TCGTemp *ret, 2575 TCGTemp *t1, TCGTemp *t2, TCGTemp *t3, TCGTemp *t4) 2576 { 2577 TCGTemp *args[4] = { t1, t2, t3, t4 }; 2578 tcg_gen_callN(func, info, ret, args); 2579 } 2580 2581 void tcg_gen_call5(void *func, TCGHelperInfo *info, TCGTemp *ret, TCGTemp *t1, 2582 TCGTemp *t2, TCGTemp *t3, TCGTemp *t4, TCGTemp *t5) 2583 { 2584 TCGTemp *args[5] = { t1, t2, t3, t4, t5 }; 2585 tcg_gen_callN(func, info, ret, args); 2586 } 2587 2588 void tcg_gen_call6(void *func, TCGHelperInfo *info, TCGTemp *ret, 2589 TCGTemp *t1, TCGTemp *t2, TCGTemp *t3, 2590 TCGTemp *t4, TCGTemp *t5, TCGTemp *t6) 2591 { 2592 TCGTemp *args[6] = { t1, t2, t3, t4, t5, t6 }; 2593 tcg_gen_callN(func, info, ret, args); 2594 } 2595 2596 void tcg_gen_call7(void *func, TCGHelperInfo *info, TCGTemp *ret, TCGTemp *t1, 2597 TCGTemp *t2, TCGTemp *t3, TCGTemp *t4, 2598 TCGTemp *t5, TCGTemp *t6, TCGTemp *t7) 2599 { 2600 TCGTemp *args[7] = { t1, t2, t3, t4, t5, t6, t7 }; 2601 tcg_gen_callN(func, info, ret, args); 2602 } 2603 2604 static void tcg_reg_alloc_start(TCGContext *s) 2605 { 2606 int i, n; 2607 2608 for (i = 0, n = s->nb_temps; i < n; i++) { 2609 TCGTemp *ts = &s->temps[i]; 2610 TCGTempVal val = TEMP_VAL_MEM; 2611 2612 switch (ts->kind) { 2613 case TEMP_CONST: 2614 val = TEMP_VAL_CONST; 2615 break; 2616 case TEMP_FIXED: 2617 val = TEMP_VAL_REG; 2618 break; 2619 case TEMP_GLOBAL: 2620 break; 2621 case TEMP_EBB: 2622 val = TEMP_VAL_DEAD; 2623 /* fall through */ 2624 case TEMP_TB: 2625 ts->mem_allocated = 0; 2626 break; 2627 default: 2628 g_assert_not_reached(); 2629 } 2630 ts->val_type = val; 2631 } 2632 2633 memset(s->reg_to_temp, 0, sizeof(s->reg_to_temp)); 2634 } 2635 2636 static char *tcg_get_arg_str_ptr(TCGContext *s, char *buf, int buf_size, 2637 TCGTemp *ts) 2638 { 2639 int idx = temp_idx(ts); 2640 2641 switch (ts->kind) { 2642 case TEMP_FIXED: 2643 case TEMP_GLOBAL: 2644 pstrcpy(buf, buf_size, ts->name); 2645 break; 2646 case TEMP_TB: 2647 snprintf(buf, buf_size, "loc%d", idx - s->nb_globals); 2648 break; 2649 case TEMP_EBB: 2650 snprintf(buf, buf_size, "tmp%d", idx - s->nb_globals); 2651 break; 2652 case TEMP_CONST: 2653 switch (ts->type) { 2654 case TCG_TYPE_I32: 2655 snprintf(buf, buf_size, "$0x%x", (int32_t)ts->val); 2656 break; 2657 #if TCG_TARGET_REG_BITS > 32 2658 case TCG_TYPE_I64: 2659 snprintf(buf, buf_size, "$0x%" PRIx64, ts->val); 2660 break; 2661 #endif 2662 case TCG_TYPE_V64: 2663 case TCG_TYPE_V128: 2664 case TCG_TYPE_V256: 2665 snprintf(buf, buf_size, "v%d$0x%" PRIx64, 2666 64 << (ts->type - TCG_TYPE_V64), ts->val); 2667 break; 2668 default: 2669 g_assert_not_reached(); 2670 } 2671 break; 2672 } 2673 return buf; 2674 } 2675 2676 static char *tcg_get_arg_str(TCGContext *s, char *buf, 2677 int buf_size, TCGArg arg) 2678 { 2679 return tcg_get_arg_str_ptr(s, buf, buf_size, arg_temp(arg)); 2680 } 2681 2682 static const char * const cond_name[] = 2683 { 2684 [TCG_COND_NEVER] = "never", 2685 [TCG_COND_ALWAYS] = "always", 2686 [TCG_COND_EQ] = "eq", 2687 [TCG_COND_NE] = "ne", 2688 [TCG_COND_LT] = "lt", 2689 [TCG_COND_GE] = "ge", 2690 [TCG_COND_LE] = "le", 2691 [TCG_COND_GT] = "gt", 2692 [TCG_COND_LTU] = "ltu", 2693 [TCG_COND_GEU] = "geu", 2694 [TCG_COND_LEU] = "leu", 2695 [TCG_COND_GTU] = "gtu", 2696 [TCG_COND_TSTEQ] = "tsteq", 2697 [TCG_COND_TSTNE] = "tstne", 2698 }; 2699 2700 static const char * const ldst_name[(MO_BSWAP | MO_SSIZE) + 1] = 2701 { 2702 [MO_UB] = "ub", 2703 [MO_SB] = "sb", 2704 [MO_LEUW] = "leuw", 2705 [MO_LESW] = "lesw", 2706 [MO_LEUL] = "leul", 2707 [MO_LESL] = "lesl", 2708 [MO_LEUQ] = "leq", 2709 [MO_BEUW] = "beuw", 2710 [MO_BESW] = "besw", 2711 [MO_BEUL] = "beul", 2712 [MO_BESL] = "besl", 2713 [MO_BEUQ] = "beq", 2714 [MO_128 + MO_BE] = "beo", 2715 [MO_128 + MO_LE] = "leo", 2716 }; 2717 2718 static const char * const alignment_name[(MO_AMASK >> MO_ASHIFT) + 1] = { 2719 [MO_UNALN >> MO_ASHIFT] = "un+", 2720 [MO_ALIGN >> MO_ASHIFT] = "al+", 2721 [MO_ALIGN_2 >> MO_ASHIFT] = "al2+", 2722 [MO_ALIGN_4 >> MO_ASHIFT] = "al4+", 2723 [MO_ALIGN_8 >> MO_ASHIFT] = "al8+", 2724 [MO_ALIGN_16 >> MO_ASHIFT] = "al16+", 2725 [MO_ALIGN_32 >> MO_ASHIFT] = "al32+", 2726 [MO_ALIGN_64 >> MO_ASHIFT] = "al64+", 2727 }; 2728 2729 static const char * const atom_name[(MO_ATOM_MASK >> MO_ATOM_SHIFT) + 1] = { 2730 [MO_ATOM_IFALIGN >> MO_ATOM_SHIFT] = "", 2731 [MO_ATOM_IFALIGN_PAIR >> MO_ATOM_SHIFT] = "pair+", 2732 [MO_ATOM_WITHIN16 >> MO_ATOM_SHIFT] = "w16+", 2733 [MO_ATOM_WITHIN16_PAIR >> MO_ATOM_SHIFT] = "w16p+", 2734 [MO_ATOM_SUBALIGN >> MO_ATOM_SHIFT] = "sub+", 2735 [MO_ATOM_NONE >> MO_ATOM_SHIFT] = "noat+", 2736 }; 2737 2738 static const char bswap_flag_name[][6] = { 2739 [TCG_BSWAP_IZ] = "iz", 2740 [TCG_BSWAP_OZ] = "oz", 2741 [TCG_BSWAP_OS] = "os", 2742 [TCG_BSWAP_IZ | TCG_BSWAP_OZ] = "iz,oz", 2743 [TCG_BSWAP_IZ | TCG_BSWAP_OS] = "iz,os", 2744 }; 2745 2746 #ifdef CONFIG_PLUGIN 2747 static const char * const plugin_from_name[] = { 2748 "from-tb", 2749 "from-insn", 2750 "after-insn", 2751 "after-tb", 2752 }; 2753 #endif 2754 2755 static inline bool tcg_regset_single(TCGRegSet d) 2756 { 2757 return (d & (d - 1)) == 0; 2758 } 2759 2760 static inline TCGReg tcg_regset_first(TCGRegSet d) 2761 { 2762 if (TCG_TARGET_NB_REGS <= 32) { 2763 return ctz32(d); 2764 } else { 2765 return ctz64(d); 2766 } 2767 } 2768 2769 /* Return only the number of characters output -- no error return. */ 2770 #define ne_fprintf(...) \ 2771 ({ int ret_ = fprintf(__VA_ARGS__); ret_ >= 0 ? ret_ : 0; }) 2772 2773 void tcg_dump_ops(TCGContext *s, FILE *f, bool have_prefs) 2774 { 2775 char buf[128]; 2776 TCGOp *op; 2777 2778 QTAILQ_FOREACH(op, &s->ops, link) { 2779 int i, k, nb_oargs, nb_iargs, nb_cargs; 2780 const TCGOpDef *def; 2781 TCGOpcode c; 2782 int col = 0; 2783 2784 c = op->opc; 2785 def = &tcg_op_defs[c]; 2786 2787 if (c == INDEX_op_insn_start) { 2788 nb_oargs = 0; 2789 col += ne_fprintf(f, "\n ----"); 2790 2791 for (i = 0, k = s->insn_start_words; i < k; ++i) { 2792 col += ne_fprintf(f, " %016" PRIx64, 2793 tcg_get_insn_start_param(op, i)); 2794 } 2795 } else if (c == INDEX_op_call) { 2796 const TCGHelperInfo *info = tcg_call_info(op); 2797 void *func = tcg_call_func(op); 2798 2799 /* variable number of arguments */ 2800 nb_oargs = TCGOP_CALLO(op); 2801 nb_iargs = TCGOP_CALLI(op); 2802 nb_cargs = def->nb_cargs; 2803 2804 col += ne_fprintf(f, " %s ", def->name); 2805 2806 /* 2807 * Print the function name from TCGHelperInfo, if available. 2808 * Note that plugins have a template function for the info, 2809 * but the actual function pointer comes from the plugin. 2810 */ 2811 if (func == info->func) { 2812 col += ne_fprintf(f, "%s", info->name); 2813 } else { 2814 col += ne_fprintf(f, "plugin(%p)", func); 2815 } 2816 2817 col += ne_fprintf(f, ",$0x%x,$%d", info->flags, nb_oargs); 2818 for (i = 0; i < nb_oargs; i++) { 2819 col += ne_fprintf(f, ",%s", tcg_get_arg_str(s, buf, sizeof(buf), 2820 op->args[i])); 2821 } 2822 for (i = 0; i < nb_iargs; i++) { 2823 TCGArg arg = op->args[nb_oargs + i]; 2824 const char *t = tcg_get_arg_str(s, buf, sizeof(buf), arg); 2825 col += ne_fprintf(f, ",%s", t); 2826 } 2827 } else { 2828 if (def->flags & TCG_OPF_INT) { 2829 col += ne_fprintf(f, " %s_i%d ", 2830 def->name, 2831 8 * tcg_type_size(TCGOP_TYPE(op))); 2832 } else if (def->flags & TCG_OPF_VECTOR) { 2833 col += ne_fprintf(f, "%s v%d,e%d,", 2834 def->name, 2835 8 * tcg_type_size(TCGOP_TYPE(op)), 2836 8 << TCGOP_VECE(op)); 2837 } else { 2838 col += ne_fprintf(f, " %s ", def->name); 2839 } 2840 2841 nb_oargs = def->nb_oargs; 2842 nb_iargs = def->nb_iargs; 2843 nb_cargs = def->nb_cargs; 2844 2845 k = 0; 2846 for (i = 0; i < nb_oargs; i++) { 2847 const char *sep = k ? "," : ""; 2848 col += ne_fprintf(f, "%s%s", sep, 2849 tcg_get_arg_str(s, buf, sizeof(buf), 2850 op->args[k++])); 2851 } 2852 for (i = 0; i < nb_iargs; i++) { 2853 const char *sep = k ? "," : ""; 2854 col += ne_fprintf(f, "%s%s", sep, 2855 tcg_get_arg_str(s, buf, sizeof(buf), 2856 op->args[k++])); 2857 } 2858 switch (c) { 2859 case INDEX_op_brcond_i32: 2860 case INDEX_op_setcond_i32: 2861 case INDEX_op_negsetcond_i32: 2862 case INDEX_op_movcond_i32: 2863 case INDEX_op_brcond2_i32: 2864 case INDEX_op_setcond2_i32: 2865 case INDEX_op_brcond_i64: 2866 case INDEX_op_setcond_i64: 2867 case INDEX_op_negsetcond_i64: 2868 case INDEX_op_movcond_i64: 2869 case INDEX_op_cmp_vec: 2870 case INDEX_op_cmpsel_vec: 2871 if (op->args[k] < ARRAY_SIZE(cond_name) 2872 && cond_name[op->args[k]]) { 2873 col += ne_fprintf(f, ",%s", cond_name[op->args[k++]]); 2874 } else { 2875 col += ne_fprintf(f, ",$0x%" TCG_PRIlx, op->args[k++]); 2876 } 2877 i = 1; 2878 break; 2879 case INDEX_op_qemu_ld_i32: 2880 case INDEX_op_qemu_st_i32: 2881 case INDEX_op_qemu_st8_i32: 2882 case INDEX_op_qemu_ld_i64: 2883 case INDEX_op_qemu_st_i64: 2884 case INDEX_op_qemu_ld_i128: 2885 case INDEX_op_qemu_st_i128: 2886 { 2887 const char *s_al, *s_op, *s_at; 2888 MemOpIdx oi = op->args[k++]; 2889 MemOp mop = get_memop(oi); 2890 unsigned ix = get_mmuidx(oi); 2891 2892 s_al = alignment_name[(mop & MO_AMASK) >> MO_ASHIFT]; 2893 s_op = ldst_name[mop & (MO_BSWAP | MO_SSIZE)]; 2894 s_at = atom_name[(mop & MO_ATOM_MASK) >> MO_ATOM_SHIFT]; 2895 mop &= ~(MO_AMASK | MO_BSWAP | MO_SSIZE | MO_ATOM_MASK); 2896 2897 /* If all fields are accounted for, print symbolically. */ 2898 if (!mop && s_al && s_op && s_at) { 2899 col += ne_fprintf(f, ",%s%s%s,%u", 2900 s_at, s_al, s_op, ix); 2901 } else { 2902 mop = get_memop(oi); 2903 col += ne_fprintf(f, ",$0x%x,%u", mop, ix); 2904 } 2905 i = 1; 2906 } 2907 break; 2908 case INDEX_op_bswap16_i32: 2909 case INDEX_op_bswap16_i64: 2910 case INDEX_op_bswap32_i32: 2911 case INDEX_op_bswap32_i64: 2912 case INDEX_op_bswap64_i64: 2913 { 2914 TCGArg flags = op->args[k]; 2915 const char *name = NULL; 2916 2917 if (flags < ARRAY_SIZE(bswap_flag_name)) { 2918 name = bswap_flag_name[flags]; 2919 } 2920 if (name) { 2921 col += ne_fprintf(f, ",%s", name); 2922 } else { 2923 col += ne_fprintf(f, ",$0x%" TCG_PRIlx, flags); 2924 } 2925 i = k = 1; 2926 } 2927 break; 2928 #ifdef CONFIG_PLUGIN 2929 case INDEX_op_plugin_cb: 2930 { 2931 TCGArg from = op->args[k++]; 2932 const char *name = NULL; 2933 2934 if (from < ARRAY_SIZE(plugin_from_name)) { 2935 name = plugin_from_name[from]; 2936 } 2937 if (name) { 2938 col += ne_fprintf(f, "%s", name); 2939 } else { 2940 col += ne_fprintf(f, "$0x%" TCG_PRIlx, from); 2941 } 2942 i = 1; 2943 } 2944 break; 2945 #endif 2946 default: 2947 i = 0; 2948 break; 2949 } 2950 switch (c) { 2951 case INDEX_op_set_label: 2952 case INDEX_op_br: 2953 case INDEX_op_brcond_i32: 2954 case INDEX_op_brcond_i64: 2955 case INDEX_op_brcond2_i32: 2956 col += ne_fprintf(f, "%s$L%d", k ? "," : "", 2957 arg_label(op->args[k])->id); 2958 i++, k++; 2959 break; 2960 case INDEX_op_mb: 2961 { 2962 TCGBar membar = op->args[k]; 2963 const char *b_op, *m_op; 2964 2965 switch (membar & TCG_BAR_SC) { 2966 case 0: 2967 b_op = "none"; 2968 break; 2969 case TCG_BAR_LDAQ: 2970 b_op = "acq"; 2971 break; 2972 case TCG_BAR_STRL: 2973 b_op = "rel"; 2974 break; 2975 case TCG_BAR_SC: 2976 b_op = "seq"; 2977 break; 2978 default: 2979 g_assert_not_reached(); 2980 } 2981 2982 switch (membar & TCG_MO_ALL) { 2983 case 0: 2984 m_op = "none"; 2985 break; 2986 case TCG_MO_LD_LD: 2987 m_op = "rr"; 2988 break; 2989 case TCG_MO_LD_ST: 2990 m_op = "rw"; 2991 break; 2992 case TCG_MO_ST_LD: 2993 m_op = "wr"; 2994 break; 2995 case TCG_MO_ST_ST: 2996 m_op = "ww"; 2997 break; 2998 case TCG_MO_LD_LD | TCG_MO_LD_ST: 2999 m_op = "rr+rw"; 3000 break; 3001 case TCG_MO_LD_LD | TCG_MO_ST_LD: 3002 m_op = "rr+wr"; 3003 break; 3004 case TCG_MO_LD_LD | TCG_MO_ST_ST: 3005 m_op = "rr+ww"; 3006 break; 3007 case TCG_MO_LD_ST | TCG_MO_ST_LD: 3008 m_op = "rw+wr"; 3009 break; 3010 case TCG_MO_LD_ST | TCG_MO_ST_ST: 3011 m_op = "rw+ww"; 3012 break; 3013 case TCG_MO_ST_LD | TCG_MO_ST_ST: 3014 m_op = "wr+ww"; 3015 break; 3016 case TCG_MO_LD_LD | TCG_MO_LD_ST | TCG_MO_ST_LD: 3017 m_op = "rr+rw+wr"; 3018 break; 3019 case TCG_MO_LD_LD | TCG_MO_LD_ST | TCG_MO_ST_ST: 3020 m_op = "rr+rw+ww"; 3021 break; 3022 case TCG_MO_LD_LD | TCG_MO_ST_LD | TCG_MO_ST_ST: 3023 m_op = "rr+wr+ww"; 3024 break; 3025 case TCG_MO_LD_ST | TCG_MO_ST_LD | TCG_MO_ST_ST: 3026 m_op = "rw+wr+ww"; 3027 break; 3028 case TCG_MO_ALL: 3029 m_op = "all"; 3030 break; 3031 default: 3032 g_assert_not_reached(); 3033 } 3034 3035 col += ne_fprintf(f, "%s%s:%s", (k ? "," : ""), b_op, m_op); 3036 i++, k++; 3037 } 3038 break; 3039 default: 3040 break; 3041 } 3042 for (; i < nb_cargs; i++, k++) { 3043 col += ne_fprintf(f, "%s$0x%" TCG_PRIlx, k ? "," : "", 3044 op->args[k]); 3045 } 3046 } 3047 3048 if (have_prefs || op->life) { 3049 for (; col < 40; ++col) { 3050 putc(' ', f); 3051 } 3052 } 3053 3054 if (op->life) { 3055 unsigned life = op->life; 3056 3057 if (life & (SYNC_ARG * 3)) { 3058 ne_fprintf(f, " sync:"); 3059 for (i = 0; i < 2; ++i) { 3060 if (life & (SYNC_ARG << i)) { 3061 ne_fprintf(f, " %d", i); 3062 } 3063 } 3064 } 3065 life /= DEAD_ARG; 3066 if (life) { 3067 ne_fprintf(f, " dead:"); 3068 for (i = 0; life; ++i, life >>= 1) { 3069 if (life & 1) { 3070 ne_fprintf(f, " %d", i); 3071 } 3072 } 3073 } 3074 } 3075 3076 if (have_prefs) { 3077 for (i = 0; i < nb_oargs; ++i) { 3078 TCGRegSet set = output_pref(op, i); 3079 3080 if (i == 0) { 3081 ne_fprintf(f, " pref="); 3082 } else { 3083 ne_fprintf(f, ","); 3084 } 3085 if (set == 0) { 3086 ne_fprintf(f, "none"); 3087 } else if (set == MAKE_64BIT_MASK(0, TCG_TARGET_NB_REGS)) { 3088 ne_fprintf(f, "all"); 3089 #ifdef CONFIG_DEBUG_TCG 3090 } else if (tcg_regset_single(set)) { 3091 TCGReg reg = tcg_regset_first(set); 3092 ne_fprintf(f, "%s", tcg_target_reg_names[reg]); 3093 #endif 3094 } else if (TCG_TARGET_NB_REGS <= 32) { 3095 ne_fprintf(f, "0x%x", (uint32_t)set); 3096 } else { 3097 ne_fprintf(f, "0x%" PRIx64, (uint64_t)set); 3098 } 3099 } 3100 } 3101 3102 putc('\n', f); 3103 } 3104 } 3105 3106 /* we give more priority to constraints with less registers */ 3107 static int get_constraint_priority(const TCGArgConstraint *arg_ct, int k) 3108 { 3109 int n; 3110 3111 arg_ct += k; 3112 n = ctpop64(arg_ct->regs); 3113 3114 /* 3115 * Sort constraints of a single register first, which includes output 3116 * aliases (which must exactly match the input already allocated). 3117 */ 3118 if (n == 1 || arg_ct->oalias) { 3119 return INT_MAX; 3120 } 3121 3122 /* 3123 * Sort register pairs next, first then second immediately after. 3124 * Arbitrarily sort multiple pairs by the index of the first reg; 3125 * there shouldn't be many pairs. 3126 */ 3127 switch (arg_ct->pair) { 3128 case 1: 3129 case 3: 3130 return (k + 1) * 2; 3131 case 2: 3132 return (arg_ct->pair_index + 1) * 2 - 1; 3133 } 3134 3135 /* Finally, sort by decreasing register count. */ 3136 assert(n > 1); 3137 return -n; 3138 } 3139 3140 /* sort from highest priority to lowest */ 3141 static void sort_constraints(TCGArgConstraint *a, int start, int n) 3142 { 3143 int i, j; 3144 3145 for (i = 0; i < n; i++) { 3146 a[start + i].sort_index = start + i; 3147 } 3148 if (n <= 1) { 3149 return; 3150 } 3151 for (i = 0; i < n - 1; i++) { 3152 for (j = i + 1; j < n; j++) { 3153 int p1 = get_constraint_priority(a, a[start + i].sort_index); 3154 int p2 = get_constraint_priority(a, a[start + j].sort_index); 3155 if (p1 < p2) { 3156 int tmp = a[start + i].sort_index; 3157 a[start + i].sort_index = a[start + j].sort_index; 3158 a[start + j].sort_index = tmp; 3159 } 3160 } 3161 } 3162 } 3163 3164 static const TCGArgConstraint empty_cts[TCG_MAX_OP_ARGS]; 3165 static TCGArgConstraint all_cts[ARRAY_SIZE(constraint_sets)][TCG_MAX_OP_ARGS]; 3166 3167 static void process_constraint_sets(void) 3168 { 3169 for (size_t c = 0; c < ARRAY_SIZE(constraint_sets); ++c) { 3170 const TCGConstraintSet *tdefs = &constraint_sets[c]; 3171 TCGArgConstraint *args_ct = all_cts[c]; 3172 int nb_oargs = tdefs->nb_oargs; 3173 int nb_iargs = tdefs->nb_iargs; 3174 int nb_args = nb_oargs + nb_iargs; 3175 bool saw_alias_pair = false; 3176 3177 for (int i = 0; i < nb_args; i++) { 3178 const char *ct_str = tdefs->args_ct_str[i]; 3179 bool input_p = i >= nb_oargs; 3180 int o; 3181 3182 switch (*ct_str) { 3183 case '0' ... '9': 3184 o = *ct_str - '0'; 3185 tcg_debug_assert(input_p); 3186 tcg_debug_assert(o < nb_oargs); 3187 tcg_debug_assert(args_ct[o].regs != 0); 3188 tcg_debug_assert(!args_ct[o].oalias); 3189 args_ct[i] = args_ct[o]; 3190 /* The output sets oalias. */ 3191 args_ct[o].oalias = 1; 3192 args_ct[o].alias_index = i; 3193 /* The input sets ialias. */ 3194 args_ct[i].ialias = 1; 3195 args_ct[i].alias_index = o; 3196 if (args_ct[i].pair) { 3197 saw_alias_pair = true; 3198 } 3199 tcg_debug_assert(ct_str[1] == '\0'); 3200 continue; 3201 3202 case '&': 3203 tcg_debug_assert(!input_p); 3204 args_ct[i].newreg = true; 3205 ct_str++; 3206 break; 3207 3208 case 'p': /* plus */ 3209 /* Allocate to the register after the previous. */ 3210 tcg_debug_assert(i > (input_p ? nb_oargs : 0)); 3211 o = i - 1; 3212 tcg_debug_assert(!args_ct[o].pair); 3213 tcg_debug_assert(!args_ct[o].ct); 3214 args_ct[i] = (TCGArgConstraint){ 3215 .pair = 2, 3216 .pair_index = o, 3217 .regs = args_ct[o].regs << 1, 3218 .newreg = args_ct[o].newreg, 3219 }; 3220 args_ct[o].pair = 1; 3221 args_ct[o].pair_index = i; 3222 tcg_debug_assert(ct_str[1] == '\0'); 3223 continue; 3224 3225 case 'm': /* minus */ 3226 /* Allocate to the register before the previous. */ 3227 tcg_debug_assert(i > (input_p ? nb_oargs : 0)); 3228 o = i - 1; 3229 tcg_debug_assert(!args_ct[o].pair); 3230 tcg_debug_assert(!args_ct[o].ct); 3231 args_ct[i] = (TCGArgConstraint){ 3232 .pair = 1, 3233 .pair_index = o, 3234 .regs = args_ct[o].regs >> 1, 3235 .newreg = args_ct[o].newreg, 3236 }; 3237 args_ct[o].pair = 2; 3238 args_ct[o].pair_index = i; 3239 tcg_debug_assert(ct_str[1] == '\0'); 3240 continue; 3241 } 3242 3243 do { 3244 switch (*ct_str) { 3245 case 'i': 3246 args_ct[i].ct |= TCG_CT_CONST; 3247 break; 3248 #ifdef TCG_REG_ZERO 3249 case 'z': 3250 args_ct[i].ct |= TCG_CT_REG_ZERO; 3251 break; 3252 #endif 3253 3254 /* Include all of the target-specific constraints. */ 3255 3256 #undef CONST 3257 #define CONST(CASE, MASK) \ 3258 case CASE: args_ct[i].ct |= MASK; break; 3259 #define REGS(CASE, MASK) \ 3260 case CASE: args_ct[i].regs |= MASK; break; 3261 3262 #include "tcg-target-con-str.h" 3263 3264 #undef REGS 3265 #undef CONST 3266 default: 3267 case '0' ... '9': 3268 case '&': 3269 case 'p': 3270 case 'm': 3271 /* Typo in TCGConstraintSet constraint. */ 3272 g_assert_not_reached(); 3273 } 3274 } while (*++ct_str != '\0'); 3275 } 3276 3277 /* 3278 * Fix up output pairs that are aliased with inputs. 3279 * When we created the alias, we copied pair from the output. 3280 * There are three cases: 3281 * (1a) Pairs of inputs alias pairs of outputs. 3282 * (1b) One input aliases the first of a pair of outputs. 3283 * (2) One input aliases the second of a pair of outputs. 3284 * 3285 * Case 1a is handled by making sure that the pair_index'es are 3286 * properly updated so that they appear the same as a pair of inputs. 3287 * 3288 * Case 1b is handled by setting the pair_index of the input to 3289 * itself, simply so it doesn't point to an unrelated argument. 3290 * Since we don't encounter the "second" during the input allocation 3291 * phase, nothing happens with the second half of the input pair. 3292 * 3293 * Case 2 is handled by setting the second input to pair=3, the 3294 * first output to pair=3, and the pair_index'es to match. 3295 */ 3296 if (saw_alias_pair) { 3297 for (int i = nb_oargs; i < nb_args; i++) { 3298 int o, o2, i2; 3299 3300 /* 3301 * Since [0-9pm] must be alone in the constraint string, 3302 * the only way they can both be set is if the pair comes 3303 * from the output alias. 3304 */ 3305 if (!args_ct[i].ialias) { 3306 continue; 3307 } 3308 switch (args_ct[i].pair) { 3309 case 0: 3310 break; 3311 case 1: 3312 o = args_ct[i].alias_index; 3313 o2 = args_ct[o].pair_index; 3314 tcg_debug_assert(args_ct[o].pair == 1); 3315 tcg_debug_assert(args_ct[o2].pair == 2); 3316 if (args_ct[o2].oalias) { 3317 /* Case 1a */ 3318 i2 = args_ct[o2].alias_index; 3319 tcg_debug_assert(args_ct[i2].pair == 2); 3320 args_ct[i2].pair_index = i; 3321 args_ct[i].pair_index = i2; 3322 } else { 3323 /* Case 1b */ 3324 args_ct[i].pair_index = i; 3325 } 3326 break; 3327 case 2: 3328 o = args_ct[i].alias_index; 3329 o2 = args_ct[o].pair_index; 3330 tcg_debug_assert(args_ct[o].pair == 2); 3331 tcg_debug_assert(args_ct[o2].pair == 1); 3332 if (args_ct[o2].oalias) { 3333 /* Case 1a */ 3334 i2 = args_ct[o2].alias_index; 3335 tcg_debug_assert(args_ct[i2].pair == 1); 3336 args_ct[i2].pair_index = i; 3337 args_ct[i].pair_index = i2; 3338 } else { 3339 /* Case 2 */ 3340 args_ct[i].pair = 3; 3341 args_ct[o2].pair = 3; 3342 args_ct[i].pair_index = o2; 3343 args_ct[o2].pair_index = i; 3344 } 3345 break; 3346 default: 3347 g_assert_not_reached(); 3348 } 3349 } 3350 } 3351 3352 /* sort the constraints (XXX: this is just an heuristic) */ 3353 sort_constraints(args_ct, 0, nb_oargs); 3354 sort_constraints(args_ct, nb_oargs, nb_iargs); 3355 } 3356 } 3357 3358 static const TCGArgConstraint *opcode_args_ct(const TCGOp *op) 3359 { 3360 TCGOpcode opc = op->opc; 3361 TCGType type = TCGOP_TYPE(op); 3362 unsigned flags = TCGOP_FLAGS(op); 3363 const TCGOpDef *def = &tcg_op_defs[opc]; 3364 const TCGOutOp *outop = all_outop[opc]; 3365 TCGConstraintSetIndex con_set; 3366 3367 if (def->flags & TCG_OPF_NOT_PRESENT) { 3368 return empty_cts; 3369 } 3370 3371 if (outop) { 3372 con_set = outop->static_constraint; 3373 if (con_set == C_Dynamic) { 3374 con_set = outop->dynamic_constraint(type, flags); 3375 } 3376 } else { 3377 con_set = tcg_target_op_def(opc, type, flags); 3378 } 3379 tcg_debug_assert(con_set >= 0); 3380 tcg_debug_assert(con_set < ARRAY_SIZE(constraint_sets)); 3381 3382 /* The constraint arguments must match TCGOpcode arguments. */ 3383 tcg_debug_assert(constraint_sets[con_set].nb_oargs == def->nb_oargs); 3384 tcg_debug_assert(constraint_sets[con_set].nb_iargs == def->nb_iargs); 3385 3386 return all_cts[con_set]; 3387 } 3388 3389 static void remove_label_use(TCGOp *op, int idx) 3390 { 3391 TCGLabel *label = arg_label(op->args[idx]); 3392 TCGLabelUse *use; 3393 3394 QSIMPLEQ_FOREACH(use, &label->branches, next) { 3395 if (use->op == op) { 3396 QSIMPLEQ_REMOVE(&label->branches, use, TCGLabelUse, next); 3397 return; 3398 } 3399 } 3400 g_assert_not_reached(); 3401 } 3402 3403 void tcg_op_remove(TCGContext *s, TCGOp *op) 3404 { 3405 switch (op->opc) { 3406 case INDEX_op_br: 3407 remove_label_use(op, 0); 3408 break; 3409 case INDEX_op_brcond_i32: 3410 case INDEX_op_brcond_i64: 3411 remove_label_use(op, 3); 3412 break; 3413 case INDEX_op_brcond2_i32: 3414 remove_label_use(op, 5); 3415 break; 3416 default: 3417 break; 3418 } 3419 3420 QTAILQ_REMOVE(&s->ops, op, link); 3421 QTAILQ_INSERT_TAIL(&s->free_ops, op, link); 3422 s->nb_ops--; 3423 } 3424 3425 void tcg_remove_ops_after(TCGOp *op) 3426 { 3427 TCGContext *s = tcg_ctx; 3428 3429 while (true) { 3430 TCGOp *last = tcg_last_op(); 3431 if (last == op) { 3432 return; 3433 } 3434 tcg_op_remove(s, last); 3435 } 3436 } 3437 3438 static TCGOp *tcg_op_alloc(TCGOpcode opc, unsigned nargs) 3439 { 3440 TCGContext *s = tcg_ctx; 3441 TCGOp *op = NULL; 3442 3443 if (unlikely(!QTAILQ_EMPTY(&s->free_ops))) { 3444 QTAILQ_FOREACH(op, &s->free_ops, link) { 3445 if (nargs <= op->nargs) { 3446 QTAILQ_REMOVE(&s->free_ops, op, link); 3447 nargs = op->nargs; 3448 goto found; 3449 } 3450 } 3451 } 3452 3453 /* Most opcodes have 3 or 4 operands: reduce fragmentation. */ 3454 nargs = MAX(4, nargs); 3455 op = tcg_malloc(sizeof(TCGOp) + sizeof(TCGArg) * nargs); 3456 3457 found: 3458 memset(op, 0, offsetof(TCGOp, link)); 3459 op->opc = opc; 3460 op->nargs = nargs; 3461 3462 /* Check for bitfield overflow. */ 3463 tcg_debug_assert(op->nargs == nargs); 3464 3465 s->nb_ops++; 3466 return op; 3467 } 3468 3469 TCGOp *tcg_emit_op(TCGOpcode opc, unsigned nargs) 3470 { 3471 TCGOp *op = tcg_op_alloc(opc, nargs); 3472 3473 if (tcg_ctx->emit_before_op) { 3474 QTAILQ_INSERT_BEFORE(tcg_ctx->emit_before_op, op, link); 3475 } else { 3476 QTAILQ_INSERT_TAIL(&tcg_ctx->ops, op, link); 3477 } 3478 return op; 3479 } 3480 3481 TCGOp *tcg_op_insert_before(TCGContext *s, TCGOp *old_op, 3482 TCGOpcode opc, TCGType type, unsigned nargs) 3483 { 3484 TCGOp *new_op = tcg_op_alloc(opc, nargs); 3485 3486 TCGOP_TYPE(new_op) = type; 3487 QTAILQ_INSERT_BEFORE(old_op, new_op, link); 3488 return new_op; 3489 } 3490 3491 TCGOp *tcg_op_insert_after(TCGContext *s, TCGOp *old_op, 3492 TCGOpcode opc, TCGType type, unsigned nargs) 3493 { 3494 TCGOp *new_op = tcg_op_alloc(opc, nargs); 3495 3496 TCGOP_TYPE(new_op) = type; 3497 QTAILQ_INSERT_AFTER(&s->ops, old_op, new_op, link); 3498 return new_op; 3499 } 3500 3501 static void move_label_uses(TCGLabel *to, TCGLabel *from) 3502 { 3503 TCGLabelUse *u; 3504 3505 QSIMPLEQ_FOREACH(u, &from->branches, next) { 3506 TCGOp *op = u->op; 3507 switch (op->opc) { 3508 case INDEX_op_br: 3509 op->args[0] = label_arg(to); 3510 break; 3511 case INDEX_op_brcond_i32: 3512 case INDEX_op_brcond_i64: 3513 op->args[3] = label_arg(to); 3514 break; 3515 case INDEX_op_brcond2_i32: 3516 op->args[5] = label_arg(to); 3517 break; 3518 default: 3519 g_assert_not_reached(); 3520 } 3521 } 3522 3523 QSIMPLEQ_CONCAT(&to->branches, &from->branches); 3524 } 3525 3526 /* Reachable analysis : remove unreachable code. */ 3527 static void __attribute__((noinline)) 3528 reachable_code_pass(TCGContext *s) 3529 { 3530 TCGOp *op, *op_next, *op_prev; 3531 bool dead = false; 3532 3533 QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) { 3534 bool remove = dead; 3535 TCGLabel *label; 3536 3537 switch (op->opc) { 3538 case INDEX_op_set_label: 3539 label = arg_label(op->args[0]); 3540 3541 /* 3542 * Note that the first op in the TB is always a load, 3543 * so there is always something before a label. 3544 */ 3545 op_prev = QTAILQ_PREV(op, link); 3546 3547 /* 3548 * If we find two sequential labels, move all branches to 3549 * reference the second label and remove the first label. 3550 * Do this before branch to next optimization, so that the 3551 * middle label is out of the way. 3552 */ 3553 if (op_prev->opc == INDEX_op_set_label) { 3554 move_label_uses(label, arg_label(op_prev->args[0])); 3555 tcg_op_remove(s, op_prev); 3556 op_prev = QTAILQ_PREV(op, link); 3557 } 3558 3559 /* 3560 * Optimization can fold conditional branches to unconditional. 3561 * If we find a label which is preceded by an unconditional 3562 * branch to next, remove the branch. We couldn't do this when 3563 * processing the branch because any dead code between the branch 3564 * and label had not yet been removed. 3565 */ 3566 if (op_prev->opc == INDEX_op_br && 3567 label == arg_label(op_prev->args[0])) { 3568 tcg_op_remove(s, op_prev); 3569 /* Fall through means insns become live again. */ 3570 dead = false; 3571 } 3572 3573 if (QSIMPLEQ_EMPTY(&label->branches)) { 3574 /* 3575 * While there is an occasional backward branch, virtually 3576 * all branches generated by the translators are forward. 3577 * Which means that generally we will have already removed 3578 * all references to the label that will be, and there is 3579 * little to be gained by iterating. 3580 */ 3581 remove = true; 3582 } else { 3583 /* Once we see a label, insns become live again. */ 3584 dead = false; 3585 remove = false; 3586 } 3587 break; 3588 3589 case INDEX_op_br: 3590 case INDEX_op_exit_tb: 3591 case INDEX_op_goto_ptr: 3592 /* Unconditional branches; everything following is dead. */ 3593 dead = true; 3594 break; 3595 3596 case INDEX_op_call: 3597 /* Notice noreturn helper calls, raising exceptions. */ 3598 if (tcg_call_flags(op) & TCG_CALL_NO_RETURN) { 3599 dead = true; 3600 } 3601 break; 3602 3603 case INDEX_op_insn_start: 3604 /* Never remove -- we need to keep these for unwind. */ 3605 remove = false; 3606 break; 3607 3608 default: 3609 break; 3610 } 3611 3612 if (remove) { 3613 tcg_op_remove(s, op); 3614 } 3615 } 3616 } 3617 3618 #define TS_DEAD 1 3619 #define TS_MEM 2 3620 3621 #define IS_DEAD_ARG(n) (arg_life & (DEAD_ARG << (n))) 3622 #define NEED_SYNC_ARG(n) (arg_life & (SYNC_ARG << (n))) 3623 3624 /* For liveness_pass_1, the register preferences for a given temp. */ 3625 static inline TCGRegSet *la_temp_pref(TCGTemp *ts) 3626 { 3627 return ts->state_ptr; 3628 } 3629 3630 /* For liveness_pass_1, reset the preferences for a given temp to the 3631 * maximal regset for its type. 3632 */ 3633 static inline void la_reset_pref(TCGTemp *ts) 3634 { 3635 *la_temp_pref(ts) 3636 = (ts->state == TS_DEAD ? 0 : tcg_target_available_regs[ts->type]); 3637 } 3638 3639 /* liveness analysis: end of function: all temps are dead, and globals 3640 should be in memory. */ 3641 static void la_func_end(TCGContext *s, int ng, int nt) 3642 { 3643 int i; 3644 3645 for (i = 0; i < ng; ++i) { 3646 s->temps[i].state = TS_DEAD | TS_MEM; 3647 la_reset_pref(&s->temps[i]); 3648 } 3649 for (i = ng; i < nt; ++i) { 3650 s->temps[i].state = TS_DEAD; 3651 la_reset_pref(&s->temps[i]); 3652 } 3653 } 3654 3655 /* liveness analysis: end of basic block: all temps are dead, globals 3656 and local temps should be in memory. */ 3657 static void la_bb_end(TCGContext *s, int ng, int nt) 3658 { 3659 int i; 3660 3661 for (i = 0; i < nt; ++i) { 3662 TCGTemp *ts = &s->temps[i]; 3663 int state; 3664 3665 switch (ts->kind) { 3666 case TEMP_FIXED: 3667 case TEMP_GLOBAL: 3668 case TEMP_TB: 3669 state = TS_DEAD | TS_MEM; 3670 break; 3671 case TEMP_EBB: 3672 case TEMP_CONST: 3673 state = TS_DEAD; 3674 break; 3675 default: 3676 g_assert_not_reached(); 3677 } 3678 ts->state = state; 3679 la_reset_pref(ts); 3680 } 3681 } 3682 3683 /* liveness analysis: sync globals back to memory. */ 3684 static void la_global_sync(TCGContext *s, int ng) 3685 { 3686 int i; 3687 3688 for (i = 0; i < ng; ++i) { 3689 int state = s->temps[i].state; 3690 s->temps[i].state = state | TS_MEM; 3691 if (state == TS_DEAD) { 3692 /* If the global was previously dead, reset prefs. */ 3693 la_reset_pref(&s->temps[i]); 3694 } 3695 } 3696 } 3697 3698 /* 3699 * liveness analysis: conditional branch: all temps are dead unless 3700 * explicitly live-across-conditional-branch, globals and local temps 3701 * should be synced. 3702 */ 3703 static void la_bb_sync(TCGContext *s, int ng, int nt) 3704 { 3705 la_global_sync(s, ng); 3706 3707 for (int i = ng; i < nt; ++i) { 3708 TCGTemp *ts = &s->temps[i]; 3709 int state; 3710 3711 switch (ts->kind) { 3712 case TEMP_TB: 3713 state = ts->state; 3714 ts->state = state | TS_MEM; 3715 if (state != TS_DEAD) { 3716 continue; 3717 } 3718 break; 3719 case TEMP_EBB: 3720 case TEMP_CONST: 3721 continue; 3722 default: 3723 g_assert_not_reached(); 3724 } 3725 la_reset_pref(&s->temps[i]); 3726 } 3727 } 3728 3729 /* liveness analysis: sync globals back to memory and kill. */ 3730 static void la_global_kill(TCGContext *s, int ng) 3731 { 3732 int i; 3733 3734 for (i = 0; i < ng; i++) { 3735 s->temps[i].state = TS_DEAD | TS_MEM; 3736 la_reset_pref(&s->temps[i]); 3737 } 3738 } 3739 3740 /* liveness analysis: note live globals crossing calls. */ 3741 static void la_cross_call(TCGContext *s, int nt) 3742 { 3743 TCGRegSet mask = ~tcg_target_call_clobber_regs; 3744 int i; 3745 3746 for (i = 0; i < nt; i++) { 3747 TCGTemp *ts = &s->temps[i]; 3748 if (!(ts->state & TS_DEAD)) { 3749 TCGRegSet *pset = la_temp_pref(ts); 3750 TCGRegSet set = *pset; 3751 3752 set &= mask; 3753 /* If the combination is not possible, restart. */ 3754 if (set == 0) { 3755 set = tcg_target_available_regs[ts->type] & mask; 3756 } 3757 *pset = set; 3758 } 3759 } 3760 } 3761 3762 /* 3763 * Liveness analysis: Verify the lifetime of TEMP_TB, and reduce 3764 * to TEMP_EBB, if possible. 3765 */ 3766 static void __attribute__((noinline)) 3767 liveness_pass_0(TCGContext *s) 3768 { 3769 void * const multiple_ebb = (void *)(uintptr_t)-1; 3770 int nb_temps = s->nb_temps; 3771 TCGOp *op, *ebb; 3772 3773 for (int i = s->nb_globals; i < nb_temps; ++i) { 3774 s->temps[i].state_ptr = NULL; 3775 } 3776 3777 /* 3778 * Represent each EBB by the op at which it begins. In the case of 3779 * the first EBB, this is the first op, otherwise it is a label. 3780 * Collect the uses of each TEMP_TB: NULL for unused, EBB for use 3781 * within a single EBB, else MULTIPLE_EBB. 3782 */ 3783 ebb = QTAILQ_FIRST(&s->ops); 3784 QTAILQ_FOREACH(op, &s->ops, link) { 3785 const TCGOpDef *def; 3786 int nb_oargs, nb_iargs; 3787 3788 switch (op->opc) { 3789 case INDEX_op_set_label: 3790 ebb = op; 3791 continue; 3792 case INDEX_op_discard: 3793 continue; 3794 case INDEX_op_call: 3795 nb_oargs = TCGOP_CALLO(op); 3796 nb_iargs = TCGOP_CALLI(op); 3797 break; 3798 default: 3799 def = &tcg_op_defs[op->opc]; 3800 nb_oargs = def->nb_oargs; 3801 nb_iargs = def->nb_iargs; 3802 break; 3803 } 3804 3805 for (int i = 0; i < nb_oargs + nb_iargs; ++i) { 3806 TCGTemp *ts = arg_temp(op->args[i]); 3807 3808 if (ts->kind != TEMP_TB) { 3809 continue; 3810 } 3811 if (ts->state_ptr == NULL) { 3812 ts->state_ptr = ebb; 3813 } else if (ts->state_ptr != ebb) { 3814 ts->state_ptr = multiple_ebb; 3815 } 3816 } 3817 } 3818 3819 /* 3820 * For TEMP_TB that turned out not to be used beyond one EBB, 3821 * reduce the liveness to TEMP_EBB. 3822 */ 3823 for (int i = s->nb_globals; i < nb_temps; ++i) { 3824 TCGTemp *ts = &s->temps[i]; 3825 if (ts->kind == TEMP_TB && ts->state_ptr != multiple_ebb) { 3826 ts->kind = TEMP_EBB; 3827 } 3828 } 3829 } 3830 3831 /* Liveness analysis : update the opc_arg_life array to tell if a 3832 given input arguments is dead. Instructions updating dead 3833 temporaries are removed. */ 3834 static void __attribute__((noinline)) 3835 liveness_pass_1(TCGContext *s) 3836 { 3837 int nb_globals = s->nb_globals; 3838 int nb_temps = s->nb_temps; 3839 TCGOp *op, *op_prev; 3840 TCGRegSet *prefs; 3841 int i; 3842 3843 prefs = tcg_malloc(sizeof(TCGRegSet) * nb_temps); 3844 for (i = 0; i < nb_temps; ++i) { 3845 s->temps[i].state_ptr = prefs + i; 3846 } 3847 3848 /* ??? Should be redundant with the exit_tb that ends the TB. */ 3849 la_func_end(s, nb_globals, nb_temps); 3850 3851 QTAILQ_FOREACH_REVERSE_SAFE(op, &s->ops, link, op_prev) { 3852 int nb_iargs, nb_oargs; 3853 TCGOpcode opc_new, opc_new2; 3854 TCGLifeData arg_life = 0; 3855 TCGTemp *ts; 3856 TCGOpcode opc = op->opc; 3857 const TCGOpDef *def = &tcg_op_defs[opc]; 3858 const TCGArgConstraint *args_ct; 3859 3860 switch (opc) { 3861 case INDEX_op_call: 3862 { 3863 const TCGHelperInfo *info = tcg_call_info(op); 3864 int call_flags = tcg_call_flags(op); 3865 3866 nb_oargs = TCGOP_CALLO(op); 3867 nb_iargs = TCGOP_CALLI(op); 3868 3869 /* pure functions can be removed if their result is unused */ 3870 if (call_flags & TCG_CALL_NO_SIDE_EFFECTS) { 3871 for (i = 0; i < nb_oargs; i++) { 3872 ts = arg_temp(op->args[i]); 3873 if (ts->state != TS_DEAD) { 3874 goto do_not_remove_call; 3875 } 3876 } 3877 goto do_remove; 3878 } 3879 do_not_remove_call: 3880 3881 /* Output args are dead. */ 3882 for (i = 0; i < nb_oargs; i++) { 3883 ts = arg_temp(op->args[i]); 3884 if (ts->state & TS_DEAD) { 3885 arg_life |= DEAD_ARG << i; 3886 } 3887 if (ts->state & TS_MEM) { 3888 arg_life |= SYNC_ARG << i; 3889 } 3890 ts->state = TS_DEAD; 3891 la_reset_pref(ts); 3892 } 3893 3894 /* Not used -- it will be tcg_target_call_oarg_reg(). */ 3895 memset(op->output_pref, 0, sizeof(op->output_pref)); 3896 3897 if (!(call_flags & (TCG_CALL_NO_WRITE_GLOBALS | 3898 TCG_CALL_NO_READ_GLOBALS))) { 3899 la_global_kill(s, nb_globals); 3900 } else if (!(call_flags & TCG_CALL_NO_READ_GLOBALS)) { 3901 la_global_sync(s, nb_globals); 3902 } 3903 3904 /* Record arguments that die in this helper. */ 3905 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) { 3906 ts = arg_temp(op->args[i]); 3907 if (ts->state & TS_DEAD) { 3908 arg_life |= DEAD_ARG << i; 3909 } 3910 } 3911 3912 /* For all live registers, remove call-clobbered prefs. */ 3913 la_cross_call(s, nb_temps); 3914 3915 /* 3916 * Input arguments are live for preceding opcodes. 3917 * 3918 * For those arguments that die, and will be allocated in 3919 * registers, clear the register set for that arg, to be 3920 * filled in below. For args that will be on the stack, 3921 * reset to any available reg. Process arguments in reverse 3922 * order so that if a temp is used more than once, the stack 3923 * reset to max happens before the register reset to 0. 3924 */ 3925 for (i = nb_iargs - 1; i >= 0; i--) { 3926 const TCGCallArgumentLoc *loc = &info->in[i]; 3927 ts = arg_temp(op->args[nb_oargs + i]); 3928 3929 if (ts->state & TS_DEAD) { 3930 switch (loc->kind) { 3931 case TCG_CALL_ARG_NORMAL: 3932 case TCG_CALL_ARG_EXTEND_U: 3933 case TCG_CALL_ARG_EXTEND_S: 3934 if (arg_slot_reg_p(loc->arg_slot)) { 3935 *la_temp_pref(ts) = 0; 3936 break; 3937 } 3938 /* fall through */ 3939 default: 3940 *la_temp_pref(ts) = 3941 tcg_target_available_regs[ts->type]; 3942 break; 3943 } 3944 ts->state &= ~TS_DEAD; 3945 } 3946 } 3947 3948 /* 3949 * For each input argument, add its input register to prefs. 3950 * If a temp is used once, this produces a single set bit; 3951 * if a temp is used multiple times, this produces a set. 3952 */ 3953 for (i = 0; i < nb_iargs; i++) { 3954 const TCGCallArgumentLoc *loc = &info->in[i]; 3955 ts = arg_temp(op->args[nb_oargs + i]); 3956 3957 switch (loc->kind) { 3958 case TCG_CALL_ARG_NORMAL: 3959 case TCG_CALL_ARG_EXTEND_U: 3960 case TCG_CALL_ARG_EXTEND_S: 3961 if (arg_slot_reg_p(loc->arg_slot)) { 3962 tcg_regset_set_reg(*la_temp_pref(ts), 3963 tcg_target_call_iarg_regs[loc->arg_slot]); 3964 } 3965 break; 3966 default: 3967 break; 3968 } 3969 } 3970 } 3971 break; 3972 case INDEX_op_insn_start: 3973 break; 3974 case INDEX_op_discard: 3975 /* mark the temporary as dead */ 3976 ts = arg_temp(op->args[0]); 3977 ts->state = TS_DEAD; 3978 la_reset_pref(ts); 3979 break; 3980 3981 case INDEX_op_add2_i32: 3982 case INDEX_op_add2_i64: 3983 opc_new = INDEX_op_add; 3984 goto do_addsub2; 3985 case INDEX_op_sub2_i32: 3986 case INDEX_op_sub2_i64: 3987 opc_new = INDEX_op_sub; 3988 do_addsub2: 3989 nb_iargs = 4; 3990 nb_oargs = 2; 3991 /* Test if the high part of the operation is dead, but not 3992 the low part. The result can be optimized to a simple 3993 add or sub. This happens often for x86_64 guest when the 3994 cpu mode is set to 32 bit. */ 3995 if (arg_temp(op->args[1])->state == TS_DEAD) { 3996 if (arg_temp(op->args[0])->state == TS_DEAD) { 3997 goto do_remove; 3998 } 3999 /* Replace the opcode and adjust the args in place, 4000 leaving 3 unused args at the end. */ 4001 op->opc = opc = opc_new; 4002 op->args[1] = op->args[2]; 4003 op->args[2] = op->args[4]; 4004 /* Fall through and mark the single-word operation live. */ 4005 nb_iargs = 2; 4006 nb_oargs = 1; 4007 } 4008 goto do_not_remove; 4009 4010 case INDEX_op_muls2_i32: 4011 case INDEX_op_muls2_i64: 4012 opc_new = INDEX_op_mul; 4013 opc_new2 = INDEX_op_mulsh; 4014 goto do_mul2; 4015 case INDEX_op_mulu2_i32: 4016 case INDEX_op_mulu2_i64: 4017 opc_new = INDEX_op_mul; 4018 opc_new2 = INDEX_op_muluh; 4019 do_mul2: 4020 nb_iargs = 2; 4021 nb_oargs = 2; 4022 if (arg_temp(op->args[1])->state == TS_DEAD) { 4023 if (arg_temp(op->args[0])->state == TS_DEAD) { 4024 /* Both parts of the operation are dead. */ 4025 goto do_remove; 4026 } 4027 /* The high part of the operation is dead; generate the low. */ 4028 op->opc = opc = opc_new; 4029 op->args[1] = op->args[2]; 4030 op->args[2] = op->args[3]; 4031 } else if (arg_temp(op->args[0])->state == TS_DEAD && 4032 tcg_op_supported(opc_new2, TCGOP_TYPE(op), 0)) { 4033 /* The low part of the operation is dead; generate the high. */ 4034 op->opc = opc = opc_new2; 4035 op->args[0] = op->args[1]; 4036 op->args[1] = op->args[2]; 4037 op->args[2] = op->args[3]; 4038 } else { 4039 goto do_not_remove; 4040 } 4041 /* Mark the single-word operation live. */ 4042 nb_oargs = 1; 4043 goto do_not_remove; 4044 4045 default: 4046 /* XXX: optimize by hardcoding common cases (e.g. triadic ops) */ 4047 nb_iargs = def->nb_iargs; 4048 nb_oargs = def->nb_oargs; 4049 4050 /* Test if the operation can be removed because all 4051 its outputs are dead. We assume that nb_oargs == 0 4052 implies side effects */ 4053 if (!(def->flags & TCG_OPF_SIDE_EFFECTS) && nb_oargs != 0) { 4054 for (i = 0; i < nb_oargs; i++) { 4055 if (arg_temp(op->args[i])->state != TS_DEAD) { 4056 goto do_not_remove; 4057 } 4058 } 4059 goto do_remove; 4060 } 4061 goto do_not_remove; 4062 4063 do_remove: 4064 tcg_op_remove(s, op); 4065 break; 4066 4067 do_not_remove: 4068 for (i = 0; i < nb_oargs; i++) { 4069 ts = arg_temp(op->args[i]); 4070 4071 /* Remember the preference of the uses that followed. */ 4072 if (i < ARRAY_SIZE(op->output_pref)) { 4073 op->output_pref[i] = *la_temp_pref(ts); 4074 } 4075 4076 /* Output args are dead. */ 4077 if (ts->state & TS_DEAD) { 4078 arg_life |= DEAD_ARG << i; 4079 } 4080 if (ts->state & TS_MEM) { 4081 arg_life |= SYNC_ARG << i; 4082 } 4083 ts->state = TS_DEAD; 4084 la_reset_pref(ts); 4085 } 4086 4087 /* If end of basic block, update. */ 4088 if (def->flags & TCG_OPF_BB_EXIT) { 4089 la_func_end(s, nb_globals, nb_temps); 4090 } else if (def->flags & TCG_OPF_COND_BRANCH) { 4091 la_bb_sync(s, nb_globals, nb_temps); 4092 } else if (def->flags & TCG_OPF_BB_END) { 4093 la_bb_end(s, nb_globals, nb_temps); 4094 } else if (def->flags & TCG_OPF_SIDE_EFFECTS) { 4095 la_global_sync(s, nb_globals); 4096 if (def->flags & TCG_OPF_CALL_CLOBBER) { 4097 la_cross_call(s, nb_temps); 4098 } 4099 } 4100 4101 /* Record arguments that die in this opcode. */ 4102 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) { 4103 ts = arg_temp(op->args[i]); 4104 if (ts->state & TS_DEAD) { 4105 arg_life |= DEAD_ARG << i; 4106 } 4107 } 4108 4109 /* Input arguments are live for preceding opcodes. */ 4110 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) { 4111 ts = arg_temp(op->args[i]); 4112 if (ts->state & TS_DEAD) { 4113 /* For operands that were dead, initially allow 4114 all regs for the type. */ 4115 *la_temp_pref(ts) = tcg_target_available_regs[ts->type]; 4116 ts->state &= ~TS_DEAD; 4117 } 4118 } 4119 4120 /* Incorporate constraints for this operand. */ 4121 switch (opc) { 4122 case INDEX_op_mov: 4123 /* Note that these are TCG_OPF_NOT_PRESENT and do not 4124 have proper constraints. That said, special case 4125 moves to propagate preferences backward. */ 4126 if (IS_DEAD_ARG(1)) { 4127 *la_temp_pref(arg_temp(op->args[0])) 4128 = *la_temp_pref(arg_temp(op->args[1])); 4129 } 4130 break; 4131 4132 default: 4133 args_ct = opcode_args_ct(op); 4134 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) { 4135 const TCGArgConstraint *ct = &args_ct[i]; 4136 TCGRegSet set, *pset; 4137 4138 ts = arg_temp(op->args[i]); 4139 pset = la_temp_pref(ts); 4140 set = *pset; 4141 4142 set &= ct->regs; 4143 if (ct->ialias) { 4144 set &= output_pref(op, ct->alias_index); 4145 } 4146 /* If the combination is not possible, restart. */ 4147 if (set == 0) { 4148 set = ct->regs; 4149 } 4150 *pset = set; 4151 } 4152 break; 4153 } 4154 break; 4155 } 4156 op->life = arg_life; 4157 } 4158 } 4159 4160 /* Liveness analysis: Convert indirect regs to direct temporaries. */ 4161 static bool __attribute__((noinline)) 4162 liveness_pass_2(TCGContext *s) 4163 { 4164 int nb_globals = s->nb_globals; 4165 int nb_temps, i; 4166 bool changes = false; 4167 TCGOp *op, *op_next; 4168 4169 /* Create a temporary for each indirect global. */ 4170 for (i = 0; i < nb_globals; ++i) { 4171 TCGTemp *its = &s->temps[i]; 4172 if (its->indirect_reg) { 4173 TCGTemp *dts = tcg_temp_alloc(s); 4174 dts->type = its->type; 4175 dts->base_type = its->base_type; 4176 dts->temp_subindex = its->temp_subindex; 4177 dts->kind = TEMP_EBB; 4178 its->state_ptr = dts; 4179 } else { 4180 its->state_ptr = NULL; 4181 } 4182 /* All globals begin dead. */ 4183 its->state = TS_DEAD; 4184 } 4185 for (nb_temps = s->nb_temps; i < nb_temps; ++i) { 4186 TCGTemp *its = &s->temps[i]; 4187 its->state_ptr = NULL; 4188 its->state = TS_DEAD; 4189 } 4190 4191 QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) { 4192 TCGOpcode opc = op->opc; 4193 const TCGOpDef *def = &tcg_op_defs[opc]; 4194 TCGLifeData arg_life = op->life; 4195 int nb_iargs, nb_oargs, call_flags; 4196 TCGTemp *arg_ts, *dir_ts; 4197 4198 if (opc == INDEX_op_call) { 4199 nb_oargs = TCGOP_CALLO(op); 4200 nb_iargs = TCGOP_CALLI(op); 4201 call_flags = tcg_call_flags(op); 4202 } else { 4203 nb_iargs = def->nb_iargs; 4204 nb_oargs = def->nb_oargs; 4205 4206 /* Set flags similar to how calls require. */ 4207 if (def->flags & TCG_OPF_COND_BRANCH) { 4208 /* Like reading globals: sync_globals */ 4209 call_flags = TCG_CALL_NO_WRITE_GLOBALS; 4210 } else if (def->flags & TCG_OPF_BB_END) { 4211 /* Like writing globals: save_globals */ 4212 call_flags = 0; 4213 } else if (def->flags & TCG_OPF_SIDE_EFFECTS) { 4214 /* Like reading globals: sync_globals */ 4215 call_flags = TCG_CALL_NO_WRITE_GLOBALS; 4216 } else { 4217 /* No effect on globals. */ 4218 call_flags = (TCG_CALL_NO_READ_GLOBALS | 4219 TCG_CALL_NO_WRITE_GLOBALS); 4220 } 4221 } 4222 4223 /* Make sure that input arguments are available. */ 4224 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) { 4225 arg_ts = arg_temp(op->args[i]); 4226 dir_ts = arg_ts->state_ptr; 4227 if (dir_ts && arg_ts->state == TS_DEAD) { 4228 TCGOpcode lopc = (arg_ts->type == TCG_TYPE_I32 4229 ? INDEX_op_ld_i32 4230 : INDEX_op_ld_i64); 4231 TCGOp *lop = tcg_op_insert_before(s, op, lopc, 4232 arg_ts->type, 3); 4233 4234 lop->args[0] = temp_arg(dir_ts); 4235 lop->args[1] = temp_arg(arg_ts->mem_base); 4236 lop->args[2] = arg_ts->mem_offset; 4237 4238 /* Loaded, but synced with memory. */ 4239 arg_ts->state = TS_MEM; 4240 } 4241 } 4242 4243 /* Perform input replacement, and mark inputs that became dead. 4244 No action is required except keeping temp_state up to date 4245 so that we reload when needed. */ 4246 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) { 4247 arg_ts = arg_temp(op->args[i]); 4248 dir_ts = arg_ts->state_ptr; 4249 if (dir_ts) { 4250 op->args[i] = temp_arg(dir_ts); 4251 changes = true; 4252 if (IS_DEAD_ARG(i)) { 4253 arg_ts->state = TS_DEAD; 4254 } 4255 } 4256 } 4257 4258 /* Liveness analysis should ensure that the following are 4259 all correct, for call sites and basic block end points. */ 4260 if (call_flags & TCG_CALL_NO_READ_GLOBALS) { 4261 /* Nothing to do */ 4262 } else if (call_flags & TCG_CALL_NO_WRITE_GLOBALS) { 4263 for (i = 0; i < nb_globals; ++i) { 4264 /* Liveness should see that globals are synced back, 4265 that is, either TS_DEAD or TS_MEM. */ 4266 arg_ts = &s->temps[i]; 4267 tcg_debug_assert(arg_ts->state_ptr == 0 4268 || arg_ts->state != 0); 4269 } 4270 } else { 4271 for (i = 0; i < nb_globals; ++i) { 4272 /* Liveness should see that globals are saved back, 4273 that is, TS_DEAD, waiting to be reloaded. */ 4274 arg_ts = &s->temps[i]; 4275 tcg_debug_assert(arg_ts->state_ptr == 0 4276 || arg_ts->state == TS_DEAD); 4277 } 4278 } 4279 4280 /* Outputs become available. */ 4281 if (opc == INDEX_op_mov) { 4282 arg_ts = arg_temp(op->args[0]); 4283 dir_ts = arg_ts->state_ptr; 4284 if (dir_ts) { 4285 op->args[0] = temp_arg(dir_ts); 4286 changes = true; 4287 4288 /* The output is now live and modified. */ 4289 arg_ts->state = 0; 4290 4291 if (NEED_SYNC_ARG(0)) { 4292 TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32 4293 ? INDEX_op_st_i32 4294 : INDEX_op_st_i64); 4295 TCGOp *sop = tcg_op_insert_after(s, op, sopc, 4296 arg_ts->type, 3); 4297 TCGTemp *out_ts = dir_ts; 4298 4299 if (IS_DEAD_ARG(0)) { 4300 out_ts = arg_temp(op->args[1]); 4301 arg_ts->state = TS_DEAD; 4302 tcg_op_remove(s, op); 4303 } else { 4304 arg_ts->state = TS_MEM; 4305 } 4306 4307 sop->args[0] = temp_arg(out_ts); 4308 sop->args[1] = temp_arg(arg_ts->mem_base); 4309 sop->args[2] = arg_ts->mem_offset; 4310 } else { 4311 tcg_debug_assert(!IS_DEAD_ARG(0)); 4312 } 4313 } 4314 } else { 4315 for (i = 0; i < nb_oargs; i++) { 4316 arg_ts = arg_temp(op->args[i]); 4317 dir_ts = arg_ts->state_ptr; 4318 if (!dir_ts) { 4319 continue; 4320 } 4321 op->args[i] = temp_arg(dir_ts); 4322 changes = true; 4323 4324 /* The output is now live and modified. */ 4325 arg_ts->state = 0; 4326 4327 /* Sync outputs upon their last write. */ 4328 if (NEED_SYNC_ARG(i)) { 4329 TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32 4330 ? INDEX_op_st_i32 4331 : INDEX_op_st_i64); 4332 TCGOp *sop = tcg_op_insert_after(s, op, sopc, 4333 arg_ts->type, 3); 4334 4335 sop->args[0] = temp_arg(dir_ts); 4336 sop->args[1] = temp_arg(arg_ts->mem_base); 4337 sop->args[2] = arg_ts->mem_offset; 4338 4339 arg_ts->state = TS_MEM; 4340 } 4341 /* Drop outputs that are dead. */ 4342 if (IS_DEAD_ARG(i)) { 4343 arg_ts->state = TS_DEAD; 4344 } 4345 } 4346 } 4347 } 4348 4349 return changes; 4350 } 4351 4352 static void temp_allocate_frame(TCGContext *s, TCGTemp *ts) 4353 { 4354 intptr_t off; 4355 int size, align; 4356 4357 /* When allocating an object, look at the full type. */ 4358 size = tcg_type_size(ts->base_type); 4359 switch (ts->base_type) { 4360 case TCG_TYPE_I32: 4361 align = 4; 4362 break; 4363 case TCG_TYPE_I64: 4364 case TCG_TYPE_V64: 4365 align = 8; 4366 break; 4367 case TCG_TYPE_I128: 4368 case TCG_TYPE_V128: 4369 case TCG_TYPE_V256: 4370 /* 4371 * Note that we do not require aligned storage for V256, 4372 * and that we provide alignment for I128 to match V128, 4373 * even if that's above what the host ABI requires. 4374 */ 4375 align = 16; 4376 break; 4377 default: 4378 g_assert_not_reached(); 4379 } 4380 4381 /* 4382 * Assume the stack is sufficiently aligned. 4383 * This affects e.g. ARM NEON, where we have 8 byte stack alignment 4384 * and do not require 16 byte vector alignment. This seems slightly 4385 * easier than fully parameterizing the above switch statement. 4386 */ 4387 align = MIN(TCG_TARGET_STACK_ALIGN, align); 4388 off = ROUND_UP(s->current_frame_offset, align); 4389 4390 /* If we've exhausted the stack frame, restart with a smaller TB. */ 4391 if (off + size > s->frame_end) { 4392 tcg_raise_tb_overflow(s); 4393 } 4394 s->current_frame_offset = off + size; 4395 #if defined(__sparc__) 4396 off += TCG_TARGET_STACK_BIAS; 4397 #endif 4398 4399 /* If the object was subdivided, assign memory to all the parts. */ 4400 if (ts->base_type != ts->type) { 4401 int part_size = tcg_type_size(ts->type); 4402 int part_count = size / part_size; 4403 4404 /* 4405 * Each part is allocated sequentially in tcg_temp_new_internal. 4406 * Jump back to the first part by subtracting the current index. 4407 */ 4408 ts -= ts->temp_subindex; 4409 for (int i = 0; i < part_count; ++i) { 4410 ts[i].mem_offset = off + i * part_size; 4411 ts[i].mem_base = s->frame_temp; 4412 ts[i].mem_allocated = 1; 4413 } 4414 } else { 4415 ts->mem_offset = off; 4416 ts->mem_base = s->frame_temp; 4417 ts->mem_allocated = 1; 4418 } 4419 } 4420 4421 /* Assign @reg to @ts, and update reg_to_temp[]. */ 4422 static void set_temp_val_reg(TCGContext *s, TCGTemp *ts, TCGReg reg) 4423 { 4424 if (ts->val_type == TEMP_VAL_REG) { 4425 TCGReg old = ts->reg; 4426 tcg_debug_assert(s->reg_to_temp[old] == ts); 4427 if (old == reg) { 4428 return; 4429 } 4430 s->reg_to_temp[old] = NULL; 4431 } 4432 tcg_debug_assert(s->reg_to_temp[reg] == NULL); 4433 s->reg_to_temp[reg] = ts; 4434 ts->val_type = TEMP_VAL_REG; 4435 ts->reg = reg; 4436 } 4437 4438 /* Assign a non-register value type to @ts, and update reg_to_temp[]. */ 4439 static void set_temp_val_nonreg(TCGContext *s, TCGTemp *ts, TCGTempVal type) 4440 { 4441 tcg_debug_assert(type != TEMP_VAL_REG); 4442 if (ts->val_type == TEMP_VAL_REG) { 4443 TCGReg reg = ts->reg; 4444 tcg_debug_assert(s->reg_to_temp[reg] == ts); 4445 s->reg_to_temp[reg] = NULL; 4446 } 4447 ts->val_type = type; 4448 } 4449 4450 static void temp_load(TCGContext *, TCGTemp *, TCGRegSet, TCGRegSet, TCGRegSet); 4451 4452 /* Mark a temporary as free or dead. If 'free_or_dead' is negative, 4453 mark it free; otherwise mark it dead. */ 4454 static void temp_free_or_dead(TCGContext *s, TCGTemp *ts, int free_or_dead) 4455 { 4456 TCGTempVal new_type; 4457 4458 switch (ts->kind) { 4459 case TEMP_FIXED: 4460 return; 4461 case TEMP_GLOBAL: 4462 case TEMP_TB: 4463 new_type = TEMP_VAL_MEM; 4464 break; 4465 case TEMP_EBB: 4466 new_type = free_or_dead < 0 ? TEMP_VAL_MEM : TEMP_VAL_DEAD; 4467 break; 4468 case TEMP_CONST: 4469 new_type = TEMP_VAL_CONST; 4470 break; 4471 default: 4472 g_assert_not_reached(); 4473 } 4474 set_temp_val_nonreg(s, ts, new_type); 4475 } 4476 4477 /* Mark a temporary as dead. */ 4478 static inline void temp_dead(TCGContext *s, TCGTemp *ts) 4479 { 4480 temp_free_or_dead(s, ts, 1); 4481 } 4482 4483 /* Sync a temporary to memory. 'allocated_regs' is used in case a temporary 4484 registers needs to be allocated to store a constant. If 'free_or_dead' 4485 is non-zero, subsequently release the temporary; if it is positive, the 4486 temp is dead; if it is negative, the temp is free. */ 4487 static void temp_sync(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs, 4488 TCGRegSet preferred_regs, int free_or_dead) 4489 { 4490 if (!temp_readonly(ts) && !ts->mem_coherent) { 4491 if (!ts->mem_allocated) { 4492 temp_allocate_frame(s, ts); 4493 } 4494 switch (ts->val_type) { 4495 case TEMP_VAL_CONST: 4496 /* If we're going to free the temp immediately, then we won't 4497 require it later in a register, so attempt to store the 4498 constant to memory directly. */ 4499 if (free_or_dead 4500 && tcg_out_sti(s, ts->type, ts->val, 4501 ts->mem_base->reg, ts->mem_offset)) { 4502 break; 4503 } 4504 temp_load(s, ts, tcg_target_available_regs[ts->type], 4505 allocated_regs, preferred_regs); 4506 /* fallthrough */ 4507 4508 case TEMP_VAL_REG: 4509 tcg_out_st(s, ts->type, ts->reg, 4510 ts->mem_base->reg, ts->mem_offset); 4511 break; 4512 4513 case TEMP_VAL_MEM: 4514 break; 4515 4516 case TEMP_VAL_DEAD: 4517 default: 4518 g_assert_not_reached(); 4519 } 4520 ts->mem_coherent = 1; 4521 } 4522 if (free_or_dead) { 4523 temp_free_or_dead(s, ts, free_or_dead); 4524 } 4525 } 4526 4527 /* free register 'reg' by spilling the corresponding temporary if necessary */ 4528 static void tcg_reg_free(TCGContext *s, TCGReg reg, TCGRegSet allocated_regs) 4529 { 4530 TCGTemp *ts = s->reg_to_temp[reg]; 4531 if (ts != NULL) { 4532 temp_sync(s, ts, allocated_regs, 0, -1); 4533 } 4534 } 4535 4536 /** 4537 * tcg_reg_alloc: 4538 * @required_regs: Set of registers in which we must allocate. 4539 * @allocated_regs: Set of registers which must be avoided. 4540 * @preferred_regs: Set of registers we should prefer. 4541 * @rev: True if we search the registers in "indirect" order. 4542 * 4543 * The allocated register must be in @required_regs & ~@allocated_regs, 4544 * but if we can put it in @preferred_regs we may save a move later. 4545 */ 4546 static TCGReg tcg_reg_alloc(TCGContext *s, TCGRegSet required_regs, 4547 TCGRegSet allocated_regs, 4548 TCGRegSet preferred_regs, bool rev) 4549 { 4550 int i, j, f, n = ARRAY_SIZE(tcg_target_reg_alloc_order); 4551 TCGRegSet reg_ct[2]; 4552 const int *order; 4553 4554 reg_ct[1] = required_regs & ~allocated_regs; 4555 tcg_debug_assert(reg_ct[1] != 0); 4556 reg_ct[0] = reg_ct[1] & preferred_regs; 4557 4558 /* Skip the preferred_regs option if it cannot be satisfied, 4559 or if the preference made no difference. */ 4560 f = reg_ct[0] == 0 || reg_ct[0] == reg_ct[1]; 4561 4562 order = rev ? indirect_reg_alloc_order : tcg_target_reg_alloc_order; 4563 4564 /* Try free registers, preferences first. */ 4565 for (j = f; j < 2; j++) { 4566 TCGRegSet set = reg_ct[j]; 4567 4568 if (tcg_regset_single(set)) { 4569 /* One register in the set. */ 4570 TCGReg reg = tcg_regset_first(set); 4571 if (s->reg_to_temp[reg] == NULL) { 4572 return reg; 4573 } 4574 } else { 4575 for (i = 0; i < n; i++) { 4576 TCGReg reg = order[i]; 4577 if (s->reg_to_temp[reg] == NULL && 4578 tcg_regset_test_reg(set, reg)) { 4579 return reg; 4580 } 4581 } 4582 } 4583 } 4584 4585 /* We must spill something. */ 4586 for (j = f; j < 2; j++) { 4587 TCGRegSet set = reg_ct[j]; 4588 4589 if (tcg_regset_single(set)) { 4590 /* One register in the set. */ 4591 TCGReg reg = tcg_regset_first(set); 4592 tcg_reg_free(s, reg, allocated_regs); 4593 return reg; 4594 } else { 4595 for (i = 0; i < n; i++) { 4596 TCGReg reg = order[i]; 4597 if (tcg_regset_test_reg(set, reg)) { 4598 tcg_reg_free(s, reg, allocated_regs); 4599 return reg; 4600 } 4601 } 4602 } 4603 } 4604 4605 g_assert_not_reached(); 4606 } 4607 4608 static TCGReg tcg_reg_alloc_pair(TCGContext *s, TCGRegSet required_regs, 4609 TCGRegSet allocated_regs, 4610 TCGRegSet preferred_regs, bool rev) 4611 { 4612 int i, j, k, fmin, n = ARRAY_SIZE(tcg_target_reg_alloc_order); 4613 TCGRegSet reg_ct[2]; 4614 const int *order; 4615 4616 /* Ensure that if I is not in allocated_regs, I+1 is not either. */ 4617 reg_ct[1] = required_regs & ~(allocated_regs | (allocated_regs >> 1)); 4618 tcg_debug_assert(reg_ct[1] != 0); 4619 reg_ct[0] = reg_ct[1] & preferred_regs; 4620 4621 order = rev ? indirect_reg_alloc_order : tcg_target_reg_alloc_order; 4622 4623 /* 4624 * Skip the preferred_regs option if it cannot be satisfied, 4625 * or if the preference made no difference. 4626 */ 4627 k = reg_ct[0] == 0 || reg_ct[0] == reg_ct[1]; 4628 4629 /* 4630 * Minimize the number of flushes by looking for 2 free registers first, 4631 * then a single flush, then two flushes. 4632 */ 4633 for (fmin = 2; fmin >= 0; fmin--) { 4634 for (j = k; j < 2; j++) { 4635 TCGRegSet set = reg_ct[j]; 4636 4637 for (i = 0; i < n; i++) { 4638 TCGReg reg = order[i]; 4639 4640 if (tcg_regset_test_reg(set, reg)) { 4641 int f = !s->reg_to_temp[reg] + !s->reg_to_temp[reg + 1]; 4642 if (f >= fmin) { 4643 tcg_reg_free(s, reg, allocated_regs); 4644 tcg_reg_free(s, reg + 1, allocated_regs); 4645 return reg; 4646 } 4647 } 4648 } 4649 } 4650 } 4651 g_assert_not_reached(); 4652 } 4653 4654 /* Make sure the temporary is in a register. If needed, allocate the register 4655 from DESIRED while avoiding ALLOCATED. */ 4656 static void temp_load(TCGContext *s, TCGTemp *ts, TCGRegSet desired_regs, 4657 TCGRegSet allocated_regs, TCGRegSet preferred_regs) 4658 { 4659 TCGReg reg; 4660 4661 switch (ts->val_type) { 4662 case TEMP_VAL_REG: 4663 return; 4664 case TEMP_VAL_CONST: 4665 reg = tcg_reg_alloc(s, desired_regs, allocated_regs, 4666 preferred_regs, ts->indirect_base); 4667 if (ts->type <= TCG_TYPE_I64) { 4668 tcg_out_movi(s, ts->type, reg, ts->val); 4669 } else { 4670 uint64_t val = ts->val; 4671 MemOp vece = MO_64; 4672 4673 /* 4674 * Find the minimal vector element that matches the constant. 4675 * The targets will, in general, have to do this search anyway, 4676 * do this generically. 4677 */ 4678 if (val == dup_const(MO_8, val)) { 4679 vece = MO_8; 4680 } else if (val == dup_const(MO_16, val)) { 4681 vece = MO_16; 4682 } else if (val == dup_const(MO_32, val)) { 4683 vece = MO_32; 4684 } 4685 4686 tcg_out_dupi_vec(s, ts->type, vece, reg, ts->val); 4687 } 4688 ts->mem_coherent = 0; 4689 break; 4690 case TEMP_VAL_MEM: 4691 if (!ts->mem_allocated) { 4692 temp_allocate_frame(s, ts); 4693 } 4694 reg = tcg_reg_alloc(s, desired_regs, allocated_regs, 4695 preferred_regs, ts->indirect_base); 4696 tcg_out_ld(s, ts->type, reg, ts->mem_base->reg, ts->mem_offset); 4697 ts->mem_coherent = 1; 4698 break; 4699 case TEMP_VAL_DEAD: 4700 default: 4701 g_assert_not_reached(); 4702 } 4703 set_temp_val_reg(s, ts, reg); 4704 } 4705 4706 /* Save a temporary to memory. 'allocated_regs' is used in case a 4707 temporary registers needs to be allocated to store a constant. */ 4708 static void temp_save(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs) 4709 { 4710 /* The liveness analysis already ensures that globals are back 4711 in memory. Keep an tcg_debug_assert for safety. */ 4712 tcg_debug_assert(ts->val_type == TEMP_VAL_MEM || temp_readonly(ts)); 4713 } 4714 4715 /* save globals to their canonical location and assume they can be 4716 modified be the following code. 'allocated_regs' is used in case a 4717 temporary registers needs to be allocated to store a constant. */ 4718 static void save_globals(TCGContext *s, TCGRegSet allocated_regs) 4719 { 4720 int i, n; 4721 4722 for (i = 0, n = s->nb_globals; i < n; i++) { 4723 temp_save(s, &s->temps[i], allocated_regs); 4724 } 4725 } 4726 4727 /* sync globals to their canonical location and assume they can be 4728 read by the following code. 'allocated_regs' is used in case a 4729 temporary registers needs to be allocated to store a constant. */ 4730 static void sync_globals(TCGContext *s, TCGRegSet allocated_regs) 4731 { 4732 int i, n; 4733 4734 for (i = 0, n = s->nb_globals; i < n; i++) { 4735 TCGTemp *ts = &s->temps[i]; 4736 tcg_debug_assert(ts->val_type != TEMP_VAL_REG 4737 || ts->kind == TEMP_FIXED 4738 || ts->mem_coherent); 4739 } 4740 } 4741 4742 /* at the end of a basic block, we assume all temporaries are dead and 4743 all globals are stored at their canonical location. */ 4744 static void tcg_reg_alloc_bb_end(TCGContext *s, TCGRegSet allocated_regs) 4745 { 4746 int i; 4747 4748 for (i = s->nb_globals; i < s->nb_temps; i++) { 4749 TCGTemp *ts = &s->temps[i]; 4750 4751 switch (ts->kind) { 4752 case TEMP_TB: 4753 temp_save(s, ts, allocated_regs); 4754 break; 4755 case TEMP_EBB: 4756 /* The liveness analysis already ensures that temps are dead. 4757 Keep an tcg_debug_assert for safety. */ 4758 tcg_debug_assert(ts->val_type == TEMP_VAL_DEAD); 4759 break; 4760 case TEMP_CONST: 4761 /* Similarly, we should have freed any allocated register. */ 4762 tcg_debug_assert(ts->val_type == TEMP_VAL_CONST); 4763 break; 4764 default: 4765 g_assert_not_reached(); 4766 } 4767 } 4768 4769 save_globals(s, allocated_regs); 4770 } 4771 4772 /* 4773 * At a conditional branch, we assume all temporaries are dead unless 4774 * explicitly live-across-conditional-branch; all globals and local 4775 * temps are synced to their location. 4776 */ 4777 static void tcg_reg_alloc_cbranch(TCGContext *s, TCGRegSet allocated_regs) 4778 { 4779 sync_globals(s, allocated_regs); 4780 4781 for (int i = s->nb_globals; i < s->nb_temps; i++) { 4782 TCGTemp *ts = &s->temps[i]; 4783 /* 4784 * The liveness analysis already ensures that temps are dead. 4785 * Keep tcg_debug_asserts for safety. 4786 */ 4787 switch (ts->kind) { 4788 case TEMP_TB: 4789 tcg_debug_assert(ts->val_type != TEMP_VAL_REG || ts->mem_coherent); 4790 break; 4791 case TEMP_EBB: 4792 case TEMP_CONST: 4793 break; 4794 default: 4795 g_assert_not_reached(); 4796 } 4797 } 4798 } 4799 4800 /* 4801 * Specialized code generation for INDEX_op_mov_* with a constant. 4802 */ 4803 static void tcg_reg_alloc_do_movi(TCGContext *s, TCGTemp *ots, 4804 tcg_target_ulong val, TCGLifeData arg_life, 4805 TCGRegSet preferred_regs) 4806 { 4807 /* ENV should not be modified. */ 4808 tcg_debug_assert(!temp_readonly(ots)); 4809 4810 /* The movi is not explicitly generated here. */ 4811 set_temp_val_nonreg(s, ots, TEMP_VAL_CONST); 4812 ots->val = val; 4813 ots->mem_coherent = 0; 4814 if (NEED_SYNC_ARG(0)) { 4815 temp_sync(s, ots, s->reserved_regs, preferred_regs, IS_DEAD_ARG(0)); 4816 } else if (IS_DEAD_ARG(0)) { 4817 temp_dead(s, ots); 4818 } 4819 } 4820 4821 /* 4822 * Specialized code generation for INDEX_op_mov_*. 4823 */ 4824 static void tcg_reg_alloc_mov(TCGContext *s, const TCGOp *op) 4825 { 4826 const TCGLifeData arg_life = op->life; 4827 TCGRegSet allocated_regs, preferred_regs; 4828 TCGTemp *ts, *ots; 4829 TCGType otype, itype; 4830 TCGReg oreg, ireg; 4831 4832 allocated_regs = s->reserved_regs; 4833 preferred_regs = output_pref(op, 0); 4834 ots = arg_temp(op->args[0]); 4835 ts = arg_temp(op->args[1]); 4836 4837 /* ENV should not be modified. */ 4838 tcg_debug_assert(!temp_readonly(ots)); 4839 4840 /* Note that otype != itype for no-op truncation. */ 4841 otype = ots->type; 4842 itype = ts->type; 4843 4844 if (ts->val_type == TEMP_VAL_CONST) { 4845 /* propagate constant or generate sti */ 4846 tcg_target_ulong val = ts->val; 4847 if (IS_DEAD_ARG(1)) { 4848 temp_dead(s, ts); 4849 } 4850 tcg_reg_alloc_do_movi(s, ots, val, arg_life, preferred_regs); 4851 return; 4852 } 4853 4854 /* If the source value is in memory we're going to be forced 4855 to have it in a register in order to perform the copy. Copy 4856 the SOURCE value into its own register first, that way we 4857 don't have to reload SOURCE the next time it is used. */ 4858 if (ts->val_type == TEMP_VAL_MEM) { 4859 temp_load(s, ts, tcg_target_available_regs[itype], 4860 allocated_regs, preferred_regs); 4861 } 4862 tcg_debug_assert(ts->val_type == TEMP_VAL_REG); 4863 ireg = ts->reg; 4864 4865 if (IS_DEAD_ARG(0)) { 4866 /* mov to a non-saved dead register makes no sense (even with 4867 liveness analysis disabled). */ 4868 tcg_debug_assert(NEED_SYNC_ARG(0)); 4869 if (!ots->mem_allocated) { 4870 temp_allocate_frame(s, ots); 4871 } 4872 tcg_out_st(s, otype, ireg, ots->mem_base->reg, ots->mem_offset); 4873 if (IS_DEAD_ARG(1)) { 4874 temp_dead(s, ts); 4875 } 4876 temp_dead(s, ots); 4877 return; 4878 } 4879 4880 if (IS_DEAD_ARG(1) && ts->kind != TEMP_FIXED) { 4881 /* 4882 * The mov can be suppressed. Kill input first, so that it 4883 * is unlinked from reg_to_temp, then set the output to the 4884 * reg that we saved from the input. 4885 */ 4886 temp_dead(s, ts); 4887 oreg = ireg; 4888 } else { 4889 if (ots->val_type == TEMP_VAL_REG) { 4890 oreg = ots->reg; 4891 } else { 4892 /* Make sure to not spill the input register during allocation. */ 4893 oreg = tcg_reg_alloc(s, tcg_target_available_regs[otype], 4894 allocated_regs | ((TCGRegSet)1 << ireg), 4895 preferred_regs, ots->indirect_base); 4896 } 4897 if (!tcg_out_mov(s, otype, oreg, ireg)) { 4898 /* 4899 * Cross register class move not supported. 4900 * Store the source register into the destination slot 4901 * and leave the destination temp as TEMP_VAL_MEM. 4902 */ 4903 assert(!temp_readonly(ots)); 4904 if (!ts->mem_allocated) { 4905 temp_allocate_frame(s, ots); 4906 } 4907 tcg_out_st(s, ts->type, ireg, ots->mem_base->reg, ots->mem_offset); 4908 set_temp_val_nonreg(s, ts, TEMP_VAL_MEM); 4909 ots->mem_coherent = 1; 4910 return; 4911 } 4912 } 4913 set_temp_val_reg(s, ots, oreg); 4914 ots->mem_coherent = 0; 4915 4916 if (NEED_SYNC_ARG(0)) { 4917 temp_sync(s, ots, allocated_regs, 0, 0); 4918 } 4919 } 4920 4921 /* 4922 * Specialized code generation for INDEX_op_dup_vec. 4923 */ 4924 static void tcg_reg_alloc_dup(TCGContext *s, const TCGOp *op) 4925 { 4926 const TCGLifeData arg_life = op->life; 4927 TCGRegSet dup_out_regs, dup_in_regs; 4928 const TCGArgConstraint *dup_args_ct; 4929 TCGTemp *its, *ots; 4930 TCGType itype, vtype; 4931 unsigned vece; 4932 int lowpart_ofs; 4933 bool ok; 4934 4935 ots = arg_temp(op->args[0]); 4936 its = arg_temp(op->args[1]); 4937 4938 /* ENV should not be modified. */ 4939 tcg_debug_assert(!temp_readonly(ots)); 4940 4941 itype = its->type; 4942 vece = TCGOP_VECE(op); 4943 vtype = TCGOP_TYPE(op); 4944 4945 if (its->val_type == TEMP_VAL_CONST) { 4946 /* Propagate constant via movi -> dupi. */ 4947 tcg_target_ulong val = its->val; 4948 if (IS_DEAD_ARG(1)) { 4949 temp_dead(s, its); 4950 } 4951 tcg_reg_alloc_do_movi(s, ots, val, arg_life, output_pref(op, 0)); 4952 return; 4953 } 4954 4955 dup_args_ct = opcode_args_ct(op); 4956 dup_out_regs = dup_args_ct[0].regs; 4957 dup_in_regs = dup_args_ct[1].regs; 4958 4959 /* Allocate the output register now. */ 4960 if (ots->val_type != TEMP_VAL_REG) { 4961 TCGRegSet allocated_regs = s->reserved_regs; 4962 TCGReg oreg; 4963 4964 if (!IS_DEAD_ARG(1) && its->val_type == TEMP_VAL_REG) { 4965 /* Make sure to not spill the input register. */ 4966 tcg_regset_set_reg(allocated_regs, its->reg); 4967 } 4968 oreg = tcg_reg_alloc(s, dup_out_regs, allocated_regs, 4969 output_pref(op, 0), ots->indirect_base); 4970 set_temp_val_reg(s, ots, oreg); 4971 } 4972 4973 switch (its->val_type) { 4974 case TEMP_VAL_REG: 4975 /* 4976 * The dup constriaints must be broad, covering all possible VECE. 4977 * However, tcg_op_dup_vec() gets to see the VECE and we allow it 4978 * to fail, indicating that extra moves are required for that case. 4979 */ 4980 if (tcg_regset_test_reg(dup_in_regs, its->reg)) { 4981 if (tcg_out_dup_vec(s, vtype, vece, ots->reg, its->reg)) { 4982 goto done; 4983 } 4984 /* Try again from memory or a vector input register. */ 4985 } 4986 if (!its->mem_coherent) { 4987 /* 4988 * The input register is not synced, and so an extra store 4989 * would be required to use memory. Attempt an integer-vector 4990 * register move first. We do not have a TCGRegSet for this. 4991 */ 4992 if (tcg_out_mov(s, itype, ots->reg, its->reg)) { 4993 break; 4994 } 4995 /* Sync the temp back to its slot and load from there. */ 4996 temp_sync(s, its, s->reserved_regs, 0, 0); 4997 } 4998 /* fall through */ 4999 5000 case TEMP_VAL_MEM: 5001 lowpart_ofs = 0; 5002 if (HOST_BIG_ENDIAN) { 5003 lowpart_ofs = tcg_type_size(itype) - (1 << vece); 5004 } 5005 if (tcg_out_dupm_vec(s, vtype, vece, ots->reg, its->mem_base->reg, 5006 its->mem_offset + lowpart_ofs)) { 5007 goto done; 5008 } 5009 /* Load the input into the destination vector register. */ 5010 tcg_out_ld(s, itype, ots->reg, its->mem_base->reg, its->mem_offset); 5011 break; 5012 5013 default: 5014 g_assert_not_reached(); 5015 } 5016 5017 /* We now have a vector input register, so dup must succeed. */ 5018 ok = tcg_out_dup_vec(s, vtype, vece, ots->reg, ots->reg); 5019 tcg_debug_assert(ok); 5020 5021 done: 5022 ots->mem_coherent = 0; 5023 if (IS_DEAD_ARG(1)) { 5024 temp_dead(s, its); 5025 } 5026 if (NEED_SYNC_ARG(0)) { 5027 temp_sync(s, ots, s->reserved_regs, 0, 0); 5028 } 5029 if (IS_DEAD_ARG(0)) { 5030 temp_dead(s, ots); 5031 } 5032 } 5033 5034 static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op) 5035 { 5036 const TCGLifeData arg_life = op->life; 5037 const TCGOpDef * const def = &tcg_op_defs[op->opc]; 5038 TCGRegSet i_allocated_regs; 5039 TCGRegSet o_allocated_regs; 5040 int i, k, nb_iargs, nb_oargs; 5041 TCGReg reg; 5042 TCGArg arg; 5043 const TCGArgConstraint *args_ct; 5044 const TCGArgConstraint *arg_ct; 5045 TCGTemp *ts; 5046 TCGArg new_args[TCG_MAX_OP_ARGS]; 5047 int const_args[TCG_MAX_OP_ARGS]; 5048 TCGCond op_cond; 5049 5050 nb_oargs = def->nb_oargs; 5051 nb_iargs = def->nb_iargs; 5052 5053 /* copy constants */ 5054 memcpy(new_args + nb_oargs + nb_iargs, 5055 op->args + nb_oargs + nb_iargs, 5056 sizeof(TCGArg) * def->nb_cargs); 5057 5058 i_allocated_regs = s->reserved_regs; 5059 o_allocated_regs = s->reserved_regs; 5060 5061 switch (op->opc) { 5062 case INDEX_op_brcond_i32: 5063 case INDEX_op_brcond_i64: 5064 op_cond = op->args[2]; 5065 break; 5066 case INDEX_op_setcond_i32: 5067 case INDEX_op_setcond_i64: 5068 case INDEX_op_negsetcond_i32: 5069 case INDEX_op_negsetcond_i64: 5070 case INDEX_op_cmp_vec: 5071 op_cond = op->args[3]; 5072 break; 5073 case INDEX_op_brcond2_i32: 5074 op_cond = op->args[4]; 5075 break; 5076 case INDEX_op_movcond_i32: 5077 case INDEX_op_movcond_i64: 5078 case INDEX_op_setcond2_i32: 5079 case INDEX_op_cmpsel_vec: 5080 op_cond = op->args[5]; 5081 break; 5082 default: 5083 /* No condition within opcode. */ 5084 op_cond = TCG_COND_ALWAYS; 5085 break; 5086 } 5087 5088 args_ct = opcode_args_ct(op); 5089 5090 /* satisfy input constraints */ 5091 for (k = 0; k < nb_iargs; k++) { 5092 TCGRegSet i_preferred_regs, i_required_regs; 5093 bool allocate_new_reg, copyto_new_reg; 5094 TCGTemp *ts2; 5095 int i1, i2; 5096 5097 i = args_ct[nb_oargs + k].sort_index; 5098 arg = op->args[i]; 5099 arg_ct = &args_ct[i]; 5100 ts = arg_temp(arg); 5101 5102 if (ts->val_type == TEMP_VAL_CONST) { 5103 #ifdef TCG_REG_ZERO 5104 if (ts->val == 0 && (arg_ct->ct & TCG_CT_REG_ZERO)) { 5105 /* Hardware zero register: indicate register via non-const. */ 5106 const_args[i] = 0; 5107 new_args[i] = TCG_REG_ZERO; 5108 continue; 5109 } 5110 #endif 5111 5112 if (tcg_target_const_match(ts->val, arg_ct->ct, ts->type, 5113 op_cond, TCGOP_VECE(op))) { 5114 /* constant is OK for instruction */ 5115 const_args[i] = 1; 5116 new_args[i] = ts->val; 5117 continue; 5118 } 5119 } 5120 5121 reg = ts->reg; 5122 i_preferred_regs = 0; 5123 i_required_regs = arg_ct->regs; 5124 allocate_new_reg = false; 5125 copyto_new_reg = false; 5126 5127 switch (arg_ct->pair) { 5128 case 0: /* not paired */ 5129 if (arg_ct->ialias) { 5130 i_preferred_regs = output_pref(op, arg_ct->alias_index); 5131 5132 /* 5133 * If the input is readonly, then it cannot also be an 5134 * output and aliased to itself. If the input is not 5135 * dead after the instruction, we must allocate a new 5136 * register and move it. 5137 */ 5138 if (temp_readonly(ts) || !IS_DEAD_ARG(i) 5139 || args_ct[arg_ct->alias_index].newreg) { 5140 allocate_new_reg = true; 5141 } else if (ts->val_type == TEMP_VAL_REG) { 5142 /* 5143 * Check if the current register has already been 5144 * allocated for another input. 5145 */ 5146 allocate_new_reg = 5147 tcg_regset_test_reg(i_allocated_regs, reg); 5148 } 5149 } 5150 if (!allocate_new_reg) { 5151 temp_load(s, ts, i_required_regs, i_allocated_regs, 5152 i_preferred_regs); 5153 reg = ts->reg; 5154 allocate_new_reg = !tcg_regset_test_reg(i_required_regs, reg); 5155 } 5156 if (allocate_new_reg) { 5157 /* 5158 * Allocate a new register matching the constraint 5159 * and move the temporary register into it. 5160 */ 5161 temp_load(s, ts, tcg_target_available_regs[ts->type], 5162 i_allocated_regs, 0); 5163 reg = tcg_reg_alloc(s, i_required_regs, i_allocated_regs, 5164 i_preferred_regs, ts->indirect_base); 5165 copyto_new_reg = true; 5166 } 5167 break; 5168 5169 case 1: 5170 /* First of an input pair; if i1 == i2, the second is an output. */ 5171 i1 = i; 5172 i2 = arg_ct->pair_index; 5173 ts2 = i1 != i2 ? arg_temp(op->args[i2]) : NULL; 5174 5175 /* 5176 * It is easier to default to allocating a new pair 5177 * and to identify a few cases where it's not required. 5178 */ 5179 if (arg_ct->ialias) { 5180 i_preferred_regs = output_pref(op, arg_ct->alias_index); 5181 if (IS_DEAD_ARG(i1) && 5182 IS_DEAD_ARG(i2) && 5183 !temp_readonly(ts) && 5184 ts->val_type == TEMP_VAL_REG && 5185 ts->reg < TCG_TARGET_NB_REGS - 1 && 5186 tcg_regset_test_reg(i_required_regs, reg) && 5187 !tcg_regset_test_reg(i_allocated_regs, reg) && 5188 !tcg_regset_test_reg(i_allocated_regs, reg + 1) && 5189 (ts2 5190 ? ts2->val_type == TEMP_VAL_REG && 5191 ts2->reg == reg + 1 && 5192 !temp_readonly(ts2) 5193 : s->reg_to_temp[reg + 1] == NULL)) { 5194 break; 5195 } 5196 } else { 5197 /* Without aliasing, the pair must also be an input. */ 5198 tcg_debug_assert(ts2); 5199 if (ts->val_type == TEMP_VAL_REG && 5200 ts2->val_type == TEMP_VAL_REG && 5201 ts2->reg == reg + 1 && 5202 tcg_regset_test_reg(i_required_regs, reg)) { 5203 break; 5204 } 5205 } 5206 reg = tcg_reg_alloc_pair(s, i_required_regs, i_allocated_regs, 5207 0, ts->indirect_base); 5208 goto do_pair; 5209 5210 case 2: /* pair second */ 5211 reg = new_args[arg_ct->pair_index] + 1; 5212 goto do_pair; 5213 5214 case 3: /* ialias with second output, no first input */ 5215 tcg_debug_assert(arg_ct->ialias); 5216 i_preferred_regs = output_pref(op, arg_ct->alias_index); 5217 5218 if (IS_DEAD_ARG(i) && 5219 !temp_readonly(ts) && 5220 ts->val_type == TEMP_VAL_REG && 5221 reg > 0 && 5222 s->reg_to_temp[reg - 1] == NULL && 5223 tcg_regset_test_reg(i_required_regs, reg) && 5224 !tcg_regset_test_reg(i_allocated_regs, reg) && 5225 !tcg_regset_test_reg(i_allocated_regs, reg - 1)) { 5226 tcg_regset_set_reg(i_allocated_regs, reg - 1); 5227 break; 5228 } 5229 reg = tcg_reg_alloc_pair(s, i_required_regs >> 1, 5230 i_allocated_regs, 0, 5231 ts->indirect_base); 5232 tcg_regset_set_reg(i_allocated_regs, reg); 5233 reg += 1; 5234 goto do_pair; 5235 5236 do_pair: 5237 /* 5238 * If an aliased input is not dead after the instruction, 5239 * we must allocate a new register and move it. 5240 */ 5241 if (arg_ct->ialias && (!IS_DEAD_ARG(i) || temp_readonly(ts))) { 5242 TCGRegSet t_allocated_regs = i_allocated_regs; 5243 5244 /* 5245 * Because of the alias, and the continued life, make sure 5246 * that the temp is somewhere *other* than the reg pair, 5247 * and we get a copy in reg. 5248 */ 5249 tcg_regset_set_reg(t_allocated_regs, reg); 5250 tcg_regset_set_reg(t_allocated_regs, reg + 1); 5251 if (ts->val_type == TEMP_VAL_REG && ts->reg == reg) { 5252 /* If ts was already in reg, copy it somewhere else. */ 5253 TCGReg nr; 5254 bool ok; 5255 5256 tcg_debug_assert(ts->kind != TEMP_FIXED); 5257 nr = tcg_reg_alloc(s, tcg_target_available_regs[ts->type], 5258 t_allocated_regs, 0, ts->indirect_base); 5259 ok = tcg_out_mov(s, ts->type, nr, reg); 5260 tcg_debug_assert(ok); 5261 5262 set_temp_val_reg(s, ts, nr); 5263 } else { 5264 temp_load(s, ts, tcg_target_available_regs[ts->type], 5265 t_allocated_regs, 0); 5266 copyto_new_reg = true; 5267 } 5268 } else { 5269 /* Preferably allocate to reg, otherwise copy. */ 5270 i_required_regs = (TCGRegSet)1 << reg; 5271 temp_load(s, ts, i_required_regs, i_allocated_regs, 5272 i_preferred_regs); 5273 copyto_new_reg = ts->reg != reg; 5274 } 5275 break; 5276 5277 default: 5278 g_assert_not_reached(); 5279 } 5280 5281 if (copyto_new_reg) { 5282 if (!tcg_out_mov(s, ts->type, reg, ts->reg)) { 5283 /* 5284 * Cross register class move not supported. Sync the 5285 * temp back to its slot and load from there. 5286 */ 5287 temp_sync(s, ts, i_allocated_regs, 0, 0); 5288 tcg_out_ld(s, ts->type, reg, 5289 ts->mem_base->reg, ts->mem_offset); 5290 } 5291 } 5292 new_args[i] = reg; 5293 const_args[i] = 0; 5294 tcg_regset_set_reg(i_allocated_regs, reg); 5295 } 5296 5297 /* mark dead temporaries and free the associated registers */ 5298 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) { 5299 if (IS_DEAD_ARG(i)) { 5300 temp_dead(s, arg_temp(op->args[i])); 5301 } 5302 } 5303 5304 if (def->flags & TCG_OPF_COND_BRANCH) { 5305 tcg_reg_alloc_cbranch(s, i_allocated_regs); 5306 } else if (def->flags & TCG_OPF_BB_END) { 5307 tcg_reg_alloc_bb_end(s, i_allocated_regs); 5308 } else { 5309 if (def->flags & TCG_OPF_CALL_CLOBBER) { 5310 /* XXX: permit generic clobber register list ? */ 5311 for (i = 0; i < TCG_TARGET_NB_REGS; i++) { 5312 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) { 5313 tcg_reg_free(s, i, i_allocated_regs); 5314 } 5315 } 5316 } 5317 if (def->flags & TCG_OPF_SIDE_EFFECTS) { 5318 /* sync globals if the op has side effects and might trigger 5319 an exception. */ 5320 sync_globals(s, i_allocated_regs); 5321 } 5322 5323 /* satisfy the output constraints */ 5324 for (k = 0; k < nb_oargs; k++) { 5325 i = args_ct[k].sort_index; 5326 arg = op->args[i]; 5327 arg_ct = &args_ct[i]; 5328 ts = arg_temp(arg); 5329 5330 /* ENV should not be modified. */ 5331 tcg_debug_assert(!temp_readonly(ts)); 5332 5333 switch (arg_ct->pair) { 5334 case 0: /* not paired */ 5335 if (arg_ct->oalias && !const_args[arg_ct->alias_index]) { 5336 reg = new_args[arg_ct->alias_index]; 5337 } else if (arg_ct->newreg) { 5338 reg = tcg_reg_alloc(s, arg_ct->regs, 5339 i_allocated_regs | o_allocated_regs, 5340 output_pref(op, k), ts->indirect_base); 5341 } else { 5342 reg = tcg_reg_alloc(s, arg_ct->regs, o_allocated_regs, 5343 output_pref(op, k), ts->indirect_base); 5344 } 5345 break; 5346 5347 case 1: /* first of pair */ 5348 if (arg_ct->oalias) { 5349 reg = new_args[arg_ct->alias_index]; 5350 } else if (arg_ct->newreg) { 5351 reg = tcg_reg_alloc_pair(s, arg_ct->regs, 5352 i_allocated_regs | o_allocated_regs, 5353 output_pref(op, k), 5354 ts->indirect_base); 5355 } else { 5356 reg = tcg_reg_alloc_pair(s, arg_ct->regs, o_allocated_regs, 5357 output_pref(op, k), 5358 ts->indirect_base); 5359 } 5360 break; 5361 5362 case 2: /* second of pair */ 5363 if (arg_ct->oalias) { 5364 reg = new_args[arg_ct->alias_index]; 5365 } else { 5366 reg = new_args[arg_ct->pair_index] + 1; 5367 } 5368 break; 5369 5370 case 3: /* first of pair, aliasing with a second input */ 5371 tcg_debug_assert(!arg_ct->newreg); 5372 reg = new_args[arg_ct->pair_index] - 1; 5373 break; 5374 5375 default: 5376 g_assert_not_reached(); 5377 } 5378 tcg_regset_set_reg(o_allocated_regs, reg); 5379 set_temp_val_reg(s, ts, reg); 5380 ts->mem_coherent = 0; 5381 new_args[i] = reg; 5382 } 5383 } 5384 5385 /* emit instruction */ 5386 TCGType type = TCGOP_TYPE(op); 5387 switch (op->opc) { 5388 case INDEX_op_ext_i32_i64: 5389 tcg_out_exts_i32_i64(s, new_args[0], new_args[1]); 5390 break; 5391 case INDEX_op_extu_i32_i64: 5392 tcg_out_extu_i32_i64(s, new_args[0], new_args[1]); 5393 break; 5394 case INDEX_op_extrl_i64_i32: 5395 tcg_out_extrl_i64_i32(s, new_args[0], new_args[1]); 5396 break; 5397 5398 case INDEX_op_add: 5399 case INDEX_op_and: 5400 case INDEX_op_andc: 5401 case INDEX_op_clz: 5402 case INDEX_op_ctz: 5403 case INDEX_op_divs: 5404 case INDEX_op_divu: 5405 case INDEX_op_eqv: 5406 case INDEX_op_mul: 5407 case INDEX_op_mulsh: 5408 case INDEX_op_muluh: 5409 case INDEX_op_nand: 5410 case INDEX_op_nor: 5411 case INDEX_op_or: 5412 case INDEX_op_orc: 5413 case INDEX_op_rems: 5414 case INDEX_op_remu: 5415 case INDEX_op_rotl: 5416 case INDEX_op_rotr: 5417 case INDEX_op_sar: 5418 case INDEX_op_shl: 5419 case INDEX_op_shr: 5420 case INDEX_op_xor: 5421 { 5422 const TCGOutOpBinary *out = 5423 container_of(all_outop[op->opc], TCGOutOpBinary, base); 5424 5425 /* Constants should never appear in the first source operand. */ 5426 tcg_debug_assert(!const_args[1]); 5427 if (const_args[2]) { 5428 out->out_rri(s, type, new_args[0], new_args[1], new_args[2]); 5429 } else { 5430 out->out_rrr(s, type, new_args[0], new_args[1], new_args[2]); 5431 } 5432 } 5433 break; 5434 5435 case INDEX_op_sub: 5436 { 5437 const TCGOutOpSubtract *out = &outop_sub; 5438 5439 /* 5440 * Constants should never appear in the second source operand. 5441 * These are folded to add with negative constant. 5442 */ 5443 tcg_debug_assert(!const_args[2]); 5444 if (const_args[1]) { 5445 out->out_rir(s, type, new_args[0], new_args[1], new_args[2]); 5446 } else { 5447 out->out_rrr(s, type, new_args[0], new_args[1], new_args[2]); 5448 } 5449 } 5450 break; 5451 5452 case INDEX_op_neg: 5453 case INDEX_op_not: 5454 { 5455 const TCGOutOpUnary *out = 5456 container_of(all_outop[op->opc], TCGOutOpUnary, base); 5457 5458 /* Constants should have been folded. */ 5459 tcg_debug_assert(!const_args[1]); 5460 out->out_rr(s, type, new_args[0], new_args[1]); 5461 } 5462 break; 5463 5464 case INDEX_op_divs2: 5465 case INDEX_op_divu2: 5466 { 5467 const TCGOutOpDivRem *out = 5468 container_of(all_outop[op->opc], TCGOutOpDivRem, base); 5469 5470 /* Only used by x86 and s390x, which use matching constraints. */ 5471 tcg_debug_assert(new_args[0] == new_args[2]); 5472 tcg_debug_assert(new_args[1] == new_args[3]); 5473 tcg_debug_assert(!const_args[4]); 5474 out->out_rr01r(s, type, new_args[0], new_args[1], new_args[4]); 5475 } 5476 break; 5477 5478 default: 5479 if (def->flags & TCG_OPF_VECTOR) { 5480 tcg_out_vec_op(s, op->opc, type - TCG_TYPE_V64, 5481 TCGOP_VECE(op), new_args, const_args); 5482 } else { 5483 tcg_out_op(s, op->opc, type, new_args, const_args); 5484 } 5485 break; 5486 } 5487 5488 /* move the outputs in the correct register if needed */ 5489 for(i = 0; i < nb_oargs; i++) { 5490 ts = arg_temp(op->args[i]); 5491 5492 /* ENV should not be modified. */ 5493 tcg_debug_assert(!temp_readonly(ts)); 5494 5495 if (NEED_SYNC_ARG(i)) { 5496 temp_sync(s, ts, o_allocated_regs, 0, IS_DEAD_ARG(i)); 5497 } else if (IS_DEAD_ARG(i)) { 5498 temp_dead(s, ts); 5499 } 5500 } 5501 } 5502 5503 static bool tcg_reg_alloc_dup2(TCGContext *s, const TCGOp *op) 5504 { 5505 const TCGLifeData arg_life = op->life; 5506 TCGTemp *ots, *itsl, *itsh; 5507 TCGType vtype = TCGOP_TYPE(op); 5508 5509 /* This opcode is only valid for 32-bit hosts, for 64-bit elements. */ 5510 tcg_debug_assert(TCG_TARGET_REG_BITS == 32); 5511 tcg_debug_assert(TCGOP_VECE(op) == MO_64); 5512 5513 ots = arg_temp(op->args[0]); 5514 itsl = arg_temp(op->args[1]); 5515 itsh = arg_temp(op->args[2]); 5516 5517 /* ENV should not be modified. */ 5518 tcg_debug_assert(!temp_readonly(ots)); 5519 5520 /* Allocate the output register now. */ 5521 if (ots->val_type != TEMP_VAL_REG) { 5522 TCGRegSet allocated_regs = s->reserved_regs; 5523 TCGRegSet dup_out_regs = opcode_args_ct(op)[0].regs; 5524 TCGReg oreg; 5525 5526 /* Make sure to not spill the input registers. */ 5527 if (!IS_DEAD_ARG(1) && itsl->val_type == TEMP_VAL_REG) { 5528 tcg_regset_set_reg(allocated_regs, itsl->reg); 5529 } 5530 if (!IS_DEAD_ARG(2) && itsh->val_type == TEMP_VAL_REG) { 5531 tcg_regset_set_reg(allocated_regs, itsh->reg); 5532 } 5533 5534 oreg = tcg_reg_alloc(s, dup_out_regs, allocated_regs, 5535 output_pref(op, 0), ots->indirect_base); 5536 set_temp_val_reg(s, ots, oreg); 5537 } 5538 5539 /* Promote dup2 of immediates to dupi_vec. */ 5540 if (itsl->val_type == TEMP_VAL_CONST && itsh->val_type == TEMP_VAL_CONST) { 5541 uint64_t val = deposit64(itsl->val, 32, 32, itsh->val); 5542 MemOp vece = MO_64; 5543 5544 if (val == dup_const(MO_8, val)) { 5545 vece = MO_8; 5546 } else if (val == dup_const(MO_16, val)) { 5547 vece = MO_16; 5548 } else if (val == dup_const(MO_32, val)) { 5549 vece = MO_32; 5550 } 5551 5552 tcg_out_dupi_vec(s, vtype, vece, ots->reg, val); 5553 goto done; 5554 } 5555 5556 /* If the two inputs form one 64-bit value, try dupm_vec. */ 5557 if (itsl->temp_subindex == HOST_BIG_ENDIAN && 5558 itsh->temp_subindex == !HOST_BIG_ENDIAN && 5559 itsl == itsh + (HOST_BIG_ENDIAN ? 1 : -1)) { 5560 TCGTemp *its = itsl - HOST_BIG_ENDIAN; 5561 5562 temp_sync(s, its + 0, s->reserved_regs, 0, 0); 5563 temp_sync(s, its + 1, s->reserved_regs, 0, 0); 5564 5565 if (tcg_out_dupm_vec(s, vtype, MO_64, ots->reg, 5566 its->mem_base->reg, its->mem_offset)) { 5567 goto done; 5568 } 5569 } 5570 5571 /* Fall back to generic expansion. */ 5572 return false; 5573 5574 done: 5575 ots->mem_coherent = 0; 5576 if (IS_DEAD_ARG(1)) { 5577 temp_dead(s, itsl); 5578 } 5579 if (IS_DEAD_ARG(2)) { 5580 temp_dead(s, itsh); 5581 } 5582 if (NEED_SYNC_ARG(0)) { 5583 temp_sync(s, ots, s->reserved_regs, 0, IS_DEAD_ARG(0)); 5584 } else if (IS_DEAD_ARG(0)) { 5585 temp_dead(s, ots); 5586 } 5587 return true; 5588 } 5589 5590 static void load_arg_reg(TCGContext *s, TCGReg reg, TCGTemp *ts, 5591 TCGRegSet allocated_regs) 5592 { 5593 if (ts->val_type == TEMP_VAL_REG) { 5594 if (ts->reg != reg) { 5595 tcg_reg_free(s, reg, allocated_regs); 5596 if (!tcg_out_mov(s, ts->type, reg, ts->reg)) { 5597 /* 5598 * Cross register class move not supported. Sync the 5599 * temp back to its slot and load from there. 5600 */ 5601 temp_sync(s, ts, allocated_regs, 0, 0); 5602 tcg_out_ld(s, ts->type, reg, 5603 ts->mem_base->reg, ts->mem_offset); 5604 } 5605 } 5606 } else { 5607 TCGRegSet arg_set = 0; 5608 5609 tcg_reg_free(s, reg, allocated_regs); 5610 tcg_regset_set_reg(arg_set, reg); 5611 temp_load(s, ts, arg_set, allocated_regs, 0); 5612 } 5613 } 5614 5615 static void load_arg_stk(TCGContext *s, unsigned arg_slot, TCGTemp *ts, 5616 TCGRegSet allocated_regs) 5617 { 5618 /* 5619 * When the destination is on the stack, load up the temp and store. 5620 * If there are many call-saved registers, the temp might live to 5621 * see another use; otherwise it'll be discarded. 5622 */ 5623 temp_load(s, ts, tcg_target_available_regs[ts->type], allocated_regs, 0); 5624 tcg_out_st(s, ts->type, ts->reg, TCG_REG_CALL_STACK, 5625 arg_slot_stk_ofs(arg_slot)); 5626 } 5627 5628 static void load_arg_normal(TCGContext *s, const TCGCallArgumentLoc *l, 5629 TCGTemp *ts, TCGRegSet *allocated_regs) 5630 { 5631 if (arg_slot_reg_p(l->arg_slot)) { 5632 TCGReg reg = tcg_target_call_iarg_regs[l->arg_slot]; 5633 load_arg_reg(s, reg, ts, *allocated_regs); 5634 tcg_regset_set_reg(*allocated_regs, reg); 5635 } else { 5636 load_arg_stk(s, l->arg_slot, ts, *allocated_regs); 5637 } 5638 } 5639 5640 static void load_arg_ref(TCGContext *s, unsigned arg_slot, TCGReg ref_base, 5641 intptr_t ref_off, TCGRegSet *allocated_regs) 5642 { 5643 TCGReg reg; 5644 5645 if (arg_slot_reg_p(arg_slot)) { 5646 reg = tcg_target_call_iarg_regs[arg_slot]; 5647 tcg_reg_free(s, reg, *allocated_regs); 5648 tcg_out_addi_ptr(s, reg, ref_base, ref_off); 5649 tcg_regset_set_reg(*allocated_regs, reg); 5650 } else { 5651 reg = tcg_reg_alloc(s, tcg_target_available_regs[TCG_TYPE_PTR], 5652 *allocated_regs, 0, false); 5653 tcg_out_addi_ptr(s, reg, ref_base, ref_off); 5654 tcg_out_st(s, TCG_TYPE_PTR, reg, TCG_REG_CALL_STACK, 5655 arg_slot_stk_ofs(arg_slot)); 5656 } 5657 } 5658 5659 static void tcg_reg_alloc_call(TCGContext *s, TCGOp *op) 5660 { 5661 const int nb_oargs = TCGOP_CALLO(op); 5662 const int nb_iargs = TCGOP_CALLI(op); 5663 const TCGLifeData arg_life = op->life; 5664 const TCGHelperInfo *info = tcg_call_info(op); 5665 TCGRegSet allocated_regs = s->reserved_regs; 5666 int i; 5667 5668 /* 5669 * Move inputs into place in reverse order, 5670 * so that we place stacked arguments first. 5671 */ 5672 for (i = nb_iargs - 1; i >= 0; --i) { 5673 const TCGCallArgumentLoc *loc = &info->in[i]; 5674 TCGTemp *ts = arg_temp(op->args[nb_oargs + i]); 5675 5676 switch (loc->kind) { 5677 case TCG_CALL_ARG_NORMAL: 5678 case TCG_CALL_ARG_EXTEND_U: 5679 case TCG_CALL_ARG_EXTEND_S: 5680 load_arg_normal(s, loc, ts, &allocated_regs); 5681 break; 5682 case TCG_CALL_ARG_BY_REF: 5683 load_arg_stk(s, loc->ref_slot, ts, allocated_regs); 5684 load_arg_ref(s, loc->arg_slot, TCG_REG_CALL_STACK, 5685 arg_slot_stk_ofs(loc->ref_slot), 5686 &allocated_regs); 5687 break; 5688 case TCG_CALL_ARG_BY_REF_N: 5689 load_arg_stk(s, loc->ref_slot, ts, allocated_regs); 5690 break; 5691 default: 5692 g_assert_not_reached(); 5693 } 5694 } 5695 5696 /* Mark dead temporaries and free the associated registers. */ 5697 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) { 5698 if (IS_DEAD_ARG(i)) { 5699 temp_dead(s, arg_temp(op->args[i])); 5700 } 5701 } 5702 5703 /* Clobber call registers. */ 5704 for (i = 0; i < TCG_TARGET_NB_REGS; i++) { 5705 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) { 5706 tcg_reg_free(s, i, allocated_regs); 5707 } 5708 } 5709 5710 /* 5711 * Save globals if they might be written by the helper, 5712 * sync them if they might be read. 5713 */ 5714 if (info->flags & TCG_CALL_NO_READ_GLOBALS) { 5715 /* Nothing to do */ 5716 } else if (info->flags & TCG_CALL_NO_WRITE_GLOBALS) { 5717 sync_globals(s, allocated_regs); 5718 } else { 5719 save_globals(s, allocated_regs); 5720 } 5721 5722 /* 5723 * If the ABI passes a pointer to the returned struct as the first 5724 * argument, load that now. Pass a pointer to the output home slot. 5725 */ 5726 if (info->out_kind == TCG_CALL_RET_BY_REF) { 5727 TCGTemp *ts = arg_temp(op->args[0]); 5728 5729 if (!ts->mem_allocated) { 5730 temp_allocate_frame(s, ts); 5731 } 5732 load_arg_ref(s, 0, ts->mem_base->reg, ts->mem_offset, &allocated_regs); 5733 } 5734 5735 tcg_out_call(s, tcg_call_func(op), info); 5736 5737 /* Assign output registers and emit moves if needed. */ 5738 switch (info->out_kind) { 5739 case TCG_CALL_RET_NORMAL: 5740 for (i = 0; i < nb_oargs; i++) { 5741 TCGTemp *ts = arg_temp(op->args[i]); 5742 TCGReg reg = tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, i); 5743 5744 /* ENV should not be modified. */ 5745 tcg_debug_assert(!temp_readonly(ts)); 5746 5747 set_temp_val_reg(s, ts, reg); 5748 ts->mem_coherent = 0; 5749 } 5750 break; 5751 5752 case TCG_CALL_RET_BY_VEC: 5753 { 5754 TCGTemp *ts = arg_temp(op->args[0]); 5755 5756 tcg_debug_assert(ts->base_type == TCG_TYPE_I128); 5757 tcg_debug_assert(ts->temp_subindex == 0); 5758 if (!ts->mem_allocated) { 5759 temp_allocate_frame(s, ts); 5760 } 5761 tcg_out_st(s, TCG_TYPE_V128, 5762 tcg_target_call_oarg_reg(TCG_CALL_RET_BY_VEC, 0), 5763 ts->mem_base->reg, ts->mem_offset); 5764 } 5765 /* fall through to mark all parts in memory */ 5766 5767 case TCG_CALL_RET_BY_REF: 5768 /* The callee has performed a write through the reference. */ 5769 for (i = 0; i < nb_oargs; i++) { 5770 TCGTemp *ts = arg_temp(op->args[i]); 5771 ts->val_type = TEMP_VAL_MEM; 5772 } 5773 break; 5774 5775 default: 5776 g_assert_not_reached(); 5777 } 5778 5779 /* Flush or discard output registers as needed. */ 5780 for (i = 0; i < nb_oargs; i++) { 5781 TCGTemp *ts = arg_temp(op->args[i]); 5782 if (NEED_SYNC_ARG(i)) { 5783 temp_sync(s, ts, s->reserved_regs, 0, IS_DEAD_ARG(i)); 5784 } else if (IS_DEAD_ARG(i)) { 5785 temp_dead(s, ts); 5786 } 5787 } 5788 } 5789 5790 /** 5791 * atom_and_align_for_opc: 5792 * @s: tcg context 5793 * @opc: memory operation code 5794 * @host_atom: MO_ATOM_{IFALIGN,WITHIN16,SUBALIGN} for host operations 5795 * @allow_two_ops: true if we are prepared to issue two operations 5796 * 5797 * Return the alignment and atomicity to use for the inline fast path 5798 * for the given memory operation. The alignment may be larger than 5799 * that specified in @opc, and the correct alignment will be diagnosed 5800 * by the slow path helper. 5801 * 5802 * If @allow_two_ops, the host is prepared to test for 2x alignment, 5803 * and issue two loads or stores for subalignment. 5804 */ 5805 static TCGAtomAlign atom_and_align_for_opc(TCGContext *s, MemOp opc, 5806 MemOp host_atom, bool allow_two_ops) 5807 { 5808 MemOp align = memop_alignment_bits(opc); 5809 MemOp size = opc & MO_SIZE; 5810 MemOp half = size ? size - 1 : 0; 5811 MemOp atom = opc & MO_ATOM_MASK; 5812 MemOp atmax; 5813 5814 switch (atom) { 5815 case MO_ATOM_NONE: 5816 /* The operation requires no specific atomicity. */ 5817 atmax = MO_8; 5818 break; 5819 5820 case MO_ATOM_IFALIGN: 5821 atmax = size; 5822 break; 5823 5824 case MO_ATOM_IFALIGN_PAIR: 5825 atmax = half; 5826 break; 5827 5828 case MO_ATOM_WITHIN16: 5829 atmax = size; 5830 if (size == MO_128) { 5831 /* Misalignment implies !within16, and therefore no atomicity. */ 5832 } else if (host_atom != MO_ATOM_WITHIN16) { 5833 /* The host does not implement within16, so require alignment. */ 5834 align = MAX(align, size); 5835 } 5836 break; 5837 5838 case MO_ATOM_WITHIN16_PAIR: 5839 atmax = size; 5840 /* 5841 * Misalignment implies !within16, and therefore half atomicity. 5842 * Any host prepared for two operations can implement this with 5843 * half alignment. 5844 */ 5845 if (host_atom != MO_ATOM_WITHIN16 && allow_two_ops) { 5846 align = MAX(align, half); 5847 } 5848 break; 5849 5850 case MO_ATOM_SUBALIGN: 5851 atmax = size; 5852 if (host_atom != MO_ATOM_SUBALIGN) { 5853 /* If unaligned but not odd, there are subobjects up to half. */ 5854 if (allow_two_ops) { 5855 align = MAX(align, half); 5856 } else { 5857 align = MAX(align, size); 5858 } 5859 } 5860 break; 5861 5862 default: 5863 g_assert_not_reached(); 5864 } 5865 5866 return (TCGAtomAlign){ .atom = atmax, .align = align }; 5867 } 5868 5869 /* 5870 * Similarly for qemu_ld/st slow path helpers. 5871 * We must re-implement tcg_gen_callN and tcg_reg_alloc_call simultaneously, 5872 * using only the provided backend tcg_out_* functions. 5873 */ 5874 5875 static int tcg_out_helper_stk_ofs(TCGType type, unsigned slot) 5876 { 5877 int ofs = arg_slot_stk_ofs(slot); 5878 5879 /* 5880 * Each stack slot is TCG_TARGET_LONG_BITS. If the host does not 5881 * require extension to uint64_t, adjust the address for uint32_t. 5882 */ 5883 if (HOST_BIG_ENDIAN && 5884 TCG_TARGET_REG_BITS == 64 && 5885 type == TCG_TYPE_I32) { 5886 ofs += 4; 5887 } 5888 return ofs; 5889 } 5890 5891 static void tcg_out_helper_load_slots(TCGContext *s, 5892 unsigned nmov, TCGMovExtend *mov, 5893 const TCGLdstHelperParam *parm) 5894 { 5895 unsigned i; 5896 TCGReg dst3; 5897 5898 /* 5899 * Start from the end, storing to the stack first. 5900 * This frees those registers, so we need not consider overlap. 5901 */ 5902 for (i = nmov; i-- > 0; ) { 5903 unsigned slot = mov[i].dst; 5904 5905 if (arg_slot_reg_p(slot)) { 5906 goto found_reg; 5907 } 5908 5909 TCGReg src = mov[i].src; 5910 TCGType dst_type = mov[i].dst_type; 5911 MemOp dst_mo = dst_type == TCG_TYPE_I32 ? MO_32 : MO_64; 5912 5913 /* The argument is going onto the stack; extend into scratch. */ 5914 if ((mov[i].src_ext & MO_SIZE) != dst_mo) { 5915 tcg_debug_assert(parm->ntmp != 0); 5916 mov[i].dst = src = parm->tmp[0]; 5917 tcg_out_movext1(s, &mov[i]); 5918 } 5919 5920 tcg_out_st(s, dst_type, src, TCG_REG_CALL_STACK, 5921 tcg_out_helper_stk_ofs(dst_type, slot)); 5922 } 5923 return; 5924 5925 found_reg: 5926 /* 5927 * The remaining arguments are in registers. 5928 * Convert slot numbers to argument registers. 5929 */ 5930 nmov = i + 1; 5931 for (i = 0; i < nmov; ++i) { 5932 mov[i].dst = tcg_target_call_iarg_regs[mov[i].dst]; 5933 } 5934 5935 switch (nmov) { 5936 case 4: 5937 /* The backend must have provided enough temps for the worst case. */ 5938 tcg_debug_assert(parm->ntmp >= 2); 5939 5940 dst3 = mov[3].dst; 5941 for (unsigned j = 0; j < 3; ++j) { 5942 if (dst3 == mov[j].src) { 5943 /* 5944 * Conflict. Copy the source to a temporary, perform the 5945 * remaining moves, then the extension from our scratch 5946 * on the way out. 5947 */ 5948 TCGReg scratch = parm->tmp[1]; 5949 5950 tcg_out_mov(s, mov[3].src_type, scratch, mov[3].src); 5951 tcg_out_movext3(s, mov, mov + 1, mov + 2, parm->tmp[0]); 5952 tcg_out_movext1_new_src(s, &mov[3], scratch); 5953 break; 5954 } 5955 } 5956 5957 /* No conflicts: perform this move and continue. */ 5958 tcg_out_movext1(s, &mov[3]); 5959 /* fall through */ 5960 5961 case 3: 5962 tcg_out_movext3(s, mov, mov + 1, mov + 2, 5963 parm->ntmp ? parm->tmp[0] : -1); 5964 break; 5965 case 2: 5966 tcg_out_movext2(s, mov, mov + 1, 5967 parm->ntmp ? parm->tmp[0] : -1); 5968 break; 5969 case 1: 5970 tcg_out_movext1(s, mov); 5971 break; 5972 default: 5973 g_assert_not_reached(); 5974 } 5975 } 5976 5977 static void tcg_out_helper_load_imm(TCGContext *s, unsigned slot, 5978 TCGType type, tcg_target_long imm, 5979 const TCGLdstHelperParam *parm) 5980 { 5981 if (arg_slot_reg_p(slot)) { 5982 tcg_out_movi(s, type, tcg_target_call_iarg_regs[slot], imm); 5983 } else { 5984 int ofs = tcg_out_helper_stk_ofs(type, slot); 5985 if (!tcg_out_sti(s, type, imm, TCG_REG_CALL_STACK, ofs)) { 5986 tcg_debug_assert(parm->ntmp != 0); 5987 tcg_out_movi(s, type, parm->tmp[0], imm); 5988 tcg_out_st(s, type, parm->tmp[0], TCG_REG_CALL_STACK, ofs); 5989 } 5990 } 5991 } 5992 5993 static void tcg_out_helper_load_common_args(TCGContext *s, 5994 const TCGLabelQemuLdst *ldst, 5995 const TCGLdstHelperParam *parm, 5996 const TCGHelperInfo *info, 5997 unsigned next_arg) 5998 { 5999 TCGMovExtend ptr_mov = { 6000 .dst_type = TCG_TYPE_PTR, 6001 .src_type = TCG_TYPE_PTR, 6002 .src_ext = sizeof(void *) == 4 ? MO_32 : MO_64 6003 }; 6004 const TCGCallArgumentLoc *loc = &info->in[0]; 6005 TCGType type; 6006 unsigned slot; 6007 tcg_target_ulong imm; 6008 6009 /* 6010 * Handle env, which is always first. 6011 */ 6012 ptr_mov.dst = loc->arg_slot; 6013 ptr_mov.src = TCG_AREG0; 6014 tcg_out_helper_load_slots(s, 1, &ptr_mov, parm); 6015 6016 /* 6017 * Handle oi. 6018 */ 6019 imm = ldst->oi; 6020 loc = &info->in[next_arg]; 6021 type = TCG_TYPE_I32; 6022 switch (loc->kind) { 6023 case TCG_CALL_ARG_NORMAL: 6024 break; 6025 case TCG_CALL_ARG_EXTEND_U: 6026 case TCG_CALL_ARG_EXTEND_S: 6027 /* No extension required for MemOpIdx. */ 6028 tcg_debug_assert(imm <= INT32_MAX); 6029 type = TCG_TYPE_REG; 6030 break; 6031 default: 6032 g_assert_not_reached(); 6033 } 6034 tcg_out_helper_load_imm(s, loc->arg_slot, type, imm, parm); 6035 next_arg++; 6036 6037 /* 6038 * Handle ra. 6039 */ 6040 loc = &info->in[next_arg]; 6041 slot = loc->arg_slot; 6042 if (parm->ra_gen) { 6043 int arg_reg = -1; 6044 TCGReg ra_reg; 6045 6046 if (arg_slot_reg_p(slot)) { 6047 arg_reg = tcg_target_call_iarg_regs[slot]; 6048 } 6049 ra_reg = parm->ra_gen(s, ldst, arg_reg); 6050 6051 ptr_mov.dst = slot; 6052 ptr_mov.src = ra_reg; 6053 tcg_out_helper_load_slots(s, 1, &ptr_mov, parm); 6054 } else { 6055 imm = (uintptr_t)ldst->raddr; 6056 tcg_out_helper_load_imm(s, slot, TCG_TYPE_PTR, imm, parm); 6057 } 6058 } 6059 6060 static unsigned tcg_out_helper_add_mov(TCGMovExtend *mov, 6061 const TCGCallArgumentLoc *loc, 6062 TCGType dst_type, TCGType src_type, 6063 TCGReg lo, TCGReg hi) 6064 { 6065 MemOp reg_mo; 6066 6067 if (dst_type <= TCG_TYPE_REG) { 6068 MemOp src_ext; 6069 6070 switch (loc->kind) { 6071 case TCG_CALL_ARG_NORMAL: 6072 src_ext = src_type == TCG_TYPE_I32 ? MO_32 : MO_64; 6073 break; 6074 case TCG_CALL_ARG_EXTEND_U: 6075 dst_type = TCG_TYPE_REG; 6076 src_ext = MO_UL; 6077 break; 6078 case TCG_CALL_ARG_EXTEND_S: 6079 dst_type = TCG_TYPE_REG; 6080 src_ext = MO_SL; 6081 break; 6082 default: 6083 g_assert_not_reached(); 6084 } 6085 6086 mov[0].dst = loc->arg_slot; 6087 mov[0].dst_type = dst_type; 6088 mov[0].src = lo; 6089 mov[0].src_type = src_type; 6090 mov[0].src_ext = src_ext; 6091 return 1; 6092 } 6093 6094 if (TCG_TARGET_REG_BITS == 32) { 6095 assert(dst_type == TCG_TYPE_I64); 6096 reg_mo = MO_32; 6097 } else { 6098 assert(dst_type == TCG_TYPE_I128); 6099 reg_mo = MO_64; 6100 } 6101 6102 mov[0].dst = loc[HOST_BIG_ENDIAN].arg_slot; 6103 mov[0].src = lo; 6104 mov[0].dst_type = TCG_TYPE_REG; 6105 mov[0].src_type = TCG_TYPE_REG; 6106 mov[0].src_ext = reg_mo; 6107 6108 mov[1].dst = loc[!HOST_BIG_ENDIAN].arg_slot; 6109 mov[1].src = hi; 6110 mov[1].dst_type = TCG_TYPE_REG; 6111 mov[1].src_type = TCG_TYPE_REG; 6112 mov[1].src_ext = reg_mo; 6113 6114 return 2; 6115 } 6116 6117 static void tcg_out_ld_helper_args(TCGContext *s, const TCGLabelQemuLdst *ldst, 6118 const TCGLdstHelperParam *parm) 6119 { 6120 const TCGHelperInfo *info; 6121 const TCGCallArgumentLoc *loc; 6122 TCGMovExtend mov[2]; 6123 unsigned next_arg, nmov; 6124 MemOp mop = get_memop(ldst->oi); 6125 6126 switch (mop & MO_SIZE) { 6127 case MO_8: 6128 case MO_16: 6129 case MO_32: 6130 info = &info_helper_ld32_mmu; 6131 break; 6132 case MO_64: 6133 info = &info_helper_ld64_mmu; 6134 break; 6135 case MO_128: 6136 info = &info_helper_ld128_mmu; 6137 break; 6138 default: 6139 g_assert_not_reached(); 6140 } 6141 6142 /* Defer env argument. */ 6143 next_arg = 1; 6144 6145 loc = &info->in[next_arg]; 6146 if (TCG_TARGET_REG_BITS == 32 && s->addr_type == TCG_TYPE_I32) { 6147 /* 6148 * 32-bit host with 32-bit guest: zero-extend the guest address 6149 * to 64-bits for the helper by storing the low part, then 6150 * load a zero for the high part. 6151 */ 6152 tcg_out_helper_add_mov(mov, loc + HOST_BIG_ENDIAN, 6153 TCG_TYPE_I32, TCG_TYPE_I32, 6154 ldst->addr_reg, -1); 6155 tcg_out_helper_load_slots(s, 1, mov, parm); 6156 6157 tcg_out_helper_load_imm(s, loc[!HOST_BIG_ENDIAN].arg_slot, 6158 TCG_TYPE_I32, 0, parm); 6159 next_arg += 2; 6160 } else { 6161 nmov = tcg_out_helper_add_mov(mov, loc, TCG_TYPE_I64, s->addr_type, 6162 ldst->addr_reg, -1); 6163 tcg_out_helper_load_slots(s, nmov, mov, parm); 6164 next_arg += nmov; 6165 } 6166 6167 switch (info->out_kind) { 6168 case TCG_CALL_RET_NORMAL: 6169 case TCG_CALL_RET_BY_VEC: 6170 break; 6171 case TCG_CALL_RET_BY_REF: 6172 /* 6173 * The return reference is in the first argument slot. 6174 * We need memory in which to return: re-use the top of stack. 6175 */ 6176 { 6177 int ofs_slot0 = TCG_TARGET_CALL_STACK_OFFSET; 6178 6179 if (arg_slot_reg_p(0)) { 6180 tcg_out_addi_ptr(s, tcg_target_call_iarg_regs[0], 6181 TCG_REG_CALL_STACK, ofs_slot0); 6182 } else { 6183 tcg_debug_assert(parm->ntmp != 0); 6184 tcg_out_addi_ptr(s, parm->tmp[0], 6185 TCG_REG_CALL_STACK, ofs_slot0); 6186 tcg_out_st(s, TCG_TYPE_PTR, parm->tmp[0], 6187 TCG_REG_CALL_STACK, ofs_slot0); 6188 } 6189 } 6190 break; 6191 default: 6192 g_assert_not_reached(); 6193 } 6194 6195 tcg_out_helper_load_common_args(s, ldst, parm, info, next_arg); 6196 } 6197 6198 static void tcg_out_ld_helper_ret(TCGContext *s, const TCGLabelQemuLdst *ldst, 6199 bool load_sign, 6200 const TCGLdstHelperParam *parm) 6201 { 6202 MemOp mop = get_memop(ldst->oi); 6203 TCGMovExtend mov[2]; 6204 int ofs_slot0; 6205 6206 switch (ldst->type) { 6207 case TCG_TYPE_I64: 6208 if (TCG_TARGET_REG_BITS == 32) { 6209 break; 6210 } 6211 /* fall through */ 6212 6213 case TCG_TYPE_I32: 6214 mov[0].dst = ldst->datalo_reg; 6215 mov[0].src = tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, 0); 6216 mov[0].dst_type = ldst->type; 6217 mov[0].src_type = TCG_TYPE_REG; 6218 6219 /* 6220 * If load_sign, then we allowed the helper to perform the 6221 * appropriate sign extension to tcg_target_ulong, and all 6222 * we need now is a plain move. 6223 * 6224 * If they do not, then we expect the relevant extension 6225 * instruction to be no more expensive than a move, and 6226 * we thus save the icache etc by only using one of two 6227 * helper functions. 6228 */ 6229 if (load_sign || !(mop & MO_SIGN)) { 6230 if (TCG_TARGET_REG_BITS == 32 || ldst->type == TCG_TYPE_I32) { 6231 mov[0].src_ext = MO_32; 6232 } else { 6233 mov[0].src_ext = MO_64; 6234 } 6235 } else { 6236 mov[0].src_ext = mop & MO_SSIZE; 6237 } 6238 tcg_out_movext1(s, mov); 6239 return; 6240 6241 case TCG_TYPE_I128: 6242 tcg_debug_assert(TCG_TARGET_REG_BITS == 64); 6243 ofs_slot0 = TCG_TARGET_CALL_STACK_OFFSET; 6244 switch (TCG_TARGET_CALL_RET_I128) { 6245 case TCG_CALL_RET_NORMAL: 6246 break; 6247 case TCG_CALL_RET_BY_VEC: 6248 tcg_out_st(s, TCG_TYPE_V128, 6249 tcg_target_call_oarg_reg(TCG_CALL_RET_BY_VEC, 0), 6250 TCG_REG_CALL_STACK, ofs_slot0); 6251 /* fall through */ 6252 case TCG_CALL_RET_BY_REF: 6253 tcg_out_ld(s, TCG_TYPE_I64, ldst->datalo_reg, 6254 TCG_REG_CALL_STACK, ofs_slot0 + 8 * HOST_BIG_ENDIAN); 6255 tcg_out_ld(s, TCG_TYPE_I64, ldst->datahi_reg, 6256 TCG_REG_CALL_STACK, ofs_slot0 + 8 * !HOST_BIG_ENDIAN); 6257 return; 6258 default: 6259 g_assert_not_reached(); 6260 } 6261 break; 6262 6263 default: 6264 g_assert_not_reached(); 6265 } 6266 6267 mov[0].dst = ldst->datalo_reg; 6268 mov[0].src = 6269 tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, HOST_BIG_ENDIAN); 6270 mov[0].dst_type = TCG_TYPE_REG; 6271 mov[0].src_type = TCG_TYPE_REG; 6272 mov[0].src_ext = TCG_TARGET_REG_BITS == 32 ? MO_32 : MO_64; 6273 6274 mov[1].dst = ldst->datahi_reg; 6275 mov[1].src = 6276 tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, !HOST_BIG_ENDIAN); 6277 mov[1].dst_type = TCG_TYPE_REG; 6278 mov[1].src_type = TCG_TYPE_REG; 6279 mov[1].src_ext = TCG_TARGET_REG_BITS == 32 ? MO_32 : MO_64; 6280 6281 tcg_out_movext2(s, mov, mov + 1, parm->ntmp ? parm->tmp[0] : -1); 6282 } 6283 6284 static void tcg_out_st_helper_args(TCGContext *s, const TCGLabelQemuLdst *ldst, 6285 const TCGLdstHelperParam *parm) 6286 { 6287 const TCGHelperInfo *info; 6288 const TCGCallArgumentLoc *loc; 6289 TCGMovExtend mov[4]; 6290 TCGType data_type; 6291 unsigned next_arg, nmov, n; 6292 MemOp mop = get_memop(ldst->oi); 6293 6294 switch (mop & MO_SIZE) { 6295 case MO_8: 6296 case MO_16: 6297 case MO_32: 6298 info = &info_helper_st32_mmu; 6299 data_type = TCG_TYPE_I32; 6300 break; 6301 case MO_64: 6302 info = &info_helper_st64_mmu; 6303 data_type = TCG_TYPE_I64; 6304 break; 6305 case MO_128: 6306 info = &info_helper_st128_mmu; 6307 data_type = TCG_TYPE_I128; 6308 break; 6309 default: 6310 g_assert_not_reached(); 6311 } 6312 6313 /* Defer env argument. */ 6314 next_arg = 1; 6315 nmov = 0; 6316 6317 /* Handle addr argument. */ 6318 loc = &info->in[next_arg]; 6319 tcg_debug_assert(s->addr_type <= TCG_TYPE_REG); 6320 if (TCG_TARGET_REG_BITS == 32) { 6321 /* 6322 * 32-bit host (and thus 32-bit guest): zero-extend the guest address 6323 * to 64-bits for the helper by storing the low part. Later, 6324 * after we have processed the register inputs, we will load a 6325 * zero for the high part. 6326 */ 6327 tcg_out_helper_add_mov(mov, loc + HOST_BIG_ENDIAN, 6328 TCG_TYPE_I32, TCG_TYPE_I32, 6329 ldst->addr_reg, -1); 6330 next_arg += 2; 6331 nmov += 1; 6332 } else { 6333 n = tcg_out_helper_add_mov(mov, loc, TCG_TYPE_I64, s->addr_type, 6334 ldst->addr_reg, -1); 6335 next_arg += n; 6336 nmov += n; 6337 } 6338 6339 /* Handle data argument. */ 6340 loc = &info->in[next_arg]; 6341 switch (loc->kind) { 6342 case TCG_CALL_ARG_NORMAL: 6343 case TCG_CALL_ARG_EXTEND_U: 6344 case TCG_CALL_ARG_EXTEND_S: 6345 n = tcg_out_helper_add_mov(mov + nmov, loc, data_type, ldst->type, 6346 ldst->datalo_reg, ldst->datahi_reg); 6347 next_arg += n; 6348 nmov += n; 6349 tcg_out_helper_load_slots(s, nmov, mov, parm); 6350 break; 6351 6352 case TCG_CALL_ARG_BY_REF: 6353 tcg_debug_assert(TCG_TARGET_REG_BITS == 64); 6354 tcg_debug_assert(data_type == TCG_TYPE_I128); 6355 tcg_out_st(s, TCG_TYPE_I64, 6356 HOST_BIG_ENDIAN ? ldst->datahi_reg : ldst->datalo_reg, 6357 TCG_REG_CALL_STACK, arg_slot_stk_ofs(loc[0].ref_slot)); 6358 tcg_out_st(s, TCG_TYPE_I64, 6359 HOST_BIG_ENDIAN ? ldst->datalo_reg : ldst->datahi_reg, 6360 TCG_REG_CALL_STACK, arg_slot_stk_ofs(loc[1].ref_slot)); 6361 6362 tcg_out_helper_load_slots(s, nmov, mov, parm); 6363 6364 if (arg_slot_reg_p(loc->arg_slot)) { 6365 tcg_out_addi_ptr(s, tcg_target_call_iarg_regs[loc->arg_slot], 6366 TCG_REG_CALL_STACK, 6367 arg_slot_stk_ofs(loc->ref_slot)); 6368 } else { 6369 tcg_debug_assert(parm->ntmp != 0); 6370 tcg_out_addi_ptr(s, parm->tmp[0], TCG_REG_CALL_STACK, 6371 arg_slot_stk_ofs(loc->ref_slot)); 6372 tcg_out_st(s, TCG_TYPE_PTR, parm->tmp[0], 6373 TCG_REG_CALL_STACK, arg_slot_stk_ofs(loc->arg_slot)); 6374 } 6375 next_arg += 2; 6376 break; 6377 6378 default: 6379 g_assert_not_reached(); 6380 } 6381 6382 if (TCG_TARGET_REG_BITS == 32) { 6383 /* Zero extend the address by loading a zero for the high part. */ 6384 loc = &info->in[1 + !HOST_BIG_ENDIAN]; 6385 tcg_out_helper_load_imm(s, loc->arg_slot, TCG_TYPE_I32, 0, parm); 6386 } 6387 6388 tcg_out_helper_load_common_args(s, ldst, parm, info, next_arg); 6389 } 6390 6391 int tcg_gen_code(TCGContext *s, TranslationBlock *tb, uint64_t pc_start) 6392 { 6393 int i, start_words, num_insns; 6394 TCGOp *op; 6395 6396 if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP) 6397 && qemu_log_in_addr_range(pc_start))) { 6398 FILE *logfile = qemu_log_trylock(); 6399 if (logfile) { 6400 fprintf(logfile, "OP:\n"); 6401 tcg_dump_ops(s, logfile, false); 6402 fprintf(logfile, "\n"); 6403 qemu_log_unlock(logfile); 6404 } 6405 } 6406 6407 #ifdef CONFIG_DEBUG_TCG 6408 /* Ensure all labels referenced have been emitted. */ 6409 { 6410 TCGLabel *l; 6411 bool error = false; 6412 6413 QSIMPLEQ_FOREACH(l, &s->labels, next) { 6414 if (unlikely(!l->present) && !QSIMPLEQ_EMPTY(&l->branches)) { 6415 qemu_log_mask(CPU_LOG_TB_OP, 6416 "$L%d referenced but not present.\n", l->id); 6417 error = true; 6418 } 6419 } 6420 assert(!error); 6421 } 6422 #endif 6423 6424 /* Do not reuse any EBB that may be allocated within the TB. */ 6425 tcg_temp_ebb_reset_freed(s); 6426 6427 tcg_optimize(s); 6428 6429 reachable_code_pass(s); 6430 liveness_pass_0(s); 6431 liveness_pass_1(s); 6432 6433 if (s->nb_indirects > 0) { 6434 if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_IND) 6435 && qemu_log_in_addr_range(pc_start))) { 6436 FILE *logfile = qemu_log_trylock(); 6437 if (logfile) { 6438 fprintf(logfile, "OP before indirect lowering:\n"); 6439 tcg_dump_ops(s, logfile, false); 6440 fprintf(logfile, "\n"); 6441 qemu_log_unlock(logfile); 6442 } 6443 } 6444 6445 /* Replace indirect temps with direct temps. */ 6446 if (liveness_pass_2(s)) { 6447 /* If changes were made, re-run liveness. */ 6448 liveness_pass_1(s); 6449 } 6450 } 6451 6452 if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_OPT) 6453 && qemu_log_in_addr_range(pc_start))) { 6454 FILE *logfile = qemu_log_trylock(); 6455 if (logfile) { 6456 fprintf(logfile, "OP after optimization and liveness analysis:\n"); 6457 tcg_dump_ops(s, logfile, true); 6458 fprintf(logfile, "\n"); 6459 qemu_log_unlock(logfile); 6460 } 6461 } 6462 6463 /* Initialize goto_tb jump offsets. */ 6464 tb->jmp_reset_offset[0] = TB_JMP_OFFSET_INVALID; 6465 tb->jmp_reset_offset[1] = TB_JMP_OFFSET_INVALID; 6466 tb->jmp_insn_offset[0] = TB_JMP_OFFSET_INVALID; 6467 tb->jmp_insn_offset[1] = TB_JMP_OFFSET_INVALID; 6468 6469 tcg_reg_alloc_start(s); 6470 6471 /* 6472 * Reset the buffer pointers when restarting after overflow. 6473 * TODO: Move this into translate-all.c with the rest of the 6474 * buffer management. Having only this done here is confusing. 6475 */ 6476 s->code_buf = tcg_splitwx_to_rw(tb->tc.ptr); 6477 s->code_ptr = s->code_buf; 6478 s->data_gen_ptr = NULL; 6479 6480 QSIMPLEQ_INIT(&s->ldst_labels); 6481 s->pool_labels = NULL; 6482 6483 start_words = s->insn_start_words; 6484 s->gen_insn_data = 6485 tcg_malloc(sizeof(uint64_t) * s->gen_tb->icount * start_words); 6486 6487 tcg_out_tb_start(s); 6488 6489 num_insns = -1; 6490 QTAILQ_FOREACH(op, &s->ops, link) { 6491 TCGOpcode opc = op->opc; 6492 6493 switch (opc) { 6494 case INDEX_op_mov: 6495 case INDEX_op_mov_vec: 6496 tcg_reg_alloc_mov(s, op); 6497 break; 6498 case INDEX_op_dup_vec: 6499 tcg_reg_alloc_dup(s, op); 6500 break; 6501 case INDEX_op_insn_start: 6502 if (num_insns >= 0) { 6503 size_t off = tcg_current_code_size(s); 6504 s->gen_insn_end_off[num_insns] = off; 6505 /* Assert that we do not overflow our stored offset. */ 6506 assert(s->gen_insn_end_off[num_insns] == off); 6507 } 6508 num_insns++; 6509 for (i = 0; i < start_words; ++i) { 6510 s->gen_insn_data[num_insns * start_words + i] = 6511 tcg_get_insn_start_param(op, i); 6512 } 6513 break; 6514 case INDEX_op_discard: 6515 temp_dead(s, arg_temp(op->args[0])); 6516 break; 6517 case INDEX_op_set_label: 6518 tcg_reg_alloc_bb_end(s, s->reserved_regs); 6519 tcg_out_label(s, arg_label(op->args[0])); 6520 break; 6521 case INDEX_op_call: 6522 tcg_reg_alloc_call(s, op); 6523 break; 6524 case INDEX_op_exit_tb: 6525 tcg_out_exit_tb(s, op->args[0]); 6526 break; 6527 case INDEX_op_goto_tb: 6528 tcg_out_goto_tb(s, op->args[0]); 6529 break; 6530 case INDEX_op_dup2_vec: 6531 if (tcg_reg_alloc_dup2(s, op)) { 6532 break; 6533 } 6534 /* fall through */ 6535 default: 6536 /* Sanity check that we've not introduced any unhandled opcodes. */ 6537 tcg_debug_assert(tcg_op_supported(opc, TCGOP_TYPE(op), 6538 TCGOP_FLAGS(op))); 6539 /* Note: in order to speed up the code, it would be much 6540 faster to have specialized register allocator functions for 6541 some common argument patterns */ 6542 tcg_reg_alloc_op(s, op); 6543 break; 6544 } 6545 /* Test for (pending) buffer overflow. The assumption is that any 6546 one operation beginning below the high water mark cannot overrun 6547 the buffer completely. Thus we can test for overflow after 6548 generating code without having to check during generation. */ 6549 if (unlikely((void *)s->code_ptr > s->code_gen_highwater)) { 6550 return -1; 6551 } 6552 /* Test for TB overflow, as seen by gen_insn_end_off. */ 6553 if (unlikely(tcg_current_code_size(s) > UINT16_MAX)) { 6554 return -2; 6555 } 6556 } 6557 tcg_debug_assert(num_insns + 1 == s->gen_tb->icount); 6558 s->gen_insn_end_off[num_insns] = tcg_current_code_size(s); 6559 6560 /* Generate TB finalization at the end of block */ 6561 i = tcg_out_ldst_finalize(s); 6562 if (i < 0) { 6563 return i; 6564 } 6565 i = tcg_out_pool_finalize(s); 6566 if (i < 0) { 6567 return i; 6568 } 6569 if (!tcg_resolve_relocs(s)) { 6570 return -2; 6571 } 6572 6573 #ifndef CONFIG_TCG_INTERPRETER 6574 /* flush instruction cache */ 6575 flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(s->code_buf), 6576 (uintptr_t)s->code_buf, 6577 tcg_ptr_byte_diff(s->code_ptr, s->code_buf)); 6578 #endif 6579 6580 return tcg_current_code_size(s); 6581 } 6582 6583 #ifdef ELF_HOST_MACHINE 6584 /* In order to use this feature, the backend needs to do three things: 6585 6586 (1) Define ELF_HOST_MACHINE to indicate both what value to 6587 put into the ELF image and to indicate support for the feature. 6588 6589 (2) Define tcg_register_jit. This should create a buffer containing 6590 the contents of a .debug_frame section that describes the post- 6591 prologue unwind info for the tcg machine. 6592 6593 (3) Call tcg_register_jit_int, with the constructed .debug_frame. 6594 */ 6595 6596 /* Begin GDB interface. THE FOLLOWING MUST MATCH GDB DOCS. */ 6597 typedef enum { 6598 JIT_NOACTION = 0, 6599 JIT_REGISTER_FN, 6600 JIT_UNREGISTER_FN 6601 } jit_actions_t; 6602 6603 struct jit_code_entry { 6604 struct jit_code_entry *next_entry; 6605 struct jit_code_entry *prev_entry; 6606 const void *symfile_addr; 6607 uint64_t symfile_size; 6608 }; 6609 6610 struct jit_descriptor { 6611 uint32_t version; 6612 uint32_t action_flag; 6613 struct jit_code_entry *relevant_entry; 6614 struct jit_code_entry *first_entry; 6615 }; 6616 6617 void __jit_debug_register_code(void) __attribute__((noinline)); 6618 void __jit_debug_register_code(void) 6619 { 6620 asm(""); 6621 } 6622 6623 /* Must statically initialize the version, because GDB may check 6624 the version before we can set it. */ 6625 struct jit_descriptor __jit_debug_descriptor = { 1, 0, 0, 0 }; 6626 6627 /* End GDB interface. */ 6628 6629 static int find_string(const char *strtab, const char *str) 6630 { 6631 const char *p = strtab + 1; 6632 6633 while (1) { 6634 if (strcmp(p, str) == 0) { 6635 return p - strtab; 6636 } 6637 p += strlen(p) + 1; 6638 } 6639 } 6640 6641 static void tcg_register_jit_int(const void *buf_ptr, size_t buf_size, 6642 const void *debug_frame, 6643 size_t debug_frame_size) 6644 { 6645 struct __attribute__((packed)) DebugInfo { 6646 uint32_t len; 6647 uint16_t version; 6648 uint32_t abbrev; 6649 uint8_t ptr_size; 6650 uint8_t cu_die; 6651 uint16_t cu_lang; 6652 uintptr_t cu_low_pc; 6653 uintptr_t cu_high_pc; 6654 uint8_t fn_die; 6655 char fn_name[16]; 6656 uintptr_t fn_low_pc; 6657 uintptr_t fn_high_pc; 6658 uint8_t cu_eoc; 6659 }; 6660 6661 struct ElfImage { 6662 ElfW(Ehdr) ehdr; 6663 ElfW(Phdr) phdr; 6664 ElfW(Shdr) shdr[7]; 6665 ElfW(Sym) sym[2]; 6666 struct DebugInfo di; 6667 uint8_t da[24]; 6668 char str[80]; 6669 }; 6670 6671 struct ElfImage *img; 6672 6673 static const struct ElfImage img_template = { 6674 .ehdr = { 6675 .e_ident[EI_MAG0] = ELFMAG0, 6676 .e_ident[EI_MAG1] = ELFMAG1, 6677 .e_ident[EI_MAG2] = ELFMAG2, 6678 .e_ident[EI_MAG3] = ELFMAG3, 6679 .e_ident[EI_CLASS] = ELF_CLASS, 6680 .e_ident[EI_DATA] = ELF_DATA, 6681 .e_ident[EI_VERSION] = EV_CURRENT, 6682 .e_type = ET_EXEC, 6683 .e_machine = ELF_HOST_MACHINE, 6684 .e_version = EV_CURRENT, 6685 .e_phoff = offsetof(struct ElfImage, phdr), 6686 .e_shoff = offsetof(struct ElfImage, shdr), 6687 .e_ehsize = sizeof(ElfW(Shdr)), 6688 .e_phentsize = sizeof(ElfW(Phdr)), 6689 .e_phnum = 1, 6690 .e_shentsize = sizeof(ElfW(Shdr)), 6691 .e_shnum = ARRAY_SIZE(img->shdr), 6692 .e_shstrndx = ARRAY_SIZE(img->shdr) - 1, 6693 #ifdef ELF_HOST_FLAGS 6694 .e_flags = ELF_HOST_FLAGS, 6695 #endif 6696 #ifdef ELF_OSABI 6697 .e_ident[EI_OSABI] = ELF_OSABI, 6698 #endif 6699 }, 6700 .phdr = { 6701 .p_type = PT_LOAD, 6702 .p_flags = PF_X, 6703 }, 6704 .shdr = { 6705 [0] = { .sh_type = SHT_NULL }, 6706 /* Trick: The contents of code_gen_buffer are not present in 6707 this fake ELF file; that got allocated elsewhere. Therefore 6708 we mark .text as SHT_NOBITS (similar to .bss) so that readers 6709 will not look for contents. We can record any address. */ 6710 [1] = { /* .text */ 6711 .sh_type = SHT_NOBITS, 6712 .sh_flags = SHF_EXECINSTR | SHF_ALLOC, 6713 }, 6714 [2] = { /* .debug_info */ 6715 .sh_type = SHT_PROGBITS, 6716 .sh_offset = offsetof(struct ElfImage, di), 6717 .sh_size = sizeof(struct DebugInfo), 6718 }, 6719 [3] = { /* .debug_abbrev */ 6720 .sh_type = SHT_PROGBITS, 6721 .sh_offset = offsetof(struct ElfImage, da), 6722 .sh_size = sizeof(img->da), 6723 }, 6724 [4] = { /* .debug_frame */ 6725 .sh_type = SHT_PROGBITS, 6726 .sh_offset = sizeof(struct ElfImage), 6727 }, 6728 [5] = { /* .symtab */ 6729 .sh_type = SHT_SYMTAB, 6730 .sh_offset = offsetof(struct ElfImage, sym), 6731 .sh_size = sizeof(img->sym), 6732 .sh_info = 1, 6733 .sh_link = ARRAY_SIZE(img->shdr) - 1, 6734 .sh_entsize = sizeof(ElfW(Sym)), 6735 }, 6736 [6] = { /* .strtab */ 6737 .sh_type = SHT_STRTAB, 6738 .sh_offset = offsetof(struct ElfImage, str), 6739 .sh_size = sizeof(img->str), 6740 } 6741 }, 6742 .sym = { 6743 [1] = { /* code_gen_buffer */ 6744 .st_info = ELF_ST_INFO(STB_GLOBAL, STT_FUNC), 6745 .st_shndx = 1, 6746 } 6747 }, 6748 .di = { 6749 .len = sizeof(struct DebugInfo) - 4, 6750 .version = 2, 6751 .ptr_size = sizeof(void *), 6752 .cu_die = 1, 6753 .cu_lang = 0x8001, /* DW_LANG_Mips_Assembler */ 6754 .fn_die = 2, 6755 .fn_name = "code_gen_buffer" 6756 }, 6757 .da = { 6758 1, /* abbrev number (the cu) */ 6759 0x11, 1, /* DW_TAG_compile_unit, has children */ 6760 0x13, 0x5, /* DW_AT_language, DW_FORM_data2 */ 6761 0x11, 0x1, /* DW_AT_low_pc, DW_FORM_addr */ 6762 0x12, 0x1, /* DW_AT_high_pc, DW_FORM_addr */ 6763 0, 0, /* end of abbrev */ 6764 2, /* abbrev number (the fn) */ 6765 0x2e, 0, /* DW_TAG_subprogram, no children */ 6766 0x3, 0x8, /* DW_AT_name, DW_FORM_string */ 6767 0x11, 0x1, /* DW_AT_low_pc, DW_FORM_addr */ 6768 0x12, 0x1, /* DW_AT_high_pc, DW_FORM_addr */ 6769 0, 0, /* end of abbrev */ 6770 0 /* no more abbrev */ 6771 }, 6772 .str = "\0" ".text\0" ".debug_info\0" ".debug_abbrev\0" 6773 ".debug_frame\0" ".symtab\0" ".strtab\0" "code_gen_buffer", 6774 }; 6775 6776 /* We only need a single jit entry; statically allocate it. */ 6777 static struct jit_code_entry one_entry; 6778 6779 uintptr_t buf = (uintptr_t)buf_ptr; 6780 size_t img_size = sizeof(struct ElfImage) + debug_frame_size; 6781 DebugFrameHeader *dfh; 6782 6783 img = g_malloc(img_size); 6784 *img = img_template; 6785 6786 img->phdr.p_vaddr = buf; 6787 img->phdr.p_paddr = buf; 6788 img->phdr.p_memsz = buf_size; 6789 6790 img->shdr[1].sh_name = find_string(img->str, ".text"); 6791 img->shdr[1].sh_addr = buf; 6792 img->shdr[1].sh_size = buf_size; 6793 6794 img->shdr[2].sh_name = find_string(img->str, ".debug_info"); 6795 img->shdr[3].sh_name = find_string(img->str, ".debug_abbrev"); 6796 6797 img->shdr[4].sh_name = find_string(img->str, ".debug_frame"); 6798 img->shdr[4].sh_size = debug_frame_size; 6799 6800 img->shdr[5].sh_name = find_string(img->str, ".symtab"); 6801 img->shdr[6].sh_name = find_string(img->str, ".strtab"); 6802 6803 img->sym[1].st_name = find_string(img->str, "code_gen_buffer"); 6804 img->sym[1].st_value = buf; 6805 img->sym[1].st_size = buf_size; 6806 6807 img->di.cu_low_pc = buf; 6808 img->di.cu_high_pc = buf + buf_size; 6809 img->di.fn_low_pc = buf; 6810 img->di.fn_high_pc = buf + buf_size; 6811 6812 dfh = (DebugFrameHeader *)(img + 1); 6813 memcpy(dfh, debug_frame, debug_frame_size); 6814 dfh->fde.func_start = buf; 6815 dfh->fde.func_len = buf_size; 6816 6817 #ifdef DEBUG_JIT 6818 /* Enable this block to be able to debug the ELF image file creation. 6819 One can use readelf, objdump, or other inspection utilities. */ 6820 { 6821 g_autofree char *jit = g_strdup_printf("%s/qemu.jit", g_get_tmp_dir()); 6822 FILE *f = fopen(jit, "w+b"); 6823 if (f) { 6824 if (fwrite(img, img_size, 1, f) != img_size) { 6825 /* Avoid stupid unused return value warning for fwrite. */ 6826 } 6827 fclose(f); 6828 } 6829 } 6830 #endif 6831 6832 one_entry.symfile_addr = img; 6833 one_entry.symfile_size = img_size; 6834 6835 __jit_debug_descriptor.action_flag = JIT_REGISTER_FN; 6836 __jit_debug_descriptor.relevant_entry = &one_entry; 6837 __jit_debug_descriptor.first_entry = &one_entry; 6838 __jit_debug_register_code(); 6839 } 6840 #else 6841 /* No support for the feature. Provide the entry point expected by exec.c, 6842 and implement the internal function we declared earlier. */ 6843 6844 static void tcg_register_jit_int(const void *buf, size_t size, 6845 const void *debug_frame, 6846 size_t debug_frame_size) 6847 { 6848 } 6849 6850 void tcg_register_jit(const void *buf, size_t buf_size) 6851 { 6852 } 6853 #endif /* ELF_HOST_MACHINE */ 6854 6855 #if !TCG_TARGET_MAYBE_vec 6856 void tcg_expand_vec_op(TCGOpcode o, TCGType t, unsigned e, TCGArg a0, ...) 6857 { 6858 g_assert_not_reached(); 6859 } 6860 #endif 6861