1 /* 2 * Tiny Code Generator for QEMU 3 * 4 * Copyright (c) 2008 Fabrice Bellard 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a copy 7 * of this software and associated documentation files (the "Software"), to deal 8 * in the Software without restriction, including without limitation the rights 9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 * copies of the Software, and to permit persons to whom the Software is 11 * furnished to do so, subject to the following conditions: 12 * 13 * The above copyright notice and this permission notice shall be included in 14 * all copies or substantial portions of the Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 22 * THE SOFTWARE. 23 */ 24 25 #include "qemu/osdep.h" 26 27 /* Define to jump the ELF file used to communicate with GDB. */ 28 #undef DEBUG_JIT 29 30 #include "qemu/error-report.h" 31 #include "qemu/cutils.h" 32 #include "qemu/host-utils.h" 33 #include "qemu/qemu-print.h" 34 #include "qemu/cacheflush.h" 35 #include "qemu/cacheinfo.h" 36 #include "qemu/timer.h" 37 #include "exec/translation-block.h" 38 #include "exec/tlb-common.h" 39 #include "tcg/startup.h" 40 #include "tcg/tcg-op-common.h" 41 42 #if UINTPTR_MAX == UINT32_MAX 43 # define ELF_CLASS ELFCLASS32 44 #else 45 # define ELF_CLASS ELFCLASS64 46 #endif 47 #if HOST_BIG_ENDIAN 48 # define ELF_DATA ELFDATA2MSB 49 #else 50 # define ELF_DATA ELFDATA2LSB 51 #endif 52 53 #include "elf.h" 54 #include "exec/log.h" 55 #include "tcg/tcg-ldst.h" 56 #include "tcg/tcg-temp-internal.h" 57 #include "tcg-internal.h" 58 #include "tcg/perf.h" 59 #include "tcg-has.h" 60 #ifdef CONFIG_USER_ONLY 61 #include "user/guest-base.h" 62 #endif 63 64 /* Forward declarations for functions declared in tcg-target.c.inc and 65 used here. */ 66 static void tcg_target_init(TCGContext *s); 67 static void tcg_target_qemu_prologue(TCGContext *s); 68 static bool patch_reloc(tcg_insn_unit *code_ptr, int type, 69 intptr_t value, intptr_t addend); 70 static void tcg_out_nop_fill(tcg_insn_unit *p, int count); 71 72 typedef struct TCGLabelQemuLdst TCGLabelQemuLdst; 73 static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l); 74 static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l); 75 76 /* The CIE and FDE header definitions will be common to all hosts. */ 77 typedef struct { 78 uint32_t len __attribute__((aligned((sizeof(void *))))); 79 uint32_t id; 80 uint8_t version; 81 char augmentation[1]; 82 uint8_t code_align; 83 uint8_t data_align; 84 uint8_t return_column; 85 } DebugFrameCIE; 86 87 typedef struct QEMU_PACKED { 88 uint32_t len __attribute__((aligned((sizeof(void *))))); 89 uint32_t cie_offset; 90 uintptr_t func_start; 91 uintptr_t func_len; 92 } DebugFrameFDEHeader; 93 94 typedef struct QEMU_PACKED { 95 DebugFrameCIE cie; 96 DebugFrameFDEHeader fde; 97 } DebugFrameHeader; 98 99 struct TCGLabelQemuLdst { 100 bool is_ld; /* qemu_ld: true, qemu_st: false */ 101 MemOpIdx oi; 102 TCGType type; /* result type of a load */ 103 TCGReg addr_reg; /* reg index for guest virtual addr */ 104 TCGReg datalo_reg; /* reg index for low word to be loaded or stored */ 105 TCGReg datahi_reg; /* reg index for high word to be loaded or stored */ 106 const tcg_insn_unit *raddr; /* addr of the next IR of qemu_ld/st IR */ 107 tcg_insn_unit *label_ptr[2]; /* label pointers to be updated */ 108 QSIMPLEQ_ENTRY(TCGLabelQemuLdst) next; 109 }; 110 111 static void tcg_register_jit_int(const void *buf, size_t size, 112 const void *debug_frame, 113 size_t debug_frame_size) 114 __attribute__((unused)); 115 116 /* Forward declarations for functions declared and used in tcg-target.c.inc. */ 117 static void tcg_out_tb_start(TCGContext *s); 118 static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg1, 119 intptr_t arg2); 120 static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg); 121 static void tcg_out_movi(TCGContext *s, TCGType type, 122 TCGReg ret, tcg_target_long arg); 123 static void tcg_out_ext8s(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg); 124 static void tcg_out_ext16s(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg); 125 static void tcg_out_ext8u(TCGContext *s, TCGReg ret, TCGReg arg); 126 static void tcg_out_ext16u(TCGContext *s, TCGReg ret, TCGReg arg); 127 static void tcg_out_ext32s(TCGContext *s, TCGReg ret, TCGReg arg); 128 static void tcg_out_ext32u(TCGContext *s, TCGReg ret, TCGReg arg); 129 static void tcg_out_exts_i32_i64(TCGContext *s, TCGReg ret, TCGReg arg); 130 static void tcg_out_extu_i32_i64(TCGContext *s, TCGReg ret, TCGReg arg); 131 static void tcg_out_extrl_i64_i32(TCGContext *s, TCGReg ret, TCGReg arg); 132 static void tcg_out_addi_ptr(TCGContext *s, TCGReg, TCGReg, tcg_target_long); 133 static bool tcg_out_xchg(TCGContext *s, TCGType type, TCGReg r1, TCGReg r2); 134 static void tcg_out_exit_tb(TCGContext *s, uintptr_t arg); 135 static void tcg_out_goto_tb(TCGContext *s, int which); 136 static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, 137 const TCGArg args[TCG_MAX_OP_ARGS], 138 const int const_args[TCG_MAX_OP_ARGS]); 139 #if TCG_TARGET_MAYBE_vec 140 static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece, 141 TCGReg dst, TCGReg src); 142 static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece, 143 TCGReg dst, TCGReg base, intptr_t offset); 144 static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece, 145 TCGReg dst, int64_t arg); 146 static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, 147 unsigned vecl, unsigned vece, 148 const TCGArg args[TCG_MAX_OP_ARGS], 149 const int const_args[TCG_MAX_OP_ARGS]); 150 #else 151 static inline bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece, 152 TCGReg dst, TCGReg src) 153 { 154 g_assert_not_reached(); 155 } 156 static inline bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece, 157 TCGReg dst, TCGReg base, intptr_t offset) 158 { 159 g_assert_not_reached(); 160 } 161 static inline void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece, 162 TCGReg dst, int64_t arg) 163 { 164 g_assert_not_reached(); 165 } 166 static inline void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, 167 unsigned vecl, unsigned vece, 168 const TCGArg args[TCG_MAX_OP_ARGS], 169 const int const_args[TCG_MAX_OP_ARGS]) 170 { 171 g_assert_not_reached(); 172 } 173 int tcg_can_emit_vec_op(TCGOpcode o, TCGType t, unsigned ve) 174 { 175 return 0; 176 } 177 #endif 178 static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, TCGReg arg1, 179 intptr_t arg2); 180 static bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val, 181 TCGReg base, intptr_t ofs); 182 static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target, 183 const TCGHelperInfo *info); 184 static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot); 185 static bool tcg_target_const_match(int64_t val, int ct, 186 TCGType type, TCGCond cond, int vece); 187 188 #ifndef CONFIG_USER_ONLY 189 #define guest_base ({ qemu_build_not_reached(); (uintptr_t)0; }) 190 #endif 191 192 typedef struct TCGLdstHelperParam { 193 TCGReg (*ra_gen)(TCGContext *s, const TCGLabelQemuLdst *l, int arg_reg); 194 unsigned ntmp; 195 int tmp[3]; 196 } TCGLdstHelperParam; 197 198 static void tcg_out_ld_helper_args(TCGContext *s, const TCGLabelQemuLdst *l, 199 const TCGLdstHelperParam *p) 200 __attribute__((unused)); 201 static void tcg_out_ld_helper_ret(TCGContext *s, const TCGLabelQemuLdst *l, 202 bool load_sign, const TCGLdstHelperParam *p) 203 __attribute__((unused)); 204 static void tcg_out_st_helper_args(TCGContext *s, const TCGLabelQemuLdst *l, 205 const TCGLdstHelperParam *p) 206 __attribute__((unused)); 207 208 static void * const qemu_ld_helpers[MO_SSIZE + 1] __attribute__((unused)) = { 209 [MO_UB] = helper_ldub_mmu, 210 [MO_SB] = helper_ldsb_mmu, 211 [MO_UW] = helper_lduw_mmu, 212 [MO_SW] = helper_ldsw_mmu, 213 [MO_UL] = helper_ldul_mmu, 214 [MO_UQ] = helper_ldq_mmu, 215 #if TCG_TARGET_REG_BITS == 64 216 [MO_SL] = helper_ldsl_mmu, 217 [MO_128] = helper_ld16_mmu, 218 #endif 219 }; 220 221 static void * const qemu_st_helpers[MO_SIZE + 1] __attribute__((unused)) = { 222 [MO_8] = helper_stb_mmu, 223 [MO_16] = helper_stw_mmu, 224 [MO_32] = helper_stl_mmu, 225 [MO_64] = helper_stq_mmu, 226 #if TCG_TARGET_REG_BITS == 64 227 [MO_128] = helper_st16_mmu, 228 #endif 229 }; 230 231 typedef struct { 232 MemOp atom; /* lg2 bits of atomicity required */ 233 MemOp align; /* lg2 bits of alignment to use */ 234 } TCGAtomAlign; 235 236 static TCGAtomAlign atom_and_align_for_opc(TCGContext *s, MemOp opc, 237 MemOp host_atom, bool allow_two_ops) 238 __attribute__((unused)); 239 240 #ifdef CONFIG_USER_ONLY 241 bool tcg_use_softmmu; 242 #endif 243 244 TCGContext tcg_init_ctx; 245 __thread TCGContext *tcg_ctx; 246 247 TCGContext **tcg_ctxs; 248 unsigned int tcg_cur_ctxs; 249 unsigned int tcg_max_ctxs; 250 TCGv_env tcg_env; 251 const void *tcg_code_gen_epilogue; 252 uintptr_t tcg_splitwx_diff; 253 254 #ifndef CONFIG_TCG_INTERPRETER 255 tcg_prologue_fn *tcg_qemu_tb_exec; 256 #endif 257 258 static TCGRegSet tcg_target_available_regs[TCG_TYPE_COUNT]; 259 static TCGRegSet tcg_target_call_clobber_regs; 260 261 #if TCG_TARGET_INSN_UNIT_SIZE == 1 262 static __attribute__((unused)) inline void tcg_out8(TCGContext *s, uint8_t v) 263 { 264 *s->code_ptr++ = v; 265 } 266 267 static __attribute__((unused)) inline void tcg_patch8(tcg_insn_unit *p, 268 uint8_t v) 269 { 270 *p = v; 271 } 272 #endif 273 274 #if TCG_TARGET_INSN_UNIT_SIZE <= 2 275 static __attribute__((unused)) inline void tcg_out16(TCGContext *s, uint16_t v) 276 { 277 if (TCG_TARGET_INSN_UNIT_SIZE == 2) { 278 *s->code_ptr++ = v; 279 } else { 280 tcg_insn_unit *p = s->code_ptr; 281 memcpy(p, &v, sizeof(v)); 282 s->code_ptr = p + (2 / TCG_TARGET_INSN_UNIT_SIZE); 283 } 284 } 285 286 static __attribute__((unused)) inline void tcg_patch16(tcg_insn_unit *p, 287 uint16_t v) 288 { 289 if (TCG_TARGET_INSN_UNIT_SIZE == 2) { 290 *p = v; 291 } else { 292 memcpy(p, &v, sizeof(v)); 293 } 294 } 295 #endif 296 297 #if TCG_TARGET_INSN_UNIT_SIZE <= 4 298 static __attribute__((unused)) inline void tcg_out32(TCGContext *s, uint32_t v) 299 { 300 if (TCG_TARGET_INSN_UNIT_SIZE == 4) { 301 *s->code_ptr++ = v; 302 } else { 303 tcg_insn_unit *p = s->code_ptr; 304 memcpy(p, &v, sizeof(v)); 305 s->code_ptr = p + (4 / TCG_TARGET_INSN_UNIT_SIZE); 306 } 307 } 308 309 static __attribute__((unused)) inline void tcg_patch32(tcg_insn_unit *p, 310 uint32_t v) 311 { 312 if (TCG_TARGET_INSN_UNIT_SIZE == 4) { 313 *p = v; 314 } else { 315 memcpy(p, &v, sizeof(v)); 316 } 317 } 318 #endif 319 320 #if TCG_TARGET_INSN_UNIT_SIZE <= 8 321 static __attribute__((unused)) inline void tcg_out64(TCGContext *s, uint64_t v) 322 { 323 if (TCG_TARGET_INSN_UNIT_SIZE == 8) { 324 *s->code_ptr++ = v; 325 } else { 326 tcg_insn_unit *p = s->code_ptr; 327 memcpy(p, &v, sizeof(v)); 328 s->code_ptr = p + (8 / TCG_TARGET_INSN_UNIT_SIZE); 329 } 330 } 331 332 static __attribute__((unused)) inline void tcg_patch64(tcg_insn_unit *p, 333 uint64_t v) 334 { 335 if (TCG_TARGET_INSN_UNIT_SIZE == 8) { 336 *p = v; 337 } else { 338 memcpy(p, &v, sizeof(v)); 339 } 340 } 341 #endif 342 343 /* label relocation processing */ 344 345 static void tcg_out_reloc(TCGContext *s, tcg_insn_unit *code_ptr, int type, 346 TCGLabel *l, intptr_t addend) 347 { 348 TCGRelocation *r = tcg_malloc(sizeof(TCGRelocation)); 349 350 r->type = type; 351 r->ptr = code_ptr; 352 r->addend = addend; 353 QSIMPLEQ_INSERT_TAIL(&l->relocs, r, next); 354 } 355 356 static void tcg_out_label(TCGContext *s, TCGLabel *l) 357 { 358 tcg_debug_assert(!l->has_value); 359 l->has_value = 1; 360 l->u.value_ptr = tcg_splitwx_to_rx(s->code_ptr); 361 } 362 363 TCGLabel *gen_new_label(void) 364 { 365 TCGContext *s = tcg_ctx; 366 TCGLabel *l = tcg_malloc(sizeof(TCGLabel)); 367 368 memset(l, 0, sizeof(TCGLabel)); 369 l->id = s->nb_labels++; 370 QSIMPLEQ_INIT(&l->branches); 371 QSIMPLEQ_INIT(&l->relocs); 372 373 QSIMPLEQ_INSERT_TAIL(&s->labels, l, next); 374 375 return l; 376 } 377 378 static bool tcg_resolve_relocs(TCGContext *s) 379 { 380 TCGLabel *l; 381 382 QSIMPLEQ_FOREACH(l, &s->labels, next) { 383 TCGRelocation *r; 384 uintptr_t value = l->u.value; 385 386 QSIMPLEQ_FOREACH(r, &l->relocs, next) { 387 if (!patch_reloc(r->ptr, r->type, value, r->addend)) { 388 return false; 389 } 390 } 391 } 392 return true; 393 } 394 395 static void set_jmp_reset_offset(TCGContext *s, int which) 396 { 397 /* 398 * We will check for overflow at the end of the opcode loop in 399 * tcg_gen_code, where we bound tcg_current_code_size to UINT16_MAX. 400 */ 401 s->gen_tb->jmp_reset_offset[which] = tcg_current_code_size(s); 402 } 403 404 static void G_GNUC_UNUSED set_jmp_insn_offset(TCGContext *s, int which) 405 { 406 /* 407 * We will check for overflow at the end of the opcode loop in 408 * tcg_gen_code, where we bound tcg_current_code_size to UINT16_MAX. 409 */ 410 s->gen_tb->jmp_insn_offset[which] = tcg_current_code_size(s); 411 } 412 413 static uintptr_t G_GNUC_UNUSED get_jmp_target_addr(TCGContext *s, int which) 414 { 415 /* 416 * Return the read-execute version of the pointer, for the benefit 417 * of any pc-relative addressing mode. 418 */ 419 return (uintptr_t)tcg_splitwx_to_rx(&s->gen_tb->jmp_target_addr[which]); 420 } 421 422 static int __attribute__((unused)) 423 tlb_mask_table_ofs(TCGContext *s, int which) 424 { 425 return (offsetof(CPUNegativeOffsetState, tlb.f[which]) - 426 sizeof(CPUNegativeOffsetState)); 427 } 428 429 /* Signal overflow, starting over with fewer guest insns. */ 430 static G_NORETURN 431 void tcg_raise_tb_overflow(TCGContext *s) 432 { 433 siglongjmp(s->jmp_trans, -2); 434 } 435 436 /* 437 * Used by tcg_out_movext{1,2} to hold the arguments for tcg_out_movext. 438 * By the time we arrive at tcg_out_movext1, @dst is always a TCGReg. 439 * 440 * However, tcg_out_helper_load_slots reuses this field to hold an 441 * argument slot number (which may designate a argument register or an 442 * argument stack slot), converting to TCGReg once all arguments that 443 * are destined for the stack are processed. 444 */ 445 typedef struct TCGMovExtend { 446 unsigned dst; 447 TCGReg src; 448 TCGType dst_type; 449 TCGType src_type; 450 MemOp src_ext; 451 } TCGMovExtend; 452 453 /** 454 * tcg_out_movext -- move and extend 455 * @s: tcg context 456 * @dst_type: integral type for destination 457 * @dst: destination register 458 * @src_type: integral type for source 459 * @src_ext: extension to apply to source 460 * @src: source register 461 * 462 * Move or extend @src into @dst, depending on @src_ext and the types. 463 */ 464 static void tcg_out_movext(TCGContext *s, TCGType dst_type, TCGReg dst, 465 TCGType src_type, MemOp src_ext, TCGReg src) 466 { 467 switch (src_ext) { 468 case MO_UB: 469 tcg_out_ext8u(s, dst, src); 470 break; 471 case MO_SB: 472 tcg_out_ext8s(s, dst_type, dst, src); 473 break; 474 case MO_UW: 475 tcg_out_ext16u(s, dst, src); 476 break; 477 case MO_SW: 478 tcg_out_ext16s(s, dst_type, dst, src); 479 break; 480 case MO_UL: 481 case MO_SL: 482 if (dst_type == TCG_TYPE_I32) { 483 if (src_type == TCG_TYPE_I32) { 484 tcg_out_mov(s, TCG_TYPE_I32, dst, src); 485 } else { 486 tcg_out_extrl_i64_i32(s, dst, src); 487 } 488 } else if (src_type == TCG_TYPE_I32) { 489 if (src_ext & MO_SIGN) { 490 tcg_out_exts_i32_i64(s, dst, src); 491 } else { 492 tcg_out_extu_i32_i64(s, dst, src); 493 } 494 } else { 495 if (src_ext & MO_SIGN) { 496 tcg_out_ext32s(s, dst, src); 497 } else { 498 tcg_out_ext32u(s, dst, src); 499 } 500 } 501 break; 502 case MO_UQ: 503 tcg_debug_assert(TCG_TARGET_REG_BITS == 64); 504 if (dst_type == TCG_TYPE_I32) { 505 tcg_out_extrl_i64_i32(s, dst, src); 506 } else { 507 tcg_out_mov(s, TCG_TYPE_I64, dst, src); 508 } 509 break; 510 default: 511 g_assert_not_reached(); 512 } 513 } 514 515 /* Minor variations on a theme, using a structure. */ 516 static void tcg_out_movext1_new_src(TCGContext *s, const TCGMovExtend *i, 517 TCGReg src) 518 { 519 tcg_out_movext(s, i->dst_type, i->dst, i->src_type, i->src_ext, src); 520 } 521 522 static void tcg_out_movext1(TCGContext *s, const TCGMovExtend *i) 523 { 524 tcg_out_movext1_new_src(s, i, i->src); 525 } 526 527 /** 528 * tcg_out_movext2 -- move and extend two pair 529 * @s: tcg context 530 * @i1: first move description 531 * @i2: second move description 532 * @scratch: temporary register, or -1 for none 533 * 534 * As tcg_out_movext, for both @i1 and @i2, caring for overlap 535 * between the sources and destinations. 536 */ 537 538 static void tcg_out_movext2(TCGContext *s, const TCGMovExtend *i1, 539 const TCGMovExtend *i2, int scratch) 540 { 541 TCGReg src1 = i1->src; 542 TCGReg src2 = i2->src; 543 544 if (i1->dst != src2) { 545 tcg_out_movext1(s, i1); 546 tcg_out_movext1(s, i2); 547 return; 548 } 549 if (i2->dst == src1) { 550 TCGType src1_type = i1->src_type; 551 TCGType src2_type = i2->src_type; 552 553 if (tcg_out_xchg(s, MAX(src1_type, src2_type), src1, src2)) { 554 /* The data is now in the correct registers, now extend. */ 555 src1 = i2->src; 556 src2 = i1->src; 557 } else { 558 tcg_debug_assert(scratch >= 0); 559 tcg_out_mov(s, src1_type, scratch, src1); 560 src1 = scratch; 561 } 562 } 563 tcg_out_movext1_new_src(s, i2, src2); 564 tcg_out_movext1_new_src(s, i1, src1); 565 } 566 567 /** 568 * tcg_out_movext3 -- move and extend three pair 569 * @s: tcg context 570 * @i1: first move description 571 * @i2: second move description 572 * @i3: third move description 573 * @scratch: temporary register, or -1 for none 574 * 575 * As tcg_out_movext, for all of @i1, @i2 and @i3, caring for overlap 576 * between the sources and destinations. 577 */ 578 579 static void tcg_out_movext3(TCGContext *s, const TCGMovExtend *i1, 580 const TCGMovExtend *i2, const TCGMovExtend *i3, 581 int scratch) 582 { 583 TCGReg src1 = i1->src; 584 TCGReg src2 = i2->src; 585 TCGReg src3 = i3->src; 586 587 if (i1->dst != src2 && i1->dst != src3) { 588 tcg_out_movext1(s, i1); 589 tcg_out_movext2(s, i2, i3, scratch); 590 return; 591 } 592 if (i2->dst != src1 && i2->dst != src3) { 593 tcg_out_movext1(s, i2); 594 tcg_out_movext2(s, i1, i3, scratch); 595 return; 596 } 597 if (i3->dst != src1 && i3->dst != src2) { 598 tcg_out_movext1(s, i3); 599 tcg_out_movext2(s, i1, i2, scratch); 600 return; 601 } 602 603 /* 604 * There is a cycle. Since there are only 3 nodes, the cycle is 605 * either "clockwise" or "anti-clockwise", and can be solved with 606 * a single scratch or two xchg. 607 */ 608 if (i1->dst == src2 && i2->dst == src3 && i3->dst == src1) { 609 /* "Clockwise" */ 610 if (tcg_out_xchg(s, MAX(i1->src_type, i2->src_type), src1, src2)) { 611 tcg_out_xchg(s, MAX(i2->src_type, i3->src_type), src2, src3); 612 /* The data is now in the correct registers, now extend. */ 613 tcg_out_movext1_new_src(s, i1, i1->dst); 614 tcg_out_movext1_new_src(s, i2, i2->dst); 615 tcg_out_movext1_new_src(s, i3, i3->dst); 616 } else { 617 tcg_debug_assert(scratch >= 0); 618 tcg_out_mov(s, i1->src_type, scratch, src1); 619 tcg_out_movext1(s, i3); 620 tcg_out_movext1(s, i2); 621 tcg_out_movext1_new_src(s, i1, scratch); 622 } 623 } else if (i1->dst == src3 && i2->dst == src1 && i3->dst == src2) { 624 /* "Anti-clockwise" */ 625 if (tcg_out_xchg(s, MAX(i2->src_type, i3->src_type), src2, src3)) { 626 tcg_out_xchg(s, MAX(i1->src_type, i2->src_type), src1, src2); 627 /* The data is now in the correct registers, now extend. */ 628 tcg_out_movext1_new_src(s, i1, i1->dst); 629 tcg_out_movext1_new_src(s, i2, i2->dst); 630 tcg_out_movext1_new_src(s, i3, i3->dst); 631 } else { 632 tcg_debug_assert(scratch >= 0); 633 tcg_out_mov(s, i1->src_type, scratch, src1); 634 tcg_out_movext1(s, i2); 635 tcg_out_movext1(s, i3); 636 tcg_out_movext1_new_src(s, i1, scratch); 637 } 638 } else { 639 g_assert_not_reached(); 640 } 641 } 642 643 /* 644 * Allocate a new TCGLabelQemuLdst entry. 645 */ 646 647 __attribute__((unused)) 648 static TCGLabelQemuLdst *new_ldst_label(TCGContext *s) 649 { 650 TCGLabelQemuLdst *l = tcg_malloc(sizeof(*l)); 651 652 memset(l, 0, sizeof(*l)); 653 QSIMPLEQ_INSERT_TAIL(&s->ldst_labels, l, next); 654 655 return l; 656 } 657 658 /* 659 * Allocate new constant pool entries. 660 */ 661 662 typedef struct TCGLabelPoolData { 663 struct TCGLabelPoolData *next; 664 tcg_insn_unit *label; 665 intptr_t addend; 666 int rtype; 667 unsigned nlong; 668 tcg_target_ulong data[]; 669 } TCGLabelPoolData; 670 671 static TCGLabelPoolData *new_pool_alloc(TCGContext *s, int nlong, int rtype, 672 tcg_insn_unit *label, intptr_t addend) 673 { 674 TCGLabelPoolData *n = tcg_malloc(sizeof(TCGLabelPoolData) 675 + sizeof(tcg_target_ulong) * nlong); 676 677 n->label = label; 678 n->addend = addend; 679 n->rtype = rtype; 680 n->nlong = nlong; 681 return n; 682 } 683 684 static void new_pool_insert(TCGContext *s, TCGLabelPoolData *n) 685 { 686 TCGLabelPoolData *i, **pp; 687 int nlong = n->nlong; 688 689 /* Insertion sort on the pool. */ 690 for (pp = &s->pool_labels; (i = *pp) != NULL; pp = &i->next) { 691 if (nlong > i->nlong) { 692 break; 693 } 694 if (nlong < i->nlong) { 695 continue; 696 } 697 if (memcmp(n->data, i->data, sizeof(tcg_target_ulong) * nlong) >= 0) { 698 break; 699 } 700 } 701 n->next = *pp; 702 *pp = n; 703 } 704 705 /* The "usual" for generic integer code. */ 706 __attribute__((unused)) 707 static void new_pool_label(TCGContext *s, tcg_target_ulong d, int rtype, 708 tcg_insn_unit *label, intptr_t addend) 709 { 710 TCGLabelPoolData *n = new_pool_alloc(s, 1, rtype, label, addend); 711 n->data[0] = d; 712 new_pool_insert(s, n); 713 } 714 715 /* For v64 or v128, depending on the host. */ 716 __attribute__((unused)) 717 static void new_pool_l2(TCGContext *s, int rtype, tcg_insn_unit *label, 718 intptr_t addend, tcg_target_ulong d0, 719 tcg_target_ulong d1) 720 { 721 TCGLabelPoolData *n = new_pool_alloc(s, 2, rtype, label, addend); 722 n->data[0] = d0; 723 n->data[1] = d1; 724 new_pool_insert(s, n); 725 } 726 727 /* For v128 or v256, depending on the host. */ 728 __attribute__((unused)) 729 static void new_pool_l4(TCGContext *s, int rtype, tcg_insn_unit *label, 730 intptr_t addend, tcg_target_ulong d0, 731 tcg_target_ulong d1, tcg_target_ulong d2, 732 tcg_target_ulong d3) 733 { 734 TCGLabelPoolData *n = new_pool_alloc(s, 4, rtype, label, addend); 735 n->data[0] = d0; 736 n->data[1] = d1; 737 n->data[2] = d2; 738 n->data[3] = d3; 739 new_pool_insert(s, n); 740 } 741 742 /* For v256, for 32-bit host. */ 743 __attribute__((unused)) 744 static void new_pool_l8(TCGContext *s, int rtype, tcg_insn_unit *label, 745 intptr_t addend, tcg_target_ulong d0, 746 tcg_target_ulong d1, tcg_target_ulong d2, 747 tcg_target_ulong d3, tcg_target_ulong d4, 748 tcg_target_ulong d5, tcg_target_ulong d6, 749 tcg_target_ulong d7) 750 { 751 TCGLabelPoolData *n = new_pool_alloc(s, 8, rtype, label, addend); 752 n->data[0] = d0; 753 n->data[1] = d1; 754 n->data[2] = d2; 755 n->data[3] = d3; 756 n->data[4] = d4; 757 n->data[5] = d5; 758 n->data[6] = d6; 759 n->data[7] = d7; 760 new_pool_insert(s, n); 761 } 762 763 /* 764 * Generate TB finalization at the end of block 765 */ 766 767 static int tcg_out_ldst_finalize(TCGContext *s) 768 { 769 TCGLabelQemuLdst *lb; 770 771 /* qemu_ld/st slow paths */ 772 QSIMPLEQ_FOREACH(lb, &s->ldst_labels, next) { 773 if (lb->is_ld 774 ? !tcg_out_qemu_ld_slow_path(s, lb) 775 : !tcg_out_qemu_st_slow_path(s, lb)) { 776 return -2; 777 } 778 779 /* 780 * Test for (pending) buffer overflow. The assumption is that any 781 * one operation beginning below the high water mark cannot overrun 782 * the buffer completely. Thus we can test for overflow after 783 * generating code without having to check during generation. 784 */ 785 if (unlikely((void *)s->code_ptr > s->code_gen_highwater)) { 786 return -1; 787 } 788 } 789 return 0; 790 } 791 792 static int tcg_out_pool_finalize(TCGContext *s) 793 { 794 TCGLabelPoolData *p = s->pool_labels; 795 TCGLabelPoolData *l = NULL; 796 void *a; 797 798 if (p == NULL) { 799 return 0; 800 } 801 802 /* 803 * ??? Round up to qemu_icache_linesize, but then do not round 804 * again when allocating the next TranslationBlock structure. 805 */ 806 a = (void *)ROUND_UP((uintptr_t)s->code_ptr, 807 sizeof(tcg_target_ulong) * p->nlong); 808 tcg_out_nop_fill(s->code_ptr, (tcg_insn_unit *)a - s->code_ptr); 809 s->data_gen_ptr = a; 810 811 for (; p != NULL; p = p->next) { 812 size_t size = sizeof(tcg_target_ulong) * p->nlong; 813 uintptr_t value; 814 815 if (!l || l->nlong != p->nlong || memcmp(l->data, p->data, size)) { 816 if (unlikely(a > s->code_gen_highwater)) { 817 return -1; 818 } 819 memcpy(a, p->data, size); 820 a += size; 821 l = p; 822 } 823 824 value = (uintptr_t)tcg_splitwx_to_rx(a) - size; 825 if (!patch_reloc(p->label, p->rtype, value, p->addend)) { 826 return -2; 827 } 828 } 829 830 s->code_ptr = a; 831 return 0; 832 } 833 834 #define C_PFX1(P, A) P##A 835 #define C_PFX2(P, A, B) P##A##_##B 836 #define C_PFX3(P, A, B, C) P##A##_##B##_##C 837 #define C_PFX4(P, A, B, C, D) P##A##_##B##_##C##_##D 838 #define C_PFX5(P, A, B, C, D, E) P##A##_##B##_##C##_##D##_##E 839 #define C_PFX6(P, A, B, C, D, E, F) P##A##_##B##_##C##_##D##_##E##_##F 840 841 /* Define an enumeration for the various combinations. */ 842 843 #define C_O0_I1(I1) C_PFX1(c_o0_i1_, I1), 844 #define C_O0_I2(I1, I2) C_PFX2(c_o0_i2_, I1, I2), 845 #define C_O0_I3(I1, I2, I3) C_PFX3(c_o0_i3_, I1, I2, I3), 846 #define C_O0_I4(I1, I2, I3, I4) C_PFX4(c_o0_i4_, I1, I2, I3, I4), 847 848 #define C_O1_I1(O1, I1) C_PFX2(c_o1_i1_, O1, I1), 849 #define C_O1_I2(O1, I1, I2) C_PFX3(c_o1_i2_, O1, I1, I2), 850 #define C_O1_I3(O1, I1, I2, I3) C_PFX4(c_o1_i3_, O1, I1, I2, I3), 851 #define C_O1_I4(O1, I1, I2, I3, I4) C_PFX5(c_o1_i4_, O1, I1, I2, I3, I4), 852 853 #define C_N1_I2(O1, I1, I2) C_PFX3(c_n1_i2_, O1, I1, I2), 854 #define C_N1O1_I1(O1, O2, I1) C_PFX3(c_n1o1_i1_, O1, O2, I1), 855 #define C_N2_I1(O1, O2, I1) C_PFX3(c_n2_i1_, O1, O2, I1), 856 857 #define C_O2_I1(O1, O2, I1) C_PFX3(c_o2_i1_, O1, O2, I1), 858 #define C_O2_I2(O1, O2, I1, I2) C_PFX4(c_o2_i2_, O1, O2, I1, I2), 859 #define C_O2_I3(O1, O2, I1, I2, I3) C_PFX5(c_o2_i3_, O1, O2, I1, I2, I3), 860 #define C_O2_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_o2_i4_, O1, O2, I1, I2, I3, I4), 861 #define C_N1_O1_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_n1_o1_i4_, O1, O2, I1, I2, I3, I4), 862 863 typedef enum { 864 C_Dynamic = -2, 865 C_NotImplemented = -1, 866 #include "tcg-target-con-set.h" 867 } TCGConstraintSetIndex; 868 869 static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode, TCGType, unsigned); 870 871 #undef C_O0_I1 872 #undef C_O0_I2 873 #undef C_O0_I3 874 #undef C_O0_I4 875 #undef C_O1_I1 876 #undef C_O1_I2 877 #undef C_O1_I3 878 #undef C_O1_I4 879 #undef C_N1_I2 880 #undef C_N1O1_I1 881 #undef C_N2_I1 882 #undef C_O2_I1 883 #undef C_O2_I2 884 #undef C_O2_I3 885 #undef C_O2_I4 886 #undef C_N1_O1_I4 887 888 /* Put all of the constraint sets into an array, indexed by the enum. */ 889 890 typedef struct TCGConstraintSet { 891 uint8_t nb_oargs, nb_iargs; 892 const char *args_ct_str[TCG_MAX_OP_ARGS]; 893 } TCGConstraintSet; 894 895 #define C_O0_I1(I1) { 0, 1, { #I1 } }, 896 #define C_O0_I2(I1, I2) { 0, 2, { #I1, #I2 } }, 897 #define C_O0_I3(I1, I2, I3) { 0, 3, { #I1, #I2, #I3 } }, 898 #define C_O0_I4(I1, I2, I3, I4) { 0, 4, { #I1, #I2, #I3, #I4 } }, 899 900 #define C_O1_I1(O1, I1) { 1, 1, { #O1, #I1 } }, 901 #define C_O1_I2(O1, I1, I2) { 1, 2, { #O1, #I1, #I2 } }, 902 #define C_O1_I3(O1, I1, I2, I3) { 1, 3, { #O1, #I1, #I2, #I3 } }, 903 #define C_O1_I4(O1, I1, I2, I3, I4) { 1, 4, { #O1, #I1, #I2, #I3, #I4 } }, 904 905 #define C_N1_I2(O1, I1, I2) { 1, 2, { "&" #O1, #I1, #I2 } }, 906 #define C_N1O1_I1(O1, O2, I1) { 2, 1, { "&" #O1, #O2, #I1 } }, 907 #define C_N2_I1(O1, O2, I1) { 2, 1, { "&" #O1, "&" #O2, #I1 } }, 908 909 #define C_O2_I1(O1, O2, I1) { 2, 1, { #O1, #O2, #I1 } }, 910 #define C_O2_I2(O1, O2, I1, I2) { 2, 2, { #O1, #O2, #I1, #I2 } }, 911 #define C_O2_I3(O1, O2, I1, I2, I3) { 2, 3, { #O1, #O2, #I1, #I2, #I3 } }, 912 #define C_O2_I4(O1, O2, I1, I2, I3, I4) { 2, 4, { #O1, #O2, #I1, #I2, #I3, #I4 } }, 913 #define C_N1_O1_I4(O1, O2, I1, I2, I3, I4) { 2, 4, { "&" #O1, #O2, #I1, #I2, #I3, #I4 } }, 914 915 static const TCGConstraintSet constraint_sets[] = { 916 #include "tcg-target-con-set.h" 917 }; 918 919 #undef C_O0_I1 920 #undef C_O0_I2 921 #undef C_O0_I3 922 #undef C_O0_I4 923 #undef C_O1_I1 924 #undef C_O1_I2 925 #undef C_O1_I3 926 #undef C_O1_I4 927 #undef C_N1_I2 928 #undef C_N1O1_I1 929 #undef C_N2_I1 930 #undef C_O2_I1 931 #undef C_O2_I2 932 #undef C_O2_I3 933 #undef C_O2_I4 934 #undef C_N1_O1_I4 935 936 /* Expand the enumerator to be returned from tcg_target_op_def(). */ 937 938 #define C_O0_I1(I1) C_PFX1(c_o0_i1_, I1) 939 #define C_O0_I2(I1, I2) C_PFX2(c_o0_i2_, I1, I2) 940 #define C_O0_I3(I1, I2, I3) C_PFX3(c_o0_i3_, I1, I2, I3) 941 #define C_O0_I4(I1, I2, I3, I4) C_PFX4(c_o0_i4_, I1, I2, I3, I4) 942 943 #define C_O1_I1(O1, I1) C_PFX2(c_o1_i1_, O1, I1) 944 #define C_O1_I2(O1, I1, I2) C_PFX3(c_o1_i2_, O1, I1, I2) 945 #define C_O1_I3(O1, I1, I2, I3) C_PFX4(c_o1_i3_, O1, I1, I2, I3) 946 #define C_O1_I4(O1, I1, I2, I3, I4) C_PFX5(c_o1_i4_, O1, I1, I2, I3, I4) 947 948 #define C_N1_I2(O1, I1, I2) C_PFX3(c_n1_i2_, O1, I1, I2) 949 #define C_N1O1_I1(O1, O2, I1) C_PFX3(c_n1o1_i1_, O1, O2, I1) 950 #define C_N2_I1(O1, O2, I1) C_PFX3(c_n2_i1_, O1, O2, I1) 951 952 #define C_O2_I1(O1, O2, I1) C_PFX3(c_o2_i1_, O1, O2, I1) 953 #define C_O2_I2(O1, O2, I1, I2) C_PFX4(c_o2_i2_, O1, O2, I1, I2) 954 #define C_O2_I3(O1, O2, I1, I2, I3) C_PFX5(c_o2_i3_, O1, O2, I1, I2, I3) 955 #define C_O2_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_o2_i4_, O1, O2, I1, I2, I3, I4) 956 #define C_N1_O1_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_n1_o1_i4_, O1, O2, I1, I2, I3, I4) 957 958 /* 959 * TCGOutOp is the base class for a set of structures that describe how 960 * to generate code for a given TCGOpcode. 961 * 962 * @static_constraint: 963 * C_NotImplemented: The TCGOpcode is not supported by the backend. 964 * C_Dynamic: Use @dynamic_constraint to select a constraint set 965 * based on any of @type, @flags, or host isa. 966 * Otherwise: The register allocation constrains for the TCGOpcode. 967 * 968 * Subclasses of TCGOutOp will define a set of output routines that may 969 * be used. Such routines will often be selected by the set of registers 970 * and constants that come out of register allocation. The set of 971 * routines that are provided will guide the set of constraints that are 972 * legal. In particular, assume that tcg_optimize() has done its job in 973 * swapping commutative operands and folding operations for which all 974 * operands are constant. 975 */ 976 typedef struct TCGOutOp { 977 TCGConstraintSetIndex static_constraint; 978 TCGConstraintSetIndex (*dynamic_constraint)(TCGType type, unsigned flags); 979 } TCGOutOp; 980 981 typedef struct TCGOutOpBinary { 982 TCGOutOp base; 983 void (*out_rrr)(TCGContext *s, TCGType type, 984 TCGReg a0, TCGReg a1, TCGReg a2); 985 void (*out_rri)(TCGContext *s, TCGType type, 986 TCGReg a0, TCGReg a1, tcg_target_long a2); 987 } TCGOutOpBinary; 988 989 typedef struct TCGOutOpDivRem { 990 TCGOutOp base; 991 void (*out_rr01r)(TCGContext *s, TCGType type, 992 TCGReg a0, TCGReg a1, TCGReg a4); 993 } TCGOutOpDivRem; 994 995 typedef struct TCGOutOpUnary { 996 TCGOutOp base; 997 void (*out_rr)(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1); 998 } TCGOutOpUnary; 999 1000 typedef struct TCGOutOpSubtract { 1001 TCGOutOp base; 1002 void (*out_rrr)(TCGContext *s, TCGType type, 1003 TCGReg a0, TCGReg a1, TCGReg a2); 1004 void (*out_rir)(TCGContext *s, TCGType type, 1005 TCGReg a0, tcg_target_long a1, TCGReg a2); 1006 } TCGOutOpSubtract; 1007 1008 #include "tcg-target.c.inc" 1009 1010 #ifndef CONFIG_TCG_INTERPRETER 1011 /* Validate CPUTLBDescFast placement. */ 1012 QEMU_BUILD_BUG_ON((int)(offsetof(CPUNegativeOffsetState, tlb.f[0]) - 1013 sizeof(CPUNegativeOffsetState)) 1014 < MIN_TLB_MASK_TABLE_OFS); 1015 #endif 1016 1017 /* 1018 * Register V as the TCGOutOp for O. 1019 * This verifies that V is of type T, otherwise give a nice compiler error. 1020 * This prevents trivial mistakes within each arch/tcg-target.c.inc. 1021 */ 1022 #define OUTOP(O, T, V) [O] = _Generic(V, T: &V.base) 1023 1024 /* Register allocation descriptions for every TCGOpcode. */ 1025 static const TCGOutOp * const all_outop[NB_OPS] = { 1026 OUTOP(INDEX_op_add, TCGOutOpBinary, outop_add), 1027 OUTOP(INDEX_op_and, TCGOutOpBinary, outop_and), 1028 OUTOP(INDEX_op_andc, TCGOutOpBinary, outop_andc), 1029 OUTOP(INDEX_op_divs, TCGOutOpBinary, outop_divs), 1030 OUTOP(INDEX_op_divu, TCGOutOpBinary, outop_divu), 1031 OUTOP(INDEX_op_divs2, TCGOutOpDivRem, outop_divs2), 1032 OUTOP(INDEX_op_divu2, TCGOutOpDivRem, outop_divu2), 1033 OUTOP(INDEX_op_eqv, TCGOutOpBinary, outop_eqv), 1034 OUTOP(INDEX_op_mul, TCGOutOpBinary, outop_mul), 1035 OUTOP(INDEX_op_mulsh, TCGOutOpBinary, outop_mulsh), 1036 OUTOP(INDEX_op_muluh, TCGOutOpBinary, outop_muluh), 1037 OUTOP(INDEX_op_nand, TCGOutOpBinary, outop_nand), 1038 OUTOP(INDEX_op_neg, TCGOutOpUnary, outop_neg), 1039 OUTOP(INDEX_op_nor, TCGOutOpBinary, outop_nor), 1040 OUTOP(INDEX_op_not, TCGOutOpUnary, outop_not), 1041 OUTOP(INDEX_op_or, TCGOutOpBinary, outop_or), 1042 OUTOP(INDEX_op_orc, TCGOutOpBinary, outop_orc), 1043 OUTOP(INDEX_op_rems, TCGOutOpBinary, outop_rems), 1044 OUTOP(INDEX_op_remu, TCGOutOpBinary, outop_remu), 1045 OUTOP(INDEX_op_rotl, TCGOutOpBinary, outop_rotl), 1046 OUTOP(INDEX_op_rotr, TCGOutOpBinary, outop_rotr), 1047 OUTOP(INDEX_op_sar, TCGOutOpBinary, outop_sar), 1048 OUTOP(INDEX_op_shl, TCGOutOpBinary, outop_shl), 1049 OUTOP(INDEX_op_shr, TCGOutOpBinary, outop_shr), 1050 OUTOP(INDEX_op_sub, TCGOutOpSubtract, outop_sub), 1051 OUTOP(INDEX_op_xor, TCGOutOpBinary, outop_xor), 1052 }; 1053 1054 #undef OUTOP 1055 1056 /* 1057 * All TCG threads except the parent (i.e. the one that called tcg_context_init 1058 * and registered the target's TCG globals) must register with this function 1059 * before initiating translation. 1060 * 1061 * In user-mode we just point tcg_ctx to tcg_init_ctx. See the documentation 1062 * of tcg_region_init() for the reasoning behind this. 1063 * 1064 * In system-mode each caller registers its context in tcg_ctxs[]. Note that in 1065 * system-mode tcg_ctxs[] does not track tcg_ctx_init, since the initial context 1066 * is not used anymore for translation once this function is called. 1067 * 1068 * Not tracking tcg_init_ctx in tcg_ctxs[] in system-mode keeps code that 1069 * iterates over the array (e.g. tcg_code_size() the same for both system/user 1070 * modes. 1071 */ 1072 #ifdef CONFIG_USER_ONLY 1073 void tcg_register_thread(void) 1074 { 1075 tcg_ctx = &tcg_init_ctx; 1076 } 1077 #else 1078 void tcg_register_thread(void) 1079 { 1080 TCGContext *s = g_malloc(sizeof(*s)); 1081 unsigned int i, n; 1082 1083 *s = tcg_init_ctx; 1084 1085 /* Relink mem_base. */ 1086 for (i = 0, n = tcg_init_ctx.nb_globals; i < n; ++i) { 1087 if (tcg_init_ctx.temps[i].mem_base) { 1088 ptrdiff_t b = tcg_init_ctx.temps[i].mem_base - tcg_init_ctx.temps; 1089 tcg_debug_assert(b >= 0 && b < n); 1090 s->temps[i].mem_base = &s->temps[b]; 1091 } 1092 } 1093 1094 /* Claim an entry in tcg_ctxs */ 1095 n = qatomic_fetch_inc(&tcg_cur_ctxs); 1096 g_assert(n < tcg_max_ctxs); 1097 qatomic_set(&tcg_ctxs[n], s); 1098 1099 if (n > 0) { 1100 tcg_region_initial_alloc(s); 1101 } 1102 1103 tcg_ctx = s; 1104 } 1105 #endif /* !CONFIG_USER_ONLY */ 1106 1107 /* pool based memory allocation */ 1108 void *tcg_malloc_internal(TCGContext *s, int size) 1109 { 1110 TCGPool *p; 1111 int pool_size; 1112 1113 if (size > TCG_POOL_CHUNK_SIZE) { 1114 /* big malloc: insert a new pool (XXX: could optimize) */ 1115 p = g_malloc(sizeof(TCGPool) + size); 1116 p->size = size; 1117 p->next = s->pool_first_large; 1118 s->pool_first_large = p; 1119 return p->data; 1120 } else { 1121 p = s->pool_current; 1122 if (!p) { 1123 p = s->pool_first; 1124 if (!p) 1125 goto new_pool; 1126 } else { 1127 if (!p->next) { 1128 new_pool: 1129 pool_size = TCG_POOL_CHUNK_SIZE; 1130 p = g_malloc(sizeof(TCGPool) + pool_size); 1131 p->size = pool_size; 1132 p->next = NULL; 1133 if (s->pool_current) { 1134 s->pool_current->next = p; 1135 } else { 1136 s->pool_first = p; 1137 } 1138 } else { 1139 p = p->next; 1140 } 1141 } 1142 } 1143 s->pool_current = p; 1144 s->pool_cur = p->data + size; 1145 s->pool_end = p->data + p->size; 1146 return p->data; 1147 } 1148 1149 void tcg_pool_reset(TCGContext *s) 1150 { 1151 TCGPool *p, *t; 1152 for (p = s->pool_first_large; p; p = t) { 1153 t = p->next; 1154 g_free(p); 1155 } 1156 s->pool_first_large = NULL; 1157 s->pool_cur = s->pool_end = NULL; 1158 s->pool_current = NULL; 1159 } 1160 1161 /* 1162 * Create TCGHelperInfo structures for "tcg/tcg-ldst.h" functions, 1163 * akin to what "exec/helper-tcg.h" does with DEF_HELPER_FLAGS_N. 1164 * We only use these for layout in tcg_out_ld_helper_ret and 1165 * tcg_out_st_helper_args, and share them between several of 1166 * the helpers, with the end result that it's easier to build manually. 1167 */ 1168 1169 #if TCG_TARGET_REG_BITS == 32 1170 # define dh_typecode_ttl dh_typecode_i32 1171 #else 1172 # define dh_typecode_ttl dh_typecode_i64 1173 #endif 1174 1175 static TCGHelperInfo info_helper_ld32_mmu = { 1176 .flags = TCG_CALL_NO_WG, 1177 .typemask = dh_typemask(ttl, 0) /* return tcg_target_ulong */ 1178 | dh_typemask(env, 1) 1179 | dh_typemask(i64, 2) /* uint64_t addr */ 1180 | dh_typemask(i32, 3) /* unsigned oi */ 1181 | dh_typemask(ptr, 4) /* uintptr_t ra */ 1182 }; 1183 1184 static TCGHelperInfo info_helper_ld64_mmu = { 1185 .flags = TCG_CALL_NO_WG, 1186 .typemask = dh_typemask(i64, 0) /* return uint64_t */ 1187 | dh_typemask(env, 1) 1188 | dh_typemask(i64, 2) /* uint64_t addr */ 1189 | dh_typemask(i32, 3) /* unsigned oi */ 1190 | dh_typemask(ptr, 4) /* uintptr_t ra */ 1191 }; 1192 1193 static TCGHelperInfo info_helper_ld128_mmu = { 1194 .flags = TCG_CALL_NO_WG, 1195 .typemask = dh_typemask(i128, 0) /* return Int128 */ 1196 | dh_typemask(env, 1) 1197 | dh_typemask(i64, 2) /* uint64_t addr */ 1198 | dh_typemask(i32, 3) /* unsigned oi */ 1199 | dh_typemask(ptr, 4) /* uintptr_t ra */ 1200 }; 1201 1202 static TCGHelperInfo info_helper_st32_mmu = { 1203 .flags = TCG_CALL_NO_WG, 1204 .typemask = dh_typemask(void, 0) 1205 | dh_typemask(env, 1) 1206 | dh_typemask(i64, 2) /* uint64_t addr */ 1207 | dh_typemask(i32, 3) /* uint32_t data */ 1208 | dh_typemask(i32, 4) /* unsigned oi */ 1209 | dh_typemask(ptr, 5) /* uintptr_t ra */ 1210 }; 1211 1212 static TCGHelperInfo info_helper_st64_mmu = { 1213 .flags = TCG_CALL_NO_WG, 1214 .typemask = dh_typemask(void, 0) 1215 | dh_typemask(env, 1) 1216 | dh_typemask(i64, 2) /* uint64_t addr */ 1217 | dh_typemask(i64, 3) /* uint64_t data */ 1218 | dh_typemask(i32, 4) /* unsigned oi */ 1219 | dh_typemask(ptr, 5) /* uintptr_t ra */ 1220 }; 1221 1222 static TCGHelperInfo info_helper_st128_mmu = { 1223 .flags = TCG_CALL_NO_WG, 1224 .typemask = dh_typemask(void, 0) 1225 | dh_typemask(env, 1) 1226 | dh_typemask(i64, 2) /* uint64_t addr */ 1227 | dh_typemask(i128, 3) /* Int128 data */ 1228 | dh_typemask(i32, 4) /* unsigned oi */ 1229 | dh_typemask(ptr, 5) /* uintptr_t ra */ 1230 }; 1231 1232 #ifdef CONFIG_TCG_INTERPRETER 1233 static ffi_type *typecode_to_ffi(int argmask) 1234 { 1235 /* 1236 * libffi does not support __int128_t, so we have forced Int128 1237 * to use the structure definition instead of the builtin type. 1238 */ 1239 static ffi_type *ffi_type_i128_elements[3] = { 1240 &ffi_type_uint64, 1241 &ffi_type_uint64, 1242 NULL 1243 }; 1244 static ffi_type ffi_type_i128 = { 1245 .size = 16, 1246 .alignment = __alignof__(Int128), 1247 .type = FFI_TYPE_STRUCT, 1248 .elements = ffi_type_i128_elements, 1249 }; 1250 1251 switch (argmask) { 1252 case dh_typecode_void: 1253 return &ffi_type_void; 1254 case dh_typecode_i32: 1255 return &ffi_type_uint32; 1256 case dh_typecode_s32: 1257 return &ffi_type_sint32; 1258 case dh_typecode_i64: 1259 return &ffi_type_uint64; 1260 case dh_typecode_s64: 1261 return &ffi_type_sint64; 1262 case dh_typecode_ptr: 1263 return &ffi_type_pointer; 1264 case dh_typecode_i128: 1265 return &ffi_type_i128; 1266 } 1267 g_assert_not_reached(); 1268 } 1269 1270 static ffi_cif *init_ffi_layout(TCGHelperInfo *info) 1271 { 1272 unsigned typemask = info->typemask; 1273 struct { 1274 ffi_cif cif; 1275 ffi_type *args[]; 1276 } *ca; 1277 ffi_status status; 1278 int nargs; 1279 1280 /* Ignoring the return type, find the last non-zero field. */ 1281 nargs = 32 - clz32(typemask >> 3); 1282 nargs = DIV_ROUND_UP(nargs, 3); 1283 assert(nargs <= MAX_CALL_IARGS); 1284 1285 ca = g_malloc0(sizeof(*ca) + nargs * sizeof(ffi_type *)); 1286 ca->cif.rtype = typecode_to_ffi(typemask & 7); 1287 ca->cif.nargs = nargs; 1288 1289 if (nargs != 0) { 1290 ca->cif.arg_types = ca->args; 1291 for (int j = 0; j < nargs; ++j) { 1292 int typecode = extract32(typemask, (j + 1) * 3, 3); 1293 ca->args[j] = typecode_to_ffi(typecode); 1294 } 1295 } 1296 1297 status = ffi_prep_cif(&ca->cif, FFI_DEFAULT_ABI, nargs, 1298 ca->cif.rtype, ca->cif.arg_types); 1299 assert(status == FFI_OK); 1300 1301 return &ca->cif; 1302 } 1303 1304 #define HELPER_INFO_INIT(I) (&(I)->cif) 1305 #define HELPER_INFO_INIT_VAL(I) init_ffi_layout(I) 1306 #else 1307 #define HELPER_INFO_INIT(I) (&(I)->init) 1308 #define HELPER_INFO_INIT_VAL(I) 1 1309 #endif /* CONFIG_TCG_INTERPRETER */ 1310 1311 static inline bool arg_slot_reg_p(unsigned arg_slot) 1312 { 1313 /* 1314 * Split the sizeof away from the comparison to avoid Werror from 1315 * "unsigned < 0 is always false", when iarg_regs is empty. 1316 */ 1317 unsigned nreg = ARRAY_SIZE(tcg_target_call_iarg_regs); 1318 return arg_slot < nreg; 1319 } 1320 1321 static inline int arg_slot_stk_ofs(unsigned arg_slot) 1322 { 1323 unsigned max = TCG_STATIC_CALL_ARGS_SIZE / sizeof(tcg_target_long); 1324 unsigned stk_slot = arg_slot - ARRAY_SIZE(tcg_target_call_iarg_regs); 1325 1326 tcg_debug_assert(stk_slot < max); 1327 return TCG_TARGET_CALL_STACK_OFFSET + stk_slot * sizeof(tcg_target_long); 1328 } 1329 1330 typedef struct TCGCumulativeArgs { 1331 int arg_idx; /* tcg_gen_callN args[] */ 1332 int info_in_idx; /* TCGHelperInfo in[] */ 1333 int arg_slot; /* regs+stack slot */ 1334 int ref_slot; /* stack slots for references */ 1335 } TCGCumulativeArgs; 1336 1337 static void layout_arg_even(TCGCumulativeArgs *cum) 1338 { 1339 cum->arg_slot += cum->arg_slot & 1; 1340 } 1341 1342 static void layout_arg_1(TCGCumulativeArgs *cum, TCGHelperInfo *info, 1343 TCGCallArgumentKind kind) 1344 { 1345 TCGCallArgumentLoc *loc = &info->in[cum->info_in_idx]; 1346 1347 *loc = (TCGCallArgumentLoc){ 1348 .kind = kind, 1349 .arg_idx = cum->arg_idx, 1350 .arg_slot = cum->arg_slot, 1351 }; 1352 cum->info_in_idx++; 1353 cum->arg_slot++; 1354 } 1355 1356 static void layout_arg_normal_n(TCGCumulativeArgs *cum, 1357 TCGHelperInfo *info, int n) 1358 { 1359 TCGCallArgumentLoc *loc = &info->in[cum->info_in_idx]; 1360 1361 for (int i = 0; i < n; ++i) { 1362 /* Layout all using the same arg_idx, adjusting the subindex. */ 1363 loc[i] = (TCGCallArgumentLoc){ 1364 .kind = TCG_CALL_ARG_NORMAL, 1365 .arg_idx = cum->arg_idx, 1366 .tmp_subindex = i, 1367 .arg_slot = cum->arg_slot + i, 1368 }; 1369 } 1370 cum->info_in_idx += n; 1371 cum->arg_slot += n; 1372 } 1373 1374 static void layout_arg_by_ref(TCGCumulativeArgs *cum, TCGHelperInfo *info) 1375 { 1376 TCGCallArgumentLoc *loc = &info->in[cum->info_in_idx]; 1377 int n = 128 / TCG_TARGET_REG_BITS; 1378 1379 /* The first subindex carries the pointer. */ 1380 layout_arg_1(cum, info, TCG_CALL_ARG_BY_REF); 1381 1382 /* 1383 * The callee is allowed to clobber memory associated with 1384 * structure pass by-reference. Therefore we must make copies. 1385 * Allocate space from "ref_slot", which will be adjusted to 1386 * follow the parameters on the stack. 1387 */ 1388 loc[0].ref_slot = cum->ref_slot; 1389 1390 /* 1391 * Subsequent words also go into the reference slot, but 1392 * do not accumulate into the regular arguments. 1393 */ 1394 for (int i = 1; i < n; ++i) { 1395 loc[i] = (TCGCallArgumentLoc){ 1396 .kind = TCG_CALL_ARG_BY_REF_N, 1397 .arg_idx = cum->arg_idx, 1398 .tmp_subindex = i, 1399 .ref_slot = cum->ref_slot + i, 1400 }; 1401 } 1402 cum->info_in_idx += n - 1; /* i=0 accounted for in layout_arg_1 */ 1403 cum->ref_slot += n; 1404 } 1405 1406 static void init_call_layout(TCGHelperInfo *info) 1407 { 1408 int max_reg_slots = ARRAY_SIZE(tcg_target_call_iarg_regs); 1409 int max_stk_slots = TCG_STATIC_CALL_ARGS_SIZE / sizeof(tcg_target_long); 1410 unsigned typemask = info->typemask; 1411 unsigned typecode; 1412 TCGCumulativeArgs cum = { }; 1413 1414 /* 1415 * Parse and place any function return value. 1416 */ 1417 typecode = typemask & 7; 1418 switch (typecode) { 1419 case dh_typecode_void: 1420 info->nr_out = 0; 1421 break; 1422 case dh_typecode_i32: 1423 case dh_typecode_s32: 1424 case dh_typecode_ptr: 1425 info->nr_out = 1; 1426 info->out_kind = TCG_CALL_RET_NORMAL; 1427 break; 1428 case dh_typecode_i64: 1429 case dh_typecode_s64: 1430 info->nr_out = 64 / TCG_TARGET_REG_BITS; 1431 info->out_kind = TCG_CALL_RET_NORMAL; 1432 /* Query the last register now to trigger any assert early. */ 1433 tcg_target_call_oarg_reg(info->out_kind, info->nr_out - 1); 1434 break; 1435 case dh_typecode_i128: 1436 info->nr_out = 128 / TCG_TARGET_REG_BITS; 1437 info->out_kind = TCG_TARGET_CALL_RET_I128; 1438 switch (TCG_TARGET_CALL_RET_I128) { 1439 case TCG_CALL_RET_NORMAL: 1440 /* Query the last register now to trigger any assert early. */ 1441 tcg_target_call_oarg_reg(info->out_kind, info->nr_out - 1); 1442 break; 1443 case TCG_CALL_RET_BY_VEC: 1444 /* Query the single register now to trigger any assert early. */ 1445 tcg_target_call_oarg_reg(TCG_CALL_RET_BY_VEC, 0); 1446 break; 1447 case TCG_CALL_RET_BY_REF: 1448 /* 1449 * Allocate the first argument to the output. 1450 * We don't need to store this anywhere, just make it 1451 * unavailable for use in the input loop below. 1452 */ 1453 cum.arg_slot = 1; 1454 break; 1455 default: 1456 qemu_build_not_reached(); 1457 } 1458 break; 1459 default: 1460 g_assert_not_reached(); 1461 } 1462 1463 /* 1464 * Parse and place function arguments. 1465 */ 1466 for (typemask >>= 3; typemask; typemask >>= 3, cum.arg_idx++) { 1467 TCGCallArgumentKind kind; 1468 TCGType type; 1469 1470 typecode = typemask & 7; 1471 switch (typecode) { 1472 case dh_typecode_i32: 1473 case dh_typecode_s32: 1474 type = TCG_TYPE_I32; 1475 break; 1476 case dh_typecode_i64: 1477 case dh_typecode_s64: 1478 type = TCG_TYPE_I64; 1479 break; 1480 case dh_typecode_ptr: 1481 type = TCG_TYPE_PTR; 1482 break; 1483 case dh_typecode_i128: 1484 type = TCG_TYPE_I128; 1485 break; 1486 default: 1487 g_assert_not_reached(); 1488 } 1489 1490 switch (type) { 1491 case TCG_TYPE_I32: 1492 switch (TCG_TARGET_CALL_ARG_I32) { 1493 case TCG_CALL_ARG_EVEN: 1494 layout_arg_even(&cum); 1495 /* fall through */ 1496 case TCG_CALL_ARG_NORMAL: 1497 layout_arg_1(&cum, info, TCG_CALL_ARG_NORMAL); 1498 break; 1499 case TCG_CALL_ARG_EXTEND: 1500 kind = TCG_CALL_ARG_EXTEND_U + (typecode & 1); 1501 layout_arg_1(&cum, info, kind); 1502 break; 1503 default: 1504 qemu_build_not_reached(); 1505 } 1506 break; 1507 1508 case TCG_TYPE_I64: 1509 switch (TCG_TARGET_CALL_ARG_I64) { 1510 case TCG_CALL_ARG_EVEN: 1511 layout_arg_even(&cum); 1512 /* fall through */ 1513 case TCG_CALL_ARG_NORMAL: 1514 if (TCG_TARGET_REG_BITS == 32) { 1515 layout_arg_normal_n(&cum, info, 2); 1516 } else { 1517 layout_arg_1(&cum, info, TCG_CALL_ARG_NORMAL); 1518 } 1519 break; 1520 default: 1521 qemu_build_not_reached(); 1522 } 1523 break; 1524 1525 case TCG_TYPE_I128: 1526 switch (TCG_TARGET_CALL_ARG_I128) { 1527 case TCG_CALL_ARG_EVEN: 1528 layout_arg_even(&cum); 1529 /* fall through */ 1530 case TCG_CALL_ARG_NORMAL: 1531 layout_arg_normal_n(&cum, info, 128 / TCG_TARGET_REG_BITS); 1532 break; 1533 case TCG_CALL_ARG_BY_REF: 1534 layout_arg_by_ref(&cum, info); 1535 break; 1536 default: 1537 qemu_build_not_reached(); 1538 } 1539 break; 1540 1541 default: 1542 g_assert_not_reached(); 1543 } 1544 } 1545 info->nr_in = cum.info_in_idx; 1546 1547 /* Validate that we didn't overrun the input array. */ 1548 assert(cum.info_in_idx <= ARRAY_SIZE(info->in)); 1549 /* Validate the backend has enough argument space. */ 1550 assert(cum.arg_slot <= max_reg_slots + max_stk_slots); 1551 1552 /* 1553 * Relocate the "ref_slot" area to the end of the parameters. 1554 * Minimizing this stack offset helps code size for x86, 1555 * which has a signed 8-bit offset encoding. 1556 */ 1557 if (cum.ref_slot != 0) { 1558 int ref_base = 0; 1559 1560 if (cum.arg_slot > max_reg_slots) { 1561 int align = __alignof(Int128) / sizeof(tcg_target_long); 1562 1563 ref_base = cum.arg_slot - max_reg_slots; 1564 if (align > 1) { 1565 ref_base = ROUND_UP(ref_base, align); 1566 } 1567 } 1568 assert(ref_base + cum.ref_slot <= max_stk_slots); 1569 ref_base += max_reg_slots; 1570 1571 if (ref_base != 0) { 1572 for (int i = cum.info_in_idx - 1; i >= 0; --i) { 1573 TCGCallArgumentLoc *loc = &info->in[i]; 1574 switch (loc->kind) { 1575 case TCG_CALL_ARG_BY_REF: 1576 case TCG_CALL_ARG_BY_REF_N: 1577 loc->ref_slot += ref_base; 1578 break; 1579 default: 1580 break; 1581 } 1582 } 1583 } 1584 } 1585 } 1586 1587 static int indirect_reg_alloc_order[ARRAY_SIZE(tcg_target_reg_alloc_order)]; 1588 static void process_constraint_sets(void); 1589 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type, 1590 TCGReg reg, const char *name); 1591 1592 static void tcg_context_init(unsigned max_threads) 1593 { 1594 TCGContext *s = &tcg_init_ctx; 1595 int n, i; 1596 TCGTemp *ts; 1597 1598 memset(s, 0, sizeof(*s)); 1599 s->nb_globals = 0; 1600 1601 init_call_layout(&info_helper_ld32_mmu); 1602 init_call_layout(&info_helper_ld64_mmu); 1603 init_call_layout(&info_helper_ld128_mmu); 1604 init_call_layout(&info_helper_st32_mmu); 1605 init_call_layout(&info_helper_st64_mmu); 1606 init_call_layout(&info_helper_st128_mmu); 1607 1608 tcg_target_init(s); 1609 process_constraint_sets(); 1610 1611 /* Reverse the order of the saved registers, assuming they're all at 1612 the start of tcg_target_reg_alloc_order. */ 1613 for (n = 0; n < ARRAY_SIZE(tcg_target_reg_alloc_order); ++n) { 1614 int r = tcg_target_reg_alloc_order[n]; 1615 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, r)) { 1616 break; 1617 } 1618 } 1619 for (i = 0; i < n; ++i) { 1620 indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[n - 1 - i]; 1621 } 1622 for (; i < ARRAY_SIZE(tcg_target_reg_alloc_order); ++i) { 1623 indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[i]; 1624 } 1625 1626 tcg_ctx = s; 1627 /* 1628 * In user-mode we simply share the init context among threads, since we 1629 * use a single region. See the documentation tcg_region_init() for the 1630 * reasoning behind this. 1631 * In system-mode we will have at most max_threads TCG threads. 1632 */ 1633 #ifdef CONFIG_USER_ONLY 1634 tcg_ctxs = &tcg_ctx; 1635 tcg_cur_ctxs = 1; 1636 tcg_max_ctxs = 1; 1637 #else 1638 tcg_max_ctxs = max_threads; 1639 tcg_ctxs = g_new0(TCGContext *, max_threads); 1640 #endif 1641 1642 tcg_debug_assert(!tcg_regset_test_reg(s->reserved_regs, TCG_AREG0)); 1643 ts = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, TCG_AREG0, "env"); 1644 tcg_env = temp_tcgv_ptr(ts); 1645 } 1646 1647 void tcg_init(size_t tb_size, int splitwx, unsigned max_threads) 1648 { 1649 tcg_context_init(max_threads); 1650 tcg_region_init(tb_size, splitwx, max_threads); 1651 } 1652 1653 /* 1654 * Allocate TBs right before their corresponding translated code, making 1655 * sure that TBs and code are on different cache lines. 1656 */ 1657 TranslationBlock *tcg_tb_alloc(TCGContext *s) 1658 { 1659 uintptr_t align = qemu_icache_linesize; 1660 TranslationBlock *tb; 1661 void *next; 1662 1663 retry: 1664 tb = (void *)ROUND_UP((uintptr_t)s->code_gen_ptr, align); 1665 next = (void *)ROUND_UP((uintptr_t)(tb + 1), align); 1666 1667 if (unlikely(next > s->code_gen_highwater)) { 1668 if (tcg_region_alloc(s)) { 1669 return NULL; 1670 } 1671 goto retry; 1672 } 1673 qatomic_set(&s->code_gen_ptr, next); 1674 return tb; 1675 } 1676 1677 void tcg_prologue_init(void) 1678 { 1679 TCGContext *s = tcg_ctx; 1680 size_t prologue_size; 1681 1682 s->code_ptr = s->code_gen_ptr; 1683 s->code_buf = s->code_gen_ptr; 1684 s->data_gen_ptr = NULL; 1685 1686 #ifndef CONFIG_TCG_INTERPRETER 1687 tcg_qemu_tb_exec = (tcg_prologue_fn *)tcg_splitwx_to_rx(s->code_ptr); 1688 #endif 1689 1690 s->pool_labels = NULL; 1691 1692 qemu_thread_jit_write(); 1693 /* Generate the prologue. */ 1694 tcg_target_qemu_prologue(s); 1695 1696 /* Allow the prologue to put e.g. guest_base into a pool entry. */ 1697 { 1698 int result = tcg_out_pool_finalize(s); 1699 tcg_debug_assert(result == 0); 1700 } 1701 1702 prologue_size = tcg_current_code_size(s); 1703 perf_report_prologue(s->code_gen_ptr, prologue_size); 1704 1705 #ifndef CONFIG_TCG_INTERPRETER 1706 flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(s->code_buf), 1707 (uintptr_t)s->code_buf, prologue_size); 1708 #endif 1709 1710 if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM)) { 1711 FILE *logfile = qemu_log_trylock(); 1712 if (logfile) { 1713 fprintf(logfile, "PROLOGUE: [size=%zu]\n", prologue_size); 1714 if (s->data_gen_ptr) { 1715 size_t code_size = s->data_gen_ptr - s->code_gen_ptr; 1716 size_t data_size = prologue_size - code_size; 1717 size_t i; 1718 1719 disas(logfile, s->code_gen_ptr, code_size); 1720 1721 for (i = 0; i < data_size; i += sizeof(tcg_target_ulong)) { 1722 if (sizeof(tcg_target_ulong) == 8) { 1723 fprintf(logfile, 1724 "0x%08" PRIxPTR ": .quad 0x%016" PRIx64 "\n", 1725 (uintptr_t)s->data_gen_ptr + i, 1726 *(uint64_t *)(s->data_gen_ptr + i)); 1727 } else { 1728 fprintf(logfile, 1729 "0x%08" PRIxPTR ": .long 0x%08x\n", 1730 (uintptr_t)s->data_gen_ptr + i, 1731 *(uint32_t *)(s->data_gen_ptr + i)); 1732 } 1733 } 1734 } else { 1735 disas(logfile, s->code_gen_ptr, prologue_size); 1736 } 1737 fprintf(logfile, "\n"); 1738 qemu_log_unlock(logfile); 1739 } 1740 } 1741 1742 #ifndef CONFIG_TCG_INTERPRETER 1743 /* 1744 * Assert that goto_ptr is implemented completely, setting an epilogue. 1745 * For tci, we use NULL as the signal to return from the interpreter, 1746 * so skip this check. 1747 */ 1748 tcg_debug_assert(tcg_code_gen_epilogue != NULL); 1749 #endif 1750 1751 tcg_region_prologue_set(s); 1752 } 1753 1754 void tcg_func_start(TCGContext *s) 1755 { 1756 tcg_pool_reset(s); 1757 s->nb_temps = s->nb_globals; 1758 1759 /* No temps have been previously allocated for size or locality. */ 1760 tcg_temp_ebb_reset_freed(s); 1761 1762 /* No constant temps have been previously allocated. */ 1763 for (int i = 0; i < TCG_TYPE_COUNT; ++i) { 1764 if (s->const_table[i]) { 1765 g_hash_table_remove_all(s->const_table[i]); 1766 } 1767 } 1768 1769 s->nb_ops = 0; 1770 s->nb_labels = 0; 1771 s->current_frame_offset = s->frame_start; 1772 1773 #ifdef CONFIG_DEBUG_TCG 1774 s->goto_tb_issue_mask = 0; 1775 #endif 1776 1777 QTAILQ_INIT(&s->ops); 1778 QTAILQ_INIT(&s->free_ops); 1779 s->emit_before_op = NULL; 1780 QSIMPLEQ_INIT(&s->labels); 1781 1782 tcg_debug_assert(s->addr_type <= TCG_TYPE_REG); 1783 tcg_debug_assert(s->insn_start_words > 0); 1784 } 1785 1786 static TCGTemp *tcg_temp_alloc(TCGContext *s) 1787 { 1788 int n = s->nb_temps++; 1789 1790 if (n >= TCG_MAX_TEMPS) { 1791 tcg_raise_tb_overflow(s); 1792 } 1793 return memset(&s->temps[n], 0, sizeof(TCGTemp)); 1794 } 1795 1796 static TCGTemp *tcg_global_alloc(TCGContext *s) 1797 { 1798 TCGTemp *ts; 1799 1800 tcg_debug_assert(s->nb_globals == s->nb_temps); 1801 tcg_debug_assert(s->nb_globals < TCG_MAX_TEMPS); 1802 s->nb_globals++; 1803 ts = tcg_temp_alloc(s); 1804 ts->kind = TEMP_GLOBAL; 1805 1806 return ts; 1807 } 1808 1809 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type, 1810 TCGReg reg, const char *name) 1811 { 1812 TCGTemp *ts; 1813 1814 tcg_debug_assert(TCG_TARGET_REG_BITS == 64 || type == TCG_TYPE_I32); 1815 1816 ts = tcg_global_alloc(s); 1817 ts->base_type = type; 1818 ts->type = type; 1819 ts->kind = TEMP_FIXED; 1820 ts->reg = reg; 1821 ts->name = name; 1822 tcg_regset_set_reg(s->reserved_regs, reg); 1823 1824 return ts; 1825 } 1826 1827 void tcg_set_frame(TCGContext *s, TCGReg reg, intptr_t start, intptr_t size) 1828 { 1829 s->frame_start = start; 1830 s->frame_end = start + size; 1831 s->frame_temp 1832 = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, reg, "_frame"); 1833 } 1834 1835 static TCGTemp *tcg_global_mem_new_internal(TCGv_ptr base, intptr_t offset, 1836 const char *name, TCGType type) 1837 { 1838 TCGContext *s = tcg_ctx; 1839 TCGTemp *base_ts = tcgv_ptr_temp(base); 1840 TCGTemp *ts = tcg_global_alloc(s); 1841 int indirect_reg = 0; 1842 1843 switch (base_ts->kind) { 1844 case TEMP_FIXED: 1845 break; 1846 case TEMP_GLOBAL: 1847 /* We do not support double-indirect registers. */ 1848 tcg_debug_assert(!base_ts->indirect_reg); 1849 base_ts->indirect_base = 1; 1850 s->nb_indirects += (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64 1851 ? 2 : 1); 1852 indirect_reg = 1; 1853 break; 1854 default: 1855 g_assert_not_reached(); 1856 } 1857 1858 if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) { 1859 TCGTemp *ts2 = tcg_global_alloc(s); 1860 char buf[64]; 1861 1862 ts->base_type = TCG_TYPE_I64; 1863 ts->type = TCG_TYPE_I32; 1864 ts->indirect_reg = indirect_reg; 1865 ts->mem_allocated = 1; 1866 ts->mem_base = base_ts; 1867 ts->mem_offset = offset; 1868 pstrcpy(buf, sizeof(buf), name); 1869 pstrcat(buf, sizeof(buf), "_0"); 1870 ts->name = strdup(buf); 1871 1872 tcg_debug_assert(ts2 == ts + 1); 1873 ts2->base_type = TCG_TYPE_I64; 1874 ts2->type = TCG_TYPE_I32; 1875 ts2->indirect_reg = indirect_reg; 1876 ts2->mem_allocated = 1; 1877 ts2->mem_base = base_ts; 1878 ts2->mem_offset = offset + 4; 1879 ts2->temp_subindex = 1; 1880 pstrcpy(buf, sizeof(buf), name); 1881 pstrcat(buf, sizeof(buf), "_1"); 1882 ts2->name = strdup(buf); 1883 } else { 1884 ts->base_type = type; 1885 ts->type = type; 1886 ts->indirect_reg = indirect_reg; 1887 ts->mem_allocated = 1; 1888 ts->mem_base = base_ts; 1889 ts->mem_offset = offset; 1890 ts->name = name; 1891 } 1892 return ts; 1893 } 1894 1895 TCGv_i32 tcg_global_mem_new_i32(TCGv_ptr reg, intptr_t off, const char *name) 1896 { 1897 TCGTemp *ts = tcg_global_mem_new_internal(reg, off, name, TCG_TYPE_I32); 1898 return temp_tcgv_i32(ts); 1899 } 1900 1901 TCGv_i64 tcg_global_mem_new_i64(TCGv_ptr reg, intptr_t off, const char *name) 1902 { 1903 TCGTemp *ts = tcg_global_mem_new_internal(reg, off, name, TCG_TYPE_I64); 1904 return temp_tcgv_i64(ts); 1905 } 1906 1907 TCGv_ptr tcg_global_mem_new_ptr(TCGv_ptr reg, intptr_t off, const char *name) 1908 { 1909 TCGTemp *ts = tcg_global_mem_new_internal(reg, off, name, TCG_TYPE_PTR); 1910 return temp_tcgv_ptr(ts); 1911 } 1912 1913 TCGTemp *tcg_temp_new_internal(TCGType type, TCGTempKind kind) 1914 { 1915 TCGContext *s = tcg_ctx; 1916 TCGTemp *ts; 1917 int n; 1918 1919 if (kind == TEMP_EBB) { 1920 int idx = find_first_bit(s->free_temps[type].l, TCG_MAX_TEMPS); 1921 1922 if (idx < TCG_MAX_TEMPS) { 1923 /* There is already an available temp with the right type. */ 1924 clear_bit(idx, s->free_temps[type].l); 1925 1926 ts = &s->temps[idx]; 1927 ts->temp_allocated = 1; 1928 tcg_debug_assert(ts->base_type == type); 1929 tcg_debug_assert(ts->kind == kind); 1930 return ts; 1931 } 1932 } else { 1933 tcg_debug_assert(kind == TEMP_TB); 1934 } 1935 1936 switch (type) { 1937 case TCG_TYPE_I32: 1938 case TCG_TYPE_V64: 1939 case TCG_TYPE_V128: 1940 case TCG_TYPE_V256: 1941 n = 1; 1942 break; 1943 case TCG_TYPE_I64: 1944 n = 64 / TCG_TARGET_REG_BITS; 1945 break; 1946 case TCG_TYPE_I128: 1947 n = 128 / TCG_TARGET_REG_BITS; 1948 break; 1949 default: 1950 g_assert_not_reached(); 1951 } 1952 1953 ts = tcg_temp_alloc(s); 1954 ts->base_type = type; 1955 ts->temp_allocated = 1; 1956 ts->kind = kind; 1957 1958 if (n == 1) { 1959 ts->type = type; 1960 } else { 1961 ts->type = TCG_TYPE_REG; 1962 1963 for (int i = 1; i < n; ++i) { 1964 TCGTemp *ts2 = tcg_temp_alloc(s); 1965 1966 tcg_debug_assert(ts2 == ts + i); 1967 ts2->base_type = type; 1968 ts2->type = TCG_TYPE_REG; 1969 ts2->temp_allocated = 1; 1970 ts2->temp_subindex = i; 1971 ts2->kind = kind; 1972 } 1973 } 1974 return ts; 1975 } 1976 1977 TCGv_i32 tcg_temp_new_i32(void) 1978 { 1979 return temp_tcgv_i32(tcg_temp_new_internal(TCG_TYPE_I32, TEMP_TB)); 1980 } 1981 1982 TCGv_i32 tcg_temp_ebb_new_i32(void) 1983 { 1984 return temp_tcgv_i32(tcg_temp_new_internal(TCG_TYPE_I32, TEMP_EBB)); 1985 } 1986 1987 TCGv_i64 tcg_temp_new_i64(void) 1988 { 1989 return temp_tcgv_i64(tcg_temp_new_internal(TCG_TYPE_I64, TEMP_TB)); 1990 } 1991 1992 TCGv_i64 tcg_temp_ebb_new_i64(void) 1993 { 1994 return temp_tcgv_i64(tcg_temp_new_internal(TCG_TYPE_I64, TEMP_EBB)); 1995 } 1996 1997 TCGv_ptr tcg_temp_new_ptr(void) 1998 { 1999 return temp_tcgv_ptr(tcg_temp_new_internal(TCG_TYPE_PTR, TEMP_TB)); 2000 } 2001 2002 TCGv_ptr tcg_temp_ebb_new_ptr(void) 2003 { 2004 return temp_tcgv_ptr(tcg_temp_new_internal(TCG_TYPE_PTR, TEMP_EBB)); 2005 } 2006 2007 TCGv_i128 tcg_temp_new_i128(void) 2008 { 2009 return temp_tcgv_i128(tcg_temp_new_internal(TCG_TYPE_I128, TEMP_TB)); 2010 } 2011 2012 TCGv_i128 tcg_temp_ebb_new_i128(void) 2013 { 2014 return temp_tcgv_i128(tcg_temp_new_internal(TCG_TYPE_I128, TEMP_EBB)); 2015 } 2016 2017 TCGv_vec tcg_temp_new_vec(TCGType type) 2018 { 2019 TCGTemp *t; 2020 2021 #ifdef CONFIG_DEBUG_TCG 2022 switch (type) { 2023 case TCG_TYPE_V64: 2024 assert(TCG_TARGET_HAS_v64); 2025 break; 2026 case TCG_TYPE_V128: 2027 assert(TCG_TARGET_HAS_v128); 2028 break; 2029 case TCG_TYPE_V256: 2030 assert(TCG_TARGET_HAS_v256); 2031 break; 2032 default: 2033 g_assert_not_reached(); 2034 } 2035 #endif 2036 2037 t = tcg_temp_new_internal(type, TEMP_EBB); 2038 return temp_tcgv_vec(t); 2039 } 2040 2041 /* Create a new temp of the same type as an existing temp. */ 2042 TCGv_vec tcg_temp_new_vec_matching(TCGv_vec match) 2043 { 2044 TCGTemp *t = tcgv_vec_temp(match); 2045 2046 tcg_debug_assert(t->temp_allocated != 0); 2047 2048 t = tcg_temp_new_internal(t->base_type, TEMP_EBB); 2049 return temp_tcgv_vec(t); 2050 } 2051 2052 void tcg_temp_free_internal(TCGTemp *ts) 2053 { 2054 TCGContext *s = tcg_ctx; 2055 2056 switch (ts->kind) { 2057 case TEMP_CONST: 2058 case TEMP_TB: 2059 /* Silently ignore free. */ 2060 break; 2061 case TEMP_EBB: 2062 tcg_debug_assert(ts->temp_allocated != 0); 2063 ts->temp_allocated = 0; 2064 set_bit(temp_idx(ts), s->free_temps[ts->base_type].l); 2065 break; 2066 default: 2067 /* It never made sense to free TEMP_FIXED or TEMP_GLOBAL. */ 2068 g_assert_not_reached(); 2069 } 2070 } 2071 2072 void tcg_temp_free_i32(TCGv_i32 arg) 2073 { 2074 tcg_temp_free_internal(tcgv_i32_temp(arg)); 2075 } 2076 2077 void tcg_temp_free_i64(TCGv_i64 arg) 2078 { 2079 tcg_temp_free_internal(tcgv_i64_temp(arg)); 2080 } 2081 2082 void tcg_temp_free_i128(TCGv_i128 arg) 2083 { 2084 tcg_temp_free_internal(tcgv_i128_temp(arg)); 2085 } 2086 2087 void tcg_temp_free_ptr(TCGv_ptr arg) 2088 { 2089 tcg_temp_free_internal(tcgv_ptr_temp(arg)); 2090 } 2091 2092 void tcg_temp_free_vec(TCGv_vec arg) 2093 { 2094 tcg_temp_free_internal(tcgv_vec_temp(arg)); 2095 } 2096 2097 TCGTemp *tcg_constant_internal(TCGType type, int64_t val) 2098 { 2099 TCGContext *s = tcg_ctx; 2100 GHashTable *h = s->const_table[type]; 2101 TCGTemp *ts; 2102 2103 if (h == NULL) { 2104 h = g_hash_table_new(g_int64_hash, g_int64_equal); 2105 s->const_table[type] = h; 2106 } 2107 2108 ts = g_hash_table_lookup(h, &val); 2109 if (ts == NULL) { 2110 int64_t *val_ptr; 2111 2112 ts = tcg_temp_alloc(s); 2113 2114 if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) { 2115 TCGTemp *ts2 = tcg_temp_alloc(s); 2116 2117 tcg_debug_assert(ts2 == ts + 1); 2118 2119 ts->base_type = TCG_TYPE_I64; 2120 ts->type = TCG_TYPE_I32; 2121 ts->kind = TEMP_CONST; 2122 ts->temp_allocated = 1; 2123 2124 ts2->base_type = TCG_TYPE_I64; 2125 ts2->type = TCG_TYPE_I32; 2126 ts2->kind = TEMP_CONST; 2127 ts2->temp_allocated = 1; 2128 ts2->temp_subindex = 1; 2129 2130 /* 2131 * Retain the full value of the 64-bit constant in the low 2132 * part, so that the hash table works. Actual uses will 2133 * truncate the value to the low part. 2134 */ 2135 ts[HOST_BIG_ENDIAN].val = val; 2136 ts[!HOST_BIG_ENDIAN].val = val >> 32; 2137 val_ptr = &ts[HOST_BIG_ENDIAN].val; 2138 } else { 2139 ts->base_type = type; 2140 ts->type = type; 2141 ts->kind = TEMP_CONST; 2142 ts->temp_allocated = 1; 2143 ts->val = val; 2144 val_ptr = &ts->val; 2145 } 2146 g_hash_table_insert(h, val_ptr, ts); 2147 } 2148 2149 return ts; 2150 } 2151 2152 TCGv_i32 tcg_constant_i32(int32_t val) 2153 { 2154 return temp_tcgv_i32(tcg_constant_internal(TCG_TYPE_I32, val)); 2155 } 2156 2157 TCGv_i64 tcg_constant_i64(int64_t val) 2158 { 2159 return temp_tcgv_i64(tcg_constant_internal(TCG_TYPE_I64, val)); 2160 } 2161 2162 TCGv_ptr tcg_constant_ptr_int(intptr_t val) 2163 { 2164 return temp_tcgv_ptr(tcg_constant_internal(TCG_TYPE_PTR, val)); 2165 } 2166 2167 TCGv_vec tcg_constant_vec(TCGType type, unsigned vece, int64_t val) 2168 { 2169 val = dup_const(vece, val); 2170 return temp_tcgv_vec(tcg_constant_internal(type, val)); 2171 } 2172 2173 TCGv_vec tcg_constant_vec_matching(TCGv_vec match, unsigned vece, int64_t val) 2174 { 2175 TCGTemp *t = tcgv_vec_temp(match); 2176 2177 tcg_debug_assert(t->temp_allocated != 0); 2178 return tcg_constant_vec(t->base_type, vece, val); 2179 } 2180 2181 #ifdef CONFIG_DEBUG_TCG 2182 size_t temp_idx(TCGTemp *ts) 2183 { 2184 ptrdiff_t n = ts - tcg_ctx->temps; 2185 assert(n >= 0 && n < tcg_ctx->nb_temps); 2186 return n; 2187 } 2188 2189 TCGTemp *tcgv_i32_temp(TCGv_i32 v) 2190 { 2191 uintptr_t o = (uintptr_t)v - offsetof(TCGContext, temps); 2192 2193 assert(o < sizeof(TCGTemp) * tcg_ctx->nb_temps); 2194 assert(o % sizeof(TCGTemp) == 0); 2195 2196 return (void *)tcg_ctx + (uintptr_t)v; 2197 } 2198 #endif /* CONFIG_DEBUG_TCG */ 2199 2200 /* 2201 * Return true if OP may appear in the opcode stream with TYPE. 2202 * Test the runtime variable that controls each opcode. 2203 */ 2204 bool tcg_op_supported(TCGOpcode op, TCGType type, unsigned flags) 2205 { 2206 bool has_type; 2207 2208 switch (type) { 2209 case TCG_TYPE_I32: 2210 has_type = true; 2211 break; 2212 case TCG_TYPE_I64: 2213 has_type = TCG_TARGET_REG_BITS == 64; 2214 break; 2215 case TCG_TYPE_V64: 2216 has_type = TCG_TARGET_HAS_v64; 2217 break; 2218 case TCG_TYPE_V128: 2219 has_type = TCG_TARGET_HAS_v128; 2220 break; 2221 case TCG_TYPE_V256: 2222 has_type = TCG_TARGET_HAS_v256; 2223 break; 2224 default: 2225 has_type = false; 2226 break; 2227 } 2228 2229 switch (op) { 2230 case INDEX_op_discard: 2231 case INDEX_op_set_label: 2232 case INDEX_op_call: 2233 case INDEX_op_br: 2234 case INDEX_op_mb: 2235 case INDEX_op_insn_start: 2236 case INDEX_op_exit_tb: 2237 case INDEX_op_goto_tb: 2238 case INDEX_op_goto_ptr: 2239 case INDEX_op_qemu_ld_i32: 2240 case INDEX_op_qemu_st_i32: 2241 case INDEX_op_qemu_ld_i64: 2242 case INDEX_op_qemu_st_i64: 2243 return true; 2244 2245 case INDEX_op_qemu_st8_i32: 2246 return TCG_TARGET_HAS_qemu_st8_i32; 2247 2248 case INDEX_op_qemu_ld_i128: 2249 case INDEX_op_qemu_st_i128: 2250 return TCG_TARGET_HAS_qemu_ldst_i128; 2251 2252 case INDEX_op_add: 2253 case INDEX_op_and: 2254 case INDEX_op_mov: 2255 case INDEX_op_or: 2256 case INDEX_op_xor: 2257 return has_type; 2258 2259 case INDEX_op_setcond_i32: 2260 case INDEX_op_brcond_i32: 2261 case INDEX_op_movcond_i32: 2262 case INDEX_op_ld8u_i32: 2263 case INDEX_op_ld8s_i32: 2264 case INDEX_op_ld16u_i32: 2265 case INDEX_op_ld16s_i32: 2266 case INDEX_op_ld_i32: 2267 case INDEX_op_st8_i32: 2268 case INDEX_op_st16_i32: 2269 case INDEX_op_st_i32: 2270 case INDEX_op_extract_i32: 2271 case INDEX_op_sextract_i32: 2272 case INDEX_op_deposit_i32: 2273 return true; 2274 2275 case INDEX_op_negsetcond_i32: 2276 return TCG_TARGET_HAS_negsetcond_i32; 2277 case INDEX_op_extract2_i32: 2278 return TCG_TARGET_HAS_extract2_i32; 2279 case INDEX_op_add2_i32: 2280 return TCG_TARGET_HAS_add2_i32; 2281 case INDEX_op_sub2_i32: 2282 return TCG_TARGET_HAS_sub2_i32; 2283 case INDEX_op_mulu2_i32: 2284 return TCG_TARGET_HAS_mulu2_i32; 2285 case INDEX_op_muls2_i32: 2286 return TCG_TARGET_HAS_muls2_i32; 2287 case INDEX_op_bswap16_i32: 2288 return TCG_TARGET_HAS_bswap16_i32; 2289 case INDEX_op_bswap32_i32: 2290 return TCG_TARGET_HAS_bswap32_i32; 2291 case INDEX_op_clz_i32: 2292 return TCG_TARGET_HAS_clz_i32; 2293 case INDEX_op_ctz_i32: 2294 return TCG_TARGET_HAS_ctz_i32; 2295 case INDEX_op_ctpop_i32: 2296 return TCG_TARGET_HAS_ctpop_i32; 2297 2298 case INDEX_op_brcond2_i32: 2299 case INDEX_op_setcond2_i32: 2300 return TCG_TARGET_REG_BITS == 32; 2301 2302 case INDEX_op_setcond_i64: 2303 case INDEX_op_brcond_i64: 2304 case INDEX_op_movcond_i64: 2305 case INDEX_op_ld8u_i64: 2306 case INDEX_op_ld8s_i64: 2307 case INDEX_op_ld16u_i64: 2308 case INDEX_op_ld16s_i64: 2309 case INDEX_op_ld32u_i64: 2310 case INDEX_op_ld32s_i64: 2311 case INDEX_op_ld_i64: 2312 case INDEX_op_st8_i64: 2313 case INDEX_op_st16_i64: 2314 case INDEX_op_st32_i64: 2315 case INDEX_op_st_i64: 2316 case INDEX_op_ext_i32_i64: 2317 case INDEX_op_extu_i32_i64: 2318 case INDEX_op_extract_i64: 2319 case INDEX_op_sextract_i64: 2320 case INDEX_op_deposit_i64: 2321 return TCG_TARGET_REG_BITS == 64; 2322 2323 case INDEX_op_negsetcond_i64: 2324 return TCG_TARGET_HAS_negsetcond_i64; 2325 case INDEX_op_extract2_i64: 2326 return TCG_TARGET_HAS_extract2_i64; 2327 case INDEX_op_extrl_i64_i32: 2328 case INDEX_op_extrh_i64_i32: 2329 return TCG_TARGET_HAS_extr_i64_i32; 2330 case INDEX_op_bswap16_i64: 2331 return TCG_TARGET_HAS_bswap16_i64; 2332 case INDEX_op_bswap32_i64: 2333 return TCG_TARGET_HAS_bswap32_i64; 2334 case INDEX_op_bswap64_i64: 2335 return TCG_TARGET_HAS_bswap64_i64; 2336 case INDEX_op_clz_i64: 2337 return TCG_TARGET_HAS_clz_i64; 2338 case INDEX_op_ctz_i64: 2339 return TCG_TARGET_HAS_ctz_i64; 2340 case INDEX_op_ctpop_i64: 2341 return TCG_TARGET_HAS_ctpop_i64; 2342 case INDEX_op_add2_i64: 2343 return TCG_TARGET_HAS_add2_i64; 2344 case INDEX_op_sub2_i64: 2345 return TCG_TARGET_HAS_sub2_i64; 2346 case INDEX_op_mulu2_i64: 2347 return TCG_TARGET_HAS_mulu2_i64; 2348 case INDEX_op_muls2_i64: 2349 return TCG_TARGET_HAS_muls2_i64; 2350 2351 case INDEX_op_mov_vec: 2352 case INDEX_op_dup_vec: 2353 case INDEX_op_dupm_vec: 2354 case INDEX_op_ld_vec: 2355 case INDEX_op_st_vec: 2356 case INDEX_op_add_vec: 2357 case INDEX_op_sub_vec: 2358 case INDEX_op_and_vec: 2359 case INDEX_op_or_vec: 2360 case INDEX_op_xor_vec: 2361 case INDEX_op_cmp_vec: 2362 return has_type; 2363 case INDEX_op_dup2_vec: 2364 return has_type && TCG_TARGET_REG_BITS == 32; 2365 case INDEX_op_not_vec: 2366 return has_type && TCG_TARGET_HAS_not_vec; 2367 case INDEX_op_neg_vec: 2368 return has_type && TCG_TARGET_HAS_neg_vec; 2369 case INDEX_op_abs_vec: 2370 return has_type && TCG_TARGET_HAS_abs_vec; 2371 case INDEX_op_andc_vec: 2372 return has_type && TCG_TARGET_HAS_andc_vec; 2373 case INDEX_op_orc_vec: 2374 return has_type && TCG_TARGET_HAS_orc_vec; 2375 case INDEX_op_nand_vec: 2376 return has_type && TCG_TARGET_HAS_nand_vec; 2377 case INDEX_op_nor_vec: 2378 return has_type && TCG_TARGET_HAS_nor_vec; 2379 case INDEX_op_eqv_vec: 2380 return has_type && TCG_TARGET_HAS_eqv_vec; 2381 case INDEX_op_mul_vec: 2382 return has_type && TCG_TARGET_HAS_mul_vec; 2383 case INDEX_op_shli_vec: 2384 case INDEX_op_shri_vec: 2385 case INDEX_op_sari_vec: 2386 return has_type && TCG_TARGET_HAS_shi_vec; 2387 case INDEX_op_shls_vec: 2388 case INDEX_op_shrs_vec: 2389 case INDEX_op_sars_vec: 2390 return has_type && TCG_TARGET_HAS_shs_vec; 2391 case INDEX_op_shlv_vec: 2392 case INDEX_op_shrv_vec: 2393 case INDEX_op_sarv_vec: 2394 return has_type && TCG_TARGET_HAS_shv_vec; 2395 case INDEX_op_rotli_vec: 2396 return has_type && TCG_TARGET_HAS_roti_vec; 2397 case INDEX_op_rotls_vec: 2398 return has_type && TCG_TARGET_HAS_rots_vec; 2399 case INDEX_op_rotlv_vec: 2400 case INDEX_op_rotrv_vec: 2401 return has_type && TCG_TARGET_HAS_rotv_vec; 2402 case INDEX_op_ssadd_vec: 2403 case INDEX_op_usadd_vec: 2404 case INDEX_op_sssub_vec: 2405 case INDEX_op_ussub_vec: 2406 return has_type && TCG_TARGET_HAS_sat_vec; 2407 case INDEX_op_smin_vec: 2408 case INDEX_op_umin_vec: 2409 case INDEX_op_smax_vec: 2410 case INDEX_op_umax_vec: 2411 return has_type && TCG_TARGET_HAS_minmax_vec; 2412 case INDEX_op_bitsel_vec: 2413 return has_type && TCG_TARGET_HAS_bitsel_vec; 2414 case INDEX_op_cmpsel_vec: 2415 return has_type && TCG_TARGET_HAS_cmpsel_vec; 2416 2417 default: 2418 if (op < INDEX_op_last_generic) { 2419 const TCGOutOp *outop; 2420 TCGConstraintSetIndex con_set; 2421 2422 if (!has_type) { 2423 return false; 2424 } 2425 2426 outop = all_outop[op]; 2427 tcg_debug_assert(outop != NULL); 2428 2429 con_set = outop->static_constraint; 2430 if (con_set == C_Dynamic) { 2431 con_set = outop->dynamic_constraint(type, flags); 2432 } 2433 if (con_set >= 0) { 2434 return true; 2435 } 2436 tcg_debug_assert(con_set == C_NotImplemented); 2437 return false; 2438 } 2439 tcg_debug_assert(op < NB_OPS); 2440 return true; 2441 2442 case INDEX_op_last_generic: 2443 g_assert_not_reached(); 2444 } 2445 } 2446 2447 bool tcg_op_deposit_valid(TCGType type, unsigned ofs, unsigned len) 2448 { 2449 unsigned width; 2450 2451 tcg_debug_assert(type == TCG_TYPE_I32 || type == TCG_TYPE_I64); 2452 width = (type == TCG_TYPE_I32 ? 32 : 64); 2453 2454 tcg_debug_assert(ofs < width); 2455 tcg_debug_assert(len > 0); 2456 tcg_debug_assert(len <= width - ofs); 2457 2458 return TCG_TARGET_deposit_valid(type, ofs, len); 2459 } 2460 2461 static TCGOp *tcg_op_alloc(TCGOpcode opc, unsigned nargs); 2462 2463 static void tcg_gen_callN(void *func, TCGHelperInfo *info, 2464 TCGTemp *ret, TCGTemp **args) 2465 { 2466 TCGv_i64 extend_free[MAX_CALL_IARGS]; 2467 int n_extend = 0; 2468 TCGOp *op; 2469 int i, n, pi = 0, total_args; 2470 2471 if (unlikely(g_once_init_enter(HELPER_INFO_INIT(info)))) { 2472 init_call_layout(info); 2473 g_once_init_leave(HELPER_INFO_INIT(info), HELPER_INFO_INIT_VAL(info)); 2474 } 2475 2476 total_args = info->nr_out + info->nr_in + 2; 2477 op = tcg_op_alloc(INDEX_op_call, total_args); 2478 2479 #ifdef CONFIG_PLUGIN 2480 /* Flag helpers that may affect guest state */ 2481 if (tcg_ctx->plugin_insn && !(info->flags & TCG_CALL_NO_SIDE_EFFECTS)) { 2482 tcg_ctx->plugin_insn->calls_helpers = true; 2483 } 2484 #endif 2485 2486 TCGOP_CALLO(op) = n = info->nr_out; 2487 switch (n) { 2488 case 0: 2489 tcg_debug_assert(ret == NULL); 2490 break; 2491 case 1: 2492 tcg_debug_assert(ret != NULL); 2493 op->args[pi++] = temp_arg(ret); 2494 break; 2495 case 2: 2496 case 4: 2497 tcg_debug_assert(ret != NULL); 2498 tcg_debug_assert(ret->base_type == ret->type + ctz32(n)); 2499 tcg_debug_assert(ret->temp_subindex == 0); 2500 for (i = 0; i < n; ++i) { 2501 op->args[pi++] = temp_arg(ret + i); 2502 } 2503 break; 2504 default: 2505 g_assert_not_reached(); 2506 } 2507 2508 TCGOP_CALLI(op) = n = info->nr_in; 2509 for (i = 0; i < n; i++) { 2510 const TCGCallArgumentLoc *loc = &info->in[i]; 2511 TCGTemp *ts = args[loc->arg_idx] + loc->tmp_subindex; 2512 2513 switch (loc->kind) { 2514 case TCG_CALL_ARG_NORMAL: 2515 case TCG_CALL_ARG_BY_REF: 2516 case TCG_CALL_ARG_BY_REF_N: 2517 op->args[pi++] = temp_arg(ts); 2518 break; 2519 2520 case TCG_CALL_ARG_EXTEND_U: 2521 case TCG_CALL_ARG_EXTEND_S: 2522 { 2523 TCGv_i64 temp = tcg_temp_ebb_new_i64(); 2524 TCGv_i32 orig = temp_tcgv_i32(ts); 2525 2526 if (loc->kind == TCG_CALL_ARG_EXTEND_S) { 2527 tcg_gen_ext_i32_i64(temp, orig); 2528 } else { 2529 tcg_gen_extu_i32_i64(temp, orig); 2530 } 2531 op->args[pi++] = tcgv_i64_arg(temp); 2532 extend_free[n_extend++] = temp; 2533 } 2534 break; 2535 2536 default: 2537 g_assert_not_reached(); 2538 } 2539 } 2540 op->args[pi++] = (uintptr_t)func; 2541 op->args[pi++] = (uintptr_t)info; 2542 tcg_debug_assert(pi == total_args); 2543 2544 if (tcg_ctx->emit_before_op) { 2545 QTAILQ_INSERT_BEFORE(tcg_ctx->emit_before_op, op, link); 2546 } else { 2547 QTAILQ_INSERT_TAIL(&tcg_ctx->ops, op, link); 2548 } 2549 2550 tcg_debug_assert(n_extend < ARRAY_SIZE(extend_free)); 2551 for (i = 0; i < n_extend; ++i) { 2552 tcg_temp_free_i64(extend_free[i]); 2553 } 2554 } 2555 2556 void tcg_gen_call0(void *func, TCGHelperInfo *info, TCGTemp *ret) 2557 { 2558 tcg_gen_callN(func, info, ret, NULL); 2559 } 2560 2561 void tcg_gen_call1(void *func, TCGHelperInfo *info, TCGTemp *ret, TCGTemp *t1) 2562 { 2563 tcg_gen_callN(func, info, ret, &t1); 2564 } 2565 2566 void tcg_gen_call2(void *func, TCGHelperInfo *info, TCGTemp *ret, 2567 TCGTemp *t1, TCGTemp *t2) 2568 { 2569 TCGTemp *args[2] = { t1, t2 }; 2570 tcg_gen_callN(func, info, ret, args); 2571 } 2572 2573 void tcg_gen_call3(void *func, TCGHelperInfo *info, TCGTemp *ret, 2574 TCGTemp *t1, TCGTemp *t2, TCGTemp *t3) 2575 { 2576 TCGTemp *args[3] = { t1, t2, t3 }; 2577 tcg_gen_callN(func, info, ret, args); 2578 } 2579 2580 void tcg_gen_call4(void *func, TCGHelperInfo *info, TCGTemp *ret, 2581 TCGTemp *t1, TCGTemp *t2, TCGTemp *t3, TCGTemp *t4) 2582 { 2583 TCGTemp *args[4] = { t1, t2, t3, t4 }; 2584 tcg_gen_callN(func, info, ret, args); 2585 } 2586 2587 void tcg_gen_call5(void *func, TCGHelperInfo *info, TCGTemp *ret, TCGTemp *t1, 2588 TCGTemp *t2, TCGTemp *t3, TCGTemp *t4, TCGTemp *t5) 2589 { 2590 TCGTemp *args[5] = { t1, t2, t3, t4, t5 }; 2591 tcg_gen_callN(func, info, ret, args); 2592 } 2593 2594 void tcg_gen_call6(void *func, TCGHelperInfo *info, TCGTemp *ret, 2595 TCGTemp *t1, TCGTemp *t2, TCGTemp *t3, 2596 TCGTemp *t4, TCGTemp *t5, TCGTemp *t6) 2597 { 2598 TCGTemp *args[6] = { t1, t2, t3, t4, t5, t6 }; 2599 tcg_gen_callN(func, info, ret, args); 2600 } 2601 2602 void tcg_gen_call7(void *func, TCGHelperInfo *info, TCGTemp *ret, TCGTemp *t1, 2603 TCGTemp *t2, TCGTemp *t3, TCGTemp *t4, 2604 TCGTemp *t5, TCGTemp *t6, TCGTemp *t7) 2605 { 2606 TCGTemp *args[7] = { t1, t2, t3, t4, t5, t6, t7 }; 2607 tcg_gen_callN(func, info, ret, args); 2608 } 2609 2610 static void tcg_reg_alloc_start(TCGContext *s) 2611 { 2612 int i, n; 2613 2614 for (i = 0, n = s->nb_temps; i < n; i++) { 2615 TCGTemp *ts = &s->temps[i]; 2616 TCGTempVal val = TEMP_VAL_MEM; 2617 2618 switch (ts->kind) { 2619 case TEMP_CONST: 2620 val = TEMP_VAL_CONST; 2621 break; 2622 case TEMP_FIXED: 2623 val = TEMP_VAL_REG; 2624 break; 2625 case TEMP_GLOBAL: 2626 break; 2627 case TEMP_EBB: 2628 val = TEMP_VAL_DEAD; 2629 /* fall through */ 2630 case TEMP_TB: 2631 ts->mem_allocated = 0; 2632 break; 2633 default: 2634 g_assert_not_reached(); 2635 } 2636 ts->val_type = val; 2637 } 2638 2639 memset(s->reg_to_temp, 0, sizeof(s->reg_to_temp)); 2640 } 2641 2642 static char *tcg_get_arg_str_ptr(TCGContext *s, char *buf, int buf_size, 2643 TCGTemp *ts) 2644 { 2645 int idx = temp_idx(ts); 2646 2647 switch (ts->kind) { 2648 case TEMP_FIXED: 2649 case TEMP_GLOBAL: 2650 pstrcpy(buf, buf_size, ts->name); 2651 break; 2652 case TEMP_TB: 2653 snprintf(buf, buf_size, "loc%d", idx - s->nb_globals); 2654 break; 2655 case TEMP_EBB: 2656 snprintf(buf, buf_size, "tmp%d", idx - s->nb_globals); 2657 break; 2658 case TEMP_CONST: 2659 switch (ts->type) { 2660 case TCG_TYPE_I32: 2661 snprintf(buf, buf_size, "$0x%x", (int32_t)ts->val); 2662 break; 2663 #if TCG_TARGET_REG_BITS > 32 2664 case TCG_TYPE_I64: 2665 snprintf(buf, buf_size, "$0x%" PRIx64, ts->val); 2666 break; 2667 #endif 2668 case TCG_TYPE_V64: 2669 case TCG_TYPE_V128: 2670 case TCG_TYPE_V256: 2671 snprintf(buf, buf_size, "v%d$0x%" PRIx64, 2672 64 << (ts->type - TCG_TYPE_V64), ts->val); 2673 break; 2674 default: 2675 g_assert_not_reached(); 2676 } 2677 break; 2678 } 2679 return buf; 2680 } 2681 2682 static char *tcg_get_arg_str(TCGContext *s, char *buf, 2683 int buf_size, TCGArg arg) 2684 { 2685 return tcg_get_arg_str_ptr(s, buf, buf_size, arg_temp(arg)); 2686 } 2687 2688 static const char * const cond_name[] = 2689 { 2690 [TCG_COND_NEVER] = "never", 2691 [TCG_COND_ALWAYS] = "always", 2692 [TCG_COND_EQ] = "eq", 2693 [TCG_COND_NE] = "ne", 2694 [TCG_COND_LT] = "lt", 2695 [TCG_COND_GE] = "ge", 2696 [TCG_COND_LE] = "le", 2697 [TCG_COND_GT] = "gt", 2698 [TCG_COND_LTU] = "ltu", 2699 [TCG_COND_GEU] = "geu", 2700 [TCG_COND_LEU] = "leu", 2701 [TCG_COND_GTU] = "gtu", 2702 [TCG_COND_TSTEQ] = "tsteq", 2703 [TCG_COND_TSTNE] = "tstne", 2704 }; 2705 2706 static const char * const ldst_name[(MO_BSWAP | MO_SSIZE) + 1] = 2707 { 2708 [MO_UB] = "ub", 2709 [MO_SB] = "sb", 2710 [MO_LEUW] = "leuw", 2711 [MO_LESW] = "lesw", 2712 [MO_LEUL] = "leul", 2713 [MO_LESL] = "lesl", 2714 [MO_LEUQ] = "leq", 2715 [MO_BEUW] = "beuw", 2716 [MO_BESW] = "besw", 2717 [MO_BEUL] = "beul", 2718 [MO_BESL] = "besl", 2719 [MO_BEUQ] = "beq", 2720 [MO_128 + MO_BE] = "beo", 2721 [MO_128 + MO_LE] = "leo", 2722 }; 2723 2724 static const char * const alignment_name[(MO_AMASK >> MO_ASHIFT) + 1] = { 2725 [MO_UNALN >> MO_ASHIFT] = "un+", 2726 [MO_ALIGN >> MO_ASHIFT] = "al+", 2727 [MO_ALIGN_2 >> MO_ASHIFT] = "al2+", 2728 [MO_ALIGN_4 >> MO_ASHIFT] = "al4+", 2729 [MO_ALIGN_8 >> MO_ASHIFT] = "al8+", 2730 [MO_ALIGN_16 >> MO_ASHIFT] = "al16+", 2731 [MO_ALIGN_32 >> MO_ASHIFT] = "al32+", 2732 [MO_ALIGN_64 >> MO_ASHIFT] = "al64+", 2733 }; 2734 2735 static const char * const atom_name[(MO_ATOM_MASK >> MO_ATOM_SHIFT) + 1] = { 2736 [MO_ATOM_IFALIGN >> MO_ATOM_SHIFT] = "", 2737 [MO_ATOM_IFALIGN_PAIR >> MO_ATOM_SHIFT] = "pair+", 2738 [MO_ATOM_WITHIN16 >> MO_ATOM_SHIFT] = "w16+", 2739 [MO_ATOM_WITHIN16_PAIR >> MO_ATOM_SHIFT] = "w16p+", 2740 [MO_ATOM_SUBALIGN >> MO_ATOM_SHIFT] = "sub+", 2741 [MO_ATOM_NONE >> MO_ATOM_SHIFT] = "noat+", 2742 }; 2743 2744 static const char bswap_flag_name[][6] = { 2745 [TCG_BSWAP_IZ] = "iz", 2746 [TCG_BSWAP_OZ] = "oz", 2747 [TCG_BSWAP_OS] = "os", 2748 [TCG_BSWAP_IZ | TCG_BSWAP_OZ] = "iz,oz", 2749 [TCG_BSWAP_IZ | TCG_BSWAP_OS] = "iz,os", 2750 }; 2751 2752 #ifdef CONFIG_PLUGIN 2753 static const char * const plugin_from_name[] = { 2754 "from-tb", 2755 "from-insn", 2756 "after-insn", 2757 "after-tb", 2758 }; 2759 #endif 2760 2761 static inline bool tcg_regset_single(TCGRegSet d) 2762 { 2763 return (d & (d - 1)) == 0; 2764 } 2765 2766 static inline TCGReg tcg_regset_first(TCGRegSet d) 2767 { 2768 if (TCG_TARGET_NB_REGS <= 32) { 2769 return ctz32(d); 2770 } else { 2771 return ctz64(d); 2772 } 2773 } 2774 2775 /* Return only the number of characters output -- no error return. */ 2776 #define ne_fprintf(...) \ 2777 ({ int ret_ = fprintf(__VA_ARGS__); ret_ >= 0 ? ret_ : 0; }) 2778 2779 void tcg_dump_ops(TCGContext *s, FILE *f, bool have_prefs) 2780 { 2781 char buf[128]; 2782 TCGOp *op; 2783 2784 QTAILQ_FOREACH(op, &s->ops, link) { 2785 int i, k, nb_oargs, nb_iargs, nb_cargs; 2786 const TCGOpDef *def; 2787 TCGOpcode c; 2788 int col = 0; 2789 2790 c = op->opc; 2791 def = &tcg_op_defs[c]; 2792 2793 if (c == INDEX_op_insn_start) { 2794 nb_oargs = 0; 2795 col += ne_fprintf(f, "\n ----"); 2796 2797 for (i = 0, k = s->insn_start_words; i < k; ++i) { 2798 col += ne_fprintf(f, " %016" PRIx64, 2799 tcg_get_insn_start_param(op, i)); 2800 } 2801 } else if (c == INDEX_op_call) { 2802 const TCGHelperInfo *info = tcg_call_info(op); 2803 void *func = tcg_call_func(op); 2804 2805 /* variable number of arguments */ 2806 nb_oargs = TCGOP_CALLO(op); 2807 nb_iargs = TCGOP_CALLI(op); 2808 nb_cargs = def->nb_cargs; 2809 2810 col += ne_fprintf(f, " %s ", def->name); 2811 2812 /* 2813 * Print the function name from TCGHelperInfo, if available. 2814 * Note that plugins have a template function for the info, 2815 * but the actual function pointer comes from the plugin. 2816 */ 2817 if (func == info->func) { 2818 col += ne_fprintf(f, "%s", info->name); 2819 } else { 2820 col += ne_fprintf(f, "plugin(%p)", func); 2821 } 2822 2823 col += ne_fprintf(f, ",$0x%x,$%d", info->flags, nb_oargs); 2824 for (i = 0; i < nb_oargs; i++) { 2825 col += ne_fprintf(f, ",%s", tcg_get_arg_str(s, buf, sizeof(buf), 2826 op->args[i])); 2827 } 2828 for (i = 0; i < nb_iargs; i++) { 2829 TCGArg arg = op->args[nb_oargs + i]; 2830 const char *t = tcg_get_arg_str(s, buf, sizeof(buf), arg); 2831 col += ne_fprintf(f, ",%s", t); 2832 } 2833 } else { 2834 if (def->flags & TCG_OPF_INT) { 2835 col += ne_fprintf(f, " %s_i%d ", 2836 def->name, 2837 8 * tcg_type_size(TCGOP_TYPE(op))); 2838 } else if (def->flags & TCG_OPF_VECTOR) { 2839 col += ne_fprintf(f, "%s v%d,e%d,", 2840 def->name, 2841 8 * tcg_type_size(TCGOP_TYPE(op)), 2842 8 << TCGOP_VECE(op)); 2843 } else { 2844 col += ne_fprintf(f, " %s ", def->name); 2845 } 2846 2847 nb_oargs = def->nb_oargs; 2848 nb_iargs = def->nb_iargs; 2849 nb_cargs = def->nb_cargs; 2850 2851 k = 0; 2852 for (i = 0; i < nb_oargs; i++) { 2853 const char *sep = k ? "," : ""; 2854 col += ne_fprintf(f, "%s%s", sep, 2855 tcg_get_arg_str(s, buf, sizeof(buf), 2856 op->args[k++])); 2857 } 2858 for (i = 0; i < nb_iargs; i++) { 2859 const char *sep = k ? "," : ""; 2860 col += ne_fprintf(f, "%s%s", sep, 2861 tcg_get_arg_str(s, buf, sizeof(buf), 2862 op->args[k++])); 2863 } 2864 switch (c) { 2865 case INDEX_op_brcond_i32: 2866 case INDEX_op_setcond_i32: 2867 case INDEX_op_negsetcond_i32: 2868 case INDEX_op_movcond_i32: 2869 case INDEX_op_brcond2_i32: 2870 case INDEX_op_setcond2_i32: 2871 case INDEX_op_brcond_i64: 2872 case INDEX_op_setcond_i64: 2873 case INDEX_op_negsetcond_i64: 2874 case INDEX_op_movcond_i64: 2875 case INDEX_op_cmp_vec: 2876 case INDEX_op_cmpsel_vec: 2877 if (op->args[k] < ARRAY_SIZE(cond_name) 2878 && cond_name[op->args[k]]) { 2879 col += ne_fprintf(f, ",%s", cond_name[op->args[k++]]); 2880 } else { 2881 col += ne_fprintf(f, ",$0x%" TCG_PRIlx, op->args[k++]); 2882 } 2883 i = 1; 2884 break; 2885 case INDEX_op_qemu_ld_i32: 2886 case INDEX_op_qemu_st_i32: 2887 case INDEX_op_qemu_st8_i32: 2888 case INDEX_op_qemu_ld_i64: 2889 case INDEX_op_qemu_st_i64: 2890 case INDEX_op_qemu_ld_i128: 2891 case INDEX_op_qemu_st_i128: 2892 { 2893 const char *s_al, *s_op, *s_at; 2894 MemOpIdx oi = op->args[k++]; 2895 MemOp mop = get_memop(oi); 2896 unsigned ix = get_mmuidx(oi); 2897 2898 s_al = alignment_name[(mop & MO_AMASK) >> MO_ASHIFT]; 2899 s_op = ldst_name[mop & (MO_BSWAP | MO_SSIZE)]; 2900 s_at = atom_name[(mop & MO_ATOM_MASK) >> MO_ATOM_SHIFT]; 2901 mop &= ~(MO_AMASK | MO_BSWAP | MO_SSIZE | MO_ATOM_MASK); 2902 2903 /* If all fields are accounted for, print symbolically. */ 2904 if (!mop && s_al && s_op && s_at) { 2905 col += ne_fprintf(f, ",%s%s%s,%u", 2906 s_at, s_al, s_op, ix); 2907 } else { 2908 mop = get_memop(oi); 2909 col += ne_fprintf(f, ",$0x%x,%u", mop, ix); 2910 } 2911 i = 1; 2912 } 2913 break; 2914 case INDEX_op_bswap16_i32: 2915 case INDEX_op_bswap16_i64: 2916 case INDEX_op_bswap32_i32: 2917 case INDEX_op_bswap32_i64: 2918 case INDEX_op_bswap64_i64: 2919 { 2920 TCGArg flags = op->args[k]; 2921 const char *name = NULL; 2922 2923 if (flags < ARRAY_SIZE(bswap_flag_name)) { 2924 name = bswap_flag_name[flags]; 2925 } 2926 if (name) { 2927 col += ne_fprintf(f, ",%s", name); 2928 } else { 2929 col += ne_fprintf(f, ",$0x%" TCG_PRIlx, flags); 2930 } 2931 i = k = 1; 2932 } 2933 break; 2934 #ifdef CONFIG_PLUGIN 2935 case INDEX_op_plugin_cb: 2936 { 2937 TCGArg from = op->args[k++]; 2938 const char *name = NULL; 2939 2940 if (from < ARRAY_SIZE(plugin_from_name)) { 2941 name = plugin_from_name[from]; 2942 } 2943 if (name) { 2944 col += ne_fprintf(f, "%s", name); 2945 } else { 2946 col += ne_fprintf(f, "$0x%" TCG_PRIlx, from); 2947 } 2948 i = 1; 2949 } 2950 break; 2951 #endif 2952 default: 2953 i = 0; 2954 break; 2955 } 2956 switch (c) { 2957 case INDEX_op_set_label: 2958 case INDEX_op_br: 2959 case INDEX_op_brcond_i32: 2960 case INDEX_op_brcond_i64: 2961 case INDEX_op_brcond2_i32: 2962 col += ne_fprintf(f, "%s$L%d", k ? "," : "", 2963 arg_label(op->args[k])->id); 2964 i++, k++; 2965 break; 2966 case INDEX_op_mb: 2967 { 2968 TCGBar membar = op->args[k]; 2969 const char *b_op, *m_op; 2970 2971 switch (membar & TCG_BAR_SC) { 2972 case 0: 2973 b_op = "none"; 2974 break; 2975 case TCG_BAR_LDAQ: 2976 b_op = "acq"; 2977 break; 2978 case TCG_BAR_STRL: 2979 b_op = "rel"; 2980 break; 2981 case TCG_BAR_SC: 2982 b_op = "seq"; 2983 break; 2984 default: 2985 g_assert_not_reached(); 2986 } 2987 2988 switch (membar & TCG_MO_ALL) { 2989 case 0: 2990 m_op = "none"; 2991 break; 2992 case TCG_MO_LD_LD: 2993 m_op = "rr"; 2994 break; 2995 case TCG_MO_LD_ST: 2996 m_op = "rw"; 2997 break; 2998 case TCG_MO_ST_LD: 2999 m_op = "wr"; 3000 break; 3001 case TCG_MO_ST_ST: 3002 m_op = "ww"; 3003 break; 3004 case TCG_MO_LD_LD | TCG_MO_LD_ST: 3005 m_op = "rr+rw"; 3006 break; 3007 case TCG_MO_LD_LD | TCG_MO_ST_LD: 3008 m_op = "rr+wr"; 3009 break; 3010 case TCG_MO_LD_LD | TCG_MO_ST_ST: 3011 m_op = "rr+ww"; 3012 break; 3013 case TCG_MO_LD_ST | TCG_MO_ST_LD: 3014 m_op = "rw+wr"; 3015 break; 3016 case TCG_MO_LD_ST | TCG_MO_ST_ST: 3017 m_op = "rw+ww"; 3018 break; 3019 case TCG_MO_ST_LD | TCG_MO_ST_ST: 3020 m_op = "wr+ww"; 3021 break; 3022 case TCG_MO_LD_LD | TCG_MO_LD_ST | TCG_MO_ST_LD: 3023 m_op = "rr+rw+wr"; 3024 break; 3025 case TCG_MO_LD_LD | TCG_MO_LD_ST | TCG_MO_ST_ST: 3026 m_op = "rr+rw+ww"; 3027 break; 3028 case TCG_MO_LD_LD | TCG_MO_ST_LD | TCG_MO_ST_ST: 3029 m_op = "rr+wr+ww"; 3030 break; 3031 case TCG_MO_LD_ST | TCG_MO_ST_LD | TCG_MO_ST_ST: 3032 m_op = "rw+wr+ww"; 3033 break; 3034 case TCG_MO_ALL: 3035 m_op = "all"; 3036 break; 3037 default: 3038 g_assert_not_reached(); 3039 } 3040 3041 col += ne_fprintf(f, "%s%s:%s", (k ? "," : ""), b_op, m_op); 3042 i++, k++; 3043 } 3044 break; 3045 default: 3046 break; 3047 } 3048 for (; i < nb_cargs; i++, k++) { 3049 col += ne_fprintf(f, "%s$0x%" TCG_PRIlx, k ? "," : "", 3050 op->args[k]); 3051 } 3052 } 3053 3054 if (have_prefs || op->life) { 3055 for (; col < 40; ++col) { 3056 putc(' ', f); 3057 } 3058 } 3059 3060 if (op->life) { 3061 unsigned life = op->life; 3062 3063 if (life & (SYNC_ARG * 3)) { 3064 ne_fprintf(f, " sync:"); 3065 for (i = 0; i < 2; ++i) { 3066 if (life & (SYNC_ARG << i)) { 3067 ne_fprintf(f, " %d", i); 3068 } 3069 } 3070 } 3071 life /= DEAD_ARG; 3072 if (life) { 3073 ne_fprintf(f, " dead:"); 3074 for (i = 0; life; ++i, life >>= 1) { 3075 if (life & 1) { 3076 ne_fprintf(f, " %d", i); 3077 } 3078 } 3079 } 3080 } 3081 3082 if (have_prefs) { 3083 for (i = 0; i < nb_oargs; ++i) { 3084 TCGRegSet set = output_pref(op, i); 3085 3086 if (i == 0) { 3087 ne_fprintf(f, " pref="); 3088 } else { 3089 ne_fprintf(f, ","); 3090 } 3091 if (set == 0) { 3092 ne_fprintf(f, "none"); 3093 } else if (set == MAKE_64BIT_MASK(0, TCG_TARGET_NB_REGS)) { 3094 ne_fprintf(f, "all"); 3095 #ifdef CONFIG_DEBUG_TCG 3096 } else if (tcg_regset_single(set)) { 3097 TCGReg reg = tcg_regset_first(set); 3098 ne_fprintf(f, "%s", tcg_target_reg_names[reg]); 3099 #endif 3100 } else if (TCG_TARGET_NB_REGS <= 32) { 3101 ne_fprintf(f, "0x%x", (uint32_t)set); 3102 } else { 3103 ne_fprintf(f, "0x%" PRIx64, (uint64_t)set); 3104 } 3105 } 3106 } 3107 3108 putc('\n', f); 3109 } 3110 } 3111 3112 /* we give more priority to constraints with less registers */ 3113 static int get_constraint_priority(const TCGArgConstraint *arg_ct, int k) 3114 { 3115 int n; 3116 3117 arg_ct += k; 3118 n = ctpop64(arg_ct->regs); 3119 3120 /* 3121 * Sort constraints of a single register first, which includes output 3122 * aliases (which must exactly match the input already allocated). 3123 */ 3124 if (n == 1 || arg_ct->oalias) { 3125 return INT_MAX; 3126 } 3127 3128 /* 3129 * Sort register pairs next, first then second immediately after. 3130 * Arbitrarily sort multiple pairs by the index of the first reg; 3131 * there shouldn't be many pairs. 3132 */ 3133 switch (arg_ct->pair) { 3134 case 1: 3135 case 3: 3136 return (k + 1) * 2; 3137 case 2: 3138 return (arg_ct->pair_index + 1) * 2 - 1; 3139 } 3140 3141 /* Finally, sort by decreasing register count. */ 3142 assert(n > 1); 3143 return -n; 3144 } 3145 3146 /* sort from highest priority to lowest */ 3147 static void sort_constraints(TCGArgConstraint *a, int start, int n) 3148 { 3149 int i, j; 3150 3151 for (i = 0; i < n; i++) { 3152 a[start + i].sort_index = start + i; 3153 } 3154 if (n <= 1) { 3155 return; 3156 } 3157 for (i = 0; i < n - 1; i++) { 3158 for (j = i + 1; j < n; j++) { 3159 int p1 = get_constraint_priority(a, a[start + i].sort_index); 3160 int p2 = get_constraint_priority(a, a[start + j].sort_index); 3161 if (p1 < p2) { 3162 int tmp = a[start + i].sort_index; 3163 a[start + i].sort_index = a[start + j].sort_index; 3164 a[start + j].sort_index = tmp; 3165 } 3166 } 3167 } 3168 } 3169 3170 static const TCGArgConstraint empty_cts[TCG_MAX_OP_ARGS]; 3171 static TCGArgConstraint all_cts[ARRAY_SIZE(constraint_sets)][TCG_MAX_OP_ARGS]; 3172 3173 static void process_constraint_sets(void) 3174 { 3175 for (size_t c = 0; c < ARRAY_SIZE(constraint_sets); ++c) { 3176 const TCGConstraintSet *tdefs = &constraint_sets[c]; 3177 TCGArgConstraint *args_ct = all_cts[c]; 3178 int nb_oargs = tdefs->nb_oargs; 3179 int nb_iargs = tdefs->nb_iargs; 3180 int nb_args = nb_oargs + nb_iargs; 3181 bool saw_alias_pair = false; 3182 3183 for (int i = 0; i < nb_args; i++) { 3184 const char *ct_str = tdefs->args_ct_str[i]; 3185 bool input_p = i >= nb_oargs; 3186 int o; 3187 3188 switch (*ct_str) { 3189 case '0' ... '9': 3190 o = *ct_str - '0'; 3191 tcg_debug_assert(input_p); 3192 tcg_debug_assert(o < nb_oargs); 3193 tcg_debug_assert(args_ct[o].regs != 0); 3194 tcg_debug_assert(!args_ct[o].oalias); 3195 args_ct[i] = args_ct[o]; 3196 /* The output sets oalias. */ 3197 args_ct[o].oalias = 1; 3198 args_ct[o].alias_index = i; 3199 /* The input sets ialias. */ 3200 args_ct[i].ialias = 1; 3201 args_ct[i].alias_index = o; 3202 if (args_ct[i].pair) { 3203 saw_alias_pair = true; 3204 } 3205 tcg_debug_assert(ct_str[1] == '\0'); 3206 continue; 3207 3208 case '&': 3209 tcg_debug_assert(!input_p); 3210 args_ct[i].newreg = true; 3211 ct_str++; 3212 break; 3213 3214 case 'p': /* plus */ 3215 /* Allocate to the register after the previous. */ 3216 tcg_debug_assert(i > (input_p ? nb_oargs : 0)); 3217 o = i - 1; 3218 tcg_debug_assert(!args_ct[o].pair); 3219 tcg_debug_assert(!args_ct[o].ct); 3220 args_ct[i] = (TCGArgConstraint){ 3221 .pair = 2, 3222 .pair_index = o, 3223 .regs = args_ct[o].regs << 1, 3224 .newreg = args_ct[o].newreg, 3225 }; 3226 args_ct[o].pair = 1; 3227 args_ct[o].pair_index = i; 3228 tcg_debug_assert(ct_str[1] == '\0'); 3229 continue; 3230 3231 case 'm': /* minus */ 3232 /* Allocate to the register before the previous. */ 3233 tcg_debug_assert(i > (input_p ? nb_oargs : 0)); 3234 o = i - 1; 3235 tcg_debug_assert(!args_ct[o].pair); 3236 tcg_debug_assert(!args_ct[o].ct); 3237 args_ct[i] = (TCGArgConstraint){ 3238 .pair = 1, 3239 .pair_index = o, 3240 .regs = args_ct[o].regs >> 1, 3241 .newreg = args_ct[o].newreg, 3242 }; 3243 args_ct[o].pair = 2; 3244 args_ct[o].pair_index = i; 3245 tcg_debug_assert(ct_str[1] == '\0'); 3246 continue; 3247 } 3248 3249 do { 3250 switch (*ct_str) { 3251 case 'i': 3252 args_ct[i].ct |= TCG_CT_CONST; 3253 break; 3254 #ifdef TCG_REG_ZERO 3255 case 'z': 3256 args_ct[i].ct |= TCG_CT_REG_ZERO; 3257 break; 3258 #endif 3259 3260 /* Include all of the target-specific constraints. */ 3261 3262 #undef CONST 3263 #define CONST(CASE, MASK) \ 3264 case CASE: args_ct[i].ct |= MASK; break; 3265 #define REGS(CASE, MASK) \ 3266 case CASE: args_ct[i].regs |= MASK; break; 3267 3268 #include "tcg-target-con-str.h" 3269 3270 #undef REGS 3271 #undef CONST 3272 default: 3273 case '0' ... '9': 3274 case '&': 3275 case 'p': 3276 case 'm': 3277 /* Typo in TCGConstraintSet constraint. */ 3278 g_assert_not_reached(); 3279 } 3280 } while (*++ct_str != '\0'); 3281 } 3282 3283 /* 3284 * Fix up output pairs that are aliased with inputs. 3285 * When we created the alias, we copied pair from the output. 3286 * There are three cases: 3287 * (1a) Pairs of inputs alias pairs of outputs. 3288 * (1b) One input aliases the first of a pair of outputs. 3289 * (2) One input aliases the second of a pair of outputs. 3290 * 3291 * Case 1a is handled by making sure that the pair_index'es are 3292 * properly updated so that they appear the same as a pair of inputs. 3293 * 3294 * Case 1b is handled by setting the pair_index of the input to 3295 * itself, simply so it doesn't point to an unrelated argument. 3296 * Since we don't encounter the "second" during the input allocation 3297 * phase, nothing happens with the second half of the input pair. 3298 * 3299 * Case 2 is handled by setting the second input to pair=3, the 3300 * first output to pair=3, and the pair_index'es to match. 3301 */ 3302 if (saw_alias_pair) { 3303 for (int i = nb_oargs; i < nb_args; i++) { 3304 int o, o2, i2; 3305 3306 /* 3307 * Since [0-9pm] must be alone in the constraint string, 3308 * the only way they can both be set is if the pair comes 3309 * from the output alias. 3310 */ 3311 if (!args_ct[i].ialias) { 3312 continue; 3313 } 3314 switch (args_ct[i].pair) { 3315 case 0: 3316 break; 3317 case 1: 3318 o = args_ct[i].alias_index; 3319 o2 = args_ct[o].pair_index; 3320 tcg_debug_assert(args_ct[o].pair == 1); 3321 tcg_debug_assert(args_ct[o2].pair == 2); 3322 if (args_ct[o2].oalias) { 3323 /* Case 1a */ 3324 i2 = args_ct[o2].alias_index; 3325 tcg_debug_assert(args_ct[i2].pair == 2); 3326 args_ct[i2].pair_index = i; 3327 args_ct[i].pair_index = i2; 3328 } else { 3329 /* Case 1b */ 3330 args_ct[i].pair_index = i; 3331 } 3332 break; 3333 case 2: 3334 o = args_ct[i].alias_index; 3335 o2 = args_ct[o].pair_index; 3336 tcg_debug_assert(args_ct[o].pair == 2); 3337 tcg_debug_assert(args_ct[o2].pair == 1); 3338 if (args_ct[o2].oalias) { 3339 /* Case 1a */ 3340 i2 = args_ct[o2].alias_index; 3341 tcg_debug_assert(args_ct[i2].pair == 1); 3342 args_ct[i2].pair_index = i; 3343 args_ct[i].pair_index = i2; 3344 } else { 3345 /* Case 2 */ 3346 args_ct[i].pair = 3; 3347 args_ct[o2].pair = 3; 3348 args_ct[i].pair_index = o2; 3349 args_ct[o2].pair_index = i; 3350 } 3351 break; 3352 default: 3353 g_assert_not_reached(); 3354 } 3355 } 3356 } 3357 3358 /* sort the constraints (XXX: this is just an heuristic) */ 3359 sort_constraints(args_ct, 0, nb_oargs); 3360 sort_constraints(args_ct, nb_oargs, nb_iargs); 3361 } 3362 } 3363 3364 static const TCGArgConstraint *opcode_args_ct(const TCGOp *op) 3365 { 3366 TCGOpcode opc = op->opc; 3367 TCGType type = TCGOP_TYPE(op); 3368 unsigned flags = TCGOP_FLAGS(op); 3369 const TCGOpDef *def = &tcg_op_defs[opc]; 3370 const TCGOutOp *outop = all_outop[opc]; 3371 TCGConstraintSetIndex con_set; 3372 3373 if (def->flags & TCG_OPF_NOT_PRESENT) { 3374 return empty_cts; 3375 } 3376 3377 if (outop) { 3378 con_set = outop->static_constraint; 3379 if (con_set == C_Dynamic) { 3380 con_set = outop->dynamic_constraint(type, flags); 3381 } 3382 } else { 3383 con_set = tcg_target_op_def(opc, type, flags); 3384 } 3385 tcg_debug_assert(con_set >= 0); 3386 tcg_debug_assert(con_set < ARRAY_SIZE(constraint_sets)); 3387 3388 /* The constraint arguments must match TCGOpcode arguments. */ 3389 tcg_debug_assert(constraint_sets[con_set].nb_oargs == def->nb_oargs); 3390 tcg_debug_assert(constraint_sets[con_set].nb_iargs == def->nb_iargs); 3391 3392 return all_cts[con_set]; 3393 } 3394 3395 static void remove_label_use(TCGOp *op, int idx) 3396 { 3397 TCGLabel *label = arg_label(op->args[idx]); 3398 TCGLabelUse *use; 3399 3400 QSIMPLEQ_FOREACH(use, &label->branches, next) { 3401 if (use->op == op) { 3402 QSIMPLEQ_REMOVE(&label->branches, use, TCGLabelUse, next); 3403 return; 3404 } 3405 } 3406 g_assert_not_reached(); 3407 } 3408 3409 void tcg_op_remove(TCGContext *s, TCGOp *op) 3410 { 3411 switch (op->opc) { 3412 case INDEX_op_br: 3413 remove_label_use(op, 0); 3414 break; 3415 case INDEX_op_brcond_i32: 3416 case INDEX_op_brcond_i64: 3417 remove_label_use(op, 3); 3418 break; 3419 case INDEX_op_brcond2_i32: 3420 remove_label_use(op, 5); 3421 break; 3422 default: 3423 break; 3424 } 3425 3426 QTAILQ_REMOVE(&s->ops, op, link); 3427 QTAILQ_INSERT_TAIL(&s->free_ops, op, link); 3428 s->nb_ops--; 3429 } 3430 3431 void tcg_remove_ops_after(TCGOp *op) 3432 { 3433 TCGContext *s = tcg_ctx; 3434 3435 while (true) { 3436 TCGOp *last = tcg_last_op(); 3437 if (last == op) { 3438 return; 3439 } 3440 tcg_op_remove(s, last); 3441 } 3442 } 3443 3444 static TCGOp *tcg_op_alloc(TCGOpcode opc, unsigned nargs) 3445 { 3446 TCGContext *s = tcg_ctx; 3447 TCGOp *op = NULL; 3448 3449 if (unlikely(!QTAILQ_EMPTY(&s->free_ops))) { 3450 QTAILQ_FOREACH(op, &s->free_ops, link) { 3451 if (nargs <= op->nargs) { 3452 QTAILQ_REMOVE(&s->free_ops, op, link); 3453 nargs = op->nargs; 3454 goto found; 3455 } 3456 } 3457 } 3458 3459 /* Most opcodes have 3 or 4 operands: reduce fragmentation. */ 3460 nargs = MAX(4, nargs); 3461 op = tcg_malloc(sizeof(TCGOp) + sizeof(TCGArg) * nargs); 3462 3463 found: 3464 memset(op, 0, offsetof(TCGOp, link)); 3465 op->opc = opc; 3466 op->nargs = nargs; 3467 3468 /* Check for bitfield overflow. */ 3469 tcg_debug_assert(op->nargs == nargs); 3470 3471 s->nb_ops++; 3472 return op; 3473 } 3474 3475 TCGOp *tcg_emit_op(TCGOpcode opc, unsigned nargs) 3476 { 3477 TCGOp *op = tcg_op_alloc(opc, nargs); 3478 3479 if (tcg_ctx->emit_before_op) { 3480 QTAILQ_INSERT_BEFORE(tcg_ctx->emit_before_op, op, link); 3481 } else { 3482 QTAILQ_INSERT_TAIL(&tcg_ctx->ops, op, link); 3483 } 3484 return op; 3485 } 3486 3487 TCGOp *tcg_op_insert_before(TCGContext *s, TCGOp *old_op, 3488 TCGOpcode opc, TCGType type, unsigned nargs) 3489 { 3490 TCGOp *new_op = tcg_op_alloc(opc, nargs); 3491 3492 TCGOP_TYPE(new_op) = type; 3493 QTAILQ_INSERT_BEFORE(old_op, new_op, link); 3494 return new_op; 3495 } 3496 3497 TCGOp *tcg_op_insert_after(TCGContext *s, TCGOp *old_op, 3498 TCGOpcode opc, TCGType type, unsigned nargs) 3499 { 3500 TCGOp *new_op = tcg_op_alloc(opc, nargs); 3501 3502 TCGOP_TYPE(new_op) = type; 3503 QTAILQ_INSERT_AFTER(&s->ops, old_op, new_op, link); 3504 return new_op; 3505 } 3506 3507 static void move_label_uses(TCGLabel *to, TCGLabel *from) 3508 { 3509 TCGLabelUse *u; 3510 3511 QSIMPLEQ_FOREACH(u, &from->branches, next) { 3512 TCGOp *op = u->op; 3513 switch (op->opc) { 3514 case INDEX_op_br: 3515 op->args[0] = label_arg(to); 3516 break; 3517 case INDEX_op_brcond_i32: 3518 case INDEX_op_brcond_i64: 3519 op->args[3] = label_arg(to); 3520 break; 3521 case INDEX_op_brcond2_i32: 3522 op->args[5] = label_arg(to); 3523 break; 3524 default: 3525 g_assert_not_reached(); 3526 } 3527 } 3528 3529 QSIMPLEQ_CONCAT(&to->branches, &from->branches); 3530 } 3531 3532 /* Reachable analysis : remove unreachable code. */ 3533 static void __attribute__((noinline)) 3534 reachable_code_pass(TCGContext *s) 3535 { 3536 TCGOp *op, *op_next, *op_prev; 3537 bool dead = false; 3538 3539 QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) { 3540 bool remove = dead; 3541 TCGLabel *label; 3542 3543 switch (op->opc) { 3544 case INDEX_op_set_label: 3545 label = arg_label(op->args[0]); 3546 3547 /* 3548 * Note that the first op in the TB is always a load, 3549 * so there is always something before a label. 3550 */ 3551 op_prev = QTAILQ_PREV(op, link); 3552 3553 /* 3554 * If we find two sequential labels, move all branches to 3555 * reference the second label and remove the first label. 3556 * Do this before branch to next optimization, so that the 3557 * middle label is out of the way. 3558 */ 3559 if (op_prev->opc == INDEX_op_set_label) { 3560 move_label_uses(label, arg_label(op_prev->args[0])); 3561 tcg_op_remove(s, op_prev); 3562 op_prev = QTAILQ_PREV(op, link); 3563 } 3564 3565 /* 3566 * Optimization can fold conditional branches to unconditional. 3567 * If we find a label which is preceded by an unconditional 3568 * branch to next, remove the branch. We couldn't do this when 3569 * processing the branch because any dead code between the branch 3570 * and label had not yet been removed. 3571 */ 3572 if (op_prev->opc == INDEX_op_br && 3573 label == arg_label(op_prev->args[0])) { 3574 tcg_op_remove(s, op_prev); 3575 /* Fall through means insns become live again. */ 3576 dead = false; 3577 } 3578 3579 if (QSIMPLEQ_EMPTY(&label->branches)) { 3580 /* 3581 * While there is an occasional backward branch, virtually 3582 * all branches generated by the translators are forward. 3583 * Which means that generally we will have already removed 3584 * all references to the label that will be, and there is 3585 * little to be gained by iterating. 3586 */ 3587 remove = true; 3588 } else { 3589 /* Once we see a label, insns become live again. */ 3590 dead = false; 3591 remove = false; 3592 } 3593 break; 3594 3595 case INDEX_op_br: 3596 case INDEX_op_exit_tb: 3597 case INDEX_op_goto_ptr: 3598 /* Unconditional branches; everything following is dead. */ 3599 dead = true; 3600 break; 3601 3602 case INDEX_op_call: 3603 /* Notice noreturn helper calls, raising exceptions. */ 3604 if (tcg_call_flags(op) & TCG_CALL_NO_RETURN) { 3605 dead = true; 3606 } 3607 break; 3608 3609 case INDEX_op_insn_start: 3610 /* Never remove -- we need to keep these for unwind. */ 3611 remove = false; 3612 break; 3613 3614 default: 3615 break; 3616 } 3617 3618 if (remove) { 3619 tcg_op_remove(s, op); 3620 } 3621 } 3622 } 3623 3624 #define TS_DEAD 1 3625 #define TS_MEM 2 3626 3627 #define IS_DEAD_ARG(n) (arg_life & (DEAD_ARG << (n))) 3628 #define NEED_SYNC_ARG(n) (arg_life & (SYNC_ARG << (n))) 3629 3630 /* For liveness_pass_1, the register preferences for a given temp. */ 3631 static inline TCGRegSet *la_temp_pref(TCGTemp *ts) 3632 { 3633 return ts->state_ptr; 3634 } 3635 3636 /* For liveness_pass_1, reset the preferences for a given temp to the 3637 * maximal regset for its type. 3638 */ 3639 static inline void la_reset_pref(TCGTemp *ts) 3640 { 3641 *la_temp_pref(ts) 3642 = (ts->state == TS_DEAD ? 0 : tcg_target_available_regs[ts->type]); 3643 } 3644 3645 /* liveness analysis: end of function: all temps are dead, and globals 3646 should be in memory. */ 3647 static void la_func_end(TCGContext *s, int ng, int nt) 3648 { 3649 int i; 3650 3651 for (i = 0; i < ng; ++i) { 3652 s->temps[i].state = TS_DEAD | TS_MEM; 3653 la_reset_pref(&s->temps[i]); 3654 } 3655 for (i = ng; i < nt; ++i) { 3656 s->temps[i].state = TS_DEAD; 3657 la_reset_pref(&s->temps[i]); 3658 } 3659 } 3660 3661 /* liveness analysis: end of basic block: all temps are dead, globals 3662 and local temps should be in memory. */ 3663 static void la_bb_end(TCGContext *s, int ng, int nt) 3664 { 3665 int i; 3666 3667 for (i = 0; i < nt; ++i) { 3668 TCGTemp *ts = &s->temps[i]; 3669 int state; 3670 3671 switch (ts->kind) { 3672 case TEMP_FIXED: 3673 case TEMP_GLOBAL: 3674 case TEMP_TB: 3675 state = TS_DEAD | TS_MEM; 3676 break; 3677 case TEMP_EBB: 3678 case TEMP_CONST: 3679 state = TS_DEAD; 3680 break; 3681 default: 3682 g_assert_not_reached(); 3683 } 3684 ts->state = state; 3685 la_reset_pref(ts); 3686 } 3687 } 3688 3689 /* liveness analysis: sync globals back to memory. */ 3690 static void la_global_sync(TCGContext *s, int ng) 3691 { 3692 int i; 3693 3694 for (i = 0; i < ng; ++i) { 3695 int state = s->temps[i].state; 3696 s->temps[i].state = state | TS_MEM; 3697 if (state == TS_DEAD) { 3698 /* If the global was previously dead, reset prefs. */ 3699 la_reset_pref(&s->temps[i]); 3700 } 3701 } 3702 } 3703 3704 /* 3705 * liveness analysis: conditional branch: all temps are dead unless 3706 * explicitly live-across-conditional-branch, globals and local temps 3707 * should be synced. 3708 */ 3709 static void la_bb_sync(TCGContext *s, int ng, int nt) 3710 { 3711 la_global_sync(s, ng); 3712 3713 for (int i = ng; i < nt; ++i) { 3714 TCGTemp *ts = &s->temps[i]; 3715 int state; 3716 3717 switch (ts->kind) { 3718 case TEMP_TB: 3719 state = ts->state; 3720 ts->state = state | TS_MEM; 3721 if (state != TS_DEAD) { 3722 continue; 3723 } 3724 break; 3725 case TEMP_EBB: 3726 case TEMP_CONST: 3727 continue; 3728 default: 3729 g_assert_not_reached(); 3730 } 3731 la_reset_pref(&s->temps[i]); 3732 } 3733 } 3734 3735 /* liveness analysis: sync globals back to memory and kill. */ 3736 static void la_global_kill(TCGContext *s, int ng) 3737 { 3738 int i; 3739 3740 for (i = 0; i < ng; i++) { 3741 s->temps[i].state = TS_DEAD | TS_MEM; 3742 la_reset_pref(&s->temps[i]); 3743 } 3744 } 3745 3746 /* liveness analysis: note live globals crossing calls. */ 3747 static void la_cross_call(TCGContext *s, int nt) 3748 { 3749 TCGRegSet mask = ~tcg_target_call_clobber_regs; 3750 int i; 3751 3752 for (i = 0; i < nt; i++) { 3753 TCGTemp *ts = &s->temps[i]; 3754 if (!(ts->state & TS_DEAD)) { 3755 TCGRegSet *pset = la_temp_pref(ts); 3756 TCGRegSet set = *pset; 3757 3758 set &= mask; 3759 /* If the combination is not possible, restart. */ 3760 if (set == 0) { 3761 set = tcg_target_available_regs[ts->type] & mask; 3762 } 3763 *pset = set; 3764 } 3765 } 3766 } 3767 3768 /* 3769 * Liveness analysis: Verify the lifetime of TEMP_TB, and reduce 3770 * to TEMP_EBB, if possible. 3771 */ 3772 static void __attribute__((noinline)) 3773 liveness_pass_0(TCGContext *s) 3774 { 3775 void * const multiple_ebb = (void *)(uintptr_t)-1; 3776 int nb_temps = s->nb_temps; 3777 TCGOp *op, *ebb; 3778 3779 for (int i = s->nb_globals; i < nb_temps; ++i) { 3780 s->temps[i].state_ptr = NULL; 3781 } 3782 3783 /* 3784 * Represent each EBB by the op at which it begins. In the case of 3785 * the first EBB, this is the first op, otherwise it is a label. 3786 * Collect the uses of each TEMP_TB: NULL for unused, EBB for use 3787 * within a single EBB, else MULTIPLE_EBB. 3788 */ 3789 ebb = QTAILQ_FIRST(&s->ops); 3790 QTAILQ_FOREACH(op, &s->ops, link) { 3791 const TCGOpDef *def; 3792 int nb_oargs, nb_iargs; 3793 3794 switch (op->opc) { 3795 case INDEX_op_set_label: 3796 ebb = op; 3797 continue; 3798 case INDEX_op_discard: 3799 continue; 3800 case INDEX_op_call: 3801 nb_oargs = TCGOP_CALLO(op); 3802 nb_iargs = TCGOP_CALLI(op); 3803 break; 3804 default: 3805 def = &tcg_op_defs[op->opc]; 3806 nb_oargs = def->nb_oargs; 3807 nb_iargs = def->nb_iargs; 3808 break; 3809 } 3810 3811 for (int i = 0; i < nb_oargs + nb_iargs; ++i) { 3812 TCGTemp *ts = arg_temp(op->args[i]); 3813 3814 if (ts->kind != TEMP_TB) { 3815 continue; 3816 } 3817 if (ts->state_ptr == NULL) { 3818 ts->state_ptr = ebb; 3819 } else if (ts->state_ptr != ebb) { 3820 ts->state_ptr = multiple_ebb; 3821 } 3822 } 3823 } 3824 3825 /* 3826 * For TEMP_TB that turned out not to be used beyond one EBB, 3827 * reduce the liveness to TEMP_EBB. 3828 */ 3829 for (int i = s->nb_globals; i < nb_temps; ++i) { 3830 TCGTemp *ts = &s->temps[i]; 3831 if (ts->kind == TEMP_TB && ts->state_ptr != multiple_ebb) { 3832 ts->kind = TEMP_EBB; 3833 } 3834 } 3835 } 3836 3837 /* Liveness analysis : update the opc_arg_life array to tell if a 3838 given input arguments is dead. Instructions updating dead 3839 temporaries are removed. */ 3840 static void __attribute__((noinline)) 3841 liveness_pass_1(TCGContext *s) 3842 { 3843 int nb_globals = s->nb_globals; 3844 int nb_temps = s->nb_temps; 3845 TCGOp *op, *op_prev; 3846 TCGRegSet *prefs; 3847 int i; 3848 3849 prefs = tcg_malloc(sizeof(TCGRegSet) * nb_temps); 3850 for (i = 0; i < nb_temps; ++i) { 3851 s->temps[i].state_ptr = prefs + i; 3852 } 3853 3854 /* ??? Should be redundant with the exit_tb that ends the TB. */ 3855 la_func_end(s, nb_globals, nb_temps); 3856 3857 QTAILQ_FOREACH_REVERSE_SAFE(op, &s->ops, link, op_prev) { 3858 int nb_iargs, nb_oargs; 3859 TCGOpcode opc_new, opc_new2; 3860 TCGLifeData arg_life = 0; 3861 TCGTemp *ts; 3862 TCGOpcode opc = op->opc; 3863 const TCGOpDef *def = &tcg_op_defs[opc]; 3864 const TCGArgConstraint *args_ct; 3865 3866 switch (opc) { 3867 case INDEX_op_call: 3868 { 3869 const TCGHelperInfo *info = tcg_call_info(op); 3870 int call_flags = tcg_call_flags(op); 3871 3872 nb_oargs = TCGOP_CALLO(op); 3873 nb_iargs = TCGOP_CALLI(op); 3874 3875 /* pure functions can be removed if their result is unused */ 3876 if (call_flags & TCG_CALL_NO_SIDE_EFFECTS) { 3877 for (i = 0; i < nb_oargs; i++) { 3878 ts = arg_temp(op->args[i]); 3879 if (ts->state != TS_DEAD) { 3880 goto do_not_remove_call; 3881 } 3882 } 3883 goto do_remove; 3884 } 3885 do_not_remove_call: 3886 3887 /* Output args are dead. */ 3888 for (i = 0; i < nb_oargs; i++) { 3889 ts = arg_temp(op->args[i]); 3890 if (ts->state & TS_DEAD) { 3891 arg_life |= DEAD_ARG << i; 3892 } 3893 if (ts->state & TS_MEM) { 3894 arg_life |= SYNC_ARG << i; 3895 } 3896 ts->state = TS_DEAD; 3897 la_reset_pref(ts); 3898 } 3899 3900 /* Not used -- it will be tcg_target_call_oarg_reg(). */ 3901 memset(op->output_pref, 0, sizeof(op->output_pref)); 3902 3903 if (!(call_flags & (TCG_CALL_NO_WRITE_GLOBALS | 3904 TCG_CALL_NO_READ_GLOBALS))) { 3905 la_global_kill(s, nb_globals); 3906 } else if (!(call_flags & TCG_CALL_NO_READ_GLOBALS)) { 3907 la_global_sync(s, nb_globals); 3908 } 3909 3910 /* Record arguments that die in this helper. */ 3911 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) { 3912 ts = arg_temp(op->args[i]); 3913 if (ts->state & TS_DEAD) { 3914 arg_life |= DEAD_ARG << i; 3915 } 3916 } 3917 3918 /* For all live registers, remove call-clobbered prefs. */ 3919 la_cross_call(s, nb_temps); 3920 3921 /* 3922 * Input arguments are live for preceding opcodes. 3923 * 3924 * For those arguments that die, and will be allocated in 3925 * registers, clear the register set for that arg, to be 3926 * filled in below. For args that will be on the stack, 3927 * reset to any available reg. Process arguments in reverse 3928 * order so that if a temp is used more than once, the stack 3929 * reset to max happens before the register reset to 0. 3930 */ 3931 for (i = nb_iargs - 1; i >= 0; i--) { 3932 const TCGCallArgumentLoc *loc = &info->in[i]; 3933 ts = arg_temp(op->args[nb_oargs + i]); 3934 3935 if (ts->state & TS_DEAD) { 3936 switch (loc->kind) { 3937 case TCG_CALL_ARG_NORMAL: 3938 case TCG_CALL_ARG_EXTEND_U: 3939 case TCG_CALL_ARG_EXTEND_S: 3940 if (arg_slot_reg_p(loc->arg_slot)) { 3941 *la_temp_pref(ts) = 0; 3942 break; 3943 } 3944 /* fall through */ 3945 default: 3946 *la_temp_pref(ts) = 3947 tcg_target_available_regs[ts->type]; 3948 break; 3949 } 3950 ts->state &= ~TS_DEAD; 3951 } 3952 } 3953 3954 /* 3955 * For each input argument, add its input register to prefs. 3956 * If a temp is used once, this produces a single set bit; 3957 * if a temp is used multiple times, this produces a set. 3958 */ 3959 for (i = 0; i < nb_iargs; i++) { 3960 const TCGCallArgumentLoc *loc = &info->in[i]; 3961 ts = arg_temp(op->args[nb_oargs + i]); 3962 3963 switch (loc->kind) { 3964 case TCG_CALL_ARG_NORMAL: 3965 case TCG_CALL_ARG_EXTEND_U: 3966 case TCG_CALL_ARG_EXTEND_S: 3967 if (arg_slot_reg_p(loc->arg_slot)) { 3968 tcg_regset_set_reg(*la_temp_pref(ts), 3969 tcg_target_call_iarg_regs[loc->arg_slot]); 3970 } 3971 break; 3972 default: 3973 break; 3974 } 3975 } 3976 } 3977 break; 3978 case INDEX_op_insn_start: 3979 break; 3980 case INDEX_op_discard: 3981 /* mark the temporary as dead */ 3982 ts = arg_temp(op->args[0]); 3983 ts->state = TS_DEAD; 3984 la_reset_pref(ts); 3985 break; 3986 3987 case INDEX_op_add2_i32: 3988 case INDEX_op_add2_i64: 3989 opc_new = INDEX_op_add; 3990 goto do_addsub2; 3991 case INDEX_op_sub2_i32: 3992 case INDEX_op_sub2_i64: 3993 opc_new = INDEX_op_sub; 3994 do_addsub2: 3995 nb_iargs = 4; 3996 nb_oargs = 2; 3997 /* Test if the high part of the operation is dead, but not 3998 the low part. The result can be optimized to a simple 3999 add or sub. This happens often for x86_64 guest when the 4000 cpu mode is set to 32 bit. */ 4001 if (arg_temp(op->args[1])->state == TS_DEAD) { 4002 if (arg_temp(op->args[0])->state == TS_DEAD) { 4003 goto do_remove; 4004 } 4005 /* Replace the opcode and adjust the args in place, 4006 leaving 3 unused args at the end. */ 4007 op->opc = opc = opc_new; 4008 op->args[1] = op->args[2]; 4009 op->args[2] = op->args[4]; 4010 /* Fall through and mark the single-word operation live. */ 4011 nb_iargs = 2; 4012 nb_oargs = 1; 4013 } 4014 goto do_not_remove; 4015 4016 case INDEX_op_muls2_i32: 4017 case INDEX_op_muls2_i64: 4018 opc_new = INDEX_op_mul; 4019 opc_new2 = INDEX_op_mulsh; 4020 goto do_mul2; 4021 case INDEX_op_mulu2_i32: 4022 case INDEX_op_mulu2_i64: 4023 opc_new = INDEX_op_mul; 4024 opc_new2 = INDEX_op_muluh; 4025 do_mul2: 4026 nb_iargs = 2; 4027 nb_oargs = 2; 4028 if (arg_temp(op->args[1])->state == TS_DEAD) { 4029 if (arg_temp(op->args[0])->state == TS_DEAD) { 4030 /* Both parts of the operation are dead. */ 4031 goto do_remove; 4032 } 4033 /* The high part of the operation is dead; generate the low. */ 4034 op->opc = opc = opc_new; 4035 op->args[1] = op->args[2]; 4036 op->args[2] = op->args[3]; 4037 } else if (arg_temp(op->args[0])->state == TS_DEAD && 4038 tcg_op_supported(opc_new2, TCGOP_TYPE(op), 0)) { 4039 /* The low part of the operation is dead; generate the high. */ 4040 op->opc = opc = opc_new2; 4041 op->args[0] = op->args[1]; 4042 op->args[1] = op->args[2]; 4043 op->args[2] = op->args[3]; 4044 } else { 4045 goto do_not_remove; 4046 } 4047 /* Mark the single-word operation live. */ 4048 nb_oargs = 1; 4049 goto do_not_remove; 4050 4051 default: 4052 /* XXX: optimize by hardcoding common cases (e.g. triadic ops) */ 4053 nb_iargs = def->nb_iargs; 4054 nb_oargs = def->nb_oargs; 4055 4056 /* Test if the operation can be removed because all 4057 its outputs are dead. We assume that nb_oargs == 0 4058 implies side effects */ 4059 if (!(def->flags & TCG_OPF_SIDE_EFFECTS) && nb_oargs != 0) { 4060 for (i = 0; i < nb_oargs; i++) { 4061 if (arg_temp(op->args[i])->state != TS_DEAD) { 4062 goto do_not_remove; 4063 } 4064 } 4065 goto do_remove; 4066 } 4067 goto do_not_remove; 4068 4069 do_remove: 4070 tcg_op_remove(s, op); 4071 break; 4072 4073 do_not_remove: 4074 for (i = 0; i < nb_oargs; i++) { 4075 ts = arg_temp(op->args[i]); 4076 4077 /* Remember the preference of the uses that followed. */ 4078 if (i < ARRAY_SIZE(op->output_pref)) { 4079 op->output_pref[i] = *la_temp_pref(ts); 4080 } 4081 4082 /* Output args are dead. */ 4083 if (ts->state & TS_DEAD) { 4084 arg_life |= DEAD_ARG << i; 4085 } 4086 if (ts->state & TS_MEM) { 4087 arg_life |= SYNC_ARG << i; 4088 } 4089 ts->state = TS_DEAD; 4090 la_reset_pref(ts); 4091 } 4092 4093 /* If end of basic block, update. */ 4094 if (def->flags & TCG_OPF_BB_EXIT) { 4095 la_func_end(s, nb_globals, nb_temps); 4096 } else if (def->flags & TCG_OPF_COND_BRANCH) { 4097 la_bb_sync(s, nb_globals, nb_temps); 4098 } else if (def->flags & TCG_OPF_BB_END) { 4099 la_bb_end(s, nb_globals, nb_temps); 4100 } else if (def->flags & TCG_OPF_SIDE_EFFECTS) { 4101 la_global_sync(s, nb_globals); 4102 if (def->flags & TCG_OPF_CALL_CLOBBER) { 4103 la_cross_call(s, nb_temps); 4104 } 4105 } 4106 4107 /* Record arguments that die in this opcode. */ 4108 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) { 4109 ts = arg_temp(op->args[i]); 4110 if (ts->state & TS_DEAD) { 4111 arg_life |= DEAD_ARG << i; 4112 } 4113 } 4114 4115 /* Input arguments are live for preceding opcodes. */ 4116 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) { 4117 ts = arg_temp(op->args[i]); 4118 if (ts->state & TS_DEAD) { 4119 /* For operands that were dead, initially allow 4120 all regs for the type. */ 4121 *la_temp_pref(ts) = tcg_target_available_regs[ts->type]; 4122 ts->state &= ~TS_DEAD; 4123 } 4124 } 4125 4126 /* Incorporate constraints for this operand. */ 4127 switch (opc) { 4128 case INDEX_op_mov: 4129 /* Note that these are TCG_OPF_NOT_PRESENT and do not 4130 have proper constraints. That said, special case 4131 moves to propagate preferences backward. */ 4132 if (IS_DEAD_ARG(1)) { 4133 *la_temp_pref(arg_temp(op->args[0])) 4134 = *la_temp_pref(arg_temp(op->args[1])); 4135 } 4136 break; 4137 4138 default: 4139 args_ct = opcode_args_ct(op); 4140 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) { 4141 const TCGArgConstraint *ct = &args_ct[i]; 4142 TCGRegSet set, *pset; 4143 4144 ts = arg_temp(op->args[i]); 4145 pset = la_temp_pref(ts); 4146 set = *pset; 4147 4148 set &= ct->regs; 4149 if (ct->ialias) { 4150 set &= output_pref(op, ct->alias_index); 4151 } 4152 /* If the combination is not possible, restart. */ 4153 if (set == 0) { 4154 set = ct->regs; 4155 } 4156 *pset = set; 4157 } 4158 break; 4159 } 4160 break; 4161 } 4162 op->life = arg_life; 4163 } 4164 } 4165 4166 /* Liveness analysis: Convert indirect regs to direct temporaries. */ 4167 static bool __attribute__((noinline)) 4168 liveness_pass_2(TCGContext *s) 4169 { 4170 int nb_globals = s->nb_globals; 4171 int nb_temps, i; 4172 bool changes = false; 4173 TCGOp *op, *op_next; 4174 4175 /* Create a temporary for each indirect global. */ 4176 for (i = 0; i < nb_globals; ++i) { 4177 TCGTemp *its = &s->temps[i]; 4178 if (its->indirect_reg) { 4179 TCGTemp *dts = tcg_temp_alloc(s); 4180 dts->type = its->type; 4181 dts->base_type = its->base_type; 4182 dts->temp_subindex = its->temp_subindex; 4183 dts->kind = TEMP_EBB; 4184 its->state_ptr = dts; 4185 } else { 4186 its->state_ptr = NULL; 4187 } 4188 /* All globals begin dead. */ 4189 its->state = TS_DEAD; 4190 } 4191 for (nb_temps = s->nb_temps; i < nb_temps; ++i) { 4192 TCGTemp *its = &s->temps[i]; 4193 its->state_ptr = NULL; 4194 its->state = TS_DEAD; 4195 } 4196 4197 QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) { 4198 TCGOpcode opc = op->opc; 4199 const TCGOpDef *def = &tcg_op_defs[opc]; 4200 TCGLifeData arg_life = op->life; 4201 int nb_iargs, nb_oargs, call_flags; 4202 TCGTemp *arg_ts, *dir_ts; 4203 4204 if (opc == INDEX_op_call) { 4205 nb_oargs = TCGOP_CALLO(op); 4206 nb_iargs = TCGOP_CALLI(op); 4207 call_flags = tcg_call_flags(op); 4208 } else { 4209 nb_iargs = def->nb_iargs; 4210 nb_oargs = def->nb_oargs; 4211 4212 /* Set flags similar to how calls require. */ 4213 if (def->flags & TCG_OPF_COND_BRANCH) { 4214 /* Like reading globals: sync_globals */ 4215 call_flags = TCG_CALL_NO_WRITE_GLOBALS; 4216 } else if (def->flags & TCG_OPF_BB_END) { 4217 /* Like writing globals: save_globals */ 4218 call_flags = 0; 4219 } else if (def->flags & TCG_OPF_SIDE_EFFECTS) { 4220 /* Like reading globals: sync_globals */ 4221 call_flags = TCG_CALL_NO_WRITE_GLOBALS; 4222 } else { 4223 /* No effect on globals. */ 4224 call_flags = (TCG_CALL_NO_READ_GLOBALS | 4225 TCG_CALL_NO_WRITE_GLOBALS); 4226 } 4227 } 4228 4229 /* Make sure that input arguments are available. */ 4230 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) { 4231 arg_ts = arg_temp(op->args[i]); 4232 dir_ts = arg_ts->state_ptr; 4233 if (dir_ts && arg_ts->state == TS_DEAD) { 4234 TCGOpcode lopc = (arg_ts->type == TCG_TYPE_I32 4235 ? INDEX_op_ld_i32 4236 : INDEX_op_ld_i64); 4237 TCGOp *lop = tcg_op_insert_before(s, op, lopc, 4238 arg_ts->type, 3); 4239 4240 lop->args[0] = temp_arg(dir_ts); 4241 lop->args[1] = temp_arg(arg_ts->mem_base); 4242 lop->args[2] = arg_ts->mem_offset; 4243 4244 /* Loaded, but synced with memory. */ 4245 arg_ts->state = TS_MEM; 4246 } 4247 } 4248 4249 /* Perform input replacement, and mark inputs that became dead. 4250 No action is required except keeping temp_state up to date 4251 so that we reload when needed. */ 4252 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) { 4253 arg_ts = arg_temp(op->args[i]); 4254 dir_ts = arg_ts->state_ptr; 4255 if (dir_ts) { 4256 op->args[i] = temp_arg(dir_ts); 4257 changes = true; 4258 if (IS_DEAD_ARG(i)) { 4259 arg_ts->state = TS_DEAD; 4260 } 4261 } 4262 } 4263 4264 /* Liveness analysis should ensure that the following are 4265 all correct, for call sites and basic block end points. */ 4266 if (call_flags & TCG_CALL_NO_READ_GLOBALS) { 4267 /* Nothing to do */ 4268 } else if (call_flags & TCG_CALL_NO_WRITE_GLOBALS) { 4269 for (i = 0; i < nb_globals; ++i) { 4270 /* Liveness should see that globals are synced back, 4271 that is, either TS_DEAD or TS_MEM. */ 4272 arg_ts = &s->temps[i]; 4273 tcg_debug_assert(arg_ts->state_ptr == 0 4274 || arg_ts->state != 0); 4275 } 4276 } else { 4277 for (i = 0; i < nb_globals; ++i) { 4278 /* Liveness should see that globals are saved back, 4279 that is, TS_DEAD, waiting to be reloaded. */ 4280 arg_ts = &s->temps[i]; 4281 tcg_debug_assert(arg_ts->state_ptr == 0 4282 || arg_ts->state == TS_DEAD); 4283 } 4284 } 4285 4286 /* Outputs become available. */ 4287 if (opc == INDEX_op_mov) { 4288 arg_ts = arg_temp(op->args[0]); 4289 dir_ts = arg_ts->state_ptr; 4290 if (dir_ts) { 4291 op->args[0] = temp_arg(dir_ts); 4292 changes = true; 4293 4294 /* The output is now live and modified. */ 4295 arg_ts->state = 0; 4296 4297 if (NEED_SYNC_ARG(0)) { 4298 TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32 4299 ? INDEX_op_st_i32 4300 : INDEX_op_st_i64); 4301 TCGOp *sop = tcg_op_insert_after(s, op, sopc, 4302 arg_ts->type, 3); 4303 TCGTemp *out_ts = dir_ts; 4304 4305 if (IS_DEAD_ARG(0)) { 4306 out_ts = arg_temp(op->args[1]); 4307 arg_ts->state = TS_DEAD; 4308 tcg_op_remove(s, op); 4309 } else { 4310 arg_ts->state = TS_MEM; 4311 } 4312 4313 sop->args[0] = temp_arg(out_ts); 4314 sop->args[1] = temp_arg(arg_ts->mem_base); 4315 sop->args[2] = arg_ts->mem_offset; 4316 } else { 4317 tcg_debug_assert(!IS_DEAD_ARG(0)); 4318 } 4319 } 4320 } else { 4321 for (i = 0; i < nb_oargs; i++) { 4322 arg_ts = arg_temp(op->args[i]); 4323 dir_ts = arg_ts->state_ptr; 4324 if (!dir_ts) { 4325 continue; 4326 } 4327 op->args[i] = temp_arg(dir_ts); 4328 changes = true; 4329 4330 /* The output is now live and modified. */ 4331 arg_ts->state = 0; 4332 4333 /* Sync outputs upon their last write. */ 4334 if (NEED_SYNC_ARG(i)) { 4335 TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32 4336 ? INDEX_op_st_i32 4337 : INDEX_op_st_i64); 4338 TCGOp *sop = tcg_op_insert_after(s, op, sopc, 4339 arg_ts->type, 3); 4340 4341 sop->args[0] = temp_arg(dir_ts); 4342 sop->args[1] = temp_arg(arg_ts->mem_base); 4343 sop->args[2] = arg_ts->mem_offset; 4344 4345 arg_ts->state = TS_MEM; 4346 } 4347 /* Drop outputs that are dead. */ 4348 if (IS_DEAD_ARG(i)) { 4349 arg_ts->state = TS_DEAD; 4350 } 4351 } 4352 } 4353 } 4354 4355 return changes; 4356 } 4357 4358 static void temp_allocate_frame(TCGContext *s, TCGTemp *ts) 4359 { 4360 intptr_t off; 4361 int size, align; 4362 4363 /* When allocating an object, look at the full type. */ 4364 size = tcg_type_size(ts->base_type); 4365 switch (ts->base_type) { 4366 case TCG_TYPE_I32: 4367 align = 4; 4368 break; 4369 case TCG_TYPE_I64: 4370 case TCG_TYPE_V64: 4371 align = 8; 4372 break; 4373 case TCG_TYPE_I128: 4374 case TCG_TYPE_V128: 4375 case TCG_TYPE_V256: 4376 /* 4377 * Note that we do not require aligned storage for V256, 4378 * and that we provide alignment for I128 to match V128, 4379 * even if that's above what the host ABI requires. 4380 */ 4381 align = 16; 4382 break; 4383 default: 4384 g_assert_not_reached(); 4385 } 4386 4387 /* 4388 * Assume the stack is sufficiently aligned. 4389 * This affects e.g. ARM NEON, where we have 8 byte stack alignment 4390 * and do not require 16 byte vector alignment. This seems slightly 4391 * easier than fully parameterizing the above switch statement. 4392 */ 4393 align = MIN(TCG_TARGET_STACK_ALIGN, align); 4394 off = ROUND_UP(s->current_frame_offset, align); 4395 4396 /* If we've exhausted the stack frame, restart with a smaller TB. */ 4397 if (off + size > s->frame_end) { 4398 tcg_raise_tb_overflow(s); 4399 } 4400 s->current_frame_offset = off + size; 4401 #if defined(__sparc__) 4402 off += TCG_TARGET_STACK_BIAS; 4403 #endif 4404 4405 /* If the object was subdivided, assign memory to all the parts. */ 4406 if (ts->base_type != ts->type) { 4407 int part_size = tcg_type_size(ts->type); 4408 int part_count = size / part_size; 4409 4410 /* 4411 * Each part is allocated sequentially in tcg_temp_new_internal. 4412 * Jump back to the first part by subtracting the current index. 4413 */ 4414 ts -= ts->temp_subindex; 4415 for (int i = 0; i < part_count; ++i) { 4416 ts[i].mem_offset = off + i * part_size; 4417 ts[i].mem_base = s->frame_temp; 4418 ts[i].mem_allocated = 1; 4419 } 4420 } else { 4421 ts->mem_offset = off; 4422 ts->mem_base = s->frame_temp; 4423 ts->mem_allocated = 1; 4424 } 4425 } 4426 4427 /* Assign @reg to @ts, and update reg_to_temp[]. */ 4428 static void set_temp_val_reg(TCGContext *s, TCGTemp *ts, TCGReg reg) 4429 { 4430 if (ts->val_type == TEMP_VAL_REG) { 4431 TCGReg old = ts->reg; 4432 tcg_debug_assert(s->reg_to_temp[old] == ts); 4433 if (old == reg) { 4434 return; 4435 } 4436 s->reg_to_temp[old] = NULL; 4437 } 4438 tcg_debug_assert(s->reg_to_temp[reg] == NULL); 4439 s->reg_to_temp[reg] = ts; 4440 ts->val_type = TEMP_VAL_REG; 4441 ts->reg = reg; 4442 } 4443 4444 /* Assign a non-register value type to @ts, and update reg_to_temp[]. */ 4445 static void set_temp_val_nonreg(TCGContext *s, TCGTemp *ts, TCGTempVal type) 4446 { 4447 tcg_debug_assert(type != TEMP_VAL_REG); 4448 if (ts->val_type == TEMP_VAL_REG) { 4449 TCGReg reg = ts->reg; 4450 tcg_debug_assert(s->reg_to_temp[reg] == ts); 4451 s->reg_to_temp[reg] = NULL; 4452 } 4453 ts->val_type = type; 4454 } 4455 4456 static void temp_load(TCGContext *, TCGTemp *, TCGRegSet, TCGRegSet, TCGRegSet); 4457 4458 /* Mark a temporary as free or dead. If 'free_or_dead' is negative, 4459 mark it free; otherwise mark it dead. */ 4460 static void temp_free_or_dead(TCGContext *s, TCGTemp *ts, int free_or_dead) 4461 { 4462 TCGTempVal new_type; 4463 4464 switch (ts->kind) { 4465 case TEMP_FIXED: 4466 return; 4467 case TEMP_GLOBAL: 4468 case TEMP_TB: 4469 new_type = TEMP_VAL_MEM; 4470 break; 4471 case TEMP_EBB: 4472 new_type = free_or_dead < 0 ? TEMP_VAL_MEM : TEMP_VAL_DEAD; 4473 break; 4474 case TEMP_CONST: 4475 new_type = TEMP_VAL_CONST; 4476 break; 4477 default: 4478 g_assert_not_reached(); 4479 } 4480 set_temp_val_nonreg(s, ts, new_type); 4481 } 4482 4483 /* Mark a temporary as dead. */ 4484 static inline void temp_dead(TCGContext *s, TCGTemp *ts) 4485 { 4486 temp_free_or_dead(s, ts, 1); 4487 } 4488 4489 /* Sync a temporary to memory. 'allocated_regs' is used in case a temporary 4490 registers needs to be allocated to store a constant. If 'free_or_dead' 4491 is non-zero, subsequently release the temporary; if it is positive, the 4492 temp is dead; if it is negative, the temp is free. */ 4493 static void temp_sync(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs, 4494 TCGRegSet preferred_regs, int free_or_dead) 4495 { 4496 if (!temp_readonly(ts) && !ts->mem_coherent) { 4497 if (!ts->mem_allocated) { 4498 temp_allocate_frame(s, ts); 4499 } 4500 switch (ts->val_type) { 4501 case TEMP_VAL_CONST: 4502 /* If we're going to free the temp immediately, then we won't 4503 require it later in a register, so attempt to store the 4504 constant to memory directly. */ 4505 if (free_or_dead 4506 && tcg_out_sti(s, ts->type, ts->val, 4507 ts->mem_base->reg, ts->mem_offset)) { 4508 break; 4509 } 4510 temp_load(s, ts, tcg_target_available_regs[ts->type], 4511 allocated_regs, preferred_regs); 4512 /* fallthrough */ 4513 4514 case TEMP_VAL_REG: 4515 tcg_out_st(s, ts->type, ts->reg, 4516 ts->mem_base->reg, ts->mem_offset); 4517 break; 4518 4519 case TEMP_VAL_MEM: 4520 break; 4521 4522 case TEMP_VAL_DEAD: 4523 default: 4524 g_assert_not_reached(); 4525 } 4526 ts->mem_coherent = 1; 4527 } 4528 if (free_or_dead) { 4529 temp_free_or_dead(s, ts, free_or_dead); 4530 } 4531 } 4532 4533 /* free register 'reg' by spilling the corresponding temporary if necessary */ 4534 static void tcg_reg_free(TCGContext *s, TCGReg reg, TCGRegSet allocated_regs) 4535 { 4536 TCGTemp *ts = s->reg_to_temp[reg]; 4537 if (ts != NULL) { 4538 temp_sync(s, ts, allocated_regs, 0, -1); 4539 } 4540 } 4541 4542 /** 4543 * tcg_reg_alloc: 4544 * @required_regs: Set of registers in which we must allocate. 4545 * @allocated_regs: Set of registers which must be avoided. 4546 * @preferred_regs: Set of registers we should prefer. 4547 * @rev: True if we search the registers in "indirect" order. 4548 * 4549 * The allocated register must be in @required_regs & ~@allocated_regs, 4550 * but if we can put it in @preferred_regs we may save a move later. 4551 */ 4552 static TCGReg tcg_reg_alloc(TCGContext *s, TCGRegSet required_regs, 4553 TCGRegSet allocated_regs, 4554 TCGRegSet preferred_regs, bool rev) 4555 { 4556 int i, j, f, n = ARRAY_SIZE(tcg_target_reg_alloc_order); 4557 TCGRegSet reg_ct[2]; 4558 const int *order; 4559 4560 reg_ct[1] = required_regs & ~allocated_regs; 4561 tcg_debug_assert(reg_ct[1] != 0); 4562 reg_ct[0] = reg_ct[1] & preferred_regs; 4563 4564 /* Skip the preferred_regs option if it cannot be satisfied, 4565 or if the preference made no difference. */ 4566 f = reg_ct[0] == 0 || reg_ct[0] == reg_ct[1]; 4567 4568 order = rev ? indirect_reg_alloc_order : tcg_target_reg_alloc_order; 4569 4570 /* Try free registers, preferences first. */ 4571 for (j = f; j < 2; j++) { 4572 TCGRegSet set = reg_ct[j]; 4573 4574 if (tcg_regset_single(set)) { 4575 /* One register in the set. */ 4576 TCGReg reg = tcg_regset_first(set); 4577 if (s->reg_to_temp[reg] == NULL) { 4578 return reg; 4579 } 4580 } else { 4581 for (i = 0; i < n; i++) { 4582 TCGReg reg = order[i]; 4583 if (s->reg_to_temp[reg] == NULL && 4584 tcg_regset_test_reg(set, reg)) { 4585 return reg; 4586 } 4587 } 4588 } 4589 } 4590 4591 /* We must spill something. */ 4592 for (j = f; j < 2; j++) { 4593 TCGRegSet set = reg_ct[j]; 4594 4595 if (tcg_regset_single(set)) { 4596 /* One register in the set. */ 4597 TCGReg reg = tcg_regset_first(set); 4598 tcg_reg_free(s, reg, allocated_regs); 4599 return reg; 4600 } else { 4601 for (i = 0; i < n; i++) { 4602 TCGReg reg = order[i]; 4603 if (tcg_regset_test_reg(set, reg)) { 4604 tcg_reg_free(s, reg, allocated_regs); 4605 return reg; 4606 } 4607 } 4608 } 4609 } 4610 4611 g_assert_not_reached(); 4612 } 4613 4614 static TCGReg tcg_reg_alloc_pair(TCGContext *s, TCGRegSet required_regs, 4615 TCGRegSet allocated_regs, 4616 TCGRegSet preferred_regs, bool rev) 4617 { 4618 int i, j, k, fmin, n = ARRAY_SIZE(tcg_target_reg_alloc_order); 4619 TCGRegSet reg_ct[2]; 4620 const int *order; 4621 4622 /* Ensure that if I is not in allocated_regs, I+1 is not either. */ 4623 reg_ct[1] = required_regs & ~(allocated_regs | (allocated_regs >> 1)); 4624 tcg_debug_assert(reg_ct[1] != 0); 4625 reg_ct[0] = reg_ct[1] & preferred_regs; 4626 4627 order = rev ? indirect_reg_alloc_order : tcg_target_reg_alloc_order; 4628 4629 /* 4630 * Skip the preferred_regs option if it cannot be satisfied, 4631 * or if the preference made no difference. 4632 */ 4633 k = reg_ct[0] == 0 || reg_ct[0] == reg_ct[1]; 4634 4635 /* 4636 * Minimize the number of flushes by looking for 2 free registers first, 4637 * then a single flush, then two flushes. 4638 */ 4639 for (fmin = 2; fmin >= 0; fmin--) { 4640 for (j = k; j < 2; j++) { 4641 TCGRegSet set = reg_ct[j]; 4642 4643 for (i = 0; i < n; i++) { 4644 TCGReg reg = order[i]; 4645 4646 if (tcg_regset_test_reg(set, reg)) { 4647 int f = !s->reg_to_temp[reg] + !s->reg_to_temp[reg + 1]; 4648 if (f >= fmin) { 4649 tcg_reg_free(s, reg, allocated_regs); 4650 tcg_reg_free(s, reg + 1, allocated_regs); 4651 return reg; 4652 } 4653 } 4654 } 4655 } 4656 } 4657 g_assert_not_reached(); 4658 } 4659 4660 /* Make sure the temporary is in a register. If needed, allocate the register 4661 from DESIRED while avoiding ALLOCATED. */ 4662 static void temp_load(TCGContext *s, TCGTemp *ts, TCGRegSet desired_regs, 4663 TCGRegSet allocated_regs, TCGRegSet preferred_regs) 4664 { 4665 TCGReg reg; 4666 4667 switch (ts->val_type) { 4668 case TEMP_VAL_REG: 4669 return; 4670 case TEMP_VAL_CONST: 4671 reg = tcg_reg_alloc(s, desired_regs, allocated_regs, 4672 preferred_regs, ts->indirect_base); 4673 if (ts->type <= TCG_TYPE_I64) { 4674 tcg_out_movi(s, ts->type, reg, ts->val); 4675 } else { 4676 uint64_t val = ts->val; 4677 MemOp vece = MO_64; 4678 4679 /* 4680 * Find the minimal vector element that matches the constant. 4681 * The targets will, in general, have to do this search anyway, 4682 * do this generically. 4683 */ 4684 if (val == dup_const(MO_8, val)) { 4685 vece = MO_8; 4686 } else if (val == dup_const(MO_16, val)) { 4687 vece = MO_16; 4688 } else if (val == dup_const(MO_32, val)) { 4689 vece = MO_32; 4690 } 4691 4692 tcg_out_dupi_vec(s, ts->type, vece, reg, ts->val); 4693 } 4694 ts->mem_coherent = 0; 4695 break; 4696 case TEMP_VAL_MEM: 4697 if (!ts->mem_allocated) { 4698 temp_allocate_frame(s, ts); 4699 } 4700 reg = tcg_reg_alloc(s, desired_regs, allocated_regs, 4701 preferred_regs, ts->indirect_base); 4702 tcg_out_ld(s, ts->type, reg, ts->mem_base->reg, ts->mem_offset); 4703 ts->mem_coherent = 1; 4704 break; 4705 case TEMP_VAL_DEAD: 4706 default: 4707 g_assert_not_reached(); 4708 } 4709 set_temp_val_reg(s, ts, reg); 4710 } 4711 4712 /* Save a temporary to memory. 'allocated_regs' is used in case a 4713 temporary registers needs to be allocated to store a constant. */ 4714 static void temp_save(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs) 4715 { 4716 /* The liveness analysis already ensures that globals are back 4717 in memory. Keep an tcg_debug_assert for safety. */ 4718 tcg_debug_assert(ts->val_type == TEMP_VAL_MEM || temp_readonly(ts)); 4719 } 4720 4721 /* save globals to their canonical location and assume they can be 4722 modified be the following code. 'allocated_regs' is used in case a 4723 temporary registers needs to be allocated to store a constant. */ 4724 static void save_globals(TCGContext *s, TCGRegSet allocated_regs) 4725 { 4726 int i, n; 4727 4728 for (i = 0, n = s->nb_globals; i < n; i++) { 4729 temp_save(s, &s->temps[i], allocated_regs); 4730 } 4731 } 4732 4733 /* sync globals to their canonical location and assume they can be 4734 read by the following code. 'allocated_regs' is used in case a 4735 temporary registers needs to be allocated to store a constant. */ 4736 static void sync_globals(TCGContext *s, TCGRegSet allocated_regs) 4737 { 4738 int i, n; 4739 4740 for (i = 0, n = s->nb_globals; i < n; i++) { 4741 TCGTemp *ts = &s->temps[i]; 4742 tcg_debug_assert(ts->val_type != TEMP_VAL_REG 4743 || ts->kind == TEMP_FIXED 4744 || ts->mem_coherent); 4745 } 4746 } 4747 4748 /* at the end of a basic block, we assume all temporaries are dead and 4749 all globals are stored at their canonical location. */ 4750 static void tcg_reg_alloc_bb_end(TCGContext *s, TCGRegSet allocated_regs) 4751 { 4752 int i; 4753 4754 for (i = s->nb_globals; i < s->nb_temps; i++) { 4755 TCGTemp *ts = &s->temps[i]; 4756 4757 switch (ts->kind) { 4758 case TEMP_TB: 4759 temp_save(s, ts, allocated_regs); 4760 break; 4761 case TEMP_EBB: 4762 /* The liveness analysis already ensures that temps are dead. 4763 Keep an tcg_debug_assert for safety. */ 4764 tcg_debug_assert(ts->val_type == TEMP_VAL_DEAD); 4765 break; 4766 case TEMP_CONST: 4767 /* Similarly, we should have freed any allocated register. */ 4768 tcg_debug_assert(ts->val_type == TEMP_VAL_CONST); 4769 break; 4770 default: 4771 g_assert_not_reached(); 4772 } 4773 } 4774 4775 save_globals(s, allocated_regs); 4776 } 4777 4778 /* 4779 * At a conditional branch, we assume all temporaries are dead unless 4780 * explicitly live-across-conditional-branch; all globals and local 4781 * temps are synced to their location. 4782 */ 4783 static void tcg_reg_alloc_cbranch(TCGContext *s, TCGRegSet allocated_regs) 4784 { 4785 sync_globals(s, allocated_regs); 4786 4787 for (int i = s->nb_globals; i < s->nb_temps; i++) { 4788 TCGTemp *ts = &s->temps[i]; 4789 /* 4790 * The liveness analysis already ensures that temps are dead. 4791 * Keep tcg_debug_asserts for safety. 4792 */ 4793 switch (ts->kind) { 4794 case TEMP_TB: 4795 tcg_debug_assert(ts->val_type != TEMP_VAL_REG || ts->mem_coherent); 4796 break; 4797 case TEMP_EBB: 4798 case TEMP_CONST: 4799 break; 4800 default: 4801 g_assert_not_reached(); 4802 } 4803 } 4804 } 4805 4806 /* 4807 * Specialized code generation for INDEX_op_mov_* with a constant. 4808 */ 4809 static void tcg_reg_alloc_do_movi(TCGContext *s, TCGTemp *ots, 4810 tcg_target_ulong val, TCGLifeData arg_life, 4811 TCGRegSet preferred_regs) 4812 { 4813 /* ENV should not be modified. */ 4814 tcg_debug_assert(!temp_readonly(ots)); 4815 4816 /* The movi is not explicitly generated here. */ 4817 set_temp_val_nonreg(s, ots, TEMP_VAL_CONST); 4818 ots->val = val; 4819 ots->mem_coherent = 0; 4820 if (NEED_SYNC_ARG(0)) { 4821 temp_sync(s, ots, s->reserved_regs, preferred_regs, IS_DEAD_ARG(0)); 4822 } else if (IS_DEAD_ARG(0)) { 4823 temp_dead(s, ots); 4824 } 4825 } 4826 4827 /* 4828 * Specialized code generation for INDEX_op_mov_*. 4829 */ 4830 static void tcg_reg_alloc_mov(TCGContext *s, const TCGOp *op) 4831 { 4832 const TCGLifeData arg_life = op->life; 4833 TCGRegSet allocated_regs, preferred_regs; 4834 TCGTemp *ts, *ots; 4835 TCGType otype, itype; 4836 TCGReg oreg, ireg; 4837 4838 allocated_regs = s->reserved_regs; 4839 preferred_regs = output_pref(op, 0); 4840 ots = arg_temp(op->args[0]); 4841 ts = arg_temp(op->args[1]); 4842 4843 /* ENV should not be modified. */ 4844 tcg_debug_assert(!temp_readonly(ots)); 4845 4846 /* Note that otype != itype for no-op truncation. */ 4847 otype = ots->type; 4848 itype = ts->type; 4849 4850 if (ts->val_type == TEMP_VAL_CONST) { 4851 /* propagate constant or generate sti */ 4852 tcg_target_ulong val = ts->val; 4853 if (IS_DEAD_ARG(1)) { 4854 temp_dead(s, ts); 4855 } 4856 tcg_reg_alloc_do_movi(s, ots, val, arg_life, preferred_regs); 4857 return; 4858 } 4859 4860 /* If the source value is in memory we're going to be forced 4861 to have it in a register in order to perform the copy. Copy 4862 the SOURCE value into its own register first, that way we 4863 don't have to reload SOURCE the next time it is used. */ 4864 if (ts->val_type == TEMP_VAL_MEM) { 4865 temp_load(s, ts, tcg_target_available_regs[itype], 4866 allocated_regs, preferred_regs); 4867 } 4868 tcg_debug_assert(ts->val_type == TEMP_VAL_REG); 4869 ireg = ts->reg; 4870 4871 if (IS_DEAD_ARG(0)) { 4872 /* mov to a non-saved dead register makes no sense (even with 4873 liveness analysis disabled). */ 4874 tcg_debug_assert(NEED_SYNC_ARG(0)); 4875 if (!ots->mem_allocated) { 4876 temp_allocate_frame(s, ots); 4877 } 4878 tcg_out_st(s, otype, ireg, ots->mem_base->reg, ots->mem_offset); 4879 if (IS_DEAD_ARG(1)) { 4880 temp_dead(s, ts); 4881 } 4882 temp_dead(s, ots); 4883 return; 4884 } 4885 4886 if (IS_DEAD_ARG(1) && ts->kind != TEMP_FIXED) { 4887 /* 4888 * The mov can be suppressed. Kill input first, so that it 4889 * is unlinked from reg_to_temp, then set the output to the 4890 * reg that we saved from the input. 4891 */ 4892 temp_dead(s, ts); 4893 oreg = ireg; 4894 } else { 4895 if (ots->val_type == TEMP_VAL_REG) { 4896 oreg = ots->reg; 4897 } else { 4898 /* Make sure to not spill the input register during allocation. */ 4899 oreg = tcg_reg_alloc(s, tcg_target_available_regs[otype], 4900 allocated_regs | ((TCGRegSet)1 << ireg), 4901 preferred_regs, ots->indirect_base); 4902 } 4903 if (!tcg_out_mov(s, otype, oreg, ireg)) { 4904 /* 4905 * Cross register class move not supported. 4906 * Store the source register into the destination slot 4907 * and leave the destination temp as TEMP_VAL_MEM. 4908 */ 4909 assert(!temp_readonly(ots)); 4910 if (!ts->mem_allocated) { 4911 temp_allocate_frame(s, ots); 4912 } 4913 tcg_out_st(s, ts->type, ireg, ots->mem_base->reg, ots->mem_offset); 4914 set_temp_val_nonreg(s, ts, TEMP_VAL_MEM); 4915 ots->mem_coherent = 1; 4916 return; 4917 } 4918 } 4919 set_temp_val_reg(s, ots, oreg); 4920 ots->mem_coherent = 0; 4921 4922 if (NEED_SYNC_ARG(0)) { 4923 temp_sync(s, ots, allocated_regs, 0, 0); 4924 } 4925 } 4926 4927 /* 4928 * Specialized code generation for INDEX_op_dup_vec. 4929 */ 4930 static void tcg_reg_alloc_dup(TCGContext *s, const TCGOp *op) 4931 { 4932 const TCGLifeData arg_life = op->life; 4933 TCGRegSet dup_out_regs, dup_in_regs; 4934 const TCGArgConstraint *dup_args_ct; 4935 TCGTemp *its, *ots; 4936 TCGType itype, vtype; 4937 unsigned vece; 4938 int lowpart_ofs; 4939 bool ok; 4940 4941 ots = arg_temp(op->args[0]); 4942 its = arg_temp(op->args[1]); 4943 4944 /* ENV should not be modified. */ 4945 tcg_debug_assert(!temp_readonly(ots)); 4946 4947 itype = its->type; 4948 vece = TCGOP_VECE(op); 4949 vtype = TCGOP_TYPE(op); 4950 4951 if (its->val_type == TEMP_VAL_CONST) { 4952 /* Propagate constant via movi -> dupi. */ 4953 tcg_target_ulong val = its->val; 4954 if (IS_DEAD_ARG(1)) { 4955 temp_dead(s, its); 4956 } 4957 tcg_reg_alloc_do_movi(s, ots, val, arg_life, output_pref(op, 0)); 4958 return; 4959 } 4960 4961 dup_args_ct = opcode_args_ct(op); 4962 dup_out_regs = dup_args_ct[0].regs; 4963 dup_in_regs = dup_args_ct[1].regs; 4964 4965 /* Allocate the output register now. */ 4966 if (ots->val_type != TEMP_VAL_REG) { 4967 TCGRegSet allocated_regs = s->reserved_regs; 4968 TCGReg oreg; 4969 4970 if (!IS_DEAD_ARG(1) && its->val_type == TEMP_VAL_REG) { 4971 /* Make sure to not spill the input register. */ 4972 tcg_regset_set_reg(allocated_regs, its->reg); 4973 } 4974 oreg = tcg_reg_alloc(s, dup_out_regs, allocated_regs, 4975 output_pref(op, 0), ots->indirect_base); 4976 set_temp_val_reg(s, ots, oreg); 4977 } 4978 4979 switch (its->val_type) { 4980 case TEMP_VAL_REG: 4981 /* 4982 * The dup constriaints must be broad, covering all possible VECE. 4983 * However, tcg_op_dup_vec() gets to see the VECE and we allow it 4984 * to fail, indicating that extra moves are required for that case. 4985 */ 4986 if (tcg_regset_test_reg(dup_in_regs, its->reg)) { 4987 if (tcg_out_dup_vec(s, vtype, vece, ots->reg, its->reg)) { 4988 goto done; 4989 } 4990 /* Try again from memory or a vector input register. */ 4991 } 4992 if (!its->mem_coherent) { 4993 /* 4994 * The input register is not synced, and so an extra store 4995 * would be required to use memory. Attempt an integer-vector 4996 * register move first. We do not have a TCGRegSet for this. 4997 */ 4998 if (tcg_out_mov(s, itype, ots->reg, its->reg)) { 4999 break; 5000 } 5001 /* Sync the temp back to its slot and load from there. */ 5002 temp_sync(s, its, s->reserved_regs, 0, 0); 5003 } 5004 /* fall through */ 5005 5006 case TEMP_VAL_MEM: 5007 lowpart_ofs = 0; 5008 if (HOST_BIG_ENDIAN) { 5009 lowpart_ofs = tcg_type_size(itype) - (1 << vece); 5010 } 5011 if (tcg_out_dupm_vec(s, vtype, vece, ots->reg, its->mem_base->reg, 5012 its->mem_offset + lowpart_ofs)) { 5013 goto done; 5014 } 5015 /* Load the input into the destination vector register. */ 5016 tcg_out_ld(s, itype, ots->reg, its->mem_base->reg, its->mem_offset); 5017 break; 5018 5019 default: 5020 g_assert_not_reached(); 5021 } 5022 5023 /* We now have a vector input register, so dup must succeed. */ 5024 ok = tcg_out_dup_vec(s, vtype, vece, ots->reg, ots->reg); 5025 tcg_debug_assert(ok); 5026 5027 done: 5028 ots->mem_coherent = 0; 5029 if (IS_DEAD_ARG(1)) { 5030 temp_dead(s, its); 5031 } 5032 if (NEED_SYNC_ARG(0)) { 5033 temp_sync(s, ots, s->reserved_regs, 0, 0); 5034 } 5035 if (IS_DEAD_ARG(0)) { 5036 temp_dead(s, ots); 5037 } 5038 } 5039 5040 static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op) 5041 { 5042 const TCGLifeData arg_life = op->life; 5043 const TCGOpDef * const def = &tcg_op_defs[op->opc]; 5044 TCGRegSet i_allocated_regs; 5045 TCGRegSet o_allocated_regs; 5046 int i, k, nb_iargs, nb_oargs; 5047 TCGReg reg; 5048 TCGArg arg; 5049 const TCGArgConstraint *args_ct; 5050 const TCGArgConstraint *arg_ct; 5051 TCGTemp *ts; 5052 TCGArg new_args[TCG_MAX_OP_ARGS]; 5053 int const_args[TCG_MAX_OP_ARGS]; 5054 TCGCond op_cond; 5055 5056 nb_oargs = def->nb_oargs; 5057 nb_iargs = def->nb_iargs; 5058 5059 /* copy constants */ 5060 memcpy(new_args + nb_oargs + nb_iargs, 5061 op->args + nb_oargs + nb_iargs, 5062 sizeof(TCGArg) * def->nb_cargs); 5063 5064 i_allocated_regs = s->reserved_regs; 5065 o_allocated_regs = s->reserved_regs; 5066 5067 switch (op->opc) { 5068 case INDEX_op_brcond_i32: 5069 case INDEX_op_brcond_i64: 5070 op_cond = op->args[2]; 5071 break; 5072 case INDEX_op_setcond_i32: 5073 case INDEX_op_setcond_i64: 5074 case INDEX_op_negsetcond_i32: 5075 case INDEX_op_negsetcond_i64: 5076 case INDEX_op_cmp_vec: 5077 op_cond = op->args[3]; 5078 break; 5079 case INDEX_op_brcond2_i32: 5080 op_cond = op->args[4]; 5081 break; 5082 case INDEX_op_movcond_i32: 5083 case INDEX_op_movcond_i64: 5084 case INDEX_op_setcond2_i32: 5085 case INDEX_op_cmpsel_vec: 5086 op_cond = op->args[5]; 5087 break; 5088 default: 5089 /* No condition within opcode. */ 5090 op_cond = TCG_COND_ALWAYS; 5091 break; 5092 } 5093 5094 args_ct = opcode_args_ct(op); 5095 5096 /* satisfy input constraints */ 5097 for (k = 0; k < nb_iargs; k++) { 5098 TCGRegSet i_preferred_regs, i_required_regs; 5099 bool allocate_new_reg, copyto_new_reg; 5100 TCGTemp *ts2; 5101 int i1, i2; 5102 5103 i = args_ct[nb_oargs + k].sort_index; 5104 arg = op->args[i]; 5105 arg_ct = &args_ct[i]; 5106 ts = arg_temp(arg); 5107 5108 if (ts->val_type == TEMP_VAL_CONST) { 5109 #ifdef TCG_REG_ZERO 5110 if (ts->val == 0 && (arg_ct->ct & TCG_CT_REG_ZERO)) { 5111 /* Hardware zero register: indicate register via non-const. */ 5112 const_args[i] = 0; 5113 new_args[i] = TCG_REG_ZERO; 5114 continue; 5115 } 5116 #endif 5117 5118 if (tcg_target_const_match(ts->val, arg_ct->ct, ts->type, 5119 op_cond, TCGOP_VECE(op))) { 5120 /* constant is OK for instruction */ 5121 const_args[i] = 1; 5122 new_args[i] = ts->val; 5123 continue; 5124 } 5125 } 5126 5127 reg = ts->reg; 5128 i_preferred_regs = 0; 5129 i_required_regs = arg_ct->regs; 5130 allocate_new_reg = false; 5131 copyto_new_reg = false; 5132 5133 switch (arg_ct->pair) { 5134 case 0: /* not paired */ 5135 if (arg_ct->ialias) { 5136 i_preferred_regs = output_pref(op, arg_ct->alias_index); 5137 5138 /* 5139 * If the input is readonly, then it cannot also be an 5140 * output and aliased to itself. If the input is not 5141 * dead after the instruction, we must allocate a new 5142 * register and move it. 5143 */ 5144 if (temp_readonly(ts) || !IS_DEAD_ARG(i) 5145 || args_ct[arg_ct->alias_index].newreg) { 5146 allocate_new_reg = true; 5147 } else if (ts->val_type == TEMP_VAL_REG) { 5148 /* 5149 * Check if the current register has already been 5150 * allocated for another input. 5151 */ 5152 allocate_new_reg = 5153 tcg_regset_test_reg(i_allocated_regs, reg); 5154 } 5155 } 5156 if (!allocate_new_reg) { 5157 temp_load(s, ts, i_required_regs, i_allocated_regs, 5158 i_preferred_regs); 5159 reg = ts->reg; 5160 allocate_new_reg = !tcg_regset_test_reg(i_required_regs, reg); 5161 } 5162 if (allocate_new_reg) { 5163 /* 5164 * Allocate a new register matching the constraint 5165 * and move the temporary register into it. 5166 */ 5167 temp_load(s, ts, tcg_target_available_regs[ts->type], 5168 i_allocated_regs, 0); 5169 reg = tcg_reg_alloc(s, i_required_regs, i_allocated_regs, 5170 i_preferred_regs, ts->indirect_base); 5171 copyto_new_reg = true; 5172 } 5173 break; 5174 5175 case 1: 5176 /* First of an input pair; if i1 == i2, the second is an output. */ 5177 i1 = i; 5178 i2 = arg_ct->pair_index; 5179 ts2 = i1 != i2 ? arg_temp(op->args[i2]) : NULL; 5180 5181 /* 5182 * It is easier to default to allocating a new pair 5183 * and to identify a few cases where it's not required. 5184 */ 5185 if (arg_ct->ialias) { 5186 i_preferred_regs = output_pref(op, arg_ct->alias_index); 5187 if (IS_DEAD_ARG(i1) && 5188 IS_DEAD_ARG(i2) && 5189 !temp_readonly(ts) && 5190 ts->val_type == TEMP_VAL_REG && 5191 ts->reg < TCG_TARGET_NB_REGS - 1 && 5192 tcg_regset_test_reg(i_required_regs, reg) && 5193 !tcg_regset_test_reg(i_allocated_regs, reg) && 5194 !tcg_regset_test_reg(i_allocated_regs, reg + 1) && 5195 (ts2 5196 ? ts2->val_type == TEMP_VAL_REG && 5197 ts2->reg == reg + 1 && 5198 !temp_readonly(ts2) 5199 : s->reg_to_temp[reg + 1] == NULL)) { 5200 break; 5201 } 5202 } else { 5203 /* Without aliasing, the pair must also be an input. */ 5204 tcg_debug_assert(ts2); 5205 if (ts->val_type == TEMP_VAL_REG && 5206 ts2->val_type == TEMP_VAL_REG && 5207 ts2->reg == reg + 1 && 5208 tcg_regset_test_reg(i_required_regs, reg)) { 5209 break; 5210 } 5211 } 5212 reg = tcg_reg_alloc_pair(s, i_required_regs, i_allocated_regs, 5213 0, ts->indirect_base); 5214 goto do_pair; 5215 5216 case 2: /* pair second */ 5217 reg = new_args[arg_ct->pair_index] + 1; 5218 goto do_pair; 5219 5220 case 3: /* ialias with second output, no first input */ 5221 tcg_debug_assert(arg_ct->ialias); 5222 i_preferred_regs = output_pref(op, arg_ct->alias_index); 5223 5224 if (IS_DEAD_ARG(i) && 5225 !temp_readonly(ts) && 5226 ts->val_type == TEMP_VAL_REG && 5227 reg > 0 && 5228 s->reg_to_temp[reg - 1] == NULL && 5229 tcg_regset_test_reg(i_required_regs, reg) && 5230 !tcg_regset_test_reg(i_allocated_regs, reg) && 5231 !tcg_regset_test_reg(i_allocated_regs, reg - 1)) { 5232 tcg_regset_set_reg(i_allocated_regs, reg - 1); 5233 break; 5234 } 5235 reg = tcg_reg_alloc_pair(s, i_required_regs >> 1, 5236 i_allocated_regs, 0, 5237 ts->indirect_base); 5238 tcg_regset_set_reg(i_allocated_regs, reg); 5239 reg += 1; 5240 goto do_pair; 5241 5242 do_pair: 5243 /* 5244 * If an aliased input is not dead after the instruction, 5245 * we must allocate a new register and move it. 5246 */ 5247 if (arg_ct->ialias && (!IS_DEAD_ARG(i) || temp_readonly(ts))) { 5248 TCGRegSet t_allocated_regs = i_allocated_regs; 5249 5250 /* 5251 * Because of the alias, and the continued life, make sure 5252 * that the temp is somewhere *other* than the reg pair, 5253 * and we get a copy in reg. 5254 */ 5255 tcg_regset_set_reg(t_allocated_regs, reg); 5256 tcg_regset_set_reg(t_allocated_regs, reg + 1); 5257 if (ts->val_type == TEMP_VAL_REG && ts->reg == reg) { 5258 /* If ts was already in reg, copy it somewhere else. */ 5259 TCGReg nr; 5260 bool ok; 5261 5262 tcg_debug_assert(ts->kind != TEMP_FIXED); 5263 nr = tcg_reg_alloc(s, tcg_target_available_regs[ts->type], 5264 t_allocated_regs, 0, ts->indirect_base); 5265 ok = tcg_out_mov(s, ts->type, nr, reg); 5266 tcg_debug_assert(ok); 5267 5268 set_temp_val_reg(s, ts, nr); 5269 } else { 5270 temp_load(s, ts, tcg_target_available_regs[ts->type], 5271 t_allocated_regs, 0); 5272 copyto_new_reg = true; 5273 } 5274 } else { 5275 /* Preferably allocate to reg, otherwise copy. */ 5276 i_required_regs = (TCGRegSet)1 << reg; 5277 temp_load(s, ts, i_required_regs, i_allocated_regs, 5278 i_preferred_regs); 5279 copyto_new_reg = ts->reg != reg; 5280 } 5281 break; 5282 5283 default: 5284 g_assert_not_reached(); 5285 } 5286 5287 if (copyto_new_reg) { 5288 if (!tcg_out_mov(s, ts->type, reg, ts->reg)) { 5289 /* 5290 * Cross register class move not supported. Sync the 5291 * temp back to its slot and load from there. 5292 */ 5293 temp_sync(s, ts, i_allocated_regs, 0, 0); 5294 tcg_out_ld(s, ts->type, reg, 5295 ts->mem_base->reg, ts->mem_offset); 5296 } 5297 } 5298 new_args[i] = reg; 5299 const_args[i] = 0; 5300 tcg_regset_set_reg(i_allocated_regs, reg); 5301 } 5302 5303 /* mark dead temporaries and free the associated registers */ 5304 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) { 5305 if (IS_DEAD_ARG(i)) { 5306 temp_dead(s, arg_temp(op->args[i])); 5307 } 5308 } 5309 5310 if (def->flags & TCG_OPF_COND_BRANCH) { 5311 tcg_reg_alloc_cbranch(s, i_allocated_regs); 5312 } else if (def->flags & TCG_OPF_BB_END) { 5313 tcg_reg_alloc_bb_end(s, i_allocated_regs); 5314 } else { 5315 if (def->flags & TCG_OPF_CALL_CLOBBER) { 5316 /* XXX: permit generic clobber register list ? */ 5317 for (i = 0; i < TCG_TARGET_NB_REGS; i++) { 5318 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) { 5319 tcg_reg_free(s, i, i_allocated_regs); 5320 } 5321 } 5322 } 5323 if (def->flags & TCG_OPF_SIDE_EFFECTS) { 5324 /* sync globals if the op has side effects and might trigger 5325 an exception. */ 5326 sync_globals(s, i_allocated_regs); 5327 } 5328 5329 /* satisfy the output constraints */ 5330 for (k = 0; k < nb_oargs; k++) { 5331 i = args_ct[k].sort_index; 5332 arg = op->args[i]; 5333 arg_ct = &args_ct[i]; 5334 ts = arg_temp(arg); 5335 5336 /* ENV should not be modified. */ 5337 tcg_debug_assert(!temp_readonly(ts)); 5338 5339 switch (arg_ct->pair) { 5340 case 0: /* not paired */ 5341 if (arg_ct->oalias && !const_args[arg_ct->alias_index]) { 5342 reg = new_args[arg_ct->alias_index]; 5343 } else if (arg_ct->newreg) { 5344 reg = tcg_reg_alloc(s, arg_ct->regs, 5345 i_allocated_regs | o_allocated_regs, 5346 output_pref(op, k), ts->indirect_base); 5347 } else { 5348 reg = tcg_reg_alloc(s, arg_ct->regs, o_allocated_regs, 5349 output_pref(op, k), ts->indirect_base); 5350 } 5351 break; 5352 5353 case 1: /* first of pair */ 5354 if (arg_ct->oalias) { 5355 reg = new_args[arg_ct->alias_index]; 5356 } else if (arg_ct->newreg) { 5357 reg = tcg_reg_alloc_pair(s, arg_ct->regs, 5358 i_allocated_regs | o_allocated_regs, 5359 output_pref(op, k), 5360 ts->indirect_base); 5361 } else { 5362 reg = tcg_reg_alloc_pair(s, arg_ct->regs, o_allocated_regs, 5363 output_pref(op, k), 5364 ts->indirect_base); 5365 } 5366 break; 5367 5368 case 2: /* second of pair */ 5369 if (arg_ct->oalias) { 5370 reg = new_args[arg_ct->alias_index]; 5371 } else { 5372 reg = new_args[arg_ct->pair_index] + 1; 5373 } 5374 break; 5375 5376 case 3: /* first of pair, aliasing with a second input */ 5377 tcg_debug_assert(!arg_ct->newreg); 5378 reg = new_args[arg_ct->pair_index] - 1; 5379 break; 5380 5381 default: 5382 g_assert_not_reached(); 5383 } 5384 tcg_regset_set_reg(o_allocated_regs, reg); 5385 set_temp_val_reg(s, ts, reg); 5386 ts->mem_coherent = 0; 5387 new_args[i] = reg; 5388 } 5389 } 5390 5391 /* emit instruction */ 5392 TCGType type = TCGOP_TYPE(op); 5393 switch (op->opc) { 5394 case INDEX_op_ext_i32_i64: 5395 tcg_out_exts_i32_i64(s, new_args[0], new_args[1]); 5396 break; 5397 case INDEX_op_extu_i32_i64: 5398 tcg_out_extu_i32_i64(s, new_args[0], new_args[1]); 5399 break; 5400 case INDEX_op_extrl_i64_i32: 5401 tcg_out_extrl_i64_i32(s, new_args[0], new_args[1]); 5402 break; 5403 5404 case INDEX_op_add: 5405 case INDEX_op_and: 5406 case INDEX_op_andc: 5407 case INDEX_op_divs: 5408 case INDEX_op_divu: 5409 case INDEX_op_eqv: 5410 case INDEX_op_mul: 5411 case INDEX_op_mulsh: 5412 case INDEX_op_muluh: 5413 case INDEX_op_nand: 5414 case INDEX_op_nor: 5415 case INDEX_op_or: 5416 case INDEX_op_orc: 5417 case INDEX_op_rems: 5418 case INDEX_op_remu: 5419 case INDEX_op_rotl: 5420 case INDEX_op_rotr: 5421 case INDEX_op_sar: 5422 case INDEX_op_shl: 5423 case INDEX_op_shr: 5424 case INDEX_op_xor: 5425 { 5426 const TCGOutOpBinary *out = 5427 container_of(all_outop[op->opc], TCGOutOpBinary, base); 5428 5429 /* Constants should never appear in the first source operand. */ 5430 tcg_debug_assert(!const_args[1]); 5431 if (const_args[2]) { 5432 out->out_rri(s, type, new_args[0], new_args[1], new_args[2]); 5433 } else { 5434 out->out_rrr(s, type, new_args[0], new_args[1], new_args[2]); 5435 } 5436 } 5437 break; 5438 5439 case INDEX_op_sub: 5440 { 5441 const TCGOutOpSubtract *out = &outop_sub; 5442 5443 /* 5444 * Constants should never appear in the second source operand. 5445 * These are folded to add with negative constant. 5446 */ 5447 tcg_debug_assert(!const_args[2]); 5448 if (const_args[1]) { 5449 out->out_rir(s, type, new_args[0], new_args[1], new_args[2]); 5450 } else { 5451 out->out_rrr(s, type, new_args[0], new_args[1], new_args[2]); 5452 } 5453 } 5454 break; 5455 5456 case INDEX_op_neg: 5457 case INDEX_op_not: 5458 { 5459 const TCGOutOpUnary *out = 5460 container_of(all_outop[op->opc], TCGOutOpUnary, base); 5461 5462 /* Constants should have been folded. */ 5463 tcg_debug_assert(!const_args[1]); 5464 out->out_rr(s, type, new_args[0], new_args[1]); 5465 } 5466 break; 5467 5468 case INDEX_op_divs2: 5469 case INDEX_op_divu2: 5470 { 5471 const TCGOutOpDivRem *out = 5472 container_of(all_outop[op->opc], TCGOutOpDivRem, base); 5473 5474 /* Only used by x86 and s390x, which use matching constraints. */ 5475 tcg_debug_assert(new_args[0] == new_args[2]); 5476 tcg_debug_assert(new_args[1] == new_args[3]); 5477 tcg_debug_assert(!const_args[4]); 5478 out->out_rr01r(s, type, new_args[0], new_args[1], new_args[4]); 5479 } 5480 break; 5481 5482 default: 5483 if (def->flags & TCG_OPF_VECTOR) { 5484 tcg_out_vec_op(s, op->opc, type - TCG_TYPE_V64, 5485 TCGOP_VECE(op), new_args, const_args); 5486 } else { 5487 tcg_out_op(s, op->opc, type, new_args, const_args); 5488 } 5489 break; 5490 } 5491 5492 /* move the outputs in the correct register if needed */ 5493 for(i = 0; i < nb_oargs; i++) { 5494 ts = arg_temp(op->args[i]); 5495 5496 /* ENV should not be modified. */ 5497 tcg_debug_assert(!temp_readonly(ts)); 5498 5499 if (NEED_SYNC_ARG(i)) { 5500 temp_sync(s, ts, o_allocated_regs, 0, IS_DEAD_ARG(i)); 5501 } else if (IS_DEAD_ARG(i)) { 5502 temp_dead(s, ts); 5503 } 5504 } 5505 } 5506 5507 static bool tcg_reg_alloc_dup2(TCGContext *s, const TCGOp *op) 5508 { 5509 const TCGLifeData arg_life = op->life; 5510 TCGTemp *ots, *itsl, *itsh; 5511 TCGType vtype = TCGOP_TYPE(op); 5512 5513 /* This opcode is only valid for 32-bit hosts, for 64-bit elements. */ 5514 tcg_debug_assert(TCG_TARGET_REG_BITS == 32); 5515 tcg_debug_assert(TCGOP_VECE(op) == MO_64); 5516 5517 ots = arg_temp(op->args[0]); 5518 itsl = arg_temp(op->args[1]); 5519 itsh = arg_temp(op->args[2]); 5520 5521 /* ENV should not be modified. */ 5522 tcg_debug_assert(!temp_readonly(ots)); 5523 5524 /* Allocate the output register now. */ 5525 if (ots->val_type != TEMP_VAL_REG) { 5526 TCGRegSet allocated_regs = s->reserved_regs; 5527 TCGRegSet dup_out_regs = opcode_args_ct(op)[0].regs; 5528 TCGReg oreg; 5529 5530 /* Make sure to not spill the input registers. */ 5531 if (!IS_DEAD_ARG(1) && itsl->val_type == TEMP_VAL_REG) { 5532 tcg_regset_set_reg(allocated_regs, itsl->reg); 5533 } 5534 if (!IS_DEAD_ARG(2) && itsh->val_type == TEMP_VAL_REG) { 5535 tcg_regset_set_reg(allocated_regs, itsh->reg); 5536 } 5537 5538 oreg = tcg_reg_alloc(s, dup_out_regs, allocated_regs, 5539 output_pref(op, 0), ots->indirect_base); 5540 set_temp_val_reg(s, ots, oreg); 5541 } 5542 5543 /* Promote dup2 of immediates to dupi_vec. */ 5544 if (itsl->val_type == TEMP_VAL_CONST && itsh->val_type == TEMP_VAL_CONST) { 5545 uint64_t val = deposit64(itsl->val, 32, 32, itsh->val); 5546 MemOp vece = MO_64; 5547 5548 if (val == dup_const(MO_8, val)) { 5549 vece = MO_8; 5550 } else if (val == dup_const(MO_16, val)) { 5551 vece = MO_16; 5552 } else if (val == dup_const(MO_32, val)) { 5553 vece = MO_32; 5554 } 5555 5556 tcg_out_dupi_vec(s, vtype, vece, ots->reg, val); 5557 goto done; 5558 } 5559 5560 /* If the two inputs form one 64-bit value, try dupm_vec. */ 5561 if (itsl->temp_subindex == HOST_BIG_ENDIAN && 5562 itsh->temp_subindex == !HOST_BIG_ENDIAN && 5563 itsl == itsh + (HOST_BIG_ENDIAN ? 1 : -1)) { 5564 TCGTemp *its = itsl - HOST_BIG_ENDIAN; 5565 5566 temp_sync(s, its + 0, s->reserved_regs, 0, 0); 5567 temp_sync(s, its + 1, s->reserved_regs, 0, 0); 5568 5569 if (tcg_out_dupm_vec(s, vtype, MO_64, ots->reg, 5570 its->mem_base->reg, its->mem_offset)) { 5571 goto done; 5572 } 5573 } 5574 5575 /* Fall back to generic expansion. */ 5576 return false; 5577 5578 done: 5579 ots->mem_coherent = 0; 5580 if (IS_DEAD_ARG(1)) { 5581 temp_dead(s, itsl); 5582 } 5583 if (IS_DEAD_ARG(2)) { 5584 temp_dead(s, itsh); 5585 } 5586 if (NEED_SYNC_ARG(0)) { 5587 temp_sync(s, ots, s->reserved_regs, 0, IS_DEAD_ARG(0)); 5588 } else if (IS_DEAD_ARG(0)) { 5589 temp_dead(s, ots); 5590 } 5591 return true; 5592 } 5593 5594 static void load_arg_reg(TCGContext *s, TCGReg reg, TCGTemp *ts, 5595 TCGRegSet allocated_regs) 5596 { 5597 if (ts->val_type == TEMP_VAL_REG) { 5598 if (ts->reg != reg) { 5599 tcg_reg_free(s, reg, allocated_regs); 5600 if (!tcg_out_mov(s, ts->type, reg, ts->reg)) { 5601 /* 5602 * Cross register class move not supported. Sync the 5603 * temp back to its slot and load from there. 5604 */ 5605 temp_sync(s, ts, allocated_regs, 0, 0); 5606 tcg_out_ld(s, ts->type, reg, 5607 ts->mem_base->reg, ts->mem_offset); 5608 } 5609 } 5610 } else { 5611 TCGRegSet arg_set = 0; 5612 5613 tcg_reg_free(s, reg, allocated_regs); 5614 tcg_regset_set_reg(arg_set, reg); 5615 temp_load(s, ts, arg_set, allocated_regs, 0); 5616 } 5617 } 5618 5619 static void load_arg_stk(TCGContext *s, unsigned arg_slot, TCGTemp *ts, 5620 TCGRegSet allocated_regs) 5621 { 5622 /* 5623 * When the destination is on the stack, load up the temp and store. 5624 * If there are many call-saved registers, the temp might live to 5625 * see another use; otherwise it'll be discarded. 5626 */ 5627 temp_load(s, ts, tcg_target_available_regs[ts->type], allocated_regs, 0); 5628 tcg_out_st(s, ts->type, ts->reg, TCG_REG_CALL_STACK, 5629 arg_slot_stk_ofs(arg_slot)); 5630 } 5631 5632 static void load_arg_normal(TCGContext *s, const TCGCallArgumentLoc *l, 5633 TCGTemp *ts, TCGRegSet *allocated_regs) 5634 { 5635 if (arg_slot_reg_p(l->arg_slot)) { 5636 TCGReg reg = tcg_target_call_iarg_regs[l->arg_slot]; 5637 load_arg_reg(s, reg, ts, *allocated_regs); 5638 tcg_regset_set_reg(*allocated_regs, reg); 5639 } else { 5640 load_arg_stk(s, l->arg_slot, ts, *allocated_regs); 5641 } 5642 } 5643 5644 static void load_arg_ref(TCGContext *s, unsigned arg_slot, TCGReg ref_base, 5645 intptr_t ref_off, TCGRegSet *allocated_regs) 5646 { 5647 TCGReg reg; 5648 5649 if (arg_slot_reg_p(arg_slot)) { 5650 reg = tcg_target_call_iarg_regs[arg_slot]; 5651 tcg_reg_free(s, reg, *allocated_regs); 5652 tcg_out_addi_ptr(s, reg, ref_base, ref_off); 5653 tcg_regset_set_reg(*allocated_regs, reg); 5654 } else { 5655 reg = tcg_reg_alloc(s, tcg_target_available_regs[TCG_TYPE_PTR], 5656 *allocated_regs, 0, false); 5657 tcg_out_addi_ptr(s, reg, ref_base, ref_off); 5658 tcg_out_st(s, TCG_TYPE_PTR, reg, TCG_REG_CALL_STACK, 5659 arg_slot_stk_ofs(arg_slot)); 5660 } 5661 } 5662 5663 static void tcg_reg_alloc_call(TCGContext *s, TCGOp *op) 5664 { 5665 const int nb_oargs = TCGOP_CALLO(op); 5666 const int nb_iargs = TCGOP_CALLI(op); 5667 const TCGLifeData arg_life = op->life; 5668 const TCGHelperInfo *info = tcg_call_info(op); 5669 TCGRegSet allocated_regs = s->reserved_regs; 5670 int i; 5671 5672 /* 5673 * Move inputs into place in reverse order, 5674 * so that we place stacked arguments first. 5675 */ 5676 for (i = nb_iargs - 1; i >= 0; --i) { 5677 const TCGCallArgumentLoc *loc = &info->in[i]; 5678 TCGTemp *ts = arg_temp(op->args[nb_oargs + i]); 5679 5680 switch (loc->kind) { 5681 case TCG_CALL_ARG_NORMAL: 5682 case TCG_CALL_ARG_EXTEND_U: 5683 case TCG_CALL_ARG_EXTEND_S: 5684 load_arg_normal(s, loc, ts, &allocated_regs); 5685 break; 5686 case TCG_CALL_ARG_BY_REF: 5687 load_arg_stk(s, loc->ref_slot, ts, allocated_regs); 5688 load_arg_ref(s, loc->arg_slot, TCG_REG_CALL_STACK, 5689 arg_slot_stk_ofs(loc->ref_slot), 5690 &allocated_regs); 5691 break; 5692 case TCG_CALL_ARG_BY_REF_N: 5693 load_arg_stk(s, loc->ref_slot, ts, allocated_regs); 5694 break; 5695 default: 5696 g_assert_not_reached(); 5697 } 5698 } 5699 5700 /* Mark dead temporaries and free the associated registers. */ 5701 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) { 5702 if (IS_DEAD_ARG(i)) { 5703 temp_dead(s, arg_temp(op->args[i])); 5704 } 5705 } 5706 5707 /* Clobber call registers. */ 5708 for (i = 0; i < TCG_TARGET_NB_REGS; i++) { 5709 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) { 5710 tcg_reg_free(s, i, allocated_regs); 5711 } 5712 } 5713 5714 /* 5715 * Save globals if they might be written by the helper, 5716 * sync them if they might be read. 5717 */ 5718 if (info->flags & TCG_CALL_NO_READ_GLOBALS) { 5719 /* Nothing to do */ 5720 } else if (info->flags & TCG_CALL_NO_WRITE_GLOBALS) { 5721 sync_globals(s, allocated_regs); 5722 } else { 5723 save_globals(s, allocated_regs); 5724 } 5725 5726 /* 5727 * If the ABI passes a pointer to the returned struct as the first 5728 * argument, load that now. Pass a pointer to the output home slot. 5729 */ 5730 if (info->out_kind == TCG_CALL_RET_BY_REF) { 5731 TCGTemp *ts = arg_temp(op->args[0]); 5732 5733 if (!ts->mem_allocated) { 5734 temp_allocate_frame(s, ts); 5735 } 5736 load_arg_ref(s, 0, ts->mem_base->reg, ts->mem_offset, &allocated_regs); 5737 } 5738 5739 tcg_out_call(s, tcg_call_func(op), info); 5740 5741 /* Assign output registers and emit moves if needed. */ 5742 switch (info->out_kind) { 5743 case TCG_CALL_RET_NORMAL: 5744 for (i = 0; i < nb_oargs; i++) { 5745 TCGTemp *ts = arg_temp(op->args[i]); 5746 TCGReg reg = tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, i); 5747 5748 /* ENV should not be modified. */ 5749 tcg_debug_assert(!temp_readonly(ts)); 5750 5751 set_temp_val_reg(s, ts, reg); 5752 ts->mem_coherent = 0; 5753 } 5754 break; 5755 5756 case TCG_CALL_RET_BY_VEC: 5757 { 5758 TCGTemp *ts = arg_temp(op->args[0]); 5759 5760 tcg_debug_assert(ts->base_type == TCG_TYPE_I128); 5761 tcg_debug_assert(ts->temp_subindex == 0); 5762 if (!ts->mem_allocated) { 5763 temp_allocate_frame(s, ts); 5764 } 5765 tcg_out_st(s, TCG_TYPE_V128, 5766 tcg_target_call_oarg_reg(TCG_CALL_RET_BY_VEC, 0), 5767 ts->mem_base->reg, ts->mem_offset); 5768 } 5769 /* fall through to mark all parts in memory */ 5770 5771 case TCG_CALL_RET_BY_REF: 5772 /* The callee has performed a write through the reference. */ 5773 for (i = 0; i < nb_oargs; i++) { 5774 TCGTemp *ts = arg_temp(op->args[i]); 5775 ts->val_type = TEMP_VAL_MEM; 5776 } 5777 break; 5778 5779 default: 5780 g_assert_not_reached(); 5781 } 5782 5783 /* Flush or discard output registers as needed. */ 5784 for (i = 0; i < nb_oargs; i++) { 5785 TCGTemp *ts = arg_temp(op->args[i]); 5786 if (NEED_SYNC_ARG(i)) { 5787 temp_sync(s, ts, s->reserved_regs, 0, IS_DEAD_ARG(i)); 5788 } else if (IS_DEAD_ARG(i)) { 5789 temp_dead(s, ts); 5790 } 5791 } 5792 } 5793 5794 /** 5795 * atom_and_align_for_opc: 5796 * @s: tcg context 5797 * @opc: memory operation code 5798 * @host_atom: MO_ATOM_{IFALIGN,WITHIN16,SUBALIGN} for host operations 5799 * @allow_two_ops: true if we are prepared to issue two operations 5800 * 5801 * Return the alignment and atomicity to use for the inline fast path 5802 * for the given memory operation. The alignment may be larger than 5803 * that specified in @opc, and the correct alignment will be diagnosed 5804 * by the slow path helper. 5805 * 5806 * If @allow_two_ops, the host is prepared to test for 2x alignment, 5807 * and issue two loads or stores for subalignment. 5808 */ 5809 static TCGAtomAlign atom_and_align_for_opc(TCGContext *s, MemOp opc, 5810 MemOp host_atom, bool allow_two_ops) 5811 { 5812 MemOp align = memop_alignment_bits(opc); 5813 MemOp size = opc & MO_SIZE; 5814 MemOp half = size ? size - 1 : 0; 5815 MemOp atom = opc & MO_ATOM_MASK; 5816 MemOp atmax; 5817 5818 switch (atom) { 5819 case MO_ATOM_NONE: 5820 /* The operation requires no specific atomicity. */ 5821 atmax = MO_8; 5822 break; 5823 5824 case MO_ATOM_IFALIGN: 5825 atmax = size; 5826 break; 5827 5828 case MO_ATOM_IFALIGN_PAIR: 5829 atmax = half; 5830 break; 5831 5832 case MO_ATOM_WITHIN16: 5833 atmax = size; 5834 if (size == MO_128) { 5835 /* Misalignment implies !within16, and therefore no atomicity. */ 5836 } else if (host_atom != MO_ATOM_WITHIN16) { 5837 /* The host does not implement within16, so require alignment. */ 5838 align = MAX(align, size); 5839 } 5840 break; 5841 5842 case MO_ATOM_WITHIN16_PAIR: 5843 atmax = size; 5844 /* 5845 * Misalignment implies !within16, and therefore half atomicity. 5846 * Any host prepared for two operations can implement this with 5847 * half alignment. 5848 */ 5849 if (host_atom != MO_ATOM_WITHIN16 && allow_two_ops) { 5850 align = MAX(align, half); 5851 } 5852 break; 5853 5854 case MO_ATOM_SUBALIGN: 5855 atmax = size; 5856 if (host_atom != MO_ATOM_SUBALIGN) { 5857 /* If unaligned but not odd, there are subobjects up to half. */ 5858 if (allow_two_ops) { 5859 align = MAX(align, half); 5860 } else { 5861 align = MAX(align, size); 5862 } 5863 } 5864 break; 5865 5866 default: 5867 g_assert_not_reached(); 5868 } 5869 5870 return (TCGAtomAlign){ .atom = atmax, .align = align }; 5871 } 5872 5873 /* 5874 * Similarly for qemu_ld/st slow path helpers. 5875 * We must re-implement tcg_gen_callN and tcg_reg_alloc_call simultaneously, 5876 * using only the provided backend tcg_out_* functions. 5877 */ 5878 5879 static int tcg_out_helper_stk_ofs(TCGType type, unsigned slot) 5880 { 5881 int ofs = arg_slot_stk_ofs(slot); 5882 5883 /* 5884 * Each stack slot is TCG_TARGET_LONG_BITS. If the host does not 5885 * require extension to uint64_t, adjust the address for uint32_t. 5886 */ 5887 if (HOST_BIG_ENDIAN && 5888 TCG_TARGET_REG_BITS == 64 && 5889 type == TCG_TYPE_I32) { 5890 ofs += 4; 5891 } 5892 return ofs; 5893 } 5894 5895 static void tcg_out_helper_load_slots(TCGContext *s, 5896 unsigned nmov, TCGMovExtend *mov, 5897 const TCGLdstHelperParam *parm) 5898 { 5899 unsigned i; 5900 TCGReg dst3; 5901 5902 /* 5903 * Start from the end, storing to the stack first. 5904 * This frees those registers, so we need not consider overlap. 5905 */ 5906 for (i = nmov; i-- > 0; ) { 5907 unsigned slot = mov[i].dst; 5908 5909 if (arg_slot_reg_p(slot)) { 5910 goto found_reg; 5911 } 5912 5913 TCGReg src = mov[i].src; 5914 TCGType dst_type = mov[i].dst_type; 5915 MemOp dst_mo = dst_type == TCG_TYPE_I32 ? MO_32 : MO_64; 5916 5917 /* The argument is going onto the stack; extend into scratch. */ 5918 if ((mov[i].src_ext & MO_SIZE) != dst_mo) { 5919 tcg_debug_assert(parm->ntmp != 0); 5920 mov[i].dst = src = parm->tmp[0]; 5921 tcg_out_movext1(s, &mov[i]); 5922 } 5923 5924 tcg_out_st(s, dst_type, src, TCG_REG_CALL_STACK, 5925 tcg_out_helper_stk_ofs(dst_type, slot)); 5926 } 5927 return; 5928 5929 found_reg: 5930 /* 5931 * The remaining arguments are in registers. 5932 * Convert slot numbers to argument registers. 5933 */ 5934 nmov = i + 1; 5935 for (i = 0; i < nmov; ++i) { 5936 mov[i].dst = tcg_target_call_iarg_regs[mov[i].dst]; 5937 } 5938 5939 switch (nmov) { 5940 case 4: 5941 /* The backend must have provided enough temps for the worst case. */ 5942 tcg_debug_assert(parm->ntmp >= 2); 5943 5944 dst3 = mov[3].dst; 5945 for (unsigned j = 0; j < 3; ++j) { 5946 if (dst3 == mov[j].src) { 5947 /* 5948 * Conflict. Copy the source to a temporary, perform the 5949 * remaining moves, then the extension from our scratch 5950 * on the way out. 5951 */ 5952 TCGReg scratch = parm->tmp[1]; 5953 5954 tcg_out_mov(s, mov[3].src_type, scratch, mov[3].src); 5955 tcg_out_movext3(s, mov, mov + 1, mov + 2, parm->tmp[0]); 5956 tcg_out_movext1_new_src(s, &mov[3], scratch); 5957 break; 5958 } 5959 } 5960 5961 /* No conflicts: perform this move and continue. */ 5962 tcg_out_movext1(s, &mov[3]); 5963 /* fall through */ 5964 5965 case 3: 5966 tcg_out_movext3(s, mov, mov + 1, mov + 2, 5967 parm->ntmp ? parm->tmp[0] : -1); 5968 break; 5969 case 2: 5970 tcg_out_movext2(s, mov, mov + 1, 5971 parm->ntmp ? parm->tmp[0] : -1); 5972 break; 5973 case 1: 5974 tcg_out_movext1(s, mov); 5975 break; 5976 default: 5977 g_assert_not_reached(); 5978 } 5979 } 5980 5981 static void tcg_out_helper_load_imm(TCGContext *s, unsigned slot, 5982 TCGType type, tcg_target_long imm, 5983 const TCGLdstHelperParam *parm) 5984 { 5985 if (arg_slot_reg_p(slot)) { 5986 tcg_out_movi(s, type, tcg_target_call_iarg_regs[slot], imm); 5987 } else { 5988 int ofs = tcg_out_helper_stk_ofs(type, slot); 5989 if (!tcg_out_sti(s, type, imm, TCG_REG_CALL_STACK, ofs)) { 5990 tcg_debug_assert(parm->ntmp != 0); 5991 tcg_out_movi(s, type, parm->tmp[0], imm); 5992 tcg_out_st(s, type, parm->tmp[0], TCG_REG_CALL_STACK, ofs); 5993 } 5994 } 5995 } 5996 5997 static void tcg_out_helper_load_common_args(TCGContext *s, 5998 const TCGLabelQemuLdst *ldst, 5999 const TCGLdstHelperParam *parm, 6000 const TCGHelperInfo *info, 6001 unsigned next_arg) 6002 { 6003 TCGMovExtend ptr_mov = { 6004 .dst_type = TCG_TYPE_PTR, 6005 .src_type = TCG_TYPE_PTR, 6006 .src_ext = sizeof(void *) == 4 ? MO_32 : MO_64 6007 }; 6008 const TCGCallArgumentLoc *loc = &info->in[0]; 6009 TCGType type; 6010 unsigned slot; 6011 tcg_target_ulong imm; 6012 6013 /* 6014 * Handle env, which is always first. 6015 */ 6016 ptr_mov.dst = loc->arg_slot; 6017 ptr_mov.src = TCG_AREG0; 6018 tcg_out_helper_load_slots(s, 1, &ptr_mov, parm); 6019 6020 /* 6021 * Handle oi. 6022 */ 6023 imm = ldst->oi; 6024 loc = &info->in[next_arg]; 6025 type = TCG_TYPE_I32; 6026 switch (loc->kind) { 6027 case TCG_CALL_ARG_NORMAL: 6028 break; 6029 case TCG_CALL_ARG_EXTEND_U: 6030 case TCG_CALL_ARG_EXTEND_S: 6031 /* No extension required for MemOpIdx. */ 6032 tcg_debug_assert(imm <= INT32_MAX); 6033 type = TCG_TYPE_REG; 6034 break; 6035 default: 6036 g_assert_not_reached(); 6037 } 6038 tcg_out_helper_load_imm(s, loc->arg_slot, type, imm, parm); 6039 next_arg++; 6040 6041 /* 6042 * Handle ra. 6043 */ 6044 loc = &info->in[next_arg]; 6045 slot = loc->arg_slot; 6046 if (parm->ra_gen) { 6047 int arg_reg = -1; 6048 TCGReg ra_reg; 6049 6050 if (arg_slot_reg_p(slot)) { 6051 arg_reg = tcg_target_call_iarg_regs[slot]; 6052 } 6053 ra_reg = parm->ra_gen(s, ldst, arg_reg); 6054 6055 ptr_mov.dst = slot; 6056 ptr_mov.src = ra_reg; 6057 tcg_out_helper_load_slots(s, 1, &ptr_mov, parm); 6058 } else { 6059 imm = (uintptr_t)ldst->raddr; 6060 tcg_out_helper_load_imm(s, slot, TCG_TYPE_PTR, imm, parm); 6061 } 6062 } 6063 6064 static unsigned tcg_out_helper_add_mov(TCGMovExtend *mov, 6065 const TCGCallArgumentLoc *loc, 6066 TCGType dst_type, TCGType src_type, 6067 TCGReg lo, TCGReg hi) 6068 { 6069 MemOp reg_mo; 6070 6071 if (dst_type <= TCG_TYPE_REG) { 6072 MemOp src_ext; 6073 6074 switch (loc->kind) { 6075 case TCG_CALL_ARG_NORMAL: 6076 src_ext = src_type == TCG_TYPE_I32 ? MO_32 : MO_64; 6077 break; 6078 case TCG_CALL_ARG_EXTEND_U: 6079 dst_type = TCG_TYPE_REG; 6080 src_ext = MO_UL; 6081 break; 6082 case TCG_CALL_ARG_EXTEND_S: 6083 dst_type = TCG_TYPE_REG; 6084 src_ext = MO_SL; 6085 break; 6086 default: 6087 g_assert_not_reached(); 6088 } 6089 6090 mov[0].dst = loc->arg_slot; 6091 mov[0].dst_type = dst_type; 6092 mov[0].src = lo; 6093 mov[0].src_type = src_type; 6094 mov[0].src_ext = src_ext; 6095 return 1; 6096 } 6097 6098 if (TCG_TARGET_REG_BITS == 32) { 6099 assert(dst_type == TCG_TYPE_I64); 6100 reg_mo = MO_32; 6101 } else { 6102 assert(dst_type == TCG_TYPE_I128); 6103 reg_mo = MO_64; 6104 } 6105 6106 mov[0].dst = loc[HOST_BIG_ENDIAN].arg_slot; 6107 mov[0].src = lo; 6108 mov[0].dst_type = TCG_TYPE_REG; 6109 mov[0].src_type = TCG_TYPE_REG; 6110 mov[0].src_ext = reg_mo; 6111 6112 mov[1].dst = loc[!HOST_BIG_ENDIAN].arg_slot; 6113 mov[1].src = hi; 6114 mov[1].dst_type = TCG_TYPE_REG; 6115 mov[1].src_type = TCG_TYPE_REG; 6116 mov[1].src_ext = reg_mo; 6117 6118 return 2; 6119 } 6120 6121 static void tcg_out_ld_helper_args(TCGContext *s, const TCGLabelQemuLdst *ldst, 6122 const TCGLdstHelperParam *parm) 6123 { 6124 const TCGHelperInfo *info; 6125 const TCGCallArgumentLoc *loc; 6126 TCGMovExtend mov[2]; 6127 unsigned next_arg, nmov; 6128 MemOp mop = get_memop(ldst->oi); 6129 6130 switch (mop & MO_SIZE) { 6131 case MO_8: 6132 case MO_16: 6133 case MO_32: 6134 info = &info_helper_ld32_mmu; 6135 break; 6136 case MO_64: 6137 info = &info_helper_ld64_mmu; 6138 break; 6139 case MO_128: 6140 info = &info_helper_ld128_mmu; 6141 break; 6142 default: 6143 g_assert_not_reached(); 6144 } 6145 6146 /* Defer env argument. */ 6147 next_arg = 1; 6148 6149 loc = &info->in[next_arg]; 6150 if (TCG_TARGET_REG_BITS == 32 && s->addr_type == TCG_TYPE_I32) { 6151 /* 6152 * 32-bit host with 32-bit guest: zero-extend the guest address 6153 * to 64-bits for the helper by storing the low part, then 6154 * load a zero for the high part. 6155 */ 6156 tcg_out_helper_add_mov(mov, loc + HOST_BIG_ENDIAN, 6157 TCG_TYPE_I32, TCG_TYPE_I32, 6158 ldst->addr_reg, -1); 6159 tcg_out_helper_load_slots(s, 1, mov, parm); 6160 6161 tcg_out_helper_load_imm(s, loc[!HOST_BIG_ENDIAN].arg_slot, 6162 TCG_TYPE_I32, 0, parm); 6163 next_arg += 2; 6164 } else { 6165 nmov = tcg_out_helper_add_mov(mov, loc, TCG_TYPE_I64, s->addr_type, 6166 ldst->addr_reg, -1); 6167 tcg_out_helper_load_slots(s, nmov, mov, parm); 6168 next_arg += nmov; 6169 } 6170 6171 switch (info->out_kind) { 6172 case TCG_CALL_RET_NORMAL: 6173 case TCG_CALL_RET_BY_VEC: 6174 break; 6175 case TCG_CALL_RET_BY_REF: 6176 /* 6177 * The return reference is in the first argument slot. 6178 * We need memory in which to return: re-use the top of stack. 6179 */ 6180 { 6181 int ofs_slot0 = TCG_TARGET_CALL_STACK_OFFSET; 6182 6183 if (arg_slot_reg_p(0)) { 6184 tcg_out_addi_ptr(s, tcg_target_call_iarg_regs[0], 6185 TCG_REG_CALL_STACK, ofs_slot0); 6186 } else { 6187 tcg_debug_assert(parm->ntmp != 0); 6188 tcg_out_addi_ptr(s, parm->tmp[0], 6189 TCG_REG_CALL_STACK, ofs_slot0); 6190 tcg_out_st(s, TCG_TYPE_PTR, parm->tmp[0], 6191 TCG_REG_CALL_STACK, ofs_slot0); 6192 } 6193 } 6194 break; 6195 default: 6196 g_assert_not_reached(); 6197 } 6198 6199 tcg_out_helper_load_common_args(s, ldst, parm, info, next_arg); 6200 } 6201 6202 static void tcg_out_ld_helper_ret(TCGContext *s, const TCGLabelQemuLdst *ldst, 6203 bool load_sign, 6204 const TCGLdstHelperParam *parm) 6205 { 6206 MemOp mop = get_memop(ldst->oi); 6207 TCGMovExtend mov[2]; 6208 int ofs_slot0; 6209 6210 switch (ldst->type) { 6211 case TCG_TYPE_I64: 6212 if (TCG_TARGET_REG_BITS == 32) { 6213 break; 6214 } 6215 /* fall through */ 6216 6217 case TCG_TYPE_I32: 6218 mov[0].dst = ldst->datalo_reg; 6219 mov[0].src = tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, 0); 6220 mov[0].dst_type = ldst->type; 6221 mov[0].src_type = TCG_TYPE_REG; 6222 6223 /* 6224 * If load_sign, then we allowed the helper to perform the 6225 * appropriate sign extension to tcg_target_ulong, and all 6226 * we need now is a plain move. 6227 * 6228 * If they do not, then we expect the relevant extension 6229 * instruction to be no more expensive than a move, and 6230 * we thus save the icache etc by only using one of two 6231 * helper functions. 6232 */ 6233 if (load_sign || !(mop & MO_SIGN)) { 6234 if (TCG_TARGET_REG_BITS == 32 || ldst->type == TCG_TYPE_I32) { 6235 mov[0].src_ext = MO_32; 6236 } else { 6237 mov[0].src_ext = MO_64; 6238 } 6239 } else { 6240 mov[0].src_ext = mop & MO_SSIZE; 6241 } 6242 tcg_out_movext1(s, mov); 6243 return; 6244 6245 case TCG_TYPE_I128: 6246 tcg_debug_assert(TCG_TARGET_REG_BITS == 64); 6247 ofs_slot0 = TCG_TARGET_CALL_STACK_OFFSET; 6248 switch (TCG_TARGET_CALL_RET_I128) { 6249 case TCG_CALL_RET_NORMAL: 6250 break; 6251 case TCG_CALL_RET_BY_VEC: 6252 tcg_out_st(s, TCG_TYPE_V128, 6253 tcg_target_call_oarg_reg(TCG_CALL_RET_BY_VEC, 0), 6254 TCG_REG_CALL_STACK, ofs_slot0); 6255 /* fall through */ 6256 case TCG_CALL_RET_BY_REF: 6257 tcg_out_ld(s, TCG_TYPE_I64, ldst->datalo_reg, 6258 TCG_REG_CALL_STACK, ofs_slot0 + 8 * HOST_BIG_ENDIAN); 6259 tcg_out_ld(s, TCG_TYPE_I64, ldst->datahi_reg, 6260 TCG_REG_CALL_STACK, ofs_slot0 + 8 * !HOST_BIG_ENDIAN); 6261 return; 6262 default: 6263 g_assert_not_reached(); 6264 } 6265 break; 6266 6267 default: 6268 g_assert_not_reached(); 6269 } 6270 6271 mov[0].dst = ldst->datalo_reg; 6272 mov[0].src = 6273 tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, HOST_BIG_ENDIAN); 6274 mov[0].dst_type = TCG_TYPE_REG; 6275 mov[0].src_type = TCG_TYPE_REG; 6276 mov[0].src_ext = TCG_TARGET_REG_BITS == 32 ? MO_32 : MO_64; 6277 6278 mov[1].dst = ldst->datahi_reg; 6279 mov[1].src = 6280 tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, !HOST_BIG_ENDIAN); 6281 mov[1].dst_type = TCG_TYPE_REG; 6282 mov[1].src_type = TCG_TYPE_REG; 6283 mov[1].src_ext = TCG_TARGET_REG_BITS == 32 ? MO_32 : MO_64; 6284 6285 tcg_out_movext2(s, mov, mov + 1, parm->ntmp ? parm->tmp[0] : -1); 6286 } 6287 6288 static void tcg_out_st_helper_args(TCGContext *s, const TCGLabelQemuLdst *ldst, 6289 const TCGLdstHelperParam *parm) 6290 { 6291 const TCGHelperInfo *info; 6292 const TCGCallArgumentLoc *loc; 6293 TCGMovExtend mov[4]; 6294 TCGType data_type; 6295 unsigned next_arg, nmov, n; 6296 MemOp mop = get_memop(ldst->oi); 6297 6298 switch (mop & MO_SIZE) { 6299 case MO_8: 6300 case MO_16: 6301 case MO_32: 6302 info = &info_helper_st32_mmu; 6303 data_type = TCG_TYPE_I32; 6304 break; 6305 case MO_64: 6306 info = &info_helper_st64_mmu; 6307 data_type = TCG_TYPE_I64; 6308 break; 6309 case MO_128: 6310 info = &info_helper_st128_mmu; 6311 data_type = TCG_TYPE_I128; 6312 break; 6313 default: 6314 g_assert_not_reached(); 6315 } 6316 6317 /* Defer env argument. */ 6318 next_arg = 1; 6319 nmov = 0; 6320 6321 /* Handle addr argument. */ 6322 loc = &info->in[next_arg]; 6323 tcg_debug_assert(s->addr_type <= TCG_TYPE_REG); 6324 if (TCG_TARGET_REG_BITS == 32) { 6325 /* 6326 * 32-bit host (and thus 32-bit guest): zero-extend the guest address 6327 * to 64-bits for the helper by storing the low part. Later, 6328 * after we have processed the register inputs, we will load a 6329 * zero for the high part. 6330 */ 6331 tcg_out_helper_add_mov(mov, loc + HOST_BIG_ENDIAN, 6332 TCG_TYPE_I32, TCG_TYPE_I32, 6333 ldst->addr_reg, -1); 6334 next_arg += 2; 6335 nmov += 1; 6336 } else { 6337 n = tcg_out_helper_add_mov(mov, loc, TCG_TYPE_I64, s->addr_type, 6338 ldst->addr_reg, -1); 6339 next_arg += n; 6340 nmov += n; 6341 } 6342 6343 /* Handle data argument. */ 6344 loc = &info->in[next_arg]; 6345 switch (loc->kind) { 6346 case TCG_CALL_ARG_NORMAL: 6347 case TCG_CALL_ARG_EXTEND_U: 6348 case TCG_CALL_ARG_EXTEND_S: 6349 n = tcg_out_helper_add_mov(mov + nmov, loc, data_type, ldst->type, 6350 ldst->datalo_reg, ldst->datahi_reg); 6351 next_arg += n; 6352 nmov += n; 6353 tcg_out_helper_load_slots(s, nmov, mov, parm); 6354 break; 6355 6356 case TCG_CALL_ARG_BY_REF: 6357 tcg_debug_assert(TCG_TARGET_REG_BITS == 64); 6358 tcg_debug_assert(data_type == TCG_TYPE_I128); 6359 tcg_out_st(s, TCG_TYPE_I64, 6360 HOST_BIG_ENDIAN ? ldst->datahi_reg : ldst->datalo_reg, 6361 TCG_REG_CALL_STACK, arg_slot_stk_ofs(loc[0].ref_slot)); 6362 tcg_out_st(s, TCG_TYPE_I64, 6363 HOST_BIG_ENDIAN ? ldst->datalo_reg : ldst->datahi_reg, 6364 TCG_REG_CALL_STACK, arg_slot_stk_ofs(loc[1].ref_slot)); 6365 6366 tcg_out_helper_load_slots(s, nmov, mov, parm); 6367 6368 if (arg_slot_reg_p(loc->arg_slot)) { 6369 tcg_out_addi_ptr(s, tcg_target_call_iarg_regs[loc->arg_slot], 6370 TCG_REG_CALL_STACK, 6371 arg_slot_stk_ofs(loc->ref_slot)); 6372 } else { 6373 tcg_debug_assert(parm->ntmp != 0); 6374 tcg_out_addi_ptr(s, parm->tmp[0], TCG_REG_CALL_STACK, 6375 arg_slot_stk_ofs(loc->ref_slot)); 6376 tcg_out_st(s, TCG_TYPE_PTR, parm->tmp[0], 6377 TCG_REG_CALL_STACK, arg_slot_stk_ofs(loc->arg_slot)); 6378 } 6379 next_arg += 2; 6380 break; 6381 6382 default: 6383 g_assert_not_reached(); 6384 } 6385 6386 if (TCG_TARGET_REG_BITS == 32) { 6387 /* Zero extend the address by loading a zero for the high part. */ 6388 loc = &info->in[1 + !HOST_BIG_ENDIAN]; 6389 tcg_out_helper_load_imm(s, loc->arg_slot, TCG_TYPE_I32, 0, parm); 6390 } 6391 6392 tcg_out_helper_load_common_args(s, ldst, parm, info, next_arg); 6393 } 6394 6395 int tcg_gen_code(TCGContext *s, TranslationBlock *tb, uint64_t pc_start) 6396 { 6397 int i, start_words, num_insns; 6398 TCGOp *op; 6399 6400 if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP) 6401 && qemu_log_in_addr_range(pc_start))) { 6402 FILE *logfile = qemu_log_trylock(); 6403 if (logfile) { 6404 fprintf(logfile, "OP:\n"); 6405 tcg_dump_ops(s, logfile, false); 6406 fprintf(logfile, "\n"); 6407 qemu_log_unlock(logfile); 6408 } 6409 } 6410 6411 #ifdef CONFIG_DEBUG_TCG 6412 /* Ensure all labels referenced have been emitted. */ 6413 { 6414 TCGLabel *l; 6415 bool error = false; 6416 6417 QSIMPLEQ_FOREACH(l, &s->labels, next) { 6418 if (unlikely(!l->present) && !QSIMPLEQ_EMPTY(&l->branches)) { 6419 qemu_log_mask(CPU_LOG_TB_OP, 6420 "$L%d referenced but not present.\n", l->id); 6421 error = true; 6422 } 6423 } 6424 assert(!error); 6425 } 6426 #endif 6427 6428 /* Do not reuse any EBB that may be allocated within the TB. */ 6429 tcg_temp_ebb_reset_freed(s); 6430 6431 tcg_optimize(s); 6432 6433 reachable_code_pass(s); 6434 liveness_pass_0(s); 6435 liveness_pass_1(s); 6436 6437 if (s->nb_indirects > 0) { 6438 if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_IND) 6439 && qemu_log_in_addr_range(pc_start))) { 6440 FILE *logfile = qemu_log_trylock(); 6441 if (logfile) { 6442 fprintf(logfile, "OP before indirect lowering:\n"); 6443 tcg_dump_ops(s, logfile, false); 6444 fprintf(logfile, "\n"); 6445 qemu_log_unlock(logfile); 6446 } 6447 } 6448 6449 /* Replace indirect temps with direct temps. */ 6450 if (liveness_pass_2(s)) { 6451 /* If changes were made, re-run liveness. */ 6452 liveness_pass_1(s); 6453 } 6454 } 6455 6456 if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_OPT) 6457 && qemu_log_in_addr_range(pc_start))) { 6458 FILE *logfile = qemu_log_trylock(); 6459 if (logfile) { 6460 fprintf(logfile, "OP after optimization and liveness analysis:\n"); 6461 tcg_dump_ops(s, logfile, true); 6462 fprintf(logfile, "\n"); 6463 qemu_log_unlock(logfile); 6464 } 6465 } 6466 6467 /* Initialize goto_tb jump offsets. */ 6468 tb->jmp_reset_offset[0] = TB_JMP_OFFSET_INVALID; 6469 tb->jmp_reset_offset[1] = TB_JMP_OFFSET_INVALID; 6470 tb->jmp_insn_offset[0] = TB_JMP_OFFSET_INVALID; 6471 tb->jmp_insn_offset[1] = TB_JMP_OFFSET_INVALID; 6472 6473 tcg_reg_alloc_start(s); 6474 6475 /* 6476 * Reset the buffer pointers when restarting after overflow. 6477 * TODO: Move this into translate-all.c with the rest of the 6478 * buffer management. Having only this done here is confusing. 6479 */ 6480 s->code_buf = tcg_splitwx_to_rw(tb->tc.ptr); 6481 s->code_ptr = s->code_buf; 6482 s->data_gen_ptr = NULL; 6483 6484 QSIMPLEQ_INIT(&s->ldst_labels); 6485 s->pool_labels = NULL; 6486 6487 start_words = s->insn_start_words; 6488 s->gen_insn_data = 6489 tcg_malloc(sizeof(uint64_t) * s->gen_tb->icount * start_words); 6490 6491 tcg_out_tb_start(s); 6492 6493 num_insns = -1; 6494 QTAILQ_FOREACH(op, &s->ops, link) { 6495 TCGOpcode opc = op->opc; 6496 6497 switch (opc) { 6498 case INDEX_op_mov: 6499 case INDEX_op_mov_vec: 6500 tcg_reg_alloc_mov(s, op); 6501 break; 6502 case INDEX_op_dup_vec: 6503 tcg_reg_alloc_dup(s, op); 6504 break; 6505 case INDEX_op_insn_start: 6506 if (num_insns >= 0) { 6507 size_t off = tcg_current_code_size(s); 6508 s->gen_insn_end_off[num_insns] = off; 6509 /* Assert that we do not overflow our stored offset. */ 6510 assert(s->gen_insn_end_off[num_insns] == off); 6511 } 6512 num_insns++; 6513 for (i = 0; i < start_words; ++i) { 6514 s->gen_insn_data[num_insns * start_words + i] = 6515 tcg_get_insn_start_param(op, i); 6516 } 6517 break; 6518 case INDEX_op_discard: 6519 temp_dead(s, arg_temp(op->args[0])); 6520 break; 6521 case INDEX_op_set_label: 6522 tcg_reg_alloc_bb_end(s, s->reserved_regs); 6523 tcg_out_label(s, arg_label(op->args[0])); 6524 break; 6525 case INDEX_op_call: 6526 tcg_reg_alloc_call(s, op); 6527 break; 6528 case INDEX_op_exit_tb: 6529 tcg_out_exit_tb(s, op->args[0]); 6530 break; 6531 case INDEX_op_goto_tb: 6532 tcg_out_goto_tb(s, op->args[0]); 6533 break; 6534 case INDEX_op_dup2_vec: 6535 if (tcg_reg_alloc_dup2(s, op)) { 6536 break; 6537 } 6538 /* fall through */ 6539 default: 6540 /* Sanity check that we've not introduced any unhandled opcodes. */ 6541 tcg_debug_assert(tcg_op_supported(opc, TCGOP_TYPE(op), 6542 TCGOP_FLAGS(op))); 6543 /* Note: in order to speed up the code, it would be much 6544 faster to have specialized register allocator functions for 6545 some common argument patterns */ 6546 tcg_reg_alloc_op(s, op); 6547 break; 6548 } 6549 /* Test for (pending) buffer overflow. The assumption is that any 6550 one operation beginning below the high water mark cannot overrun 6551 the buffer completely. Thus we can test for overflow after 6552 generating code without having to check during generation. */ 6553 if (unlikely((void *)s->code_ptr > s->code_gen_highwater)) { 6554 return -1; 6555 } 6556 /* Test for TB overflow, as seen by gen_insn_end_off. */ 6557 if (unlikely(tcg_current_code_size(s) > UINT16_MAX)) { 6558 return -2; 6559 } 6560 } 6561 tcg_debug_assert(num_insns + 1 == s->gen_tb->icount); 6562 s->gen_insn_end_off[num_insns] = tcg_current_code_size(s); 6563 6564 /* Generate TB finalization at the end of block */ 6565 i = tcg_out_ldst_finalize(s); 6566 if (i < 0) { 6567 return i; 6568 } 6569 i = tcg_out_pool_finalize(s); 6570 if (i < 0) { 6571 return i; 6572 } 6573 if (!tcg_resolve_relocs(s)) { 6574 return -2; 6575 } 6576 6577 #ifndef CONFIG_TCG_INTERPRETER 6578 /* flush instruction cache */ 6579 flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(s->code_buf), 6580 (uintptr_t)s->code_buf, 6581 tcg_ptr_byte_diff(s->code_ptr, s->code_buf)); 6582 #endif 6583 6584 return tcg_current_code_size(s); 6585 } 6586 6587 #ifdef ELF_HOST_MACHINE 6588 /* In order to use this feature, the backend needs to do three things: 6589 6590 (1) Define ELF_HOST_MACHINE to indicate both what value to 6591 put into the ELF image and to indicate support for the feature. 6592 6593 (2) Define tcg_register_jit. This should create a buffer containing 6594 the contents of a .debug_frame section that describes the post- 6595 prologue unwind info for the tcg machine. 6596 6597 (3) Call tcg_register_jit_int, with the constructed .debug_frame. 6598 */ 6599 6600 /* Begin GDB interface. THE FOLLOWING MUST MATCH GDB DOCS. */ 6601 typedef enum { 6602 JIT_NOACTION = 0, 6603 JIT_REGISTER_FN, 6604 JIT_UNREGISTER_FN 6605 } jit_actions_t; 6606 6607 struct jit_code_entry { 6608 struct jit_code_entry *next_entry; 6609 struct jit_code_entry *prev_entry; 6610 const void *symfile_addr; 6611 uint64_t symfile_size; 6612 }; 6613 6614 struct jit_descriptor { 6615 uint32_t version; 6616 uint32_t action_flag; 6617 struct jit_code_entry *relevant_entry; 6618 struct jit_code_entry *first_entry; 6619 }; 6620 6621 void __jit_debug_register_code(void) __attribute__((noinline)); 6622 void __jit_debug_register_code(void) 6623 { 6624 asm(""); 6625 } 6626 6627 /* Must statically initialize the version, because GDB may check 6628 the version before we can set it. */ 6629 struct jit_descriptor __jit_debug_descriptor = { 1, 0, 0, 0 }; 6630 6631 /* End GDB interface. */ 6632 6633 static int find_string(const char *strtab, const char *str) 6634 { 6635 const char *p = strtab + 1; 6636 6637 while (1) { 6638 if (strcmp(p, str) == 0) { 6639 return p - strtab; 6640 } 6641 p += strlen(p) + 1; 6642 } 6643 } 6644 6645 static void tcg_register_jit_int(const void *buf_ptr, size_t buf_size, 6646 const void *debug_frame, 6647 size_t debug_frame_size) 6648 { 6649 struct __attribute__((packed)) DebugInfo { 6650 uint32_t len; 6651 uint16_t version; 6652 uint32_t abbrev; 6653 uint8_t ptr_size; 6654 uint8_t cu_die; 6655 uint16_t cu_lang; 6656 uintptr_t cu_low_pc; 6657 uintptr_t cu_high_pc; 6658 uint8_t fn_die; 6659 char fn_name[16]; 6660 uintptr_t fn_low_pc; 6661 uintptr_t fn_high_pc; 6662 uint8_t cu_eoc; 6663 }; 6664 6665 struct ElfImage { 6666 ElfW(Ehdr) ehdr; 6667 ElfW(Phdr) phdr; 6668 ElfW(Shdr) shdr[7]; 6669 ElfW(Sym) sym[2]; 6670 struct DebugInfo di; 6671 uint8_t da[24]; 6672 char str[80]; 6673 }; 6674 6675 struct ElfImage *img; 6676 6677 static const struct ElfImage img_template = { 6678 .ehdr = { 6679 .e_ident[EI_MAG0] = ELFMAG0, 6680 .e_ident[EI_MAG1] = ELFMAG1, 6681 .e_ident[EI_MAG2] = ELFMAG2, 6682 .e_ident[EI_MAG3] = ELFMAG3, 6683 .e_ident[EI_CLASS] = ELF_CLASS, 6684 .e_ident[EI_DATA] = ELF_DATA, 6685 .e_ident[EI_VERSION] = EV_CURRENT, 6686 .e_type = ET_EXEC, 6687 .e_machine = ELF_HOST_MACHINE, 6688 .e_version = EV_CURRENT, 6689 .e_phoff = offsetof(struct ElfImage, phdr), 6690 .e_shoff = offsetof(struct ElfImage, shdr), 6691 .e_ehsize = sizeof(ElfW(Shdr)), 6692 .e_phentsize = sizeof(ElfW(Phdr)), 6693 .e_phnum = 1, 6694 .e_shentsize = sizeof(ElfW(Shdr)), 6695 .e_shnum = ARRAY_SIZE(img->shdr), 6696 .e_shstrndx = ARRAY_SIZE(img->shdr) - 1, 6697 #ifdef ELF_HOST_FLAGS 6698 .e_flags = ELF_HOST_FLAGS, 6699 #endif 6700 #ifdef ELF_OSABI 6701 .e_ident[EI_OSABI] = ELF_OSABI, 6702 #endif 6703 }, 6704 .phdr = { 6705 .p_type = PT_LOAD, 6706 .p_flags = PF_X, 6707 }, 6708 .shdr = { 6709 [0] = { .sh_type = SHT_NULL }, 6710 /* Trick: The contents of code_gen_buffer are not present in 6711 this fake ELF file; that got allocated elsewhere. Therefore 6712 we mark .text as SHT_NOBITS (similar to .bss) so that readers 6713 will not look for contents. We can record any address. */ 6714 [1] = { /* .text */ 6715 .sh_type = SHT_NOBITS, 6716 .sh_flags = SHF_EXECINSTR | SHF_ALLOC, 6717 }, 6718 [2] = { /* .debug_info */ 6719 .sh_type = SHT_PROGBITS, 6720 .sh_offset = offsetof(struct ElfImage, di), 6721 .sh_size = sizeof(struct DebugInfo), 6722 }, 6723 [3] = { /* .debug_abbrev */ 6724 .sh_type = SHT_PROGBITS, 6725 .sh_offset = offsetof(struct ElfImage, da), 6726 .sh_size = sizeof(img->da), 6727 }, 6728 [4] = { /* .debug_frame */ 6729 .sh_type = SHT_PROGBITS, 6730 .sh_offset = sizeof(struct ElfImage), 6731 }, 6732 [5] = { /* .symtab */ 6733 .sh_type = SHT_SYMTAB, 6734 .sh_offset = offsetof(struct ElfImage, sym), 6735 .sh_size = sizeof(img->sym), 6736 .sh_info = 1, 6737 .sh_link = ARRAY_SIZE(img->shdr) - 1, 6738 .sh_entsize = sizeof(ElfW(Sym)), 6739 }, 6740 [6] = { /* .strtab */ 6741 .sh_type = SHT_STRTAB, 6742 .sh_offset = offsetof(struct ElfImage, str), 6743 .sh_size = sizeof(img->str), 6744 } 6745 }, 6746 .sym = { 6747 [1] = { /* code_gen_buffer */ 6748 .st_info = ELF_ST_INFO(STB_GLOBAL, STT_FUNC), 6749 .st_shndx = 1, 6750 } 6751 }, 6752 .di = { 6753 .len = sizeof(struct DebugInfo) - 4, 6754 .version = 2, 6755 .ptr_size = sizeof(void *), 6756 .cu_die = 1, 6757 .cu_lang = 0x8001, /* DW_LANG_Mips_Assembler */ 6758 .fn_die = 2, 6759 .fn_name = "code_gen_buffer" 6760 }, 6761 .da = { 6762 1, /* abbrev number (the cu) */ 6763 0x11, 1, /* DW_TAG_compile_unit, has children */ 6764 0x13, 0x5, /* DW_AT_language, DW_FORM_data2 */ 6765 0x11, 0x1, /* DW_AT_low_pc, DW_FORM_addr */ 6766 0x12, 0x1, /* DW_AT_high_pc, DW_FORM_addr */ 6767 0, 0, /* end of abbrev */ 6768 2, /* abbrev number (the fn) */ 6769 0x2e, 0, /* DW_TAG_subprogram, no children */ 6770 0x3, 0x8, /* DW_AT_name, DW_FORM_string */ 6771 0x11, 0x1, /* DW_AT_low_pc, DW_FORM_addr */ 6772 0x12, 0x1, /* DW_AT_high_pc, DW_FORM_addr */ 6773 0, 0, /* end of abbrev */ 6774 0 /* no more abbrev */ 6775 }, 6776 .str = "\0" ".text\0" ".debug_info\0" ".debug_abbrev\0" 6777 ".debug_frame\0" ".symtab\0" ".strtab\0" "code_gen_buffer", 6778 }; 6779 6780 /* We only need a single jit entry; statically allocate it. */ 6781 static struct jit_code_entry one_entry; 6782 6783 uintptr_t buf = (uintptr_t)buf_ptr; 6784 size_t img_size = sizeof(struct ElfImage) + debug_frame_size; 6785 DebugFrameHeader *dfh; 6786 6787 img = g_malloc(img_size); 6788 *img = img_template; 6789 6790 img->phdr.p_vaddr = buf; 6791 img->phdr.p_paddr = buf; 6792 img->phdr.p_memsz = buf_size; 6793 6794 img->shdr[1].sh_name = find_string(img->str, ".text"); 6795 img->shdr[1].sh_addr = buf; 6796 img->shdr[1].sh_size = buf_size; 6797 6798 img->shdr[2].sh_name = find_string(img->str, ".debug_info"); 6799 img->shdr[3].sh_name = find_string(img->str, ".debug_abbrev"); 6800 6801 img->shdr[4].sh_name = find_string(img->str, ".debug_frame"); 6802 img->shdr[4].sh_size = debug_frame_size; 6803 6804 img->shdr[5].sh_name = find_string(img->str, ".symtab"); 6805 img->shdr[6].sh_name = find_string(img->str, ".strtab"); 6806 6807 img->sym[1].st_name = find_string(img->str, "code_gen_buffer"); 6808 img->sym[1].st_value = buf; 6809 img->sym[1].st_size = buf_size; 6810 6811 img->di.cu_low_pc = buf; 6812 img->di.cu_high_pc = buf + buf_size; 6813 img->di.fn_low_pc = buf; 6814 img->di.fn_high_pc = buf + buf_size; 6815 6816 dfh = (DebugFrameHeader *)(img + 1); 6817 memcpy(dfh, debug_frame, debug_frame_size); 6818 dfh->fde.func_start = buf; 6819 dfh->fde.func_len = buf_size; 6820 6821 #ifdef DEBUG_JIT 6822 /* Enable this block to be able to debug the ELF image file creation. 6823 One can use readelf, objdump, or other inspection utilities. */ 6824 { 6825 g_autofree char *jit = g_strdup_printf("%s/qemu.jit", g_get_tmp_dir()); 6826 FILE *f = fopen(jit, "w+b"); 6827 if (f) { 6828 if (fwrite(img, img_size, 1, f) != img_size) { 6829 /* Avoid stupid unused return value warning for fwrite. */ 6830 } 6831 fclose(f); 6832 } 6833 } 6834 #endif 6835 6836 one_entry.symfile_addr = img; 6837 one_entry.symfile_size = img_size; 6838 6839 __jit_debug_descriptor.action_flag = JIT_REGISTER_FN; 6840 __jit_debug_descriptor.relevant_entry = &one_entry; 6841 __jit_debug_descriptor.first_entry = &one_entry; 6842 __jit_debug_register_code(); 6843 } 6844 #else 6845 /* No support for the feature. Provide the entry point expected by exec.c, 6846 and implement the internal function we declared earlier. */ 6847 6848 static void tcg_register_jit_int(const void *buf, size_t size, 6849 const void *debug_frame, 6850 size_t debug_frame_size) 6851 { 6852 } 6853 6854 void tcg_register_jit(const void *buf, size_t buf_size) 6855 { 6856 } 6857 #endif /* ELF_HOST_MACHINE */ 6858 6859 #if !TCG_TARGET_MAYBE_vec 6860 void tcg_expand_vec_op(TCGOpcode o, TCGType t, unsigned e, TCGArg a0, ...) 6861 { 6862 g_assert_not_reached(); 6863 } 6864 #endif 6865